1#!/usr/bin/env python3 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "config.h": "generated portability layer", 27 "libxml.h": "internal only", 28 "testOOM.c": "out of memory tester", 29 "testOOMlib.h": "out of memory tester", 30 "testOOMlib.c": "out of memory tester", 31 "rngparser.c": "not yet integrated", 32 "testModule.c": "test tool", 33 "testThreads.c": "test tool", 34 "testapi.c": "generated regression tests", 35 "runtest.c": "regression tests program", 36 "runsuite.c": "regression tests program", 37 "tst.c": "not part of the library", 38 "test.c": "not part of the library", 39 "testdso.c": "test for dynamid shared libraries", 40 "testrecurse.c": "test for entities recursions", 41 "timsort.h": "Internal header only for xpath.c 2.9.0", 42} 43 44ignored_words = { 45 "WINAPI": (0, "Windows keyword"), 46 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 47 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 48 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 49 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 50 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 51 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 52 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 53 "XSLTCALL": (0, "Special macro for win32 calls"), 54 "EXSLTCALL": (0, "Special macro for win32 calls"), 55 "__declspec": (3, "Windows keyword"), 56 "__stdcall": (0, "Windows keyword"), 57 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 58 "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"), 59 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 60 "X_IN_Y": (5, "macro function builder"), 61 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 62 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 63 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 64 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 65 "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"), 66 "XML_DEPRECATED": (0, "macro keyword"), 67} 68 69def escape(raw): 70 raw = raw.replace('&', '&') 71 raw = raw.replace('<', '<') 72 raw = raw.replace('>', '>') 73 raw = raw.replace("'", ''') 74 raw = raw.replace('"', '"') 75 return raw 76 77class identifier: 78 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 79 info=None, extra=None, conditionals = None): 80 self.name = name 81 self.header = header 82 self.module = module 83 self.type = type 84 self.info = info 85 self.extra = extra 86 self.lineno = lineno 87 self.static = 0 88 if conditionals == None or len(conditionals) == 0: 89 self.conditionals = None 90 else: 91 self.conditionals = conditionals[:] 92 if self.name == debugsym: 93 print("=> define %s : %s" % (debugsym, (module, type, info, 94 extra, conditionals))) 95 96 def __repr__(self): 97 r = "%s %s:" % (self.type, self.name) 98 if self.static: 99 r = r + " static" 100 if self.module != None: 101 r = r + " from %s" % (self.module) 102 if self.info != None: 103 r = r + " " + repr(self.info) 104 if self.extra != None: 105 r = r + " " + repr(self.extra) 106 if self.conditionals != None: 107 r = r + " " + repr(self.conditionals) 108 return r 109 110 111 def set_header(self, header): 112 self.header = header 113 def set_module(self, module): 114 self.module = module 115 def set_type(self, type): 116 self.type = type 117 def set_info(self, info): 118 self.info = info 119 def set_extra(self, extra): 120 self.extra = extra 121 def set_lineno(self, lineno): 122 self.lineno = lineno 123 def set_static(self, static): 124 self.static = static 125 def set_conditionals(self, conditionals): 126 if conditionals == None or len(conditionals) == 0: 127 self.conditionals = None 128 else: 129 self.conditionals = conditionals[:] 130 131 def get_name(self): 132 return self.name 133 def get_header(self): 134 return self.module 135 def get_module(self): 136 return self.module 137 def get_type(self): 138 return self.type 139 def get_info(self): 140 return self.info 141 def get_lineno(self): 142 return self.lineno 143 def get_extra(self): 144 return self.extra 145 def get_static(self): 146 return self.static 147 def get_conditionals(self): 148 return self.conditionals 149 150 def update(self, header, module, type = None, info = None, extra=None, 151 conditionals=None): 152 if self.name == debugsym: 153 print("=> update %s : %s" % (debugsym, (module, type, info, 154 extra, conditionals))) 155 if header != None and self.header == None: 156 self.set_header(module) 157 if module != None and (self.module == None or self.header == self.module): 158 self.set_module(module) 159 if type != None and self.type == None: 160 self.set_type(type) 161 if info != None: 162 self.set_info(info) 163 if extra != None: 164 self.set_extra(extra) 165 if conditionals != None: 166 self.set_conditionals(conditionals) 167 168class index: 169 def __init__(self, name = "noname"): 170 self.name = name 171 self.identifiers = {} 172 self.functions = {} 173 self.variables = {} 174 self.includes = {} 175 self.structs = {} 176 self.enums = {} 177 self.typedefs = {} 178 self.macros = {} 179 self.references = {} 180 self.info = {} 181 182 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 183 if name[0:2] == '__': 184 return None 185 d = None 186 if name in self.identifiers: 187 d = self.identifiers[name] 188 d.update(header, module, type, info, extra, conditionals) 189 else: 190 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 191 self.identifiers[name] = d 192 193 if d != None and static == 1: 194 d.set_static(1) 195 196 if d != None and name != None and type != None: 197 self.references[name] = d 198 199 if name == debugsym: 200 print("New ref: %s" % (d)) 201 202 return d 203 204 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 205 if name[0:2] == '__': 206 return None 207 d = None 208 if name in self.identifiers: 209 d = self.identifiers[name] 210 d.update(header, module, type, info, extra, conditionals) 211 else: 212 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 213 self.identifiers[name] = d 214 215 if d != None and static == 1: 216 d.set_static(1) 217 218 if d != None and name != None and type != None: 219 if type == "function": 220 self.functions[name] = d 221 elif type == "functype": 222 self.functions[name] = d 223 elif type == "variable": 224 self.variables[name] = d 225 elif type == "include": 226 self.includes[name] = d 227 elif type == "struct": 228 self.structs[name] = d 229 elif type == "enum": 230 self.enums[name] = d 231 elif type == "typedef": 232 self.typedefs[name] = d 233 elif type == "macro": 234 self.macros[name] = d 235 else: 236 print("Unable to register type ", type) 237 238 if name == debugsym: 239 print("New symbol: %s" % (d)) 240 241 return d 242 243 def merge(self, idx): 244 for id in list(idx.functions.keys()): 245 # 246 # macro might be used to override functions or variables 247 # definitions 248 # 249 if id in self.macros: 250 del self.macros[id] 251 if id in self.functions: 252 print("function %s from %s redeclared in %s" % ( 253 id, self.functions[id].header, idx.functions[id].header)) 254 else: 255 self.functions[id] = idx.functions[id] 256 self.identifiers[id] = idx.functions[id] 257 for id in list(idx.variables.keys()): 258 # 259 # macro might be used to override functions or variables 260 # definitions 261 # 262 if id in self.macros: 263 del self.macros[id] 264 if id in self.variables: 265 print("variable %s from %s redeclared in %s" % ( 266 id, self.variables[id].header, idx.variables[id].header)) 267 else: 268 self.variables[id] = idx.variables[id] 269 self.identifiers[id] = idx.variables[id] 270 for id in list(idx.structs.keys()): 271 if id in self.structs: 272 print("struct %s from %s redeclared in %s" % ( 273 id, self.structs[id].header, idx.structs[id].header)) 274 else: 275 self.structs[id] = idx.structs[id] 276 self.identifiers[id] = idx.structs[id] 277 for id in list(idx.typedefs.keys()): 278 if id in self.typedefs: 279 print("typedef %s from %s redeclared in %s" % ( 280 id, self.typedefs[id].header, idx.typedefs[id].header)) 281 else: 282 self.typedefs[id] = idx.typedefs[id] 283 self.identifiers[id] = idx.typedefs[id] 284 for id in list(idx.macros.keys()): 285 # 286 # macro might be used to override functions or variables 287 # definitions 288 # 289 if id in self.variables: 290 continue 291 if id in self.functions: 292 continue 293 if id in self.enums: 294 continue 295 if id in self.macros: 296 print("macro %s from %s redeclared in %s" % ( 297 id, self.macros[id].header, idx.macros[id].header)) 298 else: 299 self.macros[id] = idx.macros[id] 300 self.identifiers[id] = idx.macros[id] 301 for id in list(idx.enums.keys()): 302 if id in self.enums: 303 print("enum %s from %s redeclared in %s" % ( 304 id, self.enums[id].header, idx.enums[id].header)) 305 else: 306 self.enums[id] = idx.enums[id] 307 self.identifiers[id] = idx.enums[id] 308 309 def merge_public(self, idx): 310 for id in list(idx.functions.keys()): 311 if id in self.functions: 312 # check that function condition agrees with header 313 if idx.functions[id].conditionals != \ 314 self.functions[id].conditionals: 315 print("Header condition differs from Function for %s:" \ 316 % id) 317 print(" H: %s" % self.functions[id].conditionals) 318 print(" C: %s" % idx.functions[id].conditionals) 319 up = idx.functions[id] 320 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 321 # else: 322 # print "Function %s from %s is not declared in headers" % ( 323 # id, idx.functions[id].module) 324 325 for id in list(idx.variables.keys()): 326 if id in self.variables: 327 # check that variable condition agrees with header 328 # TODO: produces many false positives 329 #if idx.variables[id].conditionals != \ 330 # self.variables[id].conditionals: 331 # print("Header condition differs from Variable for %s:" \ 332 # % id) 333 # print(" H: %s" % self.variables[id].conditionals) 334 # print(" C: %s" % idx.variables[id].conditionals) 335 up = idx.variables[id] 336 self.variables[id].update(None, up.module, up.type, up.info, up.extra) 337 338 def analyze_dict(self, type, dict): 339 count = 0 340 public = 0 341 for name in list(dict.keys()): 342 id = dict[name] 343 count = count + 1 344 if id.static == 0: 345 public = public + 1 346 if count != public: 347 print(" %d %s , %d public" % (count, type, public)) 348 elif count != 0: 349 print(" %d public %s" % (count, type)) 350 351 352 def analyze(self): 353 self.analyze_dict("functions", self.functions) 354 self.analyze_dict("variables", self.variables) 355 self.analyze_dict("structs", self.structs) 356 self.analyze_dict("typedefs", self.typedefs) 357 self.analyze_dict("macros", self.macros) 358 359class CLexer: 360 """A lexer for the C language, tokenize the input by reading and 361 analyzing it line by line""" 362 def __init__(self, input): 363 self.input = input 364 self.tokens = [] 365 self.line = "" 366 self.lineno = 0 367 368 def getline(self): 369 line = '' 370 while line == '': 371 line = self.input.readline() 372 if not line: 373 return None 374 self.lineno = self.lineno + 1 375 line = line.lstrip() 376 line = line.rstrip() 377 if line == '': 378 continue 379 while line[-1] == '\\': 380 line = line[:-1] 381 n = self.input.readline() 382 self.lineno = self.lineno + 1 383 n = n.lstrip() 384 n = n.rstrip() 385 if not n: 386 break 387 else: 388 line = line + n 389 return line 390 391 def getlineno(self): 392 return self.lineno 393 394 def push(self, token): 395 self.tokens.insert(0, token); 396 397 def debug(self): 398 print("Last token: ", self.last) 399 print("Token queue: ", self.tokens) 400 print("Line %d end: " % (self.lineno), self.line) 401 402 def token(self): 403 while self.tokens == []: 404 if self.line == "": 405 line = self.getline() 406 else: 407 line = self.line 408 self.line = "" 409 if line == None: 410 return None 411 412 if line[0] == '#': 413 self.tokens = list(map((lambda x: ('preproc', x)), 414 line.split())) 415 break; 416 l = len(line) 417 if line[0] == '"' or line[0] == "'": 418 end = line[0] 419 line = line[1:] 420 found = 0 421 tok = "" 422 while found == 0: 423 i = 0 424 l = len(line) 425 while i < l: 426 if line[i] == end: 427 self.line = line[i+1:] 428 line = line[:i] 429 l = i 430 found = 1 431 break 432 if line[i] == '\\': 433 i = i + 1 434 i = i + 1 435 tok = tok + line 436 if found == 0: 437 line = self.getline() 438 if line == None: 439 return None 440 self.last = ('string', tok) 441 return self.last 442 443 if l >= 2 and line[0] == '/' and line[1] == '*': 444 line = line[2:] 445 found = 0 446 tok = "" 447 while found == 0: 448 i = 0 449 l = len(line) 450 while i < l: 451 if line[i] == '*' and i+1 < l and line[i+1] == '/': 452 self.line = line[i+2:] 453 line = line[:i-1] 454 l = i 455 found = 1 456 break 457 i = i + 1 458 if tok != "": 459 tok = tok + "\n" 460 tok = tok + line 461 if found == 0: 462 line = self.getline() 463 if line == None: 464 return None 465 self.last = ('comment', tok) 466 return self.last 467 if l >= 2 and line[0] == '/' and line[1] == '/': 468 line = line[2:] 469 self.last = ('comment', line) 470 return self.last 471 i = 0 472 while i < l: 473 if line[i] == '/' and i+1 < l and line[i+1] == '/': 474 self.line = line[i:] 475 line = line[:i] 476 break 477 if line[i] == '/' and i+1 < l and line[i+1] == '*': 478 self.line = line[i:] 479 line = line[:i] 480 break 481 if line[i] == '"' or line[i] == "'": 482 self.line = line[i:] 483 line = line[:i] 484 break 485 i = i + 1 486 l = len(line) 487 i = 0 488 while i < l: 489 if line[i] == ' ' or line[i] == '\t': 490 i = i + 1 491 continue 492 o = ord(line[i]) 493 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 494 (o >= 48 and o <= 57): 495 s = i 496 while i < l: 497 o = ord(line[i]) 498 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 499 (o >= 48 and o <= 57) or \ 500 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: 501 i = i + 1 502 else: 503 break 504 self.tokens.append(('name', line[s:i])) 505 continue 506 if "(){}:;,[]".find(line[i]) != -1: 507# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 508# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 509# line[i] == ',' or line[i] == '[' or line[i] == ']': 510 self.tokens.append(('sep', line[i])) 511 i = i + 1 512 continue 513 if "+-*><=/%&!|.".find(line[i]) != -1: 514# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 515# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 516# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 517# line[i] == '!' or line[i] == '|' or line[i] == '.': 518 if line[i] == '.' and i + 2 < l and \ 519 line[i+1] == '.' and line[i+2] == '.': 520 self.tokens.append(('name', '...')) 521 i = i + 3 522 continue 523 524 j = i + 1 525 if j < l and ( 526 "+-*><=/%&!|".find(line[j]) != -1): 527# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 528# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 529# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 530# line[j] == '!' or line[j] == '|'): 531 self.tokens.append(('op', line[i:j+1])) 532 i = j + 1 533 else: 534 self.tokens.append(('op', line[i])) 535 i = i + 1 536 continue 537 s = i 538 while i < l: 539 o = ord(line[i]) 540 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 541 (o >= 48 and o <= 57) or ( 542 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): 543# line[i] != ' ' and line[i] != '\t' and 544# line[i] != '(' and line[i] != ')' and 545# line[i] != '{' and line[i] != '}' and 546# line[i] != ':' and line[i] != ';' and 547# line[i] != ',' and line[i] != '+' and 548# line[i] != '-' and line[i] != '*' and 549# line[i] != '/' and line[i] != '%' and 550# line[i] != '&' and line[i] != '!' and 551# line[i] != '|' and line[i] != '[' and 552# line[i] != ']' and line[i] != '=' and 553# line[i] != '*' and line[i] != '>' and 554# line[i] != '<'): 555 i = i + 1 556 else: 557 break 558 self.tokens.append(('name', line[s:i])) 559 560 tok = self.tokens[0] 561 self.tokens = self.tokens[1:] 562 self.last = tok 563 return tok 564 565class CParser: 566 """The C module parser""" 567 def __init__(self, filename, idx = None): 568 self.filename = filename 569 if len(filename) > 2 and filename[-2:] == '.h': 570 self.is_header = 1 571 else: 572 self.is_header = 0 573 self.input = open(filename) 574 self.lexer = CLexer(self.input) 575 if idx == None: 576 self.index = index() 577 else: 578 self.index = idx 579 self.top_comment = "" 580 self.last_comment = "" 581 self.comment = None 582 self.collect_ref = 0 583 self.doc_disable = 0 584 self.conditionals = [] 585 self.defines = [] 586 587 def collect_references(self): 588 self.collect_ref = 1 589 590 def disable(self): 591 self.doc_disable = 1 592 593 def enable(self): 594 self.doc_disable = 0 595 596 def lineno(self): 597 return self.lexer.getlineno() 598 599 def index_add(self, name, module, static, type, info=None, extra = None): 600 if self.doc_disable: 601 return 602 if self.is_header == 1: 603 self.index.add(name, module, module, static, type, self.lineno(), 604 info, extra, self.conditionals) 605 else: 606 self.index.add(name, None, module, static, type, self.lineno(), 607 info, extra, self.conditionals) 608 609 def index_add_ref(self, name, module, static, type, info=None, 610 extra = None): 611 if self.is_header == 1: 612 self.index.add_ref(name, module, module, static, type, 613 self.lineno(), info, extra, self.conditionals) 614 else: 615 self.index.add_ref(name, None, module, static, type, self.lineno(), 616 info, extra, self.conditionals) 617 618 def warning(self, msg): 619 if self.doc_disable: 620 return 621 print(msg) 622 623 def error(self, msg, token=-1): 624 if self.doc_disable: 625 return 626 627 print("Parse Error: " + msg) 628 if token != -1: 629 print("Got token ", token) 630 self.lexer.debug() 631 sys.exit(1) 632 633 def debug(self, msg, token=-1): 634 print("Debug: " + msg) 635 if token != -1: 636 print("Got token ", token) 637 self.lexer.debug() 638 639 def parseTopComment(self, comment): 640 res = {} 641 lines = comment.split("\n") 642 item = None 643 for line in lines: 644 while line != "" and (line[0] == ' ' or line[0] == '\t'): 645 line = line[1:] 646 while line != "" and line[0] == '*': 647 line = line[1:] 648 while line != "" and (line[0] == ' ' or line[0] == '\t'): 649 line = line[1:] 650 try: 651 (it, line) = line.split(":", 1) 652 item = it 653 while line != "" and (line[0] == ' ' or line[0] == '\t'): 654 line = line[1:] 655 if item in res: 656 res[item] = res[item] + " " + line 657 else: 658 res[item] = line 659 except: 660 if item != None: 661 if item in res: 662 res[item] = res[item] + " " + line 663 else: 664 res[item] = line 665 self.index.info = res 666 667 def parseComment(self, token): 668 if self.top_comment == "": 669 self.top_comment = token[1] 670 if self.comment == None or token[1][0] == '*': 671 self.comment = token[1]; 672 else: 673 self.comment = self.comment + token[1] 674 token = self.lexer.token() 675 676 if self.comment.find("DOC_DISABLE") != -1: 677 self.disable() 678 679 if self.comment.find("DOC_ENABLE") != -1: 680 self.enable() 681 682 return token 683 684 # 685 # Parse a simple comment block for typedefs or global variables 686 # 687 def parseSimpleComment(self, name, quiet = False): 688 if name[0:2] == '__': 689 quiet = 1 690 691 args = [] 692 desc = "" 693 694 if self.comment == None: 695 if not quiet: 696 self.warning("Missing comment for %s" % (name)) 697 return(None) 698 if self.comment[0] != '*': 699 if not quiet: 700 self.warning("Missing * in comment for %s" % (name)) 701 return(None) 702 lines = self.comment.split('\n') 703 if lines[0] == '*': 704 del lines[0] 705 if lines[0] != "* %s:" % (name): 706 if not quiet: 707 self.warning("Misformatted comment for %s" % (name)) 708 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 709 return(None) 710 del lines[0] 711 while len(lines) > 0 and lines[0] == '*': 712 del lines[0] 713 desc = "" 714 while len(lines) > 0: 715 l = lines[0] 716 while len(l) > 0 and l[0] == '*': 717 l = l[1:] 718 l = l.strip() 719 desc = desc + " " + l 720 del lines[0] 721 722 desc = desc.strip() 723 724 if quiet == 0: 725 if desc == "": 726 self.warning("Comment for %s lacks description" % (name)) 727 728 return(desc) 729 # 730 # Parse a comment block associate to a macro 731 # 732 def parseMacroComment(self, name, quiet = 0): 733 if name[0:2] == '__': 734 quiet = 1 735 736 args = [] 737 desc = "" 738 739 if self.comment == None: 740 if not quiet: 741 self.warning("Missing comment for macro %s" % (name)) 742 return((args, desc)) 743 if self.comment[0] != '*': 744 if not quiet: 745 self.warning("Missing * in macro comment for %s" % (name)) 746 return((args, desc)) 747 lines = self.comment.split('\n') 748 if lines[0] == '*': 749 del lines[0] 750 if lines[0] != "* %s:" % (name): 751 if not quiet: 752 self.warning("Misformatted macro comment for %s" % (name)) 753 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 754 return((args, desc)) 755 del lines[0] 756 while lines[0] == '*': 757 del lines[0] 758 while len(lines) > 0 and lines[0][0:3] == '* @': 759 l = lines[0][3:] 760 try: 761 (arg, desc) = l.split(':', 1) 762 desc=desc.strip() 763 arg=arg.strip() 764 except: 765 if not quiet: 766 self.warning("Misformatted macro comment for %s" % (name)) 767 self.warning(" problem with '%s'" % (lines[0])) 768 del lines[0] 769 continue 770 del lines[0] 771 l = lines[0].strip() 772 while len(l) > 2 and l[0:3] != '* @': 773 while l[0] == '*': 774 l = l[1:] 775 desc = desc + ' ' + l.strip() 776 del lines[0] 777 if len(lines) == 0: 778 break 779 l = lines[0] 780 args.append((arg, desc)) 781 while len(lines) > 0 and lines[0] == '*': 782 del lines[0] 783 desc = "" 784 while len(lines) > 0: 785 l = lines[0] 786 while len(l) > 0 and l[0] == '*': 787 l = l[1:] 788 l = l.strip() 789 desc = desc + " " + l 790 del lines[0] 791 792 desc = desc.strip() 793 794 if quiet == 0: 795 if desc == "": 796 self.warning("Macro comment for %s lack description of the macro" % (name)) 797 798 return((args, desc)) 799 800 # 801 # Parse a comment block and merge the information found in the 802 # parameters descriptions, finally returns a block as complete 803 # as possible 804 # 805 def mergeFunctionComment(self, name, description, quiet = 0): 806 if name == 'main': 807 quiet = 1 808 if name[0:2] == '__': 809 quiet = 1 810 811 (ret, args) = description 812 desc = "" 813 retdesc = "" 814 815 if self.comment == None: 816 if not quiet: 817 self.warning("Missing comment for function %s" % (name)) 818 return(((ret[0], retdesc), args, desc)) 819 if self.comment[0] != '*': 820 if not quiet: 821 self.warning("Missing * in function comment for %s" % (name)) 822 return(((ret[0], retdesc), args, desc)) 823 lines = self.comment.split('\n') 824 if lines[0] == '*': 825 del lines[0] 826 if lines[0] != "* %s:" % (name): 827 if not quiet: 828 self.warning("Misformatted function comment for %s" % (name)) 829 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 830 return(((ret[0], retdesc), args, desc)) 831 del lines[0] 832 while lines[0] == '*': 833 del lines[0] 834 nbargs = len(args) 835 while len(lines) > 0 and lines[0][0:3] == '* @': 836 l = lines[0][3:] 837 try: 838 (arg, desc) = l.split(':', 1) 839 desc=desc.strip() 840 arg=arg.strip() 841 except: 842 if not quiet: 843 self.warning("Misformatted function comment for %s" % (name)) 844 self.warning(" problem with '%s'" % (lines[0])) 845 del lines[0] 846 continue 847 del lines[0] 848 l = lines[0].strip() 849 while len(l) > 2 and l[0:3] != '* @': 850 while l[0] == '*': 851 l = l[1:] 852 desc = desc + ' ' + l.strip() 853 del lines[0] 854 if len(lines) == 0: 855 break 856 l = lines[0] 857 i = 0 858 while i < nbargs: 859 if args[i][1] == arg: 860 args[i] = (args[i][0], arg, desc) 861 break; 862 i = i + 1 863 if i >= nbargs: 864 if not quiet: 865 self.warning("Unable to find arg %s from function comment for %s" % ( 866 arg, name)) 867 while len(lines) > 0 and lines[0] == '*': 868 del lines[0] 869 desc = "" 870 while len(lines) > 0: 871 l = lines[0] 872 while len(l) > 0 and l[0] == '*': 873 l = l[1:] 874 l = l.strip() 875 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 876 try: 877 l = l.split(' ', 1)[1] 878 except: 879 l = "" 880 retdesc = l.strip() 881 del lines[0] 882 while len(lines) > 0: 883 l = lines[0] 884 while len(l) > 0 and l[0] == '*': 885 l = l[1:] 886 l = l.strip() 887 retdesc = retdesc + " " + l 888 del lines[0] 889 else: 890 desc = desc + " " + l 891 del lines[0] 892 893 retdesc = retdesc.strip() 894 desc = desc.strip() 895 896 if quiet == 0: 897 # 898 # report missing comments 899 # 900 i = 0 901 while i < nbargs: 902 if args[i][2] == None and args[i][0] != "void" and \ 903 ((args[i][1] != None) or (args[i][1] == '')): 904 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 905 i = i + 1 906 if retdesc == "" and ret[0] != "void": 907 self.warning("Function comment for %s lacks description of return value" % (name)) 908 if desc == "": 909 self.warning("Function comment for %s lacks description of the function" % (name)) 910 911 return(((ret[0], retdesc), args, desc)) 912 913 def parsePreproc(self, token): 914 if debug: 915 print("=> preproc ", token, self.lexer.tokens) 916 name = token[1] 917 if name == "#include": 918 token = self.lexer.token() 919 if token == None: 920 return None 921 if token[0] == 'preproc': 922 self.index_add(token[1], self.filename, not self.is_header, 923 "include") 924 return self.lexer.token() 925 return token 926 if name == "#define": 927 token = self.lexer.token() 928 if token == None: 929 return None 930 if token[0] == 'preproc': 931 # TODO macros with arguments 932 name = token[1] 933 lst = [] 934 token = self.lexer.token() 935 while token != None and token[0] == 'preproc' and \ 936 token[1][0] != '#': 937 lst.append(token[1]) 938 token = self.lexer.token() 939 try: 940 name = name.split('(') [0] 941 except: 942 pass 943 info = self.parseMacroComment(name, True) 944 self.index_add(name, self.filename, not self.is_header, 945 "macro", info) 946 return token 947 948 # 949 # Processing of conditionals modified by Bill 1/1/05 950 # 951 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 952 # #if, #else and #endif) for headers and mainline code, 953 # store the ones from the header in libxml2-api.xml, and later 954 # (in the routine merge_public) verify that the two (header and 955 # mainline code) agree. 956 # 957 # There is a small problem with processing the headers. Some of 958 # the variables are not concerned with enabling / disabling of 959 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 960 # them to be included in libxml2-api.xml, or involved in 961 # the check between the header and the mainline code. To 962 # accomplish this, we ignore any conditional which doesn't include 963 # the string 'ENABLED' 964 # 965 if name == "#ifdef": 966 apstr = self.lexer.tokens[0][1] 967 try: 968 self.defines.append(apstr) 969 if apstr.find('ENABLED') != -1: 970 self.conditionals.append("defined(%s)" % apstr) 971 except: 972 pass 973 elif name == "#ifndef": 974 apstr = self.lexer.tokens[0][1] 975 try: 976 self.defines.append(apstr) 977 if apstr.find('ENABLED') != -1: 978 self.conditionals.append("!defined(%s)" % apstr) 979 except: 980 pass 981 elif name == "#if": 982 apstr = "" 983 for tok in self.lexer.tokens: 984 if apstr != "": 985 apstr = apstr + " " 986 apstr = apstr + tok[1] 987 try: 988 self.defines.append(apstr) 989 if apstr.find('ENABLED') != -1: 990 self.conditionals.append(apstr) 991 except: 992 pass 993 elif name == "#else": 994 if self.conditionals != [] and \ 995 self.defines[-1].find('ENABLED') != -1: 996 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 997 elif name == "#endif": 998 if self.conditionals != [] and \ 999 self.defines[-1].find('ENABLED') != -1: 1000 self.conditionals = self.conditionals[:-1] 1001 self.defines = self.defines[:-1] 1002 token = self.lexer.token() 1003 while token != None and token[0] == 'preproc' and \ 1004 token[1][0] != '#': 1005 token = self.lexer.token() 1006 return token 1007 1008 # 1009 # token acquisition on top of the lexer, it handle internally 1010 # preprocessor and comments since they are logically not part of 1011 # the program structure. 1012 # 1013 def token(self): 1014 global ignored_words 1015 1016 token = self.lexer.token() 1017 while token != None: 1018 if token[0] == 'comment': 1019 token = self.parseComment(token) 1020 continue 1021 elif token[0] == 'preproc': 1022 token = self.parsePreproc(token) 1023 continue 1024 elif token[0] == "name" and token[1] == "__const": 1025 token = ("name", "const") 1026 return token 1027 elif token[0] == "name" and token[1] == "__attribute": 1028 token = self.lexer.token() 1029 while token != None and token[1] != ";": 1030 token = self.lexer.token() 1031 return token 1032 elif token[0] == "name" and token[1] in ignored_words: 1033 (n, info) = ignored_words[token[1]] 1034 i = 0 1035 while i < n: 1036 token = self.lexer.token() 1037 i = i + 1 1038 token = self.lexer.token() 1039 continue 1040 else: 1041 if debug: 1042 print("=> ", token) 1043 return token 1044 return None 1045 1046 # 1047 # Parse a typedef, it records the type and its name. 1048 # 1049 def parseTypedef(self, token): 1050 if token == None: 1051 return None 1052 token = self.parseType(token) 1053 if token == None: 1054 self.error("parsing typedef") 1055 return None 1056 base_type = self.type 1057 type = base_type 1058 #self.debug("end typedef type", token) 1059 while token != None: 1060 if token[0] == "name": 1061 name = token[1] 1062 signature = self.signature 1063 if signature != None: 1064 type = type.split('(')[0] 1065 d = self.mergeFunctionComment(name, 1066 ((type, None), signature), 1) 1067 self.index_add(name, self.filename, not self.is_header, 1068 "functype", d) 1069 else: 1070 if base_type == "struct": 1071 self.index_add(name, self.filename, not self.is_header, 1072 "struct", type) 1073 base_type = "struct " + name 1074 else: 1075 # TODO report missing or misformatted comments 1076 info = self.parseSimpleComment(name, True) 1077 self.index_add(name, self.filename, not self.is_header, 1078 "typedef", type, info) 1079 token = self.token() 1080 else: 1081 self.error("parsing typedef: expecting a name") 1082 return token 1083 #self.debug("end typedef", token) 1084 if token != None and token[0] == 'sep' and token[1] == ',': 1085 type = base_type 1086 token = self.token() 1087 while token != None and token[0] == "op": 1088 type = type + token[1] 1089 token = self.token() 1090 elif token != None and token[0] == 'sep' and token[1] == ';': 1091 break; 1092 elif token != None and token[0] == 'name': 1093 type = base_type 1094 continue; 1095 else: 1096 self.error("parsing typedef: expecting ';'", token) 1097 return token 1098 token = self.token() 1099 return token 1100 1101 # 1102 # Parse a C code block, used for functions it parse till 1103 # the balancing } included 1104 # 1105 def parseBlock(self, token): 1106 while token != None: 1107 if token[0] == "sep" and token[1] == "{": 1108 token = self.token() 1109 token = self.parseBlock(token) 1110 elif token[0] == "sep" and token[1] == "}": 1111 token = self.token() 1112 return token 1113 else: 1114 if self.collect_ref == 1: 1115 oldtok = token 1116 token = self.token() 1117 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1118 if token[0] == "sep" and token[1] == "(": 1119 self.index_add_ref(oldtok[1], self.filename, 1120 0, "function") 1121 token = self.token() 1122 elif token[0] == "name": 1123 token = self.token() 1124 if token[0] == "sep" and (token[1] == ";" or 1125 token[1] == "," or token[1] == "="): 1126 self.index_add_ref(oldtok[1], self.filename, 1127 0, "type") 1128 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1129 self.index_add_ref(oldtok[1], self.filename, 1130 0, "typedef") 1131 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1132 self.index_add_ref(oldtok[1], self.filename, 1133 0, "typedef") 1134 1135 else: 1136 token = self.token() 1137 return token 1138 1139 # 1140 # Parse a C struct definition till the balancing } 1141 # 1142 def parseStruct(self, token): 1143 fields = [] 1144 #self.debug("start parseStruct", token) 1145 while token != None: 1146 if token[0] == "sep" and token[1] == "{": 1147 token = self.token() 1148 token = self.parseTypeBlock(token) 1149 elif token[0] == "sep" and token[1] == "}": 1150 self.struct_fields = fields 1151 #self.debug("end parseStruct", token) 1152 #print fields 1153 token = self.token() 1154 return token 1155 else: 1156 base_type = self.type 1157 #self.debug("before parseType", token) 1158 token = self.parseType(token) 1159 #self.debug("after parseType", token) 1160 if token != None and token[0] == "name": 1161 fname = token[1] 1162 token = self.token() 1163 if token[0] == "sep" and token[1] == ";": 1164 self.comment = None 1165 token = self.token() 1166 fields.append((self.type, fname, self.comment)) 1167 self.comment = None 1168 else: 1169 self.error("parseStruct: expecting ;", token) 1170 elif token != None and token[0] == "sep" and token[1] == "{": 1171 token = self.token() 1172 token = self.parseTypeBlock(token) 1173 if token != None and token[0] == "name": 1174 token = self.token() 1175 if token != None and token[0] == "sep" and token[1] == ";": 1176 token = self.token() 1177 else: 1178 self.error("parseStruct: expecting ;", token) 1179 else: 1180 self.error("parseStruct: name", token) 1181 token = self.token() 1182 self.type = base_type; 1183 self.struct_fields = fields 1184 #self.debug("end parseStruct", token) 1185 #print fields 1186 return token 1187 1188 # 1189 # Parse a C enum block, parse till the balancing } 1190 # 1191 def parseEnumBlock(self, token): 1192 self.enums = [] 1193 name = None 1194 self.comment = None 1195 comment = "" 1196 value = "0" 1197 while token != None: 1198 if token[0] == "sep" and token[1] == "{": 1199 token = self.token() 1200 token = self.parseTypeBlock(token) 1201 elif token[0] == "sep" and token[1] == "}": 1202 if name != None: 1203 if self.comment != None: 1204 comment = self.comment 1205 self.comment = None 1206 self.enums.append((name, value, comment)) 1207 token = self.token() 1208 return token 1209 elif token[0] == "name": 1210 if name != None: 1211 if self.comment != None: 1212 comment = self.comment.strip() 1213 self.comment = None 1214 self.enums.append((name, value, comment)) 1215 name = token[1] 1216 comment = "" 1217 token = self.token() 1218 if token[0] == "op" and token[1][0] == "=": 1219 value = "" 1220 if len(token[1]) > 1: 1221 value = token[1][1:] 1222 token = self.token() 1223 while token[0] != "sep" or (token[1] != ',' and 1224 token[1] != '}'): 1225 value = value + token[1] 1226 token = self.token() 1227 else: 1228 try: 1229 value = "%d" % (int(value) + 1) 1230 except: 1231 self.warning("Failed to compute value of enum %s" % (name)) 1232 value="" 1233 if token[0] == "sep" and token[1] == ",": 1234 token = self.token() 1235 else: 1236 token = self.token() 1237 return token 1238 1239 # 1240 # Parse a C definition block, used for structs it parse till 1241 # the balancing } 1242 # 1243 def parseTypeBlock(self, token): 1244 while token != None: 1245 if token[0] == "sep" and token[1] == "{": 1246 token = self.token() 1247 token = self.parseTypeBlock(token) 1248 elif token[0] == "sep" and token[1] == "}": 1249 token = self.token() 1250 return token 1251 else: 1252 token = self.token() 1253 return token 1254 1255 # 1256 # Parse a type: the fact that the type name can either occur after 1257 # the definition or within the definition makes it a little harder 1258 # if inside, the name token is pushed back before returning 1259 # 1260 def parseType(self, token): 1261 self.type = "" 1262 self.struct_fields = [] 1263 self.signature = None 1264 if token == None: 1265 return token 1266 1267 while token[0] == "name" and ( 1268 token[1] == "const" or \ 1269 token[1] == "unsigned" or \ 1270 token[1] == "signed"): 1271 if self.type == "": 1272 self.type = token[1] 1273 else: 1274 self.type = self.type + " " + token[1] 1275 token = self.token() 1276 1277 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1278 if self.type == "": 1279 self.type = token[1] 1280 else: 1281 self.type = self.type + " " + token[1] 1282 if token[0] == "name" and token[1] == "int": 1283 if self.type == "": 1284 self.type = tmp[1] 1285 else: 1286 self.type = self.type + " " + tmp[1] 1287 1288 elif token[0] == "name" and token[1] == "struct": 1289 if self.type == "": 1290 self.type = token[1] 1291 else: 1292 self.type = self.type + " " + token[1] 1293 token = self.token() 1294 nametok = None 1295 if token[0] == "name": 1296 nametok = token 1297 token = self.token() 1298 if token != None and token[0] == "sep" and token[1] == "{": 1299 token = self.token() 1300 token = self.parseStruct(token) 1301 elif token != None and token[0] == "op" and token[1] == "*": 1302 self.type = self.type + " " + nametok[1] + " *" 1303 token = self.token() 1304 while token != None and token[0] == "op" and token[1] == "*": 1305 self.type = self.type + " *" 1306 token = self.token() 1307 if token[0] == "name": 1308 nametok = token 1309 token = self.token() 1310 else: 1311 self.error("struct : expecting name", token) 1312 return token 1313 elif token != None and token[0] == "name" and nametok != None: 1314 self.type = self.type + " " + nametok[1] 1315 return token 1316 1317 if nametok != None: 1318 self.lexer.push(token) 1319 token = nametok 1320 return token 1321 1322 elif token[0] == "name" and token[1] == "enum": 1323 if self.type == "": 1324 self.type = token[1] 1325 else: 1326 self.type = self.type + " " + token[1] 1327 self.enums = [] 1328 token = self.token() 1329 if token != None and token[0] == "sep" and token[1] == "{": 1330 token = self.token() 1331 token = self.parseEnumBlock(token) 1332 else: 1333 self.error("parsing enum: expecting '{'", token) 1334 enum_type = None 1335 if token != None and token[0] != "name": 1336 self.lexer.push(token) 1337 token = ("name", "enum") 1338 else: 1339 enum_type = token[1] 1340 for enum in self.enums: 1341 self.index_add(enum[0], self.filename, 1342 not self.is_header, "enum", 1343 (enum[1], enum[2], enum_type)) 1344 return token 1345 1346 elif token[0] == "name": 1347 if self.type == "": 1348 self.type = token[1] 1349 else: 1350 self.type = self.type + " " + token[1] 1351 else: 1352 self.error("parsing type %s: expecting a name" % (self.type), 1353 token) 1354 return token 1355 token = self.token() 1356 while token != None and (token[0] == "op" or 1357 token[0] == "name" and token[1] == "const"): 1358 self.type = self.type + " " + token[1] 1359 token = self.token() 1360 1361 # 1362 # if there is a parenthesis here, this means a function type 1363 # 1364 if token != None and token[0] == "sep" and token[1] == '(': 1365 self.type = self.type + token[1] 1366 token = self.token() 1367 while token != None and token[0] == "op" and token[1] == '*': 1368 self.type = self.type + token[1] 1369 token = self.token() 1370 if token == None or token[0] != "name" : 1371 self.error("parsing function type, name expected", token); 1372 return token 1373 self.type = self.type + token[1] 1374 nametok = token 1375 token = self.token() 1376 if token != None and token[0] == "sep" and token[1] == ')': 1377 self.type = self.type + token[1] 1378 token = self.token() 1379 if token != None and token[0] == "sep" and token[1] == '(': 1380 token = self.token() 1381 type = self.type; 1382 token = self.parseSignature(token); 1383 self.type = type; 1384 else: 1385 self.error("parsing function type, '(' expected", token); 1386 return token 1387 else: 1388 self.error("parsing function type, ')' expected", token); 1389 return token 1390 self.lexer.push(token) 1391 token = nametok 1392 return token 1393 1394 # 1395 # do some lookahead for arrays 1396 # 1397 if token != None and token[0] == "name": 1398 nametok = token 1399 token = self.token() 1400 if token != None and token[0] == "sep" and token[1] == '[': 1401 self.type = self.type + nametok[1] 1402 while token != None and token[0] == "sep" and token[1] == '[': 1403 self.type = self.type + token[1] 1404 token = self.token() 1405 while token != None and token[0] != 'sep' and \ 1406 token[1] != ']' and token[1] != ';': 1407 self.type = self.type + token[1] 1408 token = self.token() 1409 if token != None and token[0] == 'sep' and token[1] == ']': 1410 self.type = self.type + token[1] 1411 token = self.token() 1412 else: 1413 self.error("parsing array type, ']' expected", token); 1414 return token 1415 elif token != None and token[0] == "sep" and token[1] == ':': 1416 # remove :12 in case it's a limited int size 1417 token = self.token() 1418 token = self.token() 1419 self.lexer.push(token) 1420 token = nametok 1421 1422 return token 1423 1424 # 1425 # Parse a signature: '(' has been parsed and we scan the type definition 1426 # up to the ')' included 1427 def parseSignature(self, token): 1428 signature = [] 1429 if token != None and token[0] == "sep" and token[1] == ')': 1430 self.signature = [] 1431 token = self.token() 1432 return token 1433 while token != None: 1434 token = self.parseType(token) 1435 if token != None and token[0] == "name": 1436 signature.append((self.type, token[1], None)) 1437 token = self.token() 1438 elif token != None and token[0] == "sep" and token[1] == ',': 1439 token = self.token() 1440 continue 1441 elif token != None and token[0] == "sep" and token[1] == ')': 1442 # only the type was provided 1443 if self.type == "...": 1444 signature.append((self.type, "...", None)) 1445 else: 1446 signature.append((self.type, None, None)) 1447 if token != None and token[0] == "sep": 1448 if token[1] == ',': 1449 token = self.token() 1450 continue 1451 elif token[1] == ')': 1452 token = self.token() 1453 break 1454 self.signature = signature 1455 return token 1456 1457 # 1458 # Parse a global definition, be it a type, variable or function 1459 # the extern "C" blocks are a bit nasty and require it to recurse. 1460 # 1461 def parseGlobal(self, token): 1462 static = 0 1463 if token[1] == 'extern': 1464 token = self.token() 1465 if token == None: 1466 return token 1467 if token[0] == 'string': 1468 if token[1] == 'C': 1469 token = self.token() 1470 if token == None: 1471 return token 1472 if token[0] == 'sep' and token[1] == "{": 1473 token = self.token() 1474# print 'Entering extern "C line ', self.lineno() 1475 while token != None and (token[0] != 'sep' or 1476 token[1] != "}"): 1477 if token[0] == 'name': 1478 token = self.parseGlobal(token) 1479 else: 1480 self.error( 1481 "token %s %s unexpected at the top level" % ( 1482 token[0], token[1])) 1483 token = self.parseGlobal(token) 1484# print 'Exiting extern "C" line', self.lineno() 1485 token = self.token() 1486 return token 1487 else: 1488 return token 1489 elif token[1] == 'static': 1490 static = 1 1491 token = self.token() 1492 if token == None or token[0] != 'name': 1493 return token 1494 1495 if token[1] == 'typedef': 1496 token = self.token() 1497 return self.parseTypedef(token) 1498 else: 1499 token = self.parseType(token) 1500 type_orig = self.type 1501 if token == None or token[0] != "name": 1502 return token 1503 type = type_orig 1504 self.name = token[1] 1505 token = self.token() 1506 while token != None and (token[0] == "sep" or token[0] == "op"): 1507 if token[0] == "sep": 1508 if token[1] == "[": 1509 type = type + token[1] 1510 token = self.token() 1511 while token != None and (token[0] != "sep" or \ 1512 token[1] != ";"): 1513 type = type + token[1] 1514 token = self.token() 1515 1516 if token != None and token[0] == "op" and token[1] == "=": 1517 # 1518 # Skip the initialization of the variable 1519 # 1520 token = self.token() 1521 if token[0] == 'sep' and token[1] == '{': 1522 token = self.token() 1523 token = self.parseBlock(token) 1524 else: 1525 while token != None and (token[0] != "sep" or \ 1526 (token[1] != ';' and token[1] != ',')): 1527 token = self.token() 1528 if token == None or token[0] != "sep" or (token[1] != ';' and 1529 token[1] != ','): 1530 self.error("missing ';' or ',' after value") 1531 1532 if token != None and token[0] == "sep": 1533 if token[1] == ";": 1534 if type == "struct": 1535 self.index_add(self.name, self.filename, 1536 not self.is_header, "struct", self.struct_fields) 1537 else: 1538 info = self.parseSimpleComment(self.name, True) 1539 self.index_add(self.name, self.filename, 1540 not self.is_header, "variable", type, info) 1541 self.comment = None 1542 token = self.token() 1543 break 1544 elif token[1] == "(": 1545 token = self.token() 1546 token = self.parseSignature(token) 1547 if token == None: 1548 return None 1549 if token[0] == "sep" and token[1] == ";": 1550 d = self.mergeFunctionComment(self.name, 1551 ((type, None), self.signature), 1) 1552 self.index_add(self.name, self.filename, static, 1553 "function", d) 1554 self.comment = None 1555 token = self.token() 1556 elif token[0] == "sep" and token[1] == "{": 1557 d = self.mergeFunctionComment(self.name, 1558 ((type, None), self.signature), static) 1559 self.index_add(self.name, self.filename, static, 1560 "function", d) 1561 self.comment = None 1562 token = self.token() 1563 token = self.parseBlock(token); 1564 elif token[1] == ',': 1565 self.index_add(self.name, self.filename, static, 1566 "variable", type) 1567 self.comment = None 1568 type = type_orig 1569 token = self.token() 1570 while token != None and token[0] == "sep": 1571 type = type + token[1] 1572 token = self.token() 1573 if token != None and token[0] == "name": 1574 self.name = token[1] 1575 token = self.token() 1576 else: 1577 break 1578 1579 return token 1580 1581 def parse(self): 1582 self.warning("Parsing %s" % (self.filename)) 1583 token = self.token() 1584 while token != None: 1585 if token[0] == 'name': 1586 token = self.parseGlobal(token) 1587 else: 1588 self.error("token %s %s unexpected at the top level" % ( 1589 token[0], token[1])) 1590 token = self.parseGlobal(token) 1591 return 1592 self.parseTopComment(self.top_comment) 1593 return self.index 1594 1595 1596class docBuilder: 1597 """A documentation builder""" 1598 def __init__(self, name, directories=['.'], excludes=[]): 1599 self.name = name 1600 self.directories = directories 1601 self.excludes = excludes + list(ignored_files.keys()) 1602 self.modules = {} 1603 self.headers = {} 1604 self.idx = index() 1605 self.index = {} 1606 if name == 'libxml2': 1607 self.basename = 'libxml' 1608 else: 1609 self.basename = name 1610 1611 def analyze(self): 1612 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) 1613 self.idx.analyze() 1614 1615 def scanHeaders(self): 1616 for header in list(self.headers.keys()): 1617 parser = CParser(header) 1618 idx = parser.parse() 1619 self.headers[header] = idx; 1620 self.idx.merge(idx) 1621 1622 def scanModules(self): 1623 for module in list(self.modules.keys()): 1624 parser = CParser(module) 1625 idx = parser.parse() 1626 # idx.analyze() 1627 self.modules[module] = idx 1628 self.idx.merge_public(idx) 1629 1630 def scan(self): 1631 for directory in self.directories: 1632 files = glob.glob(directory + "/*.c") 1633 for file in files: 1634 skip = 0 1635 for excl in self.excludes: 1636 if file.find(excl) != -1: 1637 print("Skipping %s" % file) 1638 skip = 1 1639 break 1640 if skip == 0: 1641 self.modules[file] = None; 1642 files = glob.glob(directory + "/*.h") 1643 for file in files: 1644 skip = 0 1645 for excl in self.excludes: 1646 if file.find(excl) != -1: 1647 print("Skipping %s" % file) 1648 skip = 1 1649 break 1650 if skip == 0: 1651 self.headers[file] = None; 1652 self.scanHeaders() 1653 self.scanModules() 1654 1655 def modulename_file(self, file): 1656 module = os.path.basename(file) 1657 if module[-2:] == '.h': 1658 module = module[:-2] 1659 elif module[-2:] == '.c': 1660 module = module[:-2] 1661 return module 1662 1663 def serialize_enum(self, output, name): 1664 id = self.idx.enums[name] 1665 output.write(" <enum name='%s' file='%s'" % (name, 1666 self.modulename_file(id.header))) 1667 if id.info != None: 1668 info = id.info 1669 if info[0] != None and info[0] != '': 1670 try: 1671 val = eval(info[0]) 1672 except: 1673 val = info[0] 1674 output.write(" value='%s'" % (val)); 1675 if info[2] != None and info[2] != '': 1676 output.write(" type='%s'" % info[2]); 1677 if info[1] != None and info[1] != '': 1678 output.write(" info='%s'" % escape(info[1])); 1679 output.write("/>\n") 1680 1681 def serialize_macro(self, output, name): 1682 id = self.idx.macros[name] 1683 output.write(" <macro name='%s' file='%s'>\n" % (name, 1684 self.modulename_file(id.header))) 1685 if id.info != None: 1686 try: 1687 (args, desc) = id.info 1688 if desc != None and desc != "": 1689 output.write(" <info>%s</info>\n" % (escape(desc))) 1690 for arg in args: 1691 (name, desc) = arg 1692 if desc != None and desc != "": 1693 output.write(" <arg name='%s' info='%s'/>\n" % ( 1694 name, escape(desc))) 1695 else: 1696 output.write(" <arg name='%s'/>\n" % (name)) 1697 except: 1698 pass 1699 output.write(" </macro>\n") 1700 1701 def serialize_typedef(self, output, name): 1702 id = self.idx.typedefs[name] 1703 if id.info[0:7] == 'struct ': 1704 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1705 name, self.modulename_file(id.header), id.info)) 1706 name = id.info[7:] 1707 if name in self.idx.structs and ( \ 1708 type(self.idx.structs[name].info) == type(()) or 1709 type(self.idx.structs[name].info) == type([])): 1710 output.write(">\n"); 1711 try: 1712 for field in self.idx.structs[name].info: 1713 desc = field[2] 1714 if desc == None: 1715 desc = '' 1716 else: 1717 desc = escape(desc) 1718 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1719 except: 1720 print("Failed to serialize struct %s" % (name)) 1721 output.write(" </struct>\n") 1722 else: 1723 output.write("/>\n"); 1724 else : 1725 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1726 name, self.modulename_file(id.header), id.info)) 1727 try: 1728 desc = id.extra 1729 if desc != None and desc != "": 1730 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1731 output.write(" </typedef>\n") 1732 else: 1733 output.write("/>\n") 1734 except: 1735 output.write("/>\n") 1736 1737 def serialize_variable(self, output, name): 1738 id = self.idx.variables[name] 1739 if id.info != None: 1740 output.write(" <variable name='%s' file='%s' type='%s'" % ( 1741 name, self.modulename_file(id.header), id.info)) 1742 else: 1743 output.write(" <variable name='%s' file='%s'" % ( 1744 name, self.modulename_file(id.header))) 1745 desc = id.extra 1746 if desc != None and desc != "": 1747 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1748 output.write(" </variable>\n") 1749 else: 1750 output.write("/>\n") 1751 1752 def serialize_function(self, output, name): 1753 id = self.idx.functions[name] 1754 if name == debugsym: 1755 print("=>", id) 1756 1757 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1758 name, self.modulename_file(id.header), 1759 self.modulename_file(id.module))) 1760 # 1761 # Processing of conditionals modified by Bill 1/1/05 1762 # 1763 if id.conditionals != None: 1764 apstr = "" 1765 for cond in id.conditionals: 1766 if apstr != "": 1767 apstr = apstr + " && " 1768 apstr = apstr + cond 1769 output.write(" <cond>%s</cond>\n"% (apstr)); 1770 try: 1771 (ret, params, desc) = id.info 1772 if (desc == None or desc == '') and \ 1773 name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1774 print("%s %s from %s has no description" % (id.type, name, 1775 self.modulename_file(id.module))) 1776 1777 output.write(" <info>%s</info>\n" % (escape(desc))) 1778 if ret[0] != None: 1779 if ret[0] == "void": 1780 output.write(" <return type='void'/>\n") 1781 else: 1782 output.write(" <return type='%s' info='%s'/>\n" % ( 1783 ret[0], escape(ret[1]))) 1784 for param in params: 1785 if param[0] == 'void': 1786 continue 1787 if param[2] == None: 1788 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1789 else: 1790 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1791 except: 1792 print("Failed to save function %s info: " % name, repr(id.info)) 1793 output.write(" </%s>\n" % (id.type)) 1794 1795 def serialize_exports(self, output, file): 1796 module = self.modulename_file(file) 1797 output.write(" <file name='%s'>\n" % (module)) 1798 dict = self.headers[file] 1799 if dict.info != None: 1800 for data in ('Summary', 'Description', 'Author'): 1801 try: 1802 output.write(" <%s>%s</%s>\n" % ( 1803 data.lower(), 1804 escape(dict.info[data]), 1805 data.lower())) 1806 except: 1807 print("Header %s lacks a %s description" % (module, data)) 1808 if 'Description' in dict.info: 1809 desc = dict.info['Description'] 1810 if desc.find("DEPRECATED") != -1: 1811 output.write(" <deprecated/>\n") 1812 1813 ids = list(dict.macros.keys()) 1814 ids.sort() 1815 for id in ids: 1816 # Macros are sometime used to masquerade other types. 1817 if id in dict.functions: 1818 continue 1819 if id in dict.variables: 1820 continue 1821 if id in dict.typedefs: 1822 continue 1823 if id in dict.structs: 1824 continue 1825 if id in dict.enums: 1826 continue 1827 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1828 ids = list(dict.enums.keys()) 1829 ids.sort() 1830 for id in ids: 1831 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1832 ids = list(dict.typedefs.keys()) 1833 ids.sort() 1834 for id in ids: 1835 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1836 ids = list(dict.structs.keys()) 1837 ids.sort() 1838 for id in ids: 1839 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1840 ids = list(dict.variables.keys()) 1841 ids.sort() 1842 for id in ids: 1843 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1844 ids = list(dict.functions.keys()) 1845 ids.sort() 1846 for id in ids: 1847 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1848 output.write(" </file>\n") 1849 1850 def serialize(self): 1851 filename = "%s-api.xml" % self.name 1852 print("Saving XML description %s" % (filename)) 1853 output = open(filename, "w") 1854 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1855 output.write("<api name='%s'>\n" % self.name) 1856 output.write(" <files>\n") 1857 headers = list(self.headers.keys()) 1858 headers.sort() 1859 for file in headers: 1860 self.serialize_exports(output, file) 1861 output.write(" </files>\n") 1862 output.write(" <symbols>\n") 1863 macros = list(self.idx.macros.keys()) 1864 macros.sort() 1865 for macro in macros: 1866 self.serialize_macro(output, macro) 1867 enums = list(self.idx.enums.keys()) 1868 enums.sort() 1869 for enum in enums: 1870 self.serialize_enum(output, enum) 1871 typedefs = list(self.idx.typedefs.keys()) 1872 typedefs.sort() 1873 for typedef in typedefs: 1874 self.serialize_typedef(output, typedef) 1875 variables = list(self.idx.variables.keys()) 1876 variables.sort() 1877 for variable in variables: 1878 self.serialize_variable(output, variable) 1879 functions = list(self.idx.functions.keys()) 1880 functions.sort() 1881 for function in functions: 1882 self.serialize_function(output, function) 1883 output.write(" </symbols>\n") 1884 output.write("</api>\n") 1885 output.close() 1886 1887 1888def rebuild(): 1889 builder = None 1890 if glob.glob("parser.c") != [] : 1891 print("Rebuilding API description for libxml2") 1892 builder = docBuilder("libxml2", [".", "."], 1893 ["tst.c"]) 1894 elif glob.glob("../parser.c") != [] : 1895 print("Rebuilding API description for libxml2") 1896 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1897 ["tst.c"]) 1898 elif glob.glob("../libxslt/transform.c") != [] : 1899 print("Rebuilding API description for libxslt") 1900 builder = docBuilder("libxslt", ["../libxslt"], 1901 ["win32config.h", "libxslt.h", "tst.c"]) 1902 else: 1903 print("rebuild() failed, unable to guess the module") 1904 return None 1905 builder.scan() 1906 builder.analyze() 1907 builder.serialize() 1908 if glob.glob("../libexslt/exslt.c") != [] : 1909 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1910 extra.scan() 1911 extra.analyze() 1912 extra.serialize() 1913 return builder 1914 1915# 1916# for debugging the parser 1917# 1918def parse(filename): 1919 parser = CParser(filename) 1920 idx = parser.parse() 1921 return idx 1922 1923if __name__ == "__main__": 1924 if len(sys.argv) > 1: 1925 debug = 1 1926 parse(sys.argv[1]) 1927 else: 1928 rebuild() 1929