1#!/usr/bin/env python3 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "config.h": "generated portability layer", 23 "libxml.h": "internal only", 24 "legacy.c": "legacy code", 25 "testModule.c": "test tool", 26 "testapi.c": "generated regression tests", 27 "runtest.c": "regression tests program", 28 "runsuite.c": "regression tests program", 29 "tst.c": "not part of the library", 30 "test.c": "not part of the library", 31 "testdso.c": "test for dynamid shared libraries", 32 "testrecurse.c": "test for entities recursions", 33 "timsort.h": "Internal header only for xpath.c 2.9.0", 34 "nanoftp.h": "empty", 35 "SAX.h": "empty", 36} 37 38ignored_words = { 39 "WINAPI": (0, "Windows keyword"), 40 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 41 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 42 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 43 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 44 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 45 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 46 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 47 "XSLTCALL": (0, "Special macro for win32 calls"), 48 "EXSLTCALL": (0, "Special macro for win32 calls"), 49 "__declspec": (3, "Windows keyword"), 50 "__stdcall": (0, "Windows keyword"), 51 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 52 "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"), 53 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 54 "X_IN_Y": (5, "macro function builder"), 55 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 56 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 57 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 58 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 59 "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"), 60 "ATTRIBUTE_NO_SANITIZE_INTEGER": (0, "macro keyword"), 61 "ATTRIBUTE_COUNTED_BY": (3, "macro keyword"), 62 "XML_DEPRECATED": (0, "macro keyword"), 63 "XML_DEPRECATED_MEMBER": (0, "macro keyword"), 64 "XML_GLOBALS_ALLOC": (0, "macro keyword"), 65 "XML_GLOBALS_ERROR": (0, "macro keyword"), 66 "XML_GLOBALS_IO": (0, "macro keyword"), 67 "XML_GLOBALS_PARSER": (0, "macro keyword"), 68 "XML_GLOBALS_TREE": (0, "macro keyword"), 69 "XML_THREAD_LOCAL": (0, "macro keyword"), 70} 71 72def escape(raw): 73 raw = raw.replace('&', '&') 74 raw = raw.replace('<', '<') 75 raw = raw.replace('>', '>') 76 raw = raw.replace("'", ''') 77 raw = raw.replace('"', '"') 78 return raw 79 80class identifier: 81 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 82 info=None, extra=None, conditionals = None): 83 self.name = name 84 self.header = header 85 self.module = module 86 self.type = type 87 self.info = info 88 self.extra = extra 89 self.lineno = lineno 90 self.static = 0 91 if conditionals == None or len(conditionals) == 0: 92 self.conditionals = None 93 else: 94 self.conditionals = conditionals[:] 95 if self.name == debugsym: 96 print("=> define %s : %s" % (debugsym, (module, type, info, 97 extra, conditionals))) 98 99 def __repr__(self): 100 r = "%s %s:" % (self.type, self.name) 101 if self.static: 102 r = r + " static" 103 if self.module != None: 104 r = r + " from %s" % (self.module) 105 if self.info != None: 106 r = r + " " + repr(self.info) 107 if self.extra != None: 108 r = r + " " + repr(self.extra) 109 if self.conditionals != None: 110 r = r + " " + repr(self.conditionals) 111 return r 112 113 114 def set_header(self, header): 115 self.header = header 116 def set_module(self, module): 117 self.module = module 118 def set_type(self, type): 119 self.type = type 120 def set_info(self, info): 121 self.info = info 122 def set_extra(self, extra): 123 self.extra = extra 124 def set_lineno(self, lineno): 125 self.lineno = lineno 126 def set_static(self, static): 127 self.static = static 128 def set_conditionals(self, conditionals): 129 if conditionals == None or len(conditionals) == 0: 130 self.conditionals = None 131 else: 132 self.conditionals = conditionals[:] 133 134 def get_name(self): 135 return self.name 136 def get_header(self): 137 return self.module 138 def get_module(self): 139 return self.module 140 def get_type(self): 141 return self.type 142 def get_info(self): 143 return self.info 144 def get_lineno(self): 145 return self.lineno 146 def get_extra(self): 147 return self.extra 148 def get_static(self): 149 return self.static 150 def get_conditionals(self): 151 return self.conditionals 152 153 def update(self, header, module, type = None, info = None, extra=None, 154 conditionals=None): 155 if self.name == debugsym: 156 print("=> update %s : %s" % (debugsym, (module, type, info, 157 extra, conditionals))) 158 if header != None and self.header == None: 159 self.set_header(module) 160 if module != None and (self.module == None or self.header == self.module): 161 self.set_module(module) 162 if type != None and self.type == None: 163 self.set_type(type) 164 if info != None: 165 self.set_info(info) 166 if extra != None: 167 self.set_extra(extra) 168 if conditionals != None: 169 self.set_conditionals(conditionals) 170 171class index: 172 def __init__(self, name = "noname"): 173 self.name = name 174 self.identifiers = {} 175 self.functions = {} 176 self.variables = {} 177 self.includes = {} 178 self.structs = {} 179 self.enums = {} 180 self.typedefs = {} 181 self.macros = {} 182 self.references = {} 183 self.info = {} 184 185 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 186 if name[0:2] == '__': 187 return None 188 d = None 189 if name in self.identifiers: 190 d = self.identifiers[name] 191 d.update(header, module, type, info, extra, conditionals) 192 else: 193 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 194 self.identifiers[name] = d 195 196 if d != None and static == 1: 197 d.set_static(1) 198 199 if d != None and name != None and type != None: 200 self.references[name] = d 201 202 if name == debugsym: 203 print("New ref: %s" % (d)) 204 205 return d 206 207 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 208 if name[0:2] == '__': 209 return None 210 d = None 211 if name in self.identifiers: 212 d = self.identifiers[name] 213 d.update(header, module, type, info, extra, conditionals) 214 else: 215 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 216 self.identifiers[name] = d 217 218 if d != None and static == 1: 219 d.set_static(1) 220 221 if d != None and name != None and type != None: 222 if type == "function": 223 self.functions[name] = d 224 elif type == "functype": 225 self.functions[name] = d 226 elif type == "variable": 227 self.variables[name] = d 228 elif type == "include": 229 self.includes[name] = d 230 elif type == "struct": 231 self.structs[name] = d 232 elif type == "enum": 233 self.enums[name] = d 234 elif type == "typedef": 235 self.typedefs[name] = d 236 elif type == "macro": 237 self.macros[name] = d 238 else: 239 print("Unable to register type ", type) 240 241 if name == debugsym: 242 print("New symbol: %s" % (d)) 243 244 return d 245 246 def merge(self, idx): 247 for id in list(idx.functions.keys()): 248 # 249 # macro might be used to override functions or variables 250 # definitions 251 # 252 if id in self.macros: 253 del self.macros[id] 254 if id in self.functions: 255 print("function %s from %s redeclared in %s" % ( 256 id, self.functions[id].header, idx.functions[id].header)) 257 else: 258 self.functions[id] = idx.functions[id] 259 self.identifiers[id] = idx.functions[id] 260 for id in list(idx.variables.keys()): 261 # 262 # macro might be used to override functions or variables 263 # definitions 264 # 265 if id in self.macros: 266 del self.macros[id] 267 if id in self.variables: 268 print("variable %s from %s redeclared in %s" % ( 269 id, self.variables[id].header, idx.variables[id].header)) 270 else: 271 self.variables[id] = idx.variables[id] 272 self.identifiers[id] = idx.variables[id] 273 for id in list(idx.structs.keys()): 274 if id in self.structs: 275 print("struct %s from %s redeclared in %s" % ( 276 id, self.structs[id].header, idx.structs[id].header)) 277 else: 278 self.structs[id] = idx.structs[id] 279 self.identifiers[id] = idx.structs[id] 280 for id in list(idx.typedefs.keys()): 281 if id in self.typedefs: 282 print("typedef %s from %s redeclared in %s" % ( 283 id, self.typedefs[id].header, idx.typedefs[id].header)) 284 else: 285 self.typedefs[id] = idx.typedefs[id] 286 self.identifiers[id] = idx.typedefs[id] 287 for id in list(idx.macros.keys()): 288 # 289 # macro might be used to override functions or variables 290 # definitions 291 # 292 if id in self.variables: 293 continue 294 if id in self.functions: 295 continue 296 if id in self.enums: 297 continue 298 if id in self.macros and id != 'XML_OP': 299 print("macro %s from %s redeclared in %s" % ( 300 id, self.macros[id].header, idx.macros[id].header)) 301 else: 302 self.macros[id] = idx.macros[id] 303 self.identifiers[id] = idx.macros[id] 304 for id in list(idx.enums.keys()): 305 if id in self.enums: 306 print("enum %s from %s redeclared in %s" % ( 307 id, self.enums[id].header, idx.enums[id].header)) 308 else: 309 self.enums[id] = idx.enums[id] 310 self.identifiers[id] = idx.enums[id] 311 312 def merge_public(self, idx): 313 for id in list(idx.functions.keys()): 314 if id in self.functions: 315 # check that function condition agrees with header 316 if idx.functions[id].conditionals != \ 317 self.functions[id].conditionals: 318 print("Header condition differs from Function for %s:" \ 319 % id) 320 print(" H: %s" % self.functions[id].conditionals) 321 print(" C: %s" % idx.functions[id].conditionals) 322 up = idx.functions[id] 323 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 324 # else: 325 # print "Function %s from %s is not declared in headers" % ( 326 # id, idx.functions[id].module) 327 328 for id in list(idx.variables.keys()): 329 if id in self.variables: 330 # check that variable condition agrees with header 331 # TODO: produces many false positives 332 #if idx.variables[id].conditionals != \ 333 # self.variables[id].conditionals: 334 # print("Header condition differs from Variable for %s:" \ 335 # % id) 336 # print(" H: %s" % self.variables[id].conditionals) 337 # print(" C: %s" % idx.variables[id].conditionals) 338 up = idx.variables[id] 339 self.variables[id].update(None, up.module, up.type, up.info, up.extra) 340 341 def analyze_dict(self, type, dict): 342 count = 0 343 public = 0 344 for name in list(dict.keys()): 345 id = dict[name] 346 count = count + 1 347 if id.static == 0: 348 public = public + 1 349 if count != public: 350 print(" %d %s , %d public" % (count, type, public)) 351 elif count != 0: 352 print(" %d public %s" % (count, type)) 353 354 355 def analyze(self): 356 self.analyze_dict("functions", self.functions) 357 self.analyze_dict("variables", self.variables) 358 self.analyze_dict("structs", self.structs) 359 self.analyze_dict("typedefs", self.typedefs) 360 self.analyze_dict("macros", self.macros) 361 362class CLexer: 363 """A lexer for the C language, tokenize the input by reading and 364 analyzing it line by line""" 365 def __init__(self, input): 366 self.input = input 367 self.tokens = [] 368 self.line = "" 369 self.lineno = 0 370 371 def getline(self): 372 line = '' 373 while line == '': 374 line = self.input.readline() 375 if not line: 376 return None 377 self.lineno = self.lineno + 1 378 line = line.lstrip() 379 line = line.rstrip() 380 if line == '': 381 continue 382 while line[-1] == '\\': 383 line = line[:-1] 384 n = self.input.readline() 385 self.lineno = self.lineno + 1 386 n = n.lstrip() 387 n = n.rstrip() 388 if not n: 389 break 390 else: 391 line = line + n 392 return line 393 394 def getlineno(self): 395 return self.lineno 396 397 def push(self, token): 398 self.tokens.insert(0, token); 399 400 def debug(self): 401 print("Last token: ", self.last) 402 print("Token queue: ", self.tokens) 403 print("Line %d end: " % (self.lineno), self.line) 404 405 def token(self): 406 while self.tokens == []: 407 if self.line == "": 408 line = self.getline() 409 else: 410 line = self.line 411 self.line = "" 412 if line == None: 413 return None 414 415 if line[0] == '#': 416 self.tokens = list(map((lambda x: ('preproc', x)), 417 line.split())) 418 break; 419 l = len(line) 420 if line[0] == '"' or line[0] == "'": 421 end = line[0] 422 line = line[1:] 423 found = 0 424 tok = "" 425 while found == 0: 426 i = 0 427 l = len(line) 428 while i < l: 429 if line[i] == end: 430 self.line = line[i+1:] 431 line = line[:i] 432 l = i 433 found = 1 434 break 435 if line[i] == '\\': 436 i = i + 1 437 i = i + 1 438 tok = tok + line 439 if found == 0: 440 line = self.getline() 441 if line == None: 442 return None 443 self.last = ('string', tok) 444 return self.last 445 446 if l >= 2 and line[0] == '/' and line[1] == '*': 447 line = line[2:] 448 found = 0 449 tok = "" 450 while found == 0: 451 i = 0 452 l = len(line) 453 while i < l: 454 if line[i] == '*' and i+1 < l and line[i+1] == '/': 455 self.line = line[i+2:] 456 line = line[:i-1] 457 l = i 458 found = 1 459 break 460 i = i + 1 461 if tok != "": 462 tok = tok + "\n" 463 tok = tok + line 464 if found == 0: 465 line = self.getline() 466 if line == None: 467 return None 468 self.last = ('comment', tok) 469 return self.last 470 if l >= 2 and line[0] == '/' and line[1] == '/': 471 line = line[2:] 472 self.last = ('comment', line) 473 return self.last 474 i = 0 475 while i < l: 476 if line[i] == '/' and i+1 < l and line[i+1] == '/': 477 self.line = line[i:] 478 line = line[:i] 479 break 480 if line[i] == '/' and i+1 < l and line[i+1] == '*': 481 self.line = line[i:] 482 line = line[:i] 483 break 484 if line[i] == '"' or line[i] == "'": 485 self.line = line[i:] 486 line = line[:i] 487 break 488 i = i + 1 489 l = len(line) 490 i = 0 491 while i < l: 492 if line[i] == ' ' or line[i] == '\t': 493 i = i + 1 494 continue 495 o = ord(line[i]) 496 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 497 (o >= 48 and o <= 57): 498 s = i 499 while i < l: 500 o = ord(line[i]) 501 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 502 (o >= 48 and o <= 57) or \ 503 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: 504 i = i + 1 505 else: 506 break 507 self.tokens.append(('name', line[s:i])) 508 continue 509 if "(){}:;,[]".find(line[i]) != -1: 510# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 511# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 512# line[i] == ',' or line[i] == '[' or line[i] == ']': 513 self.tokens.append(('sep', line[i])) 514 i = i + 1 515 continue 516 if "+-*><=/%&!|.".find(line[i]) != -1: 517# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 518# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 519# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 520# line[i] == '!' or line[i] == '|' or line[i] == '.': 521 if line[i] == '.' and i + 2 < l and \ 522 line[i+1] == '.' and line[i+2] == '.': 523 self.tokens.append(('name', '...')) 524 i = i + 3 525 continue 526 527 j = i + 1 528 if j < l and ( 529 "+-*><=/%&!|".find(line[j]) != -1): 530# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 531# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 532# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 533# line[j] == '!' or line[j] == '|'): 534 self.tokens.append(('op', line[i:j+1])) 535 i = j + 1 536 else: 537 self.tokens.append(('op', line[i])) 538 i = i + 1 539 continue 540 s = i 541 while i < l: 542 o = ord(line[i]) 543 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 544 (o >= 48 and o <= 57) or ( 545 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): 546# line[i] != ' ' and line[i] != '\t' and 547# line[i] != '(' and line[i] != ')' and 548# line[i] != '{' and line[i] != '}' and 549# line[i] != ':' and line[i] != ';' and 550# line[i] != ',' and line[i] != '+' and 551# line[i] != '-' and line[i] != '*' and 552# line[i] != '/' and line[i] != '%' and 553# line[i] != '&' and line[i] != '!' and 554# line[i] != '|' and line[i] != '[' and 555# line[i] != ']' and line[i] != '=' and 556# line[i] != '*' and line[i] != '>' and 557# line[i] != '<'): 558 i = i + 1 559 else: 560 break 561 self.tokens.append(('name', line[s:i])) 562 563 tok = self.tokens[0] 564 self.tokens = self.tokens[1:] 565 self.last = tok 566 return tok 567 568class CParser: 569 """The C module parser""" 570 def __init__(self, filename, idx = None): 571 self.filename = filename 572 if len(filename) > 2 and filename[-2:] == '.h': 573 self.is_header = 1 574 else: 575 self.is_header = 0 576 self.input = open(filename) 577 self.lexer = CLexer(self.input) 578 if idx == None: 579 self.index = index() 580 else: 581 self.index = idx 582 self.top_comment = "" 583 self.last_comment = "" 584 self.comment = None 585 self.collect_ref = 0 586 self.doc_disable = 0 587 self.conditionals = [] 588 self.defines = [] 589 590 def collect_references(self): 591 self.collect_ref = 1 592 593 def disable(self): 594 self.doc_disable = 1 595 596 def enable(self): 597 self.doc_disable = 0 598 599 def lineno(self): 600 return self.lexer.getlineno() 601 602 def index_add(self, name, module, static, type, info=None, extra = None): 603 if self.doc_disable: 604 return 605 if self.is_header == 1: 606 self.index.add(name, module, module, static, type, self.lineno(), 607 info, extra, self.conditionals) 608 else: 609 self.index.add(name, None, module, static, type, self.lineno(), 610 info, extra, self.conditionals) 611 612 def index_add_ref(self, name, module, static, type, info=None, 613 extra = None): 614 if self.is_header == 1: 615 self.index.add_ref(name, module, module, static, type, 616 self.lineno(), info, extra, self.conditionals) 617 else: 618 self.index.add_ref(name, None, module, static, type, self.lineno(), 619 info, extra, self.conditionals) 620 621 def warning(self, msg): 622 if self.doc_disable: 623 return 624 print(msg) 625 626 def error(self, msg, token=-1): 627 if self.doc_disable: 628 return 629 630 print("Parse Error: " + msg) 631 if token != -1: 632 print("Got token ", token) 633 self.lexer.debug() 634 sys.exit(1) 635 636 def debug(self, msg, token=-1): 637 print("Debug: " + msg) 638 if token != -1: 639 print("Got token ", token) 640 self.lexer.debug() 641 642 def parseTopComment(self, comment): 643 res = {} 644 lines = comment.split("\n") 645 item = None 646 for line in lines: 647 while line != "" and (line[0] == ' ' or line[0] == '\t'): 648 line = line[1:] 649 while line != "" and line[0] == '*': 650 line = line[1:] 651 while line != "" and (line[0] == ' ' or line[0] == '\t'): 652 line = line[1:] 653 try: 654 (it, line) = line.split(":", 1) 655 item = it 656 while line != "" and (line[0] == ' ' or line[0] == '\t'): 657 line = line[1:] 658 if item in res: 659 res[item] = res[item] + " " + line 660 else: 661 res[item] = line 662 except: 663 if item != None: 664 if item in res: 665 res[item] = res[item] + " " + line 666 else: 667 res[item] = line 668 self.index.info = res 669 670 def parseComment(self, token): 671 if self.top_comment == "": 672 self.top_comment = token[1] 673 if self.comment == None or token[1][0] == '*': 674 self.comment = token[1]; 675 else: 676 self.comment = self.comment + token[1] 677 token = self.lexer.token() 678 679 if self.comment.find("DOC_DISABLE") != -1: 680 self.disable() 681 682 if self.comment.find("DOC_ENABLE") != -1: 683 self.enable() 684 685 return token 686 687 # 688 # Parse a simple comment block for typedefs or global variables 689 # 690 def parseSimpleComment(self, name, quiet = False): 691 if name[0:2] == '__': 692 quiet = 1 693 694 args = [] 695 desc = "" 696 697 if self.comment == None: 698 if not quiet: 699 self.warning("Missing comment for %s" % (name)) 700 return(None) 701 if self.comment[0] != '*': 702 if not quiet: 703 self.warning("Missing * in comment for %s" % (name)) 704 return(None) 705 lines = self.comment.split('\n') 706 if lines[0] == '*': 707 del lines[0] 708 if lines[0] != "* %s:" % (name): 709 if not quiet: 710 self.warning("Misformatted comment for %s" % (name)) 711 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 712 return(None) 713 del lines[0] 714 while len(lines) > 0 and lines[0] == '*': 715 del lines[0] 716 desc = "" 717 while len(lines) > 0: 718 l = lines[0] 719 while len(l) > 0 and l[0] == '*': 720 l = l[1:] 721 l = l.strip() 722 desc = desc + " " + l 723 del lines[0] 724 725 desc = desc.strip() 726 727 if quiet == 0: 728 if desc == "": 729 self.warning("Comment for %s lacks description" % (name)) 730 731 return(desc) 732 # 733 # Parse a comment block associate to a macro 734 # 735 def parseMacroComment(self, name, quiet = 0): 736 if name[0:2] == '__': 737 quiet = 1 738 739 args = [] 740 desc = "" 741 742 if self.comment == None: 743 if not quiet: 744 self.warning("Missing comment for macro %s" % (name)) 745 return((args, desc)) 746 if self.comment[0] != '*': 747 if not quiet: 748 self.warning("Missing * in macro comment for %s" % (name)) 749 return((args, desc)) 750 lines = self.comment.split('\n') 751 if lines[0] == '*': 752 del lines[0] 753 if lines[0] != "* %s:" % (name): 754 if not quiet: 755 self.warning("Misformatted macro comment for %s" % (name)) 756 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 757 return((args, desc)) 758 del lines[0] 759 while lines[0] == '*': 760 del lines[0] 761 while len(lines) > 0 and lines[0][0:3] == '* @': 762 l = lines[0][3:] 763 try: 764 (arg, desc) = l.split(':', 1) 765 desc=desc.strip() 766 arg=arg.strip() 767 except: 768 if not quiet: 769 self.warning("Misformatted macro comment for %s" % (name)) 770 self.warning(" problem with '%s'" % (lines[0])) 771 del lines[0] 772 continue 773 del lines[0] 774 l = lines[0].strip() 775 while len(l) > 2 and l[0:3] != '* @': 776 while l[0] == '*': 777 l = l[1:] 778 desc = desc + ' ' + l.strip() 779 del lines[0] 780 if len(lines) == 0: 781 break 782 l = lines[0] 783 args.append((arg, desc)) 784 while len(lines) > 0 and lines[0] == '*': 785 del lines[0] 786 desc = "" 787 while len(lines) > 0: 788 l = lines[0] 789 while len(l) > 0 and l[0] == '*': 790 l = l[1:] 791 l = l.strip() 792 desc = desc + " " + l 793 del lines[0] 794 795 desc = desc.strip() 796 797 if quiet == 0: 798 if desc == "": 799 self.warning("Macro comment for %s lack description of the macro" % (name)) 800 801 return((args, desc)) 802 803 # 804 # Parse a comment block and merge the information found in the 805 # parameters descriptions, finally returns a block as complete 806 # as possible 807 # 808 def mergeFunctionComment(self, name, description, quiet = 0): 809 if name == 'main': 810 quiet = 1 811 if name[0:2] == '__': 812 quiet = 1 813 814 (ret, args) = description 815 desc = "" 816 retdesc = "" 817 818 if self.comment == None: 819 if not quiet: 820 self.warning("Missing comment for function %s" % (name)) 821 return(((ret[0], retdesc), args, desc)) 822 if self.comment[0] != '*': 823 if not quiet: 824 self.warning("Missing * in function comment for %s" % (name)) 825 return(((ret[0], retdesc), args, desc)) 826 lines = self.comment.split('\n') 827 if lines[0] == '*': 828 del lines[0] 829 if lines[0] != "* %s:" % (name): 830 if not quiet: 831 self.warning("Misformatted function comment for %s" % (name)) 832 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 833 return(((ret[0], retdesc), args, desc)) 834 del lines[0] 835 while lines[0] == '*': 836 del lines[0] 837 nbargs = len(args) 838 while len(lines) > 0 and lines[0][0:3] == '* @': 839 l = lines[0][3:] 840 try: 841 (arg, desc) = l.split(':', 1) 842 desc=desc.strip() 843 arg=arg.strip() 844 except: 845 if not quiet: 846 self.warning("Misformatted function comment for %s" % (name)) 847 self.warning(" problem with '%s'" % (lines[0])) 848 del lines[0] 849 continue 850 del lines[0] 851 l = lines[0].strip() 852 while len(l) > 2 and l[0:3] != '* @': 853 while l[0] == '*': 854 l = l[1:] 855 desc = desc + ' ' + l.strip() 856 del lines[0] 857 if len(lines) == 0: 858 break 859 l = lines[0] 860 i = 0 861 while i < nbargs: 862 if args[i][1] == arg: 863 args[i] = (args[i][0], arg, desc) 864 break; 865 i = i + 1 866 if i >= nbargs: 867 if not quiet: 868 self.warning("Unable to find arg %s from function comment for %s" % ( 869 arg, name)) 870 while len(lines) > 0 and lines[0] == '*': 871 del lines[0] 872 desc = "" 873 while len(lines) > 0: 874 l = lines[0] 875 while len(l) > 0 and l[0] == '*': 876 l = l[1:] 877 l = l.strip() 878 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 879 try: 880 l = l.split(' ', 1)[1] 881 except: 882 l = "" 883 retdesc = l.strip() 884 del lines[0] 885 while len(lines) > 0: 886 l = lines[0] 887 while len(l) > 0 and l[0] == '*': 888 l = l[1:] 889 l = l.strip() 890 retdesc = retdesc + " " + l 891 del lines[0] 892 else: 893 desc = desc + " " + l 894 del lines[0] 895 896 retdesc = retdesc.strip() 897 desc = desc.strip() 898 899 if quiet == 0: 900 # 901 # report missing comments 902 # 903 i = 0 904 while i < nbargs: 905 if args[i][2] == None and args[i][0] != "void" and \ 906 ((args[i][1] != None) or (args[i][1] == '')): 907 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 908 i = i + 1 909 if retdesc == "" and ret[0] != "void": 910 self.warning("Function comment for %s lacks description of return value" % (name)) 911 if desc == "" and retdesc == "": 912 self.warning("Function comment for %s lacks description of the function" % (name)) 913 914 return(((ret[0], retdesc), args, desc)) 915 916 def parsePreproc(self, token): 917 if debug: 918 print("=> preproc ", token, self.lexer.tokens) 919 name = token[1] 920 if name == "#include": 921 token = self.lexer.token() 922 if token == None: 923 return None 924 if token[0] == 'preproc': 925 self.index_add(token[1], self.filename, not self.is_header, 926 "include") 927 return self.lexer.token() 928 return token 929 if name == "#define": 930 token = self.lexer.token() 931 if token == None: 932 return None 933 if token[0] == 'preproc': 934 # TODO macros with arguments 935 name = token[1] 936 lst = [] 937 token = self.lexer.token() 938 while token != None and token[0] == 'preproc' and \ 939 token[1][0] != '#': 940 lst.append(token[1]) 941 token = self.lexer.token() 942 try: 943 name = name.split('(') [0] 944 except: 945 pass 946 info = self.parseMacroComment(name, True) 947 self.index_add(name, self.filename, not self.is_header, 948 "macro", info) 949 return token 950 951 # 952 # Processing of conditionals modified by Bill 1/1/05 953 # 954 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 955 # #if, #else and #endif) for headers and mainline code, 956 # store the ones from the header in libxml2-api.xml, and later 957 # (in the routine merge_public) verify that the two (header and 958 # mainline code) agree. 959 # 960 # There is a small problem with processing the headers. Some of 961 # the variables are not concerned with enabling / disabling of 962 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 963 # them to be included in libxml2-api.xml, or involved in 964 # the check between the header and the mainline code. To 965 # accomplish this, we ignore any conditional which doesn't include 966 # the string 'ENABLED' 967 # 968 if name == "#ifdef": 969 apstr = self.lexer.tokens[0][1] 970 try: 971 self.defines.append(apstr) 972 if apstr.find('ENABLED') != -1: 973 self.conditionals.append("defined(%s)" % apstr) 974 except: 975 pass 976 elif name == "#ifndef": 977 apstr = self.lexer.tokens[0][1] 978 try: 979 self.defines.append(apstr) 980 if apstr.find('ENABLED') != -1: 981 self.conditionals.append("!defined(%s)" % apstr) 982 except: 983 pass 984 elif name == "#if": 985 apstr = "" 986 for tok in self.lexer.tokens: 987 if apstr != "": 988 apstr = apstr + " " 989 apstr = apstr + tok[1] 990 try: 991 self.defines.append(apstr) 992 if apstr.find('ENABLED') != -1: 993 self.conditionals.append(apstr) 994 except: 995 pass 996 elif name == "#else": 997 if self.conditionals != [] and \ 998 self.defines[-1].find('ENABLED') != -1: 999 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 1000 elif name == "#endif": 1001 if self.conditionals != [] and \ 1002 self.defines[-1].find('ENABLED') != -1: 1003 self.conditionals = self.conditionals[:-1] 1004 self.defines = self.defines[:-1] 1005 token = self.lexer.token() 1006 while token != None and token[0] == 'preproc' and \ 1007 token[1][0] != '#': 1008 token = self.lexer.token() 1009 return token 1010 1011 # 1012 # token acquisition on top of the lexer, it handle internally 1013 # preprocessor and comments since they are logically not part of 1014 # the program structure. 1015 # 1016 def token(self): 1017 global ignored_words 1018 1019 token = self.lexer.token() 1020 while token != None: 1021 if token[0] == 'comment': 1022 token = self.parseComment(token) 1023 continue 1024 elif token[0] == 'preproc': 1025 token = self.parsePreproc(token) 1026 continue 1027 elif token[0] == "name" and token[1] == "__const": 1028 token = ("name", "const") 1029 return token 1030 elif token[0] == "name" and token[1] == "__attribute": 1031 token = self.lexer.token() 1032 while token != None and token[1] != ";": 1033 token = self.lexer.token() 1034 return token 1035 elif token[0] == "name" and token[1] in ignored_words: 1036 (n, info) = ignored_words[token[1]] 1037 i = 0 1038 while i < n: 1039 token = self.lexer.token() 1040 i = i + 1 1041 token = self.lexer.token() 1042 continue 1043 else: 1044 if debug: 1045 print("=> ", token) 1046 return token 1047 return None 1048 1049 # 1050 # Parse a typedef, it records the type and its name. 1051 # 1052 def parseTypedef(self, token): 1053 if token == None: 1054 return None 1055 token = self.parseType(token) 1056 if token == None: 1057 self.error("parsing typedef") 1058 return None 1059 base_type = self.type 1060 type = base_type 1061 #self.debug("end typedef type", token) 1062 while token != None: 1063 if token[0] == "name": 1064 name = token[1] 1065 signature = self.signature 1066 if signature != None: 1067 type = type.split('(')[0] 1068 d = self.mergeFunctionComment(name, 1069 ((type, None), signature), 1) 1070 self.index_add(name, self.filename, not self.is_header, 1071 "functype", d) 1072 else: 1073 if base_type == "struct": 1074 self.index_add(name, self.filename, not self.is_header, 1075 "struct", type) 1076 base_type = "struct " + name 1077 else: 1078 # TODO report missing or misformatted comments 1079 info = self.parseSimpleComment(name, True) 1080 self.index_add(name, self.filename, not self.is_header, 1081 "typedef", type, info) 1082 token = self.token() 1083 else: 1084 self.error("parsing typedef: expecting a name") 1085 return token 1086 #self.debug("end typedef", token) 1087 if token != None and token[0] == 'sep' and token[1] == ',': 1088 type = base_type 1089 token = self.token() 1090 while token != None and token[0] == "op": 1091 type = type + token[1] 1092 token = self.token() 1093 elif token != None and token[0] == 'sep' and token[1] == ';': 1094 break; 1095 elif token != None and token[0] == 'name': 1096 type = base_type 1097 continue; 1098 else: 1099 self.error("parsing typedef: expecting ';'", token) 1100 return token 1101 token = self.token() 1102 return token 1103 1104 # 1105 # Parse a C code block, used for functions it parse till 1106 # the balancing } included 1107 # 1108 def parseBlock(self, token): 1109 while token != None: 1110 if token[0] == "sep" and token[1] == "{": 1111 token = self.token() 1112 token = self.parseBlock(token) 1113 elif token[0] == "sep" and token[1] == "}": 1114 token = self.token() 1115 return token 1116 else: 1117 if self.collect_ref == 1: 1118 oldtok = token 1119 token = self.token() 1120 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1121 if token[0] == "sep" and token[1] == "(": 1122 self.index_add_ref(oldtok[1], self.filename, 1123 0, "function") 1124 token = self.token() 1125 elif token[0] == "name": 1126 token = self.token() 1127 if token[0] == "sep" and (token[1] == ";" or 1128 token[1] == "," or token[1] == "="): 1129 self.index_add_ref(oldtok[1], self.filename, 1130 0, "type") 1131 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1132 self.index_add_ref(oldtok[1], self.filename, 1133 0, "typedef") 1134 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1135 self.index_add_ref(oldtok[1], self.filename, 1136 0, "typedef") 1137 1138 else: 1139 token = self.token() 1140 return token 1141 1142 # 1143 # Parse a C struct definition till the balancing } 1144 # 1145 def parseStruct(self, token): 1146 fields = [] 1147 #self.debug("start parseStruct", token) 1148 while token != None: 1149 if token[0] == "sep" and token[1] == "{": 1150 token = self.token() 1151 token = self.parseTypeBlock(token) 1152 elif token[0] == "sep" and token[1] == "}": 1153 self.struct_fields = fields 1154 #self.debug("end parseStruct", token) 1155 #print fields 1156 token = self.token() 1157 return token 1158 else: 1159 base_type = self.type 1160 #self.debug("before parseType", token) 1161 token = self.parseType(token) 1162 #self.debug("after parseType", token) 1163 if token != None and token[0] == "name": 1164 fname = token[1] 1165 token = self.token() 1166 if token[0] == "sep" and token[1] == ";": 1167 token = self.token() 1168 fields.append((self.type, fname)) 1169 else: 1170 self.error("parseStruct: expecting ;", token) 1171 elif token != None and token[0] == "sep" and token[1] == "{": 1172 token = self.token() 1173 token = self.parseTypeBlock(token) 1174 if token != None and token[0] == "name": 1175 token = self.token() 1176 if token != None and token[0] == "sep" and token[1] == ";": 1177 token = self.token() 1178 else: 1179 self.error("parseStruct: expecting ;", token) 1180 else: 1181 self.error("parseStruct: name", token) 1182 token = self.token() 1183 self.type = base_type; 1184 self.struct_fields = fields 1185 #self.debug("end parseStruct", token) 1186 #print fields 1187 return token 1188 1189 # 1190 # Parse a C enum block, parse till the balancing } 1191 # 1192 def parseEnumBlock(self, token): 1193 self.enums = [] 1194 name = None 1195 self.comment = None 1196 comment = "" 1197 value = "0" 1198 while token != None: 1199 if token[0] == "sep" and token[1] == "{": 1200 token = self.token() 1201 token = self.parseTypeBlock(token) 1202 elif token[0] == "sep" and token[1] == "}": 1203 if name != None: 1204 if self.comment != None: 1205 comment = self.comment 1206 self.comment = None 1207 self.enums.append((name, value, comment)) 1208 token = self.token() 1209 return token 1210 elif token[0] == "name": 1211 if name != None: 1212 if self.comment != None: 1213 comment = self.comment.strip() 1214 self.comment = None 1215 self.enums.append((name, value, comment)) 1216 name = token[1] 1217 comment = "" 1218 token = self.token() 1219 if token[0] == "op" and token[1][0] == "=": 1220 value = "" 1221 if len(token[1]) > 1: 1222 value = token[1][1:] 1223 token = self.token() 1224 while token[0] != "sep" or (token[1] != ',' and 1225 token[1] != '}'): 1226 value = value + token[1] 1227 token = self.token() 1228 else: 1229 try: 1230 value = "%d" % (int(value) + 1) 1231 except: 1232 self.warning("Failed to compute value of enum %s" % (name)) 1233 value="" 1234 if token[0] == "sep" and token[1] == ",": 1235 token = self.token() 1236 else: 1237 token = self.token() 1238 return token 1239 1240 # 1241 # Parse a C definition block, used for structs it parse till 1242 # the balancing } 1243 # 1244 def parseTypeBlock(self, token): 1245 while token != None: 1246 if token[0] == "sep" and token[1] == "{": 1247 token = self.token() 1248 token = self.parseTypeBlock(token) 1249 elif token[0] == "sep" and token[1] == "}": 1250 token = self.token() 1251 return token 1252 else: 1253 token = self.token() 1254 return token 1255 1256 # 1257 # Parse a type: the fact that the type name can either occur after 1258 # the definition or within the definition makes it a little harder 1259 # if inside, the name token is pushed back before returning 1260 # 1261 def parseType(self, token): 1262 self.type = "" 1263 self.struct_fields = [] 1264 self.signature = None 1265 if token == None: 1266 return token 1267 1268 have_sign = 0 1269 done = 0 1270 1271 while token[0] == "name" and ( 1272 token[1] == "const" or \ 1273 token[1] == "unsigned" or \ 1274 token[1] == "signed"): 1275 if token[1] == "unsigned" or token[1] == "signed": 1276 have_sign = 1 1277 if self.type == "": 1278 self.type = token[1] 1279 else: 1280 self.type = self.type + " " + token[1] 1281 token = self.token() 1282 1283 if token[0] == "name" and token[1] in ("char", "short", "int", "long"): 1284 if self.type == "": 1285 self.type = token[1] 1286 else: 1287 self.type = self.type + " " + token[1] 1288 1289 elif have_sign: 1290 done = 1 1291 1292 elif token[0] == "name" and token[1] == "struct": 1293 if self.type == "": 1294 self.type = token[1] 1295 else: 1296 self.type = self.type + " " + token[1] 1297 token = self.token() 1298 nametok = None 1299 if token[0] == "name": 1300 nametok = token 1301 token = self.token() 1302 if token != None and token[0] == "sep" and token[1] == "{": 1303 token = self.token() 1304 token = self.parseStruct(token) 1305 elif token != None and token[0] == "op" and token[1] == "*": 1306 self.type = self.type + " " + nametok[1] + " *" 1307 token = self.token() 1308 while token != None and token[0] == "op" and token[1] == "*": 1309 self.type = self.type + " *" 1310 token = self.token() 1311 if token[0] == "name": 1312 nametok = token 1313 token = self.token() 1314 else: 1315 self.error("struct : expecting name", token) 1316 return token 1317 elif token != None and token[0] == "name" and nametok != None: 1318 self.type = self.type + " " + nametok[1] 1319 return token 1320 1321 if nametok != None: 1322 self.lexer.push(token) 1323 token = nametok 1324 return token 1325 1326 elif token[0] == "name" and token[1] == "enum": 1327 if self.type == "": 1328 self.type = token[1] 1329 else: 1330 self.type = self.type + " " + token[1] 1331 self.enums = [] 1332 token = self.token() 1333 if token != None and token[0] == "sep" and token[1] == "{": 1334 token = self.token() 1335 token = self.parseEnumBlock(token) 1336 else: 1337 self.error("parsing enum: expecting '{'", token) 1338 enum_type = None 1339 if token != None and token[0] != "name": 1340 self.lexer.push(token) 1341 token = ("name", "enum") 1342 else: 1343 enum_type = token[1] 1344 for enum in self.enums: 1345 self.index_add(enum[0], self.filename, 1346 not self.is_header, "enum", 1347 (enum[1], enum[2], enum_type)) 1348 return token 1349 1350 elif token[0] == "name": 1351 if self.type == "": 1352 self.type = token[1] 1353 else: 1354 self.type = self.type + " " + token[1] 1355 else: 1356 self.error("parsing type %s: expecting a name" % (self.type), 1357 token) 1358 return token 1359 if not done: 1360 token = self.token() 1361 while token != None and (token[0] == "op" or 1362 token[0] == "name" and token[1] == "const"): 1363 self.type = self.type + " " + token[1] 1364 token = self.token() 1365 1366 # 1367 # if there is a parenthesis here, this means a function type 1368 # 1369 if token != None and token[0] == "sep" and token[1] == '(': 1370 self.type = self.type + token[1] 1371 token = self.token() 1372 while token != None and token[0] == "op" and token[1] == '*': 1373 self.type = self.type + token[1] 1374 token = self.token() 1375 if token == None or token[0] != "name" : 1376 self.error("parsing function type, name expected", token); 1377 return token 1378 self.type = self.type + token[1] 1379 nametok = token 1380 token = self.token() 1381 if token != None and token[0] == "sep" and token[1] == ')': 1382 self.type = self.type + token[1] 1383 token = self.token() 1384 if token != None and token[0] == "sep" and token[1] == '(': 1385 token = self.token() 1386 type = self.type; 1387 token = self.parseSignature(token); 1388 self.type = type; 1389 else: 1390 self.error("parsing function type, '(' expected", token); 1391 return token 1392 else: 1393 self.error("parsing function type, ')' expected", token); 1394 return token 1395 self.lexer.push(token) 1396 token = nametok 1397 return token 1398 1399 # 1400 # do some lookahead for arrays 1401 # 1402 if token != None and token[0] == "name": 1403 nametok = token 1404 token = self.token() 1405 if token != None and token[0] == "sep" and token[1] == '[': 1406 self.type = self.type + nametok[1] 1407 while token != None and token[0] == "sep" and token[1] == '[': 1408 self.type = self.type + token[1] 1409 token = self.token() 1410 while token != None and token[0] != 'sep' and \ 1411 token[1] != ']' and token[1] != ';': 1412 self.type = self.type + token[1] 1413 token = self.token() 1414 if token != None and token[0] == 'sep' and token[1] == ']': 1415 self.type = self.type + token[1] 1416 token = self.token() 1417 else: 1418 self.error("parsing array type, ']' expected", token); 1419 return token 1420 elif token != None and token[0] == "sep" and token[1] == ':': 1421 # remove :12 in case it's a limited int size 1422 token = self.token() 1423 token = self.token() 1424 self.lexer.push(token) 1425 token = nametok 1426 1427 return token 1428 1429 # 1430 # Parse a signature: '(' has been parsed and we scan the type definition 1431 # up to the ')' included 1432 def parseSignature(self, token): 1433 signature = [] 1434 if token != None and token[0] == "sep" and token[1] == ')': 1435 self.signature = [] 1436 token = self.token() 1437 return token 1438 while token != None: 1439 token = self.parseType(token) 1440 if token != None and token[0] == "name": 1441 signature.append((self.type, token[1], None)) 1442 token = self.token() 1443 elif token != None and token[0] == "sep" and token[1] == ',': 1444 token = self.token() 1445 continue 1446 elif token != None and token[0] == "sep" and token[1] == ')': 1447 # only the type was provided 1448 if self.type == "...": 1449 signature.append((self.type, "...", None)) 1450 else: 1451 signature.append((self.type, None, None)) 1452 if token != None and token[0] == "sep": 1453 if token[1] == ',': 1454 token = self.token() 1455 continue 1456 elif token[1] == ')': 1457 token = self.token() 1458 break 1459 self.signature = signature 1460 return token 1461 1462 # 1463 # Parse a global definition, be it a type, variable or function 1464 # the extern "C" blocks are a bit nasty and require it to recurse. 1465 # 1466 def parseGlobal(self, token): 1467 static = 0 1468 if token[1] == 'extern': 1469 token = self.token() 1470 if token == None: 1471 return token 1472 if token[0] == 'string': 1473 if token[1] == 'C': 1474 token = self.token() 1475 if token == None: 1476 return token 1477 if token[0] == 'sep' and token[1] == "{": 1478 token = self.token() 1479# print 'Entering extern "C line ', self.lineno() 1480 while token != None and (token[0] != 'sep' or 1481 token[1] != "}"): 1482 if token[0] == 'name': 1483 token = self.parseGlobal(token) 1484 else: 1485 self.error( 1486 "token %s %s unexpected at the top level" % ( 1487 token[0], token[1])) 1488 token = self.parseGlobal(token) 1489# print 'Exiting extern "C" line', self.lineno() 1490 token = self.token() 1491 return token 1492 else: 1493 return token 1494 elif token[1] == 'static': 1495 static = 1 1496 token = self.token() 1497 if token == None or token[0] != 'name': 1498 return token 1499 1500 if token[1] == 'typedef': 1501 token = self.token() 1502 return self.parseTypedef(token) 1503 else: 1504 token = self.parseType(token) 1505 type_orig = self.type 1506 if token == None or token[0] != "name": 1507 return token 1508 type = type_orig 1509 self.name = token[1] 1510 token = self.token() 1511 while token != None and (token[0] == "sep" or token[0] == "op"): 1512 if token[0] == "sep": 1513 if token[1] == "[": 1514 type = type + token[1] 1515 token = self.token() 1516 while token != None and (token[0] != "sep" or \ 1517 token[1] != ";"): 1518 type = type + token[1] 1519 token = self.token() 1520 1521 if token != None and token[0] == "op" and token[1] == "=": 1522 # 1523 # Skip the initialization of the variable 1524 # 1525 token = self.token() 1526 if token[0] == 'sep' and token[1] == '{': 1527 token = self.token() 1528 token = self.parseBlock(token) 1529 else: 1530 while token != None and (token[0] != "sep" or \ 1531 (token[1] != ';' and token[1] != ',')): 1532 token = self.token() 1533 if token == None or token[0] != "sep" or (token[1] != ';' and 1534 token[1] != ','): 1535 self.error("missing ';' or ',' after value") 1536 1537 if token != None and token[0] == "sep": 1538 if token[1] == ";": 1539 if type == "struct": 1540 self.index_add(self.name, self.filename, 1541 not self.is_header, "struct", self.struct_fields) 1542 else: 1543 info = self.parseSimpleComment(self.name, True) 1544 self.index_add(self.name, self.filename, 1545 not self.is_header, "variable", type, info) 1546 self.comment = None 1547 token = self.token() 1548 break 1549 elif token[1] == "(": 1550 token = self.token() 1551 token = self.parseSignature(token) 1552 if token == None: 1553 return None 1554 if token[0] == "sep" and token[1] == ";": 1555 d = self.mergeFunctionComment(self.name, 1556 ((type, None), self.signature), 1) 1557 self.index_add(self.name, self.filename, static, 1558 "function", d) 1559 self.comment = None 1560 token = self.token() 1561 elif token[0] == "sep" and token[1] == "{": 1562 d = self.mergeFunctionComment(self.name, 1563 ((type, None), self.signature), static) 1564 self.index_add(self.name, self.filename, static, 1565 "function", d) 1566 self.comment = None 1567 token = self.token() 1568 token = self.parseBlock(token); 1569 elif token[1] == ',': 1570 self.index_add(self.name, self.filename, static, 1571 "variable", type) 1572 self.comment = None 1573 type = type_orig 1574 token = self.token() 1575 while token != None and token[0] == "sep": 1576 type = type + token[1] 1577 token = self.token() 1578 if token != None and token[0] == "name": 1579 self.name = token[1] 1580 token = self.token() 1581 else: 1582 break 1583 1584 return token 1585 1586 def parse(self): 1587 self.warning("Parsing %s" % (self.filename)) 1588 token = self.token() 1589 while token != None: 1590 if token[0] == 'name': 1591 token = self.parseGlobal(token) 1592 else: 1593 self.error("token %s %s unexpected at the top level" % ( 1594 token[0], token[1])) 1595 token = self.parseGlobal(token) 1596 return 1597 self.parseTopComment(self.top_comment) 1598 return self.index 1599 1600 1601class docBuilder: 1602 """A documentation builder""" 1603 def __init__(self, name, directories=['.'], excludes=[]): 1604 self.name = name 1605 self.directories = directories 1606 self.excludes = excludes + list(ignored_files.keys()) 1607 self.modules = {} 1608 self.headers = {} 1609 self.idx = index() 1610 self.index = {} 1611 if name == 'libxml2': 1612 self.basename = 'libxml' 1613 else: 1614 self.basename = name 1615 1616 def analyze(self): 1617 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) 1618 self.idx.analyze() 1619 1620 def scanHeaders(self): 1621 for header in list(self.headers.keys()): 1622 parser = CParser(header) 1623 idx = parser.parse() 1624 self.headers[header] = idx; 1625 self.idx.merge(idx) 1626 1627 def scanModules(self): 1628 for module in list(self.modules.keys()): 1629 parser = CParser(module) 1630 idx = parser.parse() 1631 # idx.analyze() 1632 self.modules[module] = idx 1633 self.idx.merge_public(idx) 1634 1635 def scan(self): 1636 for directory in self.directories: 1637 files = glob.glob(directory + "/*.c") 1638 for file in files: 1639 skip = 0 1640 for excl in self.excludes: 1641 if file.find(excl) != -1: 1642 print("Skipping %s" % file) 1643 skip = 1 1644 break 1645 if skip == 0: 1646 self.modules[file] = None; 1647 files = glob.glob(directory + "/*.h") 1648 for file in files: 1649 skip = 0 1650 for excl in self.excludes: 1651 if file.find(excl) != -1: 1652 print("Skipping %s" % file) 1653 skip = 1 1654 break 1655 if skip == 0: 1656 self.headers[file] = None; 1657 self.scanHeaders() 1658 self.scanModules() 1659 1660 def modulename_file(self, file): 1661 module = os.path.basename(file) 1662 if module[-2:] == '.h': 1663 module = module[:-2] 1664 elif module[-2:] == '.c': 1665 module = module[:-2] 1666 return module 1667 1668 def serialize_enum(self, output, name): 1669 id = self.idx.enums[name] 1670 output.write(" <enum name='%s' file='%s'" % (name, 1671 self.modulename_file(id.header))) 1672 if id.info != None: 1673 info = id.info 1674 if info[0] != None and info[0] != '': 1675 try: 1676 val = eval(info[0]) 1677 except: 1678 val = info[0] 1679 output.write(" value='%s'" % (val)); 1680 if info[2] != None and info[2] != '': 1681 output.write(" type='%s'" % info[2]); 1682 if info[1] != None and info[1] != '': 1683 output.write(" info='%s'" % escape(info[1])); 1684 output.write("/>\n") 1685 1686 def serialize_macro(self, output, name): 1687 id = self.idx.macros[name] 1688 output.write(" <macro name='%s' file='%s'>\n" % (name, 1689 self.modulename_file(id.header))) 1690 if id.info != None: 1691 try: 1692 (args, desc) = id.info 1693 if desc != None and desc != "": 1694 output.write(" <info>%s</info>\n" % (escape(desc))) 1695 for arg in args: 1696 (name, desc) = arg 1697 if desc != None and desc != "": 1698 output.write(" <arg name='%s' info='%s'/>\n" % ( 1699 name, escape(desc))) 1700 else: 1701 output.write(" <arg name='%s'/>\n" % (name)) 1702 except: 1703 pass 1704 output.write(" </macro>\n") 1705 1706 def serialize_typedef(self, output, name): 1707 id = self.idx.typedefs[name] 1708 if id.info[0:7] == 'struct ': 1709 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1710 name, self.modulename_file(id.header), id.info)) 1711 name = id.info[7:] 1712 if name in self.idx.structs and ( \ 1713 type(self.idx.structs[name].info) == type(()) or 1714 type(self.idx.structs[name].info) == type([])): 1715 output.write(">\n"); 1716 try: 1717 for field in self.idx.structs[name].info: 1718 output.write(" <field name='%s' type='%s'/>\n" % (field[1] , field[0])) 1719 except: 1720 print("Failed to serialize struct %s" % (name)) 1721 output.write(" </struct>\n") 1722 else: 1723 output.write("/>\n"); 1724 else : 1725 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1726 name, self.modulename_file(id.header), id.info)) 1727 try: 1728 desc = id.extra 1729 if desc != None and desc != "": 1730 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1731 output.write(" </typedef>\n") 1732 else: 1733 output.write("/>\n") 1734 except: 1735 output.write("/>\n") 1736 1737 def serialize_variable(self, output, name): 1738 id = self.idx.variables[name] 1739 if id.info != None: 1740 output.write(" <variable name='%s' file='%s' type='%s'" % ( 1741 name, self.modulename_file(id.header), id.info)) 1742 else: 1743 output.write(" <variable name='%s' file='%s'" % ( 1744 name, self.modulename_file(id.header))) 1745 desc = id.extra 1746 if desc != None and desc != "": 1747 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1748 output.write(" </variable>\n") 1749 else: 1750 output.write("/>\n") 1751 1752 def serialize_function(self, output, name): 1753 id = self.idx.functions[name] 1754 if name == debugsym: 1755 print("=>", id) 1756 1757 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1758 name, self.modulename_file(id.header), 1759 self.modulename_file(id.module))) 1760 # 1761 # Processing of conditionals modified by Bill 1/1/05 1762 # 1763 if id.conditionals != None: 1764 apstr = "" 1765 for cond in id.conditionals: 1766 if apstr != "": 1767 apstr = apstr + " && " 1768 apstr = apstr + cond 1769 output.write(" <cond>%s</cond>\n"% (apstr)); 1770 try: 1771 (ret, params, desc) = id.info 1772 if (desc == None or desc == '') and \ 1773 name[0:9] != "xmlThrDef" and name != "xmlDllMain" and \ 1774 ret[1] == '': 1775 print("%s %s from %s has no description" % (id.type, name, 1776 self.modulename_file(id.module))) 1777 1778 output.write(" <info>%s</info>\n" % (escape(desc))) 1779 if ret[0] != None: 1780 if ret[0] == "void": 1781 output.write(" <return type='void'/>\n") 1782 else: 1783 output.write(" <return type='%s' info='%s'/>\n" % ( 1784 ret[0], escape(ret[1]))) 1785 for param in params: 1786 if param[0] == 'void': 1787 continue 1788 if param[2] == None: 1789 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1790 else: 1791 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1792 except: 1793 print("Failed to save function %s info: " % name, repr(id.info)) 1794 output.write(" </%s>\n" % (id.type)) 1795 1796 def serialize_exports(self, output, file): 1797 module = self.modulename_file(file) 1798 output.write(" <file name='%s'>\n" % (module)) 1799 dict = self.headers[file] 1800 if dict.info != None: 1801 for data in ('Summary', 'Description', 'Author'): 1802 try: 1803 output.write(" <%s>%s</%s>\n" % ( 1804 data.lower(), 1805 escape(dict.info[data]), 1806 data.lower())) 1807 except: 1808 if data != 'Author': 1809 print("Header %s lacks a %s description" % (module, data)) 1810 if 'Description' in dict.info: 1811 desc = dict.info['Description'] 1812 if desc.find("DEPRECATED") != -1: 1813 output.write(" <deprecated/>\n") 1814 1815 ids = list(dict.macros.keys()) 1816 ids.sort() 1817 for id in ids: 1818 # Macros are sometime used to masquerade other types. 1819 if id in dict.functions: 1820 continue 1821 if id in dict.variables: 1822 continue 1823 if id in dict.typedefs: 1824 continue 1825 if id in dict.structs: 1826 continue 1827 if id in dict.enums: 1828 continue 1829 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1830 ids = list(dict.enums.keys()) 1831 ids.sort() 1832 for id in ids: 1833 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1834 ids = list(dict.typedefs.keys()) 1835 ids.sort() 1836 for id in ids: 1837 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1838 ids = list(dict.structs.keys()) 1839 ids.sort() 1840 for id in ids: 1841 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1842 ids = list(dict.variables.keys()) 1843 ids.sort() 1844 for id in ids: 1845 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1846 ids = list(dict.functions.keys()) 1847 ids.sort() 1848 for id in ids: 1849 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1850 output.write(" </file>\n") 1851 1852 def serialize(self): 1853 filename = "%s-api.xml" % self.name 1854 print("Saving XML description %s" % (filename)) 1855 output = open(filename, "w") 1856 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1857 output.write("<api name='%s'>\n" % self.name) 1858 output.write(" <files>\n") 1859 headers = list(self.headers.keys()) 1860 headers.sort() 1861 for file in headers: 1862 self.serialize_exports(output, file) 1863 output.write(" </files>\n") 1864 output.write(" <symbols>\n") 1865 macros = list(self.idx.macros.keys()) 1866 macros.sort() 1867 for macro in macros: 1868 self.serialize_macro(output, macro) 1869 enums = list(self.idx.enums.keys()) 1870 enums.sort() 1871 for enum in enums: 1872 self.serialize_enum(output, enum) 1873 typedefs = list(self.idx.typedefs.keys()) 1874 typedefs.sort() 1875 for typedef in typedefs: 1876 self.serialize_typedef(output, typedef) 1877 variables = list(self.idx.variables.keys()) 1878 variables.sort() 1879 for variable in variables: 1880 self.serialize_variable(output, variable) 1881 functions = list(self.idx.functions.keys()) 1882 functions.sort() 1883 for function in functions: 1884 self.serialize_function(output, function) 1885 output.write(" </symbols>\n") 1886 output.write("</api>\n") 1887 output.close() 1888 1889 1890def rebuild(): 1891 builder = None 1892 if glob.glob("parser.c") != [] : 1893 print("Rebuilding API description for libxml2") 1894 builder = docBuilder("libxml2", [".", "."], 1895 ["tst.c"]) 1896 elif glob.glob("../parser.c") != [] : 1897 print("Rebuilding API description for libxml2") 1898 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1899 ["tst.c"]) 1900 elif glob.glob("../libxslt/transform.c") != [] : 1901 print("Rebuilding API description for libxslt") 1902 builder = docBuilder("libxslt", ["../libxslt"], 1903 ["win32config.h", "libxslt.h", "tst.c"]) 1904 else: 1905 print("rebuild() failed, unable to guess the module") 1906 return None 1907 builder.scan() 1908 builder.analyze() 1909 builder.serialize() 1910 if glob.glob("../libexslt/exslt.c") != [] : 1911 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1912 extra.scan() 1913 extra.analyze() 1914 extra.serialize() 1915 return builder 1916 1917# 1918# for debugging the parser 1919# 1920def parse(filename): 1921 parser = CParser(filename) 1922 idx = parser.parse() 1923 return idx 1924 1925if __name__ == "__main__": 1926 if len(sys.argv) > 1: 1927 debug = 1 1928 parse(sys.argv[1]) 1929 else: 1930 rebuild() 1931