1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "acconfig.h": "generated portability layer", 27 "config.h": "generated portability layer", 28 "libxml.h": "internal only", 29 "testOOM.c": "out of memory tester", 30 "testOOMlib.h": "out of memory tester", 31 "testOOMlib.c": "out of memory tester", 32 "rngparser.c": "not yet integrated", 33 "rngparser.h": "not yet integrated", 34 "elfgcchack.h": "not a normal header", 35 "testHTML.c": "test tool", 36 "testReader.c": "test tool", 37 "testSchemas.c": "test tool", 38 "testXPath.c": "test tool", 39 "testAutomata.c": "test tool", 40 "testModule.c": "test tool", 41 "testRegexp.c": "test tool", 42 "testThreads.c": "test tool", 43 "testC14N.c": "test tool", 44 "testRelax.c": "test tool", 45 "testThreadsWin32.c": "test tool", 46 "testSAX.c": "test tool", 47 "testURI.c": "test tool", 48 "testapi.c": "generated regression tests", 49 "runtest.c": "regression tests program", 50 "runsuite.c": "regression tests program", 51 "tst.c": "not part of the library", 52 "test.c": "not part of the library", 53 "testdso.c": "test for dynamid shared libraries", 54 "testrecurse.c": "test for entities recursions", 55 "xzlib.h": "Internal API only 2.8.0", 56 "buf.h": "Internal API only 2.9.0", 57 "enc.h": "Internal API only 2.9.0", 58 "/save.h": "Internal API only 2.9.0", 59 "timsort.h": "Internal header only for xpath.c 2.9.0", 60} 61 62ignored_words = { 63 "WINAPI": (0, "Windows keyword"), 64 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 65 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 66 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 67 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 68 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 69 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 70 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 71 "XMLCALL": (0, "Special macro for win32 calls"), 72 "XSLTCALL": (0, "Special macro for win32 calls"), 73 "XMLCDECL": (0, "Special macro for win32 calls"), 74 "EXSLTCALL": (0, "Special macro for win32 calls"), 75 "__declspec": (3, "Windows keyword"), 76 "__stdcall": (0, "Windows keyword"), 77 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 78 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 79 "X_IN_Y": (5, "macro function builder"), 80 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 81 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 82 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 83 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 84 "__XML_EXTERNC": (0, "Special macro added for os400"), 85} 86 87def escape(raw): 88 raw = raw.replace('&', '&') 89 raw = raw.replace('<', '<') 90 raw = raw.replace('>', '>') 91 raw = raw.replace("'", ''') 92 raw = raw.replace('"', '"') 93 return raw 94 95def uniq(items): 96 d = {} 97 for item in items: 98 d[item]=1 99 return list(d.keys()) 100 101class identifier: 102 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 103 info=None, extra=None, conditionals = None): 104 self.name = name 105 self.header = header 106 self.module = module 107 self.type = type 108 self.info = info 109 self.extra = extra 110 self.lineno = lineno 111 self.static = 0 112 if conditionals == None or len(conditionals) == 0: 113 self.conditionals = None 114 else: 115 self.conditionals = conditionals[:] 116 if self.name == debugsym: 117 print("=> define %s : %s" % (debugsym, (module, type, info, 118 extra, conditionals))) 119 120 def __repr__(self): 121 r = "%s %s:" % (self.type, self.name) 122 if self.static: 123 r = r + " static" 124 if self.module != None: 125 r = r + " from %s" % (self.module) 126 if self.info != None: 127 r = r + " " + repr(self.info) 128 if self.extra != None: 129 r = r + " " + repr(self.extra) 130 if self.conditionals != None: 131 r = r + " " + repr(self.conditionals) 132 return r 133 134 135 def set_header(self, header): 136 self.header = header 137 def set_module(self, module): 138 self.module = module 139 def set_type(self, type): 140 self.type = type 141 def set_info(self, info): 142 self.info = info 143 def set_extra(self, extra): 144 self.extra = extra 145 def set_lineno(self, lineno): 146 self.lineno = lineno 147 def set_static(self, static): 148 self.static = static 149 def set_conditionals(self, conditionals): 150 if conditionals == None or len(conditionals) == 0: 151 self.conditionals = None 152 else: 153 self.conditionals = conditionals[:] 154 155 def get_name(self): 156 return self.name 157 def get_header(self): 158 return self.module 159 def get_module(self): 160 return self.module 161 def get_type(self): 162 return self.type 163 def get_info(self): 164 return self.info 165 def get_lineno(self): 166 return self.lineno 167 def get_extra(self): 168 return self.extra 169 def get_static(self): 170 return self.static 171 def get_conditionals(self): 172 return self.conditionals 173 174 def update(self, header, module, type = None, info = None, extra=None, 175 conditionals=None): 176 if self.name == debugsym: 177 print("=> update %s : %s" % (debugsym, (module, type, info, 178 extra, conditionals))) 179 if header != None and self.header == None: 180 self.set_header(module) 181 if module != None and (self.module == None or self.header == self.module): 182 self.set_module(module) 183 if type != None and self.type == None: 184 self.set_type(type) 185 if info != None: 186 self.set_info(info) 187 if extra != None: 188 self.set_extra(extra) 189 if conditionals != None: 190 self.set_conditionals(conditionals) 191 192class index: 193 def __init__(self, name = "noname"): 194 self.name = name 195 self.identifiers = {} 196 self.functions = {} 197 self.variables = {} 198 self.includes = {} 199 self.structs = {} 200 self.enums = {} 201 self.typedefs = {} 202 self.macros = {} 203 self.references = {} 204 self.info = {} 205 206 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 207 if name[0:2] == '__': 208 return None 209 d = None 210 try: 211 d = self.identifiers[name] 212 d.update(header, module, type, lineno, info, extra, conditionals) 213 except: 214 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 215 self.identifiers[name] = d 216 217 if d != None and static == 1: 218 d.set_static(1) 219 220 if d != None and name != None and type != None: 221 self.references[name] = d 222 223 if name == debugsym: 224 print("New ref: %s" % (d)) 225 226 return d 227 228 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 229 if name[0:2] == '__': 230 return None 231 d = None 232 try: 233 d = self.identifiers[name] 234 d.update(header, module, type, lineno, info, extra, conditionals) 235 except: 236 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 237 self.identifiers[name] = d 238 239 if d != None and static == 1: 240 d.set_static(1) 241 242 if d != None and name != None and type != None: 243 if type == "function": 244 self.functions[name] = d 245 elif type == "functype": 246 self.functions[name] = d 247 elif type == "variable": 248 self.variables[name] = d 249 elif type == "include": 250 self.includes[name] = d 251 elif type == "struct": 252 self.structs[name] = d 253 elif type == "enum": 254 self.enums[name] = d 255 elif type == "typedef": 256 self.typedefs[name] = d 257 elif type == "macro": 258 self.macros[name] = d 259 else: 260 print("Unable to register type ", type) 261 262 if name == debugsym: 263 print("New symbol: %s" % (d)) 264 265 return d 266 267 def merge(self, idx): 268 for id in list(idx.functions.keys()): 269 # 270 # macro might be used to override functions or variables 271 # definitions 272 # 273 if id in self.macros: 274 del self.macros[id] 275 if id in self.functions: 276 print("function %s from %s redeclared in %s" % ( 277 id, self.functions[id].header, idx.functions[id].header)) 278 else: 279 self.functions[id] = idx.functions[id] 280 self.identifiers[id] = idx.functions[id] 281 for id in list(idx.variables.keys()): 282 # 283 # macro might be used to override functions or variables 284 # definitions 285 # 286 if id in self.macros: 287 del self.macros[id] 288 if id in self.variables: 289 print("variable %s from %s redeclared in %s" % ( 290 id, self.variables[id].header, idx.variables[id].header)) 291 else: 292 self.variables[id] = idx.variables[id] 293 self.identifiers[id] = idx.variables[id] 294 for id in list(idx.structs.keys()): 295 if id in self.structs: 296 print("struct %s from %s redeclared in %s" % ( 297 id, self.structs[id].header, idx.structs[id].header)) 298 else: 299 self.structs[id] = idx.structs[id] 300 self.identifiers[id] = idx.structs[id] 301 for id in list(idx.typedefs.keys()): 302 if id in self.typedefs: 303 print("typedef %s from %s redeclared in %s" % ( 304 id, self.typedefs[id].header, idx.typedefs[id].header)) 305 else: 306 self.typedefs[id] = idx.typedefs[id] 307 self.identifiers[id] = idx.typedefs[id] 308 for id in list(idx.macros.keys()): 309 # 310 # macro might be used to override functions or variables 311 # definitions 312 # 313 if id in self.variables: 314 continue 315 if id in self.functions: 316 continue 317 if id in self.enums: 318 continue 319 if id in self.macros: 320 print("macro %s from %s redeclared in %s" % ( 321 id, self.macros[id].header, idx.macros[id].header)) 322 else: 323 self.macros[id] = idx.macros[id] 324 self.identifiers[id] = idx.macros[id] 325 for id in list(idx.enums.keys()): 326 if id in self.enums: 327 print("enum %s from %s redeclared in %s" % ( 328 id, self.enums[id].header, idx.enums[id].header)) 329 else: 330 self.enums[id] = idx.enums[id] 331 self.identifiers[id] = idx.enums[id] 332 333 def merge_public(self, idx): 334 for id in list(idx.functions.keys()): 335 if id in self.functions: 336 # check that function condition agrees with header 337 if idx.functions[id].conditionals != \ 338 self.functions[id].conditionals: 339 print("Header condition differs from Function for %s:" \ 340 % id) 341 print(" H: %s" % self.functions[id].conditionals) 342 print(" C: %s" % idx.functions[id].conditionals) 343 up = idx.functions[id] 344 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 345 # else: 346 # print "Function %s from %s is not declared in headers" % ( 347 # id, idx.functions[id].module) 348 # TODO: do the same for variables. 349 350 def analyze_dict(self, type, dict): 351 count = 0 352 public = 0 353 for name in list(dict.keys()): 354 id = dict[name] 355 count = count + 1 356 if id.static == 0: 357 public = public + 1 358 if count != public: 359 print(" %d %s , %d public" % (count, type, public)) 360 elif count != 0: 361 print(" %d public %s" % (count, type)) 362 363 364 def analyze(self): 365 self.analyze_dict("functions", self.functions) 366 self.analyze_dict("variables", self.variables) 367 self.analyze_dict("structs", self.structs) 368 self.analyze_dict("typedefs", self.typedefs) 369 self.analyze_dict("macros", self.macros) 370 371class CLexer: 372 """A lexer for the C language, tokenize the input by reading and 373 analyzing it line by line""" 374 def __init__(self, input): 375 self.input = input 376 self.tokens = [] 377 self.line = "" 378 self.lineno = 0 379 380 def getline(self): 381 line = '' 382 while line == '': 383 line = self.input.readline() 384 if not line: 385 return None 386 self.lineno = self.lineno + 1 387 line = line.lstrip() 388 line = line.rstrip() 389 if line == '': 390 continue 391 while line[-1] == '\\': 392 line = line[:-1] 393 n = self.input.readline() 394 self.lineno = self.lineno + 1 395 n = n.lstrip() 396 n = n.rstrip() 397 if not n: 398 break 399 else: 400 line = line + n 401 return line 402 403 def getlineno(self): 404 return self.lineno 405 406 def push(self, token): 407 self.tokens.insert(0, token); 408 409 def debug(self): 410 print("Last token: ", self.last) 411 print("Token queue: ", self.tokens) 412 print("Line %d end: " % (self.lineno), self.line) 413 414 def token(self): 415 while self.tokens == []: 416 if self.line == "": 417 line = self.getline() 418 else: 419 line = self.line 420 self.line = "" 421 if line == None: 422 return None 423 424 if line[0] == '#': 425 self.tokens = list(map((lambda x: ('preproc', x)), 426 line.split())) 427 break; 428 l = len(line) 429 if line[0] == '"' or line[0] == "'": 430 end = line[0] 431 line = line[1:] 432 found = 0 433 tok = "" 434 while found == 0: 435 i = 0 436 l = len(line) 437 while i < l: 438 if line[i] == end: 439 self.line = line[i+1:] 440 line = line[:i] 441 l = i 442 found = 1 443 break 444 if line[i] == '\\': 445 i = i + 1 446 i = i + 1 447 tok = tok + line 448 if found == 0: 449 line = self.getline() 450 if line == None: 451 return None 452 self.last = ('string', tok) 453 return self.last 454 455 if l >= 2 and line[0] == '/' and line[1] == '*': 456 line = line[2:] 457 found = 0 458 tok = "" 459 while found == 0: 460 i = 0 461 l = len(line) 462 while i < l: 463 if line[i] == '*' and i+1 < l and line[i+1] == '/': 464 self.line = line[i+2:] 465 line = line[:i-1] 466 l = i 467 found = 1 468 break 469 i = i + 1 470 if tok != "": 471 tok = tok + "\n" 472 tok = tok + line 473 if found == 0: 474 line = self.getline() 475 if line == None: 476 return None 477 self.last = ('comment', tok) 478 return self.last 479 if l >= 2 and line[0] == '/' and line[1] == '/': 480 line = line[2:] 481 self.last = ('comment', line) 482 return self.last 483 i = 0 484 while i < l: 485 if line[i] == '/' and i+1 < l and line[i+1] == '/': 486 self.line = line[i:] 487 line = line[:i] 488 break 489 if line[i] == '/' and i+1 < l and line[i+1] == '*': 490 self.line = line[i:] 491 line = line[:i] 492 break 493 if line[i] == '"' or line[i] == "'": 494 self.line = line[i:] 495 line = line[:i] 496 break 497 i = i + 1 498 l = len(line) 499 i = 0 500 while i < l: 501 if line[i] == ' ' or line[i] == '\t': 502 i = i + 1 503 continue 504 o = ord(line[i]) 505 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 506 (o >= 48 and o <= 57): 507 s = i 508 while i < l: 509 o = ord(line[i]) 510 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 511 (o >= 48 and o <= 57) or \ 512 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: 513 i = i + 1 514 else: 515 break 516 self.tokens.append(('name', line[s:i])) 517 continue 518 if "(){}:;,[]".find(line[i]) != -1: 519# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 520# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 521# line[i] == ',' or line[i] == '[' or line[i] == ']': 522 self.tokens.append(('sep', line[i])) 523 i = i + 1 524 continue 525 if "+-*><=/%&!|.".find(line[i]) != -1: 526# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 527# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 528# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 529# line[i] == '!' or line[i] == '|' or line[i] == '.': 530 if line[i] == '.' and i + 2 < l and \ 531 line[i+1] == '.' and line[i+2] == '.': 532 self.tokens.append(('name', '...')) 533 i = i + 3 534 continue 535 536 j = i + 1 537 if j < l and ( 538 "+-*><=/%&!|".find(line[j]) != -1): 539# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 540# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 541# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 542# line[j] == '!' or line[j] == '|'): 543 self.tokens.append(('op', line[i:j+1])) 544 i = j + 1 545 else: 546 self.tokens.append(('op', line[i])) 547 i = i + 1 548 continue 549 s = i 550 while i < l: 551 o = ord(line[i]) 552 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 553 (o >= 48 and o <= 57) or ( 554 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): 555# line[i] != ' ' and line[i] != '\t' and 556# line[i] != '(' and line[i] != ')' and 557# line[i] != '{' and line[i] != '}' and 558# line[i] != ':' and line[i] != ';' and 559# line[i] != ',' and line[i] != '+' and 560# line[i] != '-' and line[i] != '*' and 561# line[i] != '/' and line[i] != '%' and 562# line[i] != '&' and line[i] != '!' and 563# line[i] != '|' and line[i] != '[' and 564# line[i] != ']' and line[i] != '=' and 565# line[i] != '*' and line[i] != '>' and 566# line[i] != '<'): 567 i = i + 1 568 else: 569 break 570 self.tokens.append(('name', line[s:i])) 571 572 tok = self.tokens[0] 573 self.tokens = self.tokens[1:] 574 self.last = tok 575 return tok 576 577class CParser: 578 """The C module parser""" 579 def __init__(self, filename, idx = None): 580 self.filename = filename 581 if len(filename) > 2 and filename[-2:] == '.h': 582 self.is_header = 1 583 else: 584 self.is_header = 0 585 self.input = open(filename) 586 self.lexer = CLexer(self.input) 587 if idx == None: 588 self.index = index() 589 else: 590 self.index = idx 591 self.top_comment = "" 592 self.last_comment = "" 593 self.comment = None 594 self.collect_ref = 0 595 self.no_error = 0 596 self.conditionals = [] 597 self.defines = [] 598 599 def collect_references(self): 600 self.collect_ref = 1 601 602 def stop_error(self): 603 self.no_error = 1 604 605 def start_error(self): 606 self.no_error = 0 607 608 def lineno(self): 609 return self.lexer.getlineno() 610 611 def index_add(self, name, module, static, type, info=None, extra = None): 612 if self.is_header == 1: 613 self.index.add(name, module, module, static, type, self.lineno(), 614 info, extra, self.conditionals) 615 else: 616 self.index.add(name, None, module, static, type, self.lineno(), 617 info, extra, self.conditionals) 618 619 def index_add_ref(self, name, module, static, type, info=None, 620 extra = None): 621 if self.is_header == 1: 622 self.index.add_ref(name, module, module, static, type, 623 self.lineno(), info, extra, self.conditionals) 624 else: 625 self.index.add_ref(name, None, module, static, type, self.lineno(), 626 info, extra, self.conditionals) 627 628 def warning(self, msg): 629 if self.no_error: 630 return 631 print(msg) 632 633 def error(self, msg, token=-1): 634 if self.no_error: 635 return 636 637 print("Parse Error: " + msg) 638 if token != -1: 639 print("Got token ", token) 640 self.lexer.debug() 641 sys.exit(1) 642 643 def debug(self, msg, token=-1): 644 print("Debug: " + msg) 645 if token != -1: 646 print("Got token ", token) 647 self.lexer.debug() 648 649 def parseTopComment(self, comment): 650 res = {} 651 lines = comment.split("\n") 652 item = None 653 for line in lines: 654 while line != "" and (line[0] == ' ' or line[0] == '\t'): 655 line = line[1:] 656 while line != "" and line[0] == '*': 657 line = line[1:] 658 while line != "" and (line[0] == ' ' or line[0] == '\t'): 659 line = line[1:] 660 try: 661 (it, line) = line.split(":", 1) 662 item = it 663 while line != "" and (line[0] == ' ' or line[0] == '\t'): 664 line = line[1:] 665 if item in res: 666 res[item] = res[item] + " " + line 667 else: 668 res[item] = line 669 except: 670 if item != None: 671 if item in res: 672 res[item] = res[item] + " " + line 673 else: 674 res[item] = line 675 self.index.info = res 676 677 def parseComment(self, token): 678 if self.top_comment == "": 679 self.top_comment = token[1] 680 if self.comment == None or token[1][0] == '*': 681 self.comment = token[1]; 682 else: 683 self.comment = self.comment + token[1] 684 token = self.lexer.token() 685 686 if self.comment.find("DOC_DISABLE") != -1: 687 self.stop_error() 688 689 if self.comment.find("DOC_ENABLE") != -1: 690 self.start_error() 691 692 return token 693 694 # 695 # Parse a comment block associate to a typedef 696 # 697 def parseTypeComment(self, name, quiet = 0): 698 if name[0:2] == '__': 699 quiet = 1 700 701 args = [] 702 desc = "" 703 704 if self.comment == None: 705 if not quiet: 706 self.warning("Missing comment for type %s" % (name)) 707 return((args, desc)) 708 if self.comment[0] != '*': 709 if not quiet: 710 self.warning("Missing * in type comment for %s" % (name)) 711 return((args, desc)) 712 lines = self.comment.split('\n') 713 if lines[0] == '*': 714 del lines[0] 715 if lines[0] != "* %s:" % (name): 716 if not quiet: 717 self.warning("Misformatted type comment for %s" % (name)) 718 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 719 return((args, desc)) 720 del lines[0] 721 while len(lines) > 0 and lines[0] == '*': 722 del lines[0] 723 desc = "" 724 while len(lines) > 0: 725 l = lines[0] 726 while len(l) > 0 and l[0] == '*': 727 l = l[1:] 728 l = l.strip() 729 desc = desc + " " + l 730 del lines[0] 731 732 desc = desc.strip() 733 734 if quiet == 0: 735 if desc == "": 736 self.warning("Type comment for %s lack description of the macro" % (name)) 737 738 return(desc) 739 # 740 # Parse a comment block associate to a macro 741 # 742 def parseMacroComment(self, name, quiet = 0): 743 if name[0:2] == '__': 744 quiet = 1 745 746 args = [] 747 desc = "" 748 749 if self.comment == None: 750 if not quiet: 751 self.warning("Missing comment for macro %s" % (name)) 752 return((args, desc)) 753 if self.comment[0] != '*': 754 if not quiet: 755 self.warning("Missing * in macro comment for %s" % (name)) 756 return((args, desc)) 757 lines = self.comment.split('\n') 758 if lines[0] == '*': 759 del lines[0] 760 if lines[0] != "* %s:" % (name): 761 if not quiet: 762 self.warning("Misformatted macro comment for %s" % (name)) 763 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 764 return((args, desc)) 765 del lines[0] 766 while lines[0] == '*': 767 del lines[0] 768 while len(lines) > 0 and lines[0][0:3] == '* @': 769 l = lines[0][3:] 770 try: 771 (arg, desc) = l.split(':', 1) 772 desc=desc.strip() 773 arg=arg.strip() 774 except: 775 if not quiet: 776 self.warning("Misformatted macro comment for %s" % (name)) 777 self.warning(" problem with '%s'" % (lines[0])) 778 del lines[0] 779 continue 780 del lines[0] 781 l = lines[0].strip() 782 while len(l) > 2 and l[0:3] != '* @': 783 while l[0] == '*': 784 l = l[1:] 785 desc = desc + ' ' + l.strip() 786 del lines[0] 787 if len(lines) == 0: 788 break 789 l = lines[0] 790 args.append((arg, desc)) 791 while len(lines) > 0 and lines[0] == '*': 792 del lines[0] 793 desc = "" 794 while len(lines) > 0: 795 l = lines[0] 796 while len(l) > 0 and l[0] == '*': 797 l = l[1:] 798 l = l.strip() 799 desc = desc + " " + l 800 del lines[0] 801 802 desc = desc.strip() 803 804 if quiet == 0: 805 if desc == "": 806 self.warning("Macro comment for %s lack description of the macro" % (name)) 807 808 return((args, desc)) 809 810 # 811 # Parse a comment block and merge the informations found in the 812 # parameters descriptions, finally returns a block as complete 813 # as possible 814 # 815 def mergeFunctionComment(self, name, description, quiet = 0): 816 if name == 'main': 817 quiet = 1 818 if name[0:2] == '__': 819 quiet = 1 820 821 (ret, args) = description 822 desc = "" 823 retdesc = "" 824 825 if self.comment == None: 826 if not quiet: 827 self.warning("Missing comment for function %s" % (name)) 828 return(((ret[0], retdesc), args, desc)) 829 if self.comment[0] != '*': 830 if not quiet: 831 self.warning("Missing * in function comment for %s" % (name)) 832 return(((ret[0], retdesc), args, desc)) 833 lines = self.comment.split('\n') 834 if lines[0] == '*': 835 del lines[0] 836 if lines[0] != "* %s:" % (name): 837 if not quiet: 838 self.warning("Misformatted function comment for %s" % (name)) 839 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 840 return(((ret[0], retdesc), args, desc)) 841 del lines[0] 842 while lines[0] == '*': 843 del lines[0] 844 nbargs = len(args) 845 while len(lines) > 0 and lines[0][0:3] == '* @': 846 l = lines[0][3:] 847 try: 848 (arg, desc) = l.split(':', 1) 849 desc=desc.strip() 850 arg=arg.strip() 851 except: 852 if not quiet: 853 self.warning("Misformatted function comment for %s" % (name)) 854 self.warning(" problem with '%s'" % (lines[0])) 855 del lines[0] 856 continue 857 del lines[0] 858 l = lines[0].strip() 859 while len(l) > 2 and l[0:3] != '* @': 860 while l[0] == '*': 861 l = l[1:] 862 desc = desc + ' ' + l.strip() 863 del lines[0] 864 if len(lines) == 0: 865 break 866 l = lines[0] 867 i = 0 868 while i < nbargs: 869 if args[i][1] == arg: 870 args[i] = (args[i][0], arg, desc) 871 break; 872 i = i + 1 873 if i >= nbargs: 874 if not quiet: 875 self.warning("Unable to find arg %s from function comment for %s" % ( 876 arg, name)) 877 while len(lines) > 0 and lines[0] == '*': 878 del lines[0] 879 desc = "" 880 while len(lines) > 0: 881 l = lines[0] 882 while len(l) > 0 and l[0] == '*': 883 l = l[1:] 884 l = l.strip() 885 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 886 try: 887 l = l.split(' ', 1)[1] 888 except: 889 l = "" 890 retdesc = l.strip() 891 del lines[0] 892 while len(lines) > 0: 893 l = lines[0] 894 while len(l) > 0 and l[0] == '*': 895 l = l[1:] 896 l = l.strip() 897 retdesc = retdesc + " " + l 898 del lines[0] 899 else: 900 desc = desc + " " + l 901 del lines[0] 902 903 retdesc = retdesc.strip() 904 desc = desc.strip() 905 906 if quiet == 0: 907 # 908 # report missing comments 909 # 910 i = 0 911 while i < nbargs: 912 if args[i][2] == None and args[i][0] != "void" and \ 913 ((args[i][1] != None) or (args[i][1] == '')): 914 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 915 i = i + 1 916 if retdesc == "" and ret[0] != "void": 917 self.warning("Function comment for %s lacks description of return value" % (name)) 918 if desc == "": 919 self.warning("Function comment for %s lacks description of the function" % (name)) 920 921 return(((ret[0], retdesc), args, desc)) 922 923 def parsePreproc(self, token): 924 if debug: 925 print("=> preproc ", token, self.lexer.tokens) 926 name = token[1] 927 if name == "#include": 928 token = self.lexer.token() 929 if token == None: 930 return None 931 if token[0] == 'preproc': 932 self.index_add(token[1], self.filename, not self.is_header, 933 "include") 934 return self.lexer.token() 935 return token 936 if name == "#define": 937 token = self.lexer.token() 938 if token == None: 939 return None 940 if token[0] == 'preproc': 941 # TODO macros with arguments 942 name = token[1] 943 lst = [] 944 token = self.lexer.token() 945 while token != None and token[0] == 'preproc' and \ 946 token[1][0] != '#': 947 lst.append(token[1]) 948 token = self.lexer.token() 949 try: 950 name = name.split('(') [0] 951 except: 952 pass 953 info = self.parseMacroComment(name, not self.is_header) 954 self.index_add(name, self.filename, not self.is_header, 955 "macro", info) 956 return token 957 958 # 959 # Processing of conditionals modified by Bill 1/1/05 960 # 961 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 962 # #if, #else and #endif) for headers and mainline code, 963 # store the ones from the header in libxml2-api.xml, and later 964 # (in the routine merge_public) verify that the two (header and 965 # mainline code) agree. 966 # 967 # There is a small problem with processing the headers. Some of 968 # the variables are not concerned with enabling / disabling of 969 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 970 # them to be included in libxml2-api.xml, or involved in 971 # the check between the header and the mainline code. To 972 # accomplish this, we ignore any conditional which doesn't include 973 # the string 'ENABLED' 974 # 975 if name == "#ifdef": 976 apstr = self.lexer.tokens[0][1] 977 try: 978 self.defines.append(apstr) 979 if apstr.find('ENABLED') != -1: 980 self.conditionals.append("defined(%s)" % apstr) 981 except: 982 pass 983 elif name == "#ifndef": 984 apstr = self.lexer.tokens[0][1] 985 try: 986 self.defines.append(apstr) 987 if apstr.find('ENABLED') != -1: 988 self.conditionals.append("!defined(%s)" % apstr) 989 except: 990 pass 991 elif name == "#if": 992 apstr = "" 993 for tok in self.lexer.tokens: 994 if apstr != "": 995 apstr = apstr + " " 996 apstr = apstr + tok[1] 997 try: 998 self.defines.append(apstr) 999 if apstr.find('ENABLED') != -1: 1000 self.conditionals.append(apstr) 1001 except: 1002 pass 1003 elif name == "#else": 1004 if self.conditionals != [] and \ 1005 self.defines[-1].find('ENABLED') != -1: 1006 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 1007 elif name == "#endif": 1008 if self.conditionals != [] and \ 1009 self.defines[-1].find('ENABLED') != -1: 1010 self.conditionals = self.conditionals[:-1] 1011 self.defines = self.defines[:-1] 1012 token = self.lexer.token() 1013 while token != None and token[0] == 'preproc' and \ 1014 token[1][0] != '#': 1015 token = self.lexer.token() 1016 return token 1017 1018 # 1019 # token acquisition on top of the lexer, it handle internally 1020 # preprocessor and comments since they are logically not part of 1021 # the program structure. 1022 # 1023 def token(self): 1024 global ignored_words 1025 1026 token = self.lexer.token() 1027 while token != None: 1028 if token[0] == 'comment': 1029 token = self.parseComment(token) 1030 continue 1031 elif token[0] == 'preproc': 1032 token = self.parsePreproc(token) 1033 continue 1034 elif token[0] == "name" and token[1] == "__const": 1035 token = ("name", "const") 1036 return token 1037 elif token[0] == "name" and token[1] == "__attribute": 1038 token = self.lexer.token() 1039 while token != None and token[1] != ";": 1040 token = self.lexer.token() 1041 return token 1042 elif token[0] == "name" and token[1] in ignored_words: 1043 (n, info) = ignored_words[token[1]] 1044 i = 0 1045 while i < n: 1046 token = self.lexer.token() 1047 i = i + 1 1048 token = self.lexer.token() 1049 continue 1050 else: 1051 if debug: 1052 print("=> ", token) 1053 return token 1054 return None 1055 1056 # 1057 # Parse a typedef, it records the type and its name. 1058 # 1059 def parseTypedef(self, token): 1060 if token == None: 1061 return None 1062 token = self.parseType(token) 1063 if token == None: 1064 self.error("parsing typedef") 1065 return None 1066 base_type = self.type 1067 type = base_type 1068 #self.debug("end typedef type", token) 1069 while token != None: 1070 if token[0] == "name": 1071 name = token[1] 1072 signature = self.signature 1073 if signature != None: 1074 type = type.split('(')[0] 1075 d = self.mergeFunctionComment(name, 1076 ((type, None), signature), 1) 1077 self.index_add(name, self.filename, not self.is_header, 1078 "functype", d) 1079 else: 1080 if base_type == "struct": 1081 self.index_add(name, self.filename, not self.is_header, 1082 "struct", type) 1083 base_type = "struct " + name 1084 else: 1085 # TODO report missing or misformatted comments 1086 info = self.parseTypeComment(name, 1) 1087 self.index_add(name, self.filename, not self.is_header, 1088 "typedef", type, info) 1089 token = self.token() 1090 else: 1091 self.error("parsing typedef: expecting a name") 1092 return token 1093 #self.debug("end typedef", token) 1094 if token != None and token[0] == 'sep' and token[1] == ',': 1095 type = base_type 1096 token = self.token() 1097 while token != None and token[0] == "op": 1098 type = type + token[1] 1099 token = self.token() 1100 elif token != None and token[0] == 'sep' and token[1] == ';': 1101 break; 1102 elif token != None and token[0] == 'name': 1103 type = base_type 1104 continue; 1105 else: 1106 self.error("parsing typedef: expecting ';'", token) 1107 return token 1108 token = self.token() 1109 return token 1110 1111 # 1112 # Parse a C code block, used for functions it parse till 1113 # the balancing } included 1114 # 1115 def parseBlock(self, token): 1116 while token != None: 1117 if token[0] == "sep" and token[1] == "{": 1118 token = self.token() 1119 token = self.parseBlock(token) 1120 elif token[0] == "sep" and token[1] == "}": 1121 self.comment = None 1122 token = self.token() 1123 return token 1124 else: 1125 if self.collect_ref == 1: 1126 oldtok = token 1127 token = self.token() 1128 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1129 if token[0] == "sep" and token[1] == "(": 1130 self.index_add_ref(oldtok[1], self.filename, 1131 0, "function") 1132 token = self.token() 1133 elif token[0] == "name": 1134 token = self.token() 1135 if token[0] == "sep" and (token[1] == ";" or 1136 token[1] == "," or token[1] == "="): 1137 self.index_add_ref(oldtok[1], self.filename, 1138 0, "type") 1139 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1140 self.index_add_ref(oldtok[1], self.filename, 1141 0, "typedef") 1142 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1143 self.index_add_ref(oldtok[1], self.filename, 1144 0, "typedef") 1145 1146 else: 1147 token = self.token() 1148 return token 1149 1150 # 1151 # Parse a C struct definition till the balancing } 1152 # 1153 def parseStruct(self, token): 1154 fields = [] 1155 #self.debug("start parseStruct", token) 1156 while token != None: 1157 if token[0] == "sep" and token[1] == "{": 1158 token = self.token() 1159 token = self.parseTypeBlock(token) 1160 elif token[0] == "sep" and token[1] == "}": 1161 self.struct_fields = fields 1162 #self.debug("end parseStruct", token) 1163 #print fields 1164 token = self.token() 1165 return token 1166 else: 1167 base_type = self.type 1168 #self.debug("before parseType", token) 1169 token = self.parseType(token) 1170 #self.debug("after parseType", token) 1171 if token != None and token[0] == "name": 1172 fname = token[1] 1173 token = self.token() 1174 if token[0] == "sep" and token[1] == ";": 1175 self.comment = None 1176 token = self.token() 1177 fields.append((self.type, fname, self.comment)) 1178 self.comment = None 1179 else: 1180 self.error("parseStruct: expecting ;", token) 1181 elif token != None and token[0] == "sep" and token[1] == "{": 1182 token = self.token() 1183 token = self.parseTypeBlock(token) 1184 if token != None and token[0] == "name": 1185 token = self.token() 1186 if token != None and token[0] == "sep" and token[1] == ";": 1187 token = self.token() 1188 else: 1189 self.error("parseStruct: expecting ;", token) 1190 else: 1191 self.error("parseStruct: name", token) 1192 token = self.token() 1193 self.type = base_type; 1194 self.struct_fields = fields 1195 #self.debug("end parseStruct", token) 1196 #print fields 1197 return token 1198 1199 # 1200 # Parse a C enum block, parse till the balancing } 1201 # 1202 def parseEnumBlock(self, token): 1203 self.enums = [] 1204 name = None 1205 self.comment = None 1206 comment = "" 1207 value = "0" 1208 while token != None: 1209 if token[0] == "sep" and token[1] == "{": 1210 token = self.token() 1211 token = self.parseTypeBlock(token) 1212 elif token[0] == "sep" and token[1] == "}": 1213 if name != None: 1214 if self.comment != None: 1215 comment = self.comment 1216 self.comment = None 1217 self.enums.append((name, value, comment)) 1218 token = self.token() 1219 return token 1220 elif token[0] == "name": 1221 if name != None: 1222 if self.comment != None: 1223 comment = self.comment.strip() 1224 self.comment = None 1225 self.enums.append((name, value, comment)) 1226 name = token[1] 1227 comment = "" 1228 token = self.token() 1229 if token[0] == "op" and token[1][0] == "=": 1230 value = "" 1231 if len(token[1]) > 1: 1232 value = token[1][1:] 1233 token = self.token() 1234 while token[0] != "sep" or (token[1] != ',' and 1235 token[1] != '}'): 1236 value = value + token[1] 1237 token = self.token() 1238 else: 1239 try: 1240 value = "%d" % (int(value) + 1) 1241 except: 1242 self.warning("Failed to compute value of enum %s" % (name)) 1243 value="" 1244 if token[0] == "sep" and token[1] == ",": 1245 token = self.token() 1246 else: 1247 token = self.token() 1248 return token 1249 1250 # 1251 # Parse a C definition block, used for structs it parse till 1252 # the balancing } 1253 # 1254 def parseTypeBlock(self, token): 1255 while token != None: 1256 if token[0] == "sep" and token[1] == "{": 1257 token = self.token() 1258 token = self.parseTypeBlock(token) 1259 elif token[0] == "sep" and token[1] == "}": 1260 token = self.token() 1261 return token 1262 else: 1263 token = self.token() 1264 return token 1265 1266 # 1267 # Parse a type: the fact that the type name can either occur after 1268 # the definition or within the definition makes it a little harder 1269 # if inside, the name token is pushed back before returning 1270 # 1271 def parseType(self, token): 1272 self.type = "" 1273 self.struct_fields = [] 1274 self.signature = None 1275 if token == None: 1276 return token 1277 1278 while token[0] == "name" and ( 1279 token[1] == "const" or \ 1280 token[1] == "unsigned" or \ 1281 token[1] == "signed"): 1282 if self.type == "": 1283 self.type = token[1] 1284 else: 1285 self.type = self.type + " " + token[1] 1286 token = self.token() 1287 1288 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1289 if self.type == "": 1290 self.type = token[1] 1291 else: 1292 self.type = self.type + " " + token[1] 1293 if token[0] == "name" and token[1] == "int": 1294 if self.type == "": 1295 self.type = tmp[1] 1296 else: 1297 self.type = self.type + " " + tmp[1] 1298 1299 elif token[0] == "name" and token[1] == "struct": 1300 if self.type == "": 1301 self.type = token[1] 1302 else: 1303 self.type = self.type + " " + token[1] 1304 token = self.token() 1305 nametok = None 1306 if token[0] == "name": 1307 nametok = token 1308 token = self.token() 1309 if token != None and token[0] == "sep" and token[1] == "{": 1310 token = self.token() 1311 token = self.parseStruct(token) 1312 elif token != None and token[0] == "op" and token[1] == "*": 1313 self.type = self.type + " " + nametok[1] + " *" 1314 token = self.token() 1315 while token != None and token[0] == "op" and token[1] == "*": 1316 self.type = self.type + " *" 1317 token = self.token() 1318 if token[0] == "name": 1319 nametok = token 1320 token = self.token() 1321 else: 1322 self.error("struct : expecting name", token) 1323 return token 1324 elif token != None and token[0] == "name" and nametok != None: 1325 self.type = self.type + " " + nametok[1] 1326 return token 1327 1328 if nametok != None: 1329 self.lexer.push(token) 1330 token = nametok 1331 return token 1332 1333 elif token[0] == "name" and token[1] == "enum": 1334 if self.type == "": 1335 self.type = token[1] 1336 else: 1337 self.type = self.type + " " + token[1] 1338 self.enums = [] 1339 token = self.token() 1340 if token != None and token[0] == "sep" and token[1] == "{": 1341 token = self.token() 1342 token = self.parseEnumBlock(token) 1343 else: 1344 self.error("parsing enum: expecting '{'", token) 1345 enum_type = None 1346 if token != None and token[0] != "name": 1347 self.lexer.push(token) 1348 token = ("name", "enum") 1349 else: 1350 enum_type = token[1] 1351 for enum in self.enums: 1352 self.index_add(enum[0], self.filename, 1353 not self.is_header, "enum", 1354 (enum[1], enum[2], enum_type)) 1355 return token 1356 1357 elif token[0] == "name": 1358 if self.type == "": 1359 self.type = token[1] 1360 else: 1361 self.type = self.type + " " + token[1] 1362 else: 1363 self.error("parsing type %s: expecting a name" % (self.type), 1364 token) 1365 return token 1366 token = self.token() 1367 while token != None and (token[0] == "op" or 1368 token[0] == "name" and token[1] == "const"): 1369 self.type = self.type + " " + token[1] 1370 token = self.token() 1371 1372 # 1373 # if there is a parenthesis here, this means a function type 1374 # 1375 if token != None and token[0] == "sep" and token[1] == '(': 1376 self.type = self.type + token[1] 1377 token = self.token() 1378 while token != None and token[0] == "op" and token[1] == '*': 1379 self.type = self.type + token[1] 1380 token = self.token() 1381 if token == None or token[0] != "name" : 1382 self.error("parsing function type, name expected", token); 1383 return token 1384 self.type = self.type + token[1] 1385 nametok = token 1386 token = self.token() 1387 if token != None and token[0] == "sep" and token[1] == ')': 1388 self.type = self.type + token[1] 1389 token = self.token() 1390 if token != None and token[0] == "sep" and token[1] == '(': 1391 token = self.token() 1392 type = self.type; 1393 token = self.parseSignature(token); 1394 self.type = type; 1395 else: 1396 self.error("parsing function type, '(' expected", token); 1397 return token 1398 else: 1399 self.error("parsing function type, ')' expected", token); 1400 return token 1401 self.lexer.push(token) 1402 token = nametok 1403 return token 1404 1405 # 1406 # do some lookahead for arrays 1407 # 1408 if token != None and token[0] == "name": 1409 nametok = token 1410 token = self.token() 1411 if token != None and token[0] == "sep" and token[1] == '[': 1412 self.type = self.type + nametok[1] 1413 while token != None and token[0] == "sep" and token[1] == '[': 1414 self.type = self.type + token[1] 1415 token = self.token() 1416 while token != None and token[0] != 'sep' and \ 1417 token[1] != ']' and token[1] != ';': 1418 self.type = self.type + token[1] 1419 token = self.token() 1420 if token != None and token[0] == 'sep' and token[1] == ']': 1421 self.type = self.type + token[1] 1422 token = self.token() 1423 else: 1424 self.error("parsing array type, ']' expected", token); 1425 return token 1426 elif token != None and token[0] == "sep" and token[1] == ':': 1427 # remove :12 in case it's a limited int size 1428 token = self.token() 1429 token = self.token() 1430 self.lexer.push(token) 1431 token = nametok 1432 1433 return token 1434 1435 # 1436 # Parse a signature: '(' has been parsed and we scan the type definition 1437 # up to the ')' included 1438 def parseSignature(self, token): 1439 signature = [] 1440 if token != None and token[0] == "sep" and token[1] == ')': 1441 self.signature = [] 1442 token = self.token() 1443 return token 1444 while token != None: 1445 token = self.parseType(token) 1446 if token != None and token[0] == "name": 1447 signature.append((self.type, token[1], None)) 1448 token = self.token() 1449 elif token != None and token[0] == "sep" and token[1] == ',': 1450 token = self.token() 1451 continue 1452 elif token != None and token[0] == "sep" and token[1] == ')': 1453 # only the type was provided 1454 if self.type == "...": 1455 signature.append((self.type, "...", None)) 1456 else: 1457 signature.append((self.type, None, None)) 1458 if token != None and token[0] == "sep": 1459 if token[1] == ',': 1460 token = self.token() 1461 continue 1462 elif token[1] == ')': 1463 token = self.token() 1464 break 1465 self.signature = signature 1466 return token 1467 1468 # 1469 # Parse a global definition, be it a type, variable or function 1470 # the extern "C" blocks are a bit nasty and require it to recurse. 1471 # 1472 def parseGlobal(self, token): 1473 static = 0 1474 if token[1] == 'extern': 1475 token = self.token() 1476 if token == None: 1477 return token 1478 if token[0] == 'string': 1479 if token[1] == 'C': 1480 token = self.token() 1481 if token == None: 1482 return token 1483 if token[0] == 'sep' and token[1] == "{": 1484 token = self.token() 1485# print 'Entering extern "C line ', self.lineno() 1486 while token != None and (token[0] != 'sep' or 1487 token[1] != "}"): 1488 if token[0] == 'name': 1489 token = self.parseGlobal(token) 1490 else: 1491 self.error( 1492 "token %s %s unexpected at the top level" % ( 1493 token[0], token[1])) 1494 token = self.parseGlobal(token) 1495# print 'Exiting extern "C" line', self.lineno() 1496 token = self.token() 1497 return token 1498 else: 1499 return token 1500 elif token[1] == 'static': 1501 static = 1 1502 token = self.token() 1503 if token == None or token[0] != 'name': 1504 return token 1505 1506 if token[1] == 'typedef': 1507 token = self.token() 1508 return self.parseTypedef(token) 1509 else: 1510 token = self.parseType(token) 1511 type_orig = self.type 1512 if token == None or token[0] != "name": 1513 return token 1514 type = type_orig 1515 self.name = token[1] 1516 token = self.token() 1517 while token != None and (token[0] == "sep" or token[0] == "op"): 1518 if token[0] == "sep": 1519 if token[1] == "[": 1520 type = type + token[1] 1521 token = self.token() 1522 while token != None and (token[0] != "sep" or \ 1523 token[1] != ";"): 1524 type = type + token[1] 1525 token = self.token() 1526 1527 if token != None and token[0] == "op" and token[1] == "=": 1528 # 1529 # Skip the initialization of the variable 1530 # 1531 token = self.token() 1532 if token[0] == 'sep' and token[1] == '{': 1533 token = self.token() 1534 token = self.parseBlock(token) 1535 else: 1536 self.comment = None 1537 while token != None and (token[0] != "sep" or \ 1538 (token[1] != ';' and token[1] != ',')): 1539 token = self.token() 1540 self.comment = None 1541 if token == None or token[0] != "sep" or (token[1] != ';' and 1542 token[1] != ','): 1543 self.error("missing ';' or ',' after value") 1544 1545 if token != None and token[0] == "sep": 1546 if token[1] == ";": 1547 self.comment = None 1548 token = self.token() 1549 if type == "struct": 1550 self.index_add(self.name, self.filename, 1551 not self.is_header, "struct", self.struct_fields) 1552 else: 1553 self.index_add(self.name, self.filename, 1554 not self.is_header, "variable", type) 1555 break 1556 elif token[1] == "(": 1557 token = self.token() 1558 token = self.parseSignature(token) 1559 if token == None: 1560 return None 1561 if token[0] == "sep" and token[1] == ";": 1562 d = self.mergeFunctionComment(self.name, 1563 ((type, None), self.signature), 1) 1564 self.index_add(self.name, self.filename, static, 1565 "function", d) 1566 token = self.token() 1567 elif token[0] == "sep" and token[1] == "{": 1568 d = self.mergeFunctionComment(self.name, 1569 ((type, None), self.signature), static) 1570 self.index_add(self.name, self.filename, static, 1571 "function", d) 1572 token = self.token() 1573 token = self.parseBlock(token); 1574 elif token[1] == ',': 1575 self.comment = None 1576 self.index_add(self.name, self.filename, static, 1577 "variable", type) 1578 type = type_orig 1579 token = self.token() 1580 while token != None and token[0] == "sep": 1581 type = type + token[1] 1582 token = self.token() 1583 if token != None and token[0] == "name": 1584 self.name = token[1] 1585 token = self.token() 1586 else: 1587 break 1588 1589 return token 1590 1591 def parse(self): 1592 self.warning("Parsing %s" % (self.filename)) 1593 token = self.token() 1594 while token != None: 1595 if token[0] == 'name': 1596 token = self.parseGlobal(token) 1597 else: 1598 self.error("token %s %s unexpected at the top level" % ( 1599 token[0], token[1])) 1600 token = self.parseGlobal(token) 1601 return 1602 self.parseTopComment(self.top_comment) 1603 return self.index 1604 1605 1606class docBuilder: 1607 """A documentation builder""" 1608 def __init__(self, name, directories=['.'], excludes=[]): 1609 self.name = name 1610 self.directories = directories 1611 self.excludes = excludes + list(ignored_files.keys()) 1612 self.modules = {} 1613 self.headers = {} 1614 self.idx = index() 1615 self.xref = {} 1616 self.index = {} 1617 if name == 'libxml2': 1618 self.basename = 'libxml' 1619 else: 1620 self.basename = name 1621 1622 def indexString(self, id, str): 1623 if str == None: 1624 return 1625 str = str.replace("'", ' ') 1626 str = str.replace('"', ' ') 1627 str = str.replace("/", ' ') 1628 str = str.replace('*', ' ') 1629 str = str.replace("[", ' ') 1630 str = str.replace("]", ' ') 1631 str = str.replace("(", ' ') 1632 str = str.replace(")", ' ') 1633 str = str.replace("<", ' ') 1634 str = str.replace('>', ' ') 1635 str = str.replace("&", ' ') 1636 str = str.replace('#', ' ') 1637 str = str.replace(",", ' ') 1638 str = str.replace('.', ' ') 1639 str = str.replace(';', ' ') 1640 tokens = str.split() 1641 for token in tokens: 1642 try: 1643 c = token[0] 1644 if string.ascii_letters.find(c) < 0: 1645 pass 1646 elif len(token) < 3: 1647 pass 1648 else: 1649 lower = token.lower() 1650 # TODO: generalize this a bit 1651 if lower == 'and' or lower == 'the': 1652 pass 1653 elif token in self.xref: 1654 self.xref[token].append(id) 1655 else: 1656 self.xref[token] = [id] 1657 except: 1658 pass 1659 1660 def analyze(self): 1661 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) 1662 self.idx.analyze() 1663 1664 def scanHeaders(self): 1665 for header in list(self.headers.keys()): 1666 parser = CParser(header) 1667 idx = parser.parse() 1668 self.headers[header] = idx; 1669 self.idx.merge(idx) 1670 1671 def scanModules(self): 1672 for module in list(self.modules.keys()): 1673 parser = CParser(module) 1674 idx = parser.parse() 1675 # idx.analyze() 1676 self.modules[module] = idx 1677 self.idx.merge_public(idx) 1678 1679 def scan(self): 1680 for directory in self.directories: 1681 files = glob.glob(directory + "/*.c") 1682 for file in files: 1683 skip = 0 1684 for excl in self.excludes: 1685 if file.find(excl) != -1: 1686 print("Skipping %s" % file) 1687 skip = 1 1688 break 1689 if skip == 0: 1690 self.modules[file] = None; 1691 files = glob.glob(directory + "/*.h") 1692 for file in files: 1693 skip = 0 1694 for excl in self.excludes: 1695 if file.find(excl) != -1: 1696 print("Skipping %s" % file) 1697 skip = 1 1698 break 1699 if skip == 0: 1700 self.headers[file] = None; 1701 self.scanHeaders() 1702 self.scanModules() 1703 1704 def modulename_file(self, file): 1705 module = os.path.basename(file) 1706 if module[-2:] == '.h': 1707 module = module[:-2] 1708 elif module[-2:] == '.c': 1709 module = module[:-2] 1710 return module 1711 1712 def serialize_enum(self, output, name): 1713 id = self.idx.enums[name] 1714 output.write(" <enum name='%s' file='%s'" % (name, 1715 self.modulename_file(id.header))) 1716 if id.info != None: 1717 info = id.info 1718 if info[0] != None and info[0] != '': 1719 try: 1720 val = eval(info[0]) 1721 except: 1722 val = info[0] 1723 output.write(" value='%s'" % (val)); 1724 if info[2] != None and info[2] != '': 1725 output.write(" type='%s'" % info[2]); 1726 if info[1] != None and info[1] != '': 1727 output.write(" info='%s'" % escape(info[1])); 1728 output.write("/>\n") 1729 1730 def serialize_macro(self, output, name): 1731 id = self.idx.macros[name] 1732 output.write(" <macro name='%s' file='%s'>\n" % (name, 1733 self.modulename_file(id.header))) 1734 if id.info != None: 1735 try: 1736 (args, desc) = id.info 1737 if desc != None and desc != "": 1738 output.write(" <info>%s</info>\n" % (escape(desc))) 1739 self.indexString(name, desc) 1740 for arg in args: 1741 (name, desc) = arg 1742 if desc != None and desc != "": 1743 output.write(" <arg name='%s' info='%s'/>\n" % ( 1744 name, escape(desc))) 1745 self.indexString(name, desc) 1746 else: 1747 output.write(" <arg name='%s'/>\n" % (name)) 1748 except: 1749 pass 1750 output.write(" </macro>\n") 1751 1752 def serialize_typedef(self, output, name): 1753 id = self.idx.typedefs[name] 1754 if id.info[0:7] == 'struct ': 1755 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1756 name, self.modulename_file(id.header), id.info)) 1757 name = id.info[7:] 1758 if name in self.idx.structs and ( \ 1759 type(self.idx.structs[name].info) == type(()) or 1760 type(self.idx.structs[name].info) == type([])): 1761 output.write(">\n"); 1762 try: 1763 for field in self.idx.structs[name].info: 1764 desc = field[2] 1765 self.indexString(name, desc) 1766 if desc == None: 1767 desc = '' 1768 else: 1769 desc = escape(desc) 1770 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1771 except: 1772 print("Failed to serialize struct %s" % (name)) 1773 output.write(" </struct>\n") 1774 else: 1775 output.write("/>\n"); 1776 else : 1777 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1778 name, self.modulename_file(id.header), id.info)) 1779 try: 1780 desc = id.extra 1781 if desc != None and desc != "": 1782 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1783 output.write(" </typedef>\n") 1784 else: 1785 output.write("/>\n") 1786 except: 1787 output.write("/>\n") 1788 1789 def serialize_variable(self, output, name): 1790 id = self.idx.variables[name] 1791 if id.info != None: 1792 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1793 name, self.modulename_file(id.header), id.info)) 1794 else: 1795 output.write(" <variable name='%s' file='%s'/>\n" % ( 1796 name, self.modulename_file(id.header))) 1797 1798 def serialize_function(self, output, name): 1799 id = self.idx.functions[name] 1800 if name == debugsym: 1801 print("=>", id) 1802 1803 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1804 name, self.modulename_file(id.header), 1805 self.modulename_file(id.module))) 1806 # 1807 # Processing of conditionals modified by Bill 1/1/05 1808 # 1809 if id.conditionals != None: 1810 apstr = "" 1811 for cond in id.conditionals: 1812 if apstr != "": 1813 apstr = apstr + " && " 1814 apstr = apstr + cond 1815 output.write(" <cond>%s</cond>\n"% (apstr)); 1816 try: 1817 (ret, params, desc) = id.info 1818 if (desc == None or desc == '') and \ 1819 name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1820 print("%s %s from %s has no description" % (id.type, name, 1821 self.modulename_file(id.module))) 1822 1823 output.write(" <info>%s</info>\n" % (escape(desc))) 1824 self.indexString(name, desc) 1825 if ret[0] != None: 1826 if ret[0] == "void": 1827 output.write(" <return type='void'/>\n") 1828 else: 1829 output.write(" <return type='%s' info='%s'/>\n" % ( 1830 ret[0], escape(ret[1]))) 1831 self.indexString(name, ret[1]) 1832 for param in params: 1833 if param[0] == 'void': 1834 continue 1835 if param[2] == None: 1836 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1837 else: 1838 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1839 self.indexString(name, param[2]) 1840 except: 1841 print("Failed to save function %s info: " % name, repr(id.info)) 1842 output.write(" </%s>\n" % (id.type)) 1843 1844 def serialize_exports(self, output, file): 1845 module = self.modulename_file(file) 1846 output.write(" <file name='%s'>\n" % (module)) 1847 dict = self.headers[file] 1848 if dict.info != None: 1849 for data in ('Summary', 'Description', 'Author'): 1850 try: 1851 output.write(" <%s>%s</%s>\n" % ( 1852 data.lower(), 1853 escape(dict.info[data]), 1854 data.lower())) 1855 except: 1856 print("Header %s lacks a %s description" % (module, data)) 1857 if 'Description' in dict.info: 1858 desc = dict.info['Description'] 1859 if desc.find("DEPRECATED") != -1: 1860 output.write(" <deprecated/>\n") 1861 1862 ids = list(dict.macros.keys()) 1863 ids.sort() 1864 for id in uniq(ids): 1865 # Macros are sometime used to masquerade other types. 1866 if id in dict.functions: 1867 continue 1868 if id in dict.variables: 1869 continue 1870 if id in dict.typedefs: 1871 continue 1872 if id in dict.structs: 1873 continue 1874 if id in dict.enums: 1875 continue 1876 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1877 ids = list(dict.enums.keys()) 1878 ids.sort() 1879 for id in uniq(ids): 1880 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1881 ids = list(dict.typedefs.keys()) 1882 ids.sort() 1883 for id in uniq(ids): 1884 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1885 ids = list(dict.structs.keys()) 1886 ids.sort() 1887 for id in uniq(ids): 1888 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1889 ids = list(dict.variables.keys()) 1890 ids.sort() 1891 for id in uniq(ids): 1892 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1893 ids = list(dict.functions.keys()) 1894 ids.sort() 1895 for id in uniq(ids): 1896 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1897 output.write(" </file>\n") 1898 1899 def serialize_xrefs_files(self, output): 1900 headers = list(self.headers.keys()) 1901 headers.sort() 1902 for file in headers: 1903 module = self.modulename_file(file) 1904 output.write(" <file name='%s'>\n" % (module)) 1905 dict = self.headers[file] 1906 ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \ 1907 list(dict.macros.keys()) + list(dict.typedefs.keys()) + \ 1908 list(dict.structs.keys()) + list(dict.enums.keys())) 1909 ids.sort() 1910 for id in ids: 1911 output.write(" <ref name='%s'/>\n" % (id)) 1912 output.write(" </file>\n") 1913 pass 1914 1915 def serialize_xrefs_functions(self, output): 1916 funcs = {} 1917 for name in list(self.idx.functions.keys()): 1918 id = self.idx.functions[name] 1919 try: 1920 (ret, params, desc) = id.info 1921 for param in params: 1922 if param[0] == 'void': 1923 continue 1924 if param[0] in funcs: 1925 funcs[param[0]].append(name) 1926 else: 1927 funcs[param[0]] = [name] 1928 except: 1929 pass 1930 typ = list(funcs.keys()) 1931 typ.sort() 1932 for type in typ: 1933 if type == '' or type == 'void' or type == "int" or \ 1934 type == "char *" or type == "const char *" : 1935 continue 1936 output.write(" <type name='%s'>\n" % (type)) 1937 ids = funcs[type] 1938 ids.sort() 1939 pid = '' # not sure why we have dups, but get rid of them! 1940 for id in ids: 1941 if id != pid: 1942 output.write(" <ref name='%s'/>\n" % (id)) 1943 pid = id 1944 output.write(" </type>\n") 1945 1946 def serialize_xrefs_constructors(self, output): 1947 funcs = {} 1948 for name in list(self.idx.functions.keys()): 1949 id = self.idx.functions[name] 1950 try: 1951 (ret, params, desc) = id.info 1952 if ret[0] == "void": 1953 continue 1954 if ret[0] in funcs: 1955 funcs[ret[0]].append(name) 1956 else: 1957 funcs[ret[0]] = [name] 1958 except: 1959 pass 1960 typ = list(funcs.keys()) 1961 typ.sort() 1962 for type in typ: 1963 if type == '' or type == 'void' or type == "int" or \ 1964 type == "char *" or type == "const char *" : 1965 continue 1966 output.write(" <type name='%s'>\n" % (type)) 1967 ids = funcs[type] 1968 ids.sort() 1969 for id in ids: 1970 output.write(" <ref name='%s'/>\n" % (id)) 1971 output.write(" </type>\n") 1972 1973 def serialize_xrefs_alpha(self, output): 1974 letter = None 1975 ids = list(self.idx.identifiers.keys()) 1976 ids.sort() 1977 for id in ids: 1978 if id[0] != letter: 1979 if letter != None: 1980 output.write(" </letter>\n") 1981 letter = id[0] 1982 output.write(" <letter name='%s'>\n" % (letter)) 1983 output.write(" <ref name='%s'/>\n" % (id)) 1984 if letter != None: 1985 output.write(" </letter>\n") 1986 1987 def serialize_xrefs_references(self, output): 1988 typ = list(self.idx.identifiers.keys()) 1989 typ.sort() 1990 for id in typ: 1991 idf = self.idx.identifiers[id] 1992 module = idf.header 1993 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1994 'html/' + self.basename + '-' + 1995 self.modulename_file(module) + '.html#' + 1996 id)) 1997 1998 def serialize_xrefs_index(self, output): 1999 index = self.xref 2000 typ = list(index.keys()) 2001 typ.sort() 2002 letter = None 2003 count = 0 2004 chunk = 0 2005 chunks = [] 2006 for id in typ: 2007 if len(index[id]) > 30: 2008 continue 2009 if id[0] != letter: 2010 if letter == None or count > 200: 2011 if letter != None: 2012 output.write(" </letter>\n") 2013 output.write(" </chunk>\n") 2014 count = 0 2015 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2016 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2017 first_letter = id[0] 2018 chunk = chunk + 1 2019 elif letter != None: 2020 output.write(" </letter>\n") 2021 letter = id[0] 2022 output.write(" <letter name='%s'>\n" % (letter)) 2023 output.write(" <word name='%s'>\n" % (id)) 2024 tokens = index[id]; 2025 tokens.sort() 2026 tok = None 2027 for token in tokens: 2028 if tok == token: 2029 continue 2030 tok = token 2031 output.write(" <ref name='%s'/>\n" % (token)) 2032 count = count + 1 2033 output.write(" </word>\n") 2034 if letter != None: 2035 output.write(" </letter>\n") 2036 output.write(" </chunk>\n") 2037 if count != 0: 2038 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2039 output.write(" <chunks>\n") 2040 for ch in chunks: 2041 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2042 ch[0], ch[1], ch[2])) 2043 output.write(" </chunks>\n") 2044 2045 def serialize_xrefs(self, output): 2046 output.write(" <references>\n") 2047 self.serialize_xrefs_references(output) 2048 output.write(" </references>\n") 2049 output.write(" <alpha>\n") 2050 self.serialize_xrefs_alpha(output) 2051 output.write(" </alpha>\n") 2052 output.write(" <constructors>\n") 2053 self.serialize_xrefs_constructors(output) 2054 output.write(" </constructors>\n") 2055 output.write(" <functions>\n") 2056 self.serialize_xrefs_functions(output) 2057 output.write(" </functions>\n") 2058 output.write(" <files>\n") 2059 self.serialize_xrefs_files(output) 2060 output.write(" </files>\n") 2061 output.write(" <index>\n") 2062 self.serialize_xrefs_index(output) 2063 output.write(" </index>\n") 2064 2065 def serialize(self): 2066 filename = "%s-api.xml" % self.name 2067 print("Saving XML description %s" % (filename)) 2068 output = open(filename, "w") 2069 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2070 output.write("<api name='%s'>\n" % self.name) 2071 output.write(" <files>\n") 2072 headers = list(self.headers.keys()) 2073 headers.sort() 2074 for file in headers: 2075 self.serialize_exports(output, file) 2076 output.write(" </files>\n") 2077 output.write(" <symbols>\n") 2078 macros = list(self.idx.macros.keys()) 2079 macros.sort() 2080 for macro in macros: 2081 self.serialize_macro(output, macro) 2082 enums = list(self.idx.enums.keys()) 2083 enums.sort() 2084 for enum in enums: 2085 self.serialize_enum(output, enum) 2086 typedefs = list(self.idx.typedefs.keys()) 2087 typedefs.sort() 2088 for typedef in typedefs: 2089 self.serialize_typedef(output, typedef) 2090 variables = list(self.idx.variables.keys()) 2091 variables.sort() 2092 for variable in variables: 2093 self.serialize_variable(output, variable) 2094 functions = list(self.idx.functions.keys()) 2095 functions.sort() 2096 for function in functions: 2097 self.serialize_function(output, function) 2098 output.write(" </symbols>\n") 2099 output.write("</api>\n") 2100 output.close() 2101 2102 filename = "%s-refs.xml" % self.name 2103 print("Saving XML Cross References %s" % (filename)) 2104 output = open(filename, "w") 2105 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2106 output.write("<apirefs name='%s'>\n" % self.name) 2107 self.serialize_xrefs(output) 2108 output.write("</apirefs>\n") 2109 output.close() 2110 2111 2112def rebuild(): 2113 builder = None 2114 if glob.glob("parser.c") != [] : 2115 print("Rebuilding API description for libxml2") 2116 builder = docBuilder("libxml2", [".", "."], 2117 ["xmlwin32version.h", "tst.c"]) 2118 elif glob.glob("../parser.c") != [] : 2119 print("Rebuilding API description for libxml2") 2120 builder = docBuilder("libxml2", ["..", "../include/libxml"], 2121 ["xmlwin32version.h", "tst.c"]) 2122 elif glob.glob("../libxslt/transform.c") != [] : 2123 print("Rebuilding API description for libxslt") 2124 builder = docBuilder("libxslt", ["../libxslt"], 2125 ["win32config.h", "libxslt.h", "tst.c"]) 2126 else: 2127 print("rebuild() failed, unable to guess the module") 2128 return None 2129 builder.scan() 2130 builder.analyze() 2131 builder.serialize() 2132 if glob.glob("../libexslt/exslt.c") != [] : 2133 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2134 extra.scan() 2135 extra.analyze() 2136 extra.serialize() 2137 return builder 2138 2139# 2140# for debugging the parser 2141# 2142def parse(filename): 2143 parser = CParser(filename) 2144 idx = parser.parse() 2145 return idx 2146 2147if __name__ == "__main__": 2148 if len(sys.argv) > 1: 2149 debug = 1 2150 parse(sys.argv[1]) 2151 else: 2152 rebuild() 2153