1#!/usr/bin/python3 2# 3# Copyright 2016-2024 The Khronos Group Inc. 4# 5# SPDX-License-Identifier: Apache-2.0 6 7# Utility functions for automatic ref page generation and other script stuff 8 9import io 10import re 11import sys 12import subprocess 13 14# global errFile, warnFile, diagFile 15 16errFile = sys.stderr 17warnFile = sys.stdout 18diagFile = None 19logSourcefile = None 20logProcname = None 21logLine = None 22 23def unescapeQuotes(s): 24 """Remove \' escape sequences in a string (refpage description)""" 25 return s.replace('\\\'', '\'') 26 27def write(*args, **kwargs ): 28 file = kwargs.pop('file',sys.stdout) 29 end = kwargs.pop('end','\n') 30 file.write(' '.join(str(arg) for arg in args)) 31 file.write(end) 32 33def setLogSourcefile(filename): 34 """Metadata which may be printed (if not None) for diagnostic messages""" 35 global logSourcefile 36 logSourcefile = filename 37 38def setLogProcname(procname): 39 global logProcname 40 logProcname = procname 41 42def setLogLine(line): 43 global logLine 44 logLine = line 45 46def logHeader(severity): 47 """Generate prefix for a diagnostic line using metadata and severity""" 48 global logSourcefile, logProcname, logLine 49 50 msg = severity + ': ' 51 if logProcname: 52 msg = msg + ' in ' + logProcname 53 if logSourcefile: 54 msg = msg + ' for ' + logSourcefile 55 if logLine: 56 msg = msg + ' line ' + str(logLine) 57 return msg + ' ' 58 59def setLogFile(setDiag, setWarn, filename): 60 """Set the file handle to log either or both warnings and diagnostics to. 61 62 - setDiag and setWarn are True if the corresponding handle is to be set. 63 - filename is None for no logging, '-' for stdout, or a pathname.""" 64 global diagFile, warnFile 65 66 if filename is None: 67 return 68 69 if filename == '-': 70 fp = sys.stdout 71 else: 72 fp = open(filename, 'w', encoding='utf-8') 73 74 if setDiag: 75 diagFile = fp 76 if setWarn: 77 warnFile = fp 78 79def logDiag(*args, **kwargs): 80 file = kwargs.pop('file', diagFile) 81 end = kwargs.pop('end','\n') 82 if file is not None: 83 file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args)) 84 file.write(end) 85 86def logWarn(*args, **kwargs): 87 file = kwargs.pop('file', warnFile) 88 end = kwargs.pop('end','\n') 89 if file is not None: 90 file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args)) 91 file.write(end) 92 93def logErr(*args, **kwargs): 94 file = kwargs.pop('file', errFile) 95 end = kwargs.pop('end','\n') 96 97 strfile = io.StringIO() 98 strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args)) 99 strfile.write(end) 100 101 if file is not None: 102 file.write(strfile.getvalue()) 103 raise UserWarning(strfile.getvalue()) 104 105def isempty(s): 106 """Return True if s is nothing but white space, False otherwise""" 107 return len(''.join(s.split())) == 0 108 109class pageInfo: 110 """Information about a ref page relative to the file it is extracted from.""" 111 def __init__(self): 112 self.extractPage = True 113 """True if page should be extracted""" 114 115 self.Warning = None 116 """string warning if page is suboptimal or cannot be generated""" 117 118 self.embed = False 119 """False or the name of the ref page this include is embedded within""" 120 121 self.type = None 122 """refpage type attribute - 'structs', 'protos', 'freeform', etc.""" 123 124 self.name = None 125 """struct/proto/enumerant/etc. name""" 126 127 self.desc = None 128 """short description of ref page""" 129 130 self.begin = None 131 """index of first line of the page (heuristic or // refBegin)""" 132 133 self.include = None 134 """index of include:: line defining the page""" 135 136 self.param = None 137 """index of first line of parameter/member definitions""" 138 139 self.body = None 140 """index of first line of body text""" 141 142 self.validity = None 143 """index of validity include""" 144 145 self.end = None 146 """index of last line of the page (heuristic validity include, or // refEnd)""" 147 148 self.alias = '' 149 """aliases of this name, if supplied, or ''""" 150 151 self.refs = '' 152 """cross-references on // refEnd line, if supplied""" 153 154 self.spec = None 155 """'spec' attribute in refpage open block, if supplied, or None for the default ('api') type""" 156 157 self.anchor = None 158 """'anchor' attribute in refpage open block, if supplied, or inferred to be the same as the 'name'""" 159 160def printPageInfoField(desc, line, file): 161 """Print a single field of a pageInfo struct, possibly None. 162 163 - desc - string description of field 164 - line - field value or None 165 - file - indexed by line""" 166 if line is not None: 167 logDiag(desc + ':', line + 1, '\t-> ', file[line], end='') 168 else: 169 logDiag(desc + ':', line) 170 171def printPageInfo(pi, file): 172 """Print out fields of a pageInfo struct 173 174 - pi - pageInfo 175 - file - indexed by pageInfo""" 176 logDiag('TYPE: ', pi.type) 177 logDiag('NAME: ', pi.name) 178 logDiag('WARNING:', pi.Warning) 179 logDiag('EXTRACT:', pi.extractPage) 180 logDiag('EMBED: ', pi.embed) 181 logDiag('DESC: ', pi.desc) 182 printPageInfoField('BEGIN ', pi.begin, file) 183 printPageInfoField('INCLUDE ', pi.include, file) 184 printPageInfoField('PARAM ', pi.param, file) 185 printPageInfoField('BODY ', pi.body, file) 186 printPageInfoField('VALIDITY', pi.validity, file) 187 printPageInfoField('END ', pi.end, file) 188 logDiag('REFS: "' + pi.refs + '"') 189 190def prevPara(file, line): 191 """Go back one paragraph from the specified line and return the line number 192 of the first line of that paragraph. 193 194 Paragraphs are delimited by blank lines. It is assumed that the 195 current line is the first line of a paragraph. 196 197 - file is an array of strings 198 - line is the starting point (zero-based)""" 199 # Skip over current paragraph 200 while (line >= 0 and not isempty(file[line])): 201 line = line - 1 202 # Skip over white space 203 while (line >= 0 and isempty(file[line])): 204 line = line - 1 205 # Skip to first line of previous paragraph 206 while (line >= 1 and not isempty(file[line-1])): 207 line = line - 1 208 return line 209 210def nextPara(file, line): 211 """Go forward one paragraph from the specified line and return the line 212 number of the first line of that paragraph. 213 214 Paragraphs are delimited by blank lines. It is assumed that the 215 current line is standalone (which is bogus). 216 217 - file is an array of strings 218 - line is the starting point (zero-based)""" 219 maxLine = len(file) - 1 220 # Skip over current paragraph 221 while (line != maxLine and not isempty(file[line])): 222 line = line + 1 223 # Skip over white space 224 while (line != maxLine and isempty(file[line])): 225 line = line + 1 226 return line 227 228def lookupPage(pageMap, name): 229 """Return (creating if needed) the pageInfo entry in pageMap for name""" 230 if name not in pageMap: 231 pi = pageInfo() 232 pi.name = name 233 pageMap[name] = pi 234 else: 235 pi = pageMap[name] 236 return pi 237 238def loadFile(filename): 239 """Load a file into a list of strings. Return the (list, newline_string) or (None, None) on failure""" 240 newline_string = "\n" 241 try: 242 with open(filename, 'rb') as fp: 243 contents = fp.read() 244 if contents.count(b"\r\n") > 1: 245 newline_string = "\r\n" 246 247 with open(filename, 'r', encoding='utf-8') as fp: 248 lines = fp.readlines() 249 except: 250 logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) 251 return None, None 252 253 return lines, newline_string 254 255def clampToBlock(line, minline, maxline): 256 """Clamp a line number to be in the range [minline,maxline]. 257 258 If the line number is None, just return it. 259 If minline is None, do not clamp to that value.""" 260 if line is None: 261 return line 262 if minline and line < minline: 263 return minline 264 if line > maxline: 265 return maxline 266 267 return line 268 269def fixupRefs(pageMap, specFile, file): 270 """Fill in missing fields in pageInfo structures, to the extent they can be 271 inferred. 272 273 - pageMap - dictionary of pageInfo structures 274 - specFile - filename 275 - file - list of strings making up the file, indexed by pageInfo""" 276 # All potential ref pages are now in pageMap. Process them to 277 # identify actual page start/end/description boundaries, if 278 # not already determined from the text. 279 for name in sorted(pageMap.keys()): 280 pi = pageMap[name] 281 282 # # If nothing is found but an include line with no begin, validity, 283 # # or end, this is not intended as a ref page (yet). Set the begin 284 # # line to the include line, so autogeneration can at least 285 # # pull the include out, but mark it not to be extracted. 286 # # Examples include the host sync table includes in 287 # # chapters/fundamentals.adoc and the table of Vk*Flag types in 288 # # appendices/boilerplate.adoc. 289 # if pi.begin is None and pi.validity is None and pi.end is None: 290 # pi.begin = pi.include 291 # pi.extractPage = False 292 # pi.Warning = 'No begin, validity, or end lines identified' 293 # continue 294 295 # Using open block delimiters, ref pages must *always* have a 296 # defined begin and end. If either is undefined, that is fatal. 297 if pi.begin is None: 298 pi.extractPage = False 299 pi.Warning = 'Can\'t identify begin of ref page open block' 300 continue 301 302 if pi.end is None: 303 pi.extractPage = False 304 pi.Warning = 'Can\'t identify end of ref page open block' 305 continue 306 307 # If there is no description of the page, infer one from the type 308 if pi.desc is None: 309 if pi.type is not None: 310 # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' 311 pi.Warning = 'No short description available; could infer from the type and name' 312 else: 313 pi.extractPage = False 314 pi.Warning = 'No short description available, cannot infer from the type' 315 continue 316 317 # Try to determine where the parameter and body sections of the page 318 # begin. funcpointer, proto, and struct pages infer the location of 319 # the parameter and body sections. Other pages infer the location of 320 # the body, but have no parameter sections. 321 # 322 # Probably some other types infer this as well - refer to list of 323 # all page types in genRef.py:emitPage() 324 if pi.include is not None: 325 if pi.type in ['funcpointers', 'protos', 'structs']: 326 pi.param = nextPara(file, pi.include) 327 if pi.body is None: 328 pi.body = nextPara(file, pi.param) 329 else: 330 if pi.body is None: 331 pi.body = nextPara(file, pi.include) 332 else: 333 pi.Warning = 'Page does not have an API definition include::' 334 335 # It is possible for the inferred param and body lines to run past 336 # the end of block, if, for example, there is no parameter section. 337 pi.param = clampToBlock(pi.param, pi.include, pi.end) 338 pi.body = clampToBlock(pi.body, pi.param, pi.end) 339 340 # We can get to this point with .include, .param, and .validity 341 # all being None, indicating those sections were not found. 342 343 logDiag('fixupRefs: after processing,', pi.name, 'looks like:') 344 printPageInfo(pi, file) 345 346 # Now that all the valid pages have been found, try to make some 347 # inferences about invalid pages. 348 # 349 # If a reference without a .end is entirely inside a valid reference, 350 # then it is intentionally embedded - may want to create an indirect 351 # page that links into the embedding page. This is done by a very 352 # inefficient double loop, but the loop depth is small. 353 for name in sorted(pageMap.keys()): 354 pi = pageMap[name] 355 356 if pi.end is None: 357 for embedName in sorted(pageMap.keys()): 358 logDiag('fixupRefs: comparing', pi.name, 'to', embedName) 359 embed = pageMap[embedName] 360 # Do not check embeddings which are themselves invalid 361 if not embed.extractPage: 362 logDiag('Skipping check for embedding in:', embed.name) 363 continue 364 if embed.begin is None or embed.end is None: 365 logDiag('fixupRefs:', name + ':', 366 'can\'t compare to unanchored ref:', embed.name, 367 'in', specFile, 'at line', pi.include ) 368 printPageInfo(pi, file) 369 printPageInfo(embed, file) 370 # If an embed is found, change the error to a warning 371 elif (pi.include is not None and pi.include >= embed.begin and 372 pi.include <= embed.end): 373 logDiag('fixupRefs: Found embed for:', name, 374 'inside:', embedName, 375 'in', specFile, 'at line', pi.include ) 376 pi.embed = embed.name 377 pi.Warning = 'Embedded in definition for ' + embed.name 378 break 379 else: 380 logDiag('fixupRefs: No embed match for:', name, 381 'inside:', embedName, 'in', specFile, 382 'at line', pi.include) 383 384 385def compatiblePageTypes(refpage_type, pagemap_type): 386 """Returns whether two refpage 'types' (categories) are compatible - 387 this is only true for 'consts' and 'enums' types.""" 388 389 constsEnums = [ 'consts', 'enums' ] 390 391 if refpage_type == pagemap_type: 392 return True 393 if refpage_type in constsEnums and pagemap_type in constsEnums: 394 return True 395 return False 396 397# Patterns used to recognize interesting lines in an asciidoc source file. 398# These patterns are only compiled once. 399endifPat = re.compile(r'^endif::(?P<condition>[\w_+,]+)\[\]') 400beginPat = re.compile(r'^\[open,(?P<attribs>refpage=.*)\]') 401# attribute key/value pairs of an open block 402attribStr = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'" 403attribPat = re.compile(attribStr) 404bodyPat = re.compile(r'^// *refBody') 405errorPat = re.compile(r'^// *refError') 406 407# This regex transplanted from check_spec_links 408# It looks for various generated file conventions, and for the api/validity 409# include (generated_type), protos/struct/etc path (category), and API name 410# (entity_name). 411# It could be put into the API conventions object, instead of being 412# generalized for all the different specs. 413INCLUDE = re.compile( 414 r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+)\.(adoc|txt)[\[][\]]') 415 416def findRefs(file, filename): 417 """Identify reference pages in a list of strings, returning a dictionary of 418 pageInfo entries for each one found, or None on failure.""" 419 setLogSourcefile(filename) 420 setLogProcname('findRefs') 421 422 # To reliably detect the open blocks around reference pages, we must 423 # first detect the '[open,refpage=...]' markup delimiting the block; 424 # skip past the '--' block delimiter on the next line; and identify the 425 # '--' block delimiter closing the page. 426 # This cannot be done solely with pattern matching, and requires state to 427 # track 'inside/outside block'. 428 # When looking for open blocks, possible states are: 429 # 'outside' - outside a block 430 # 'start' - have found the '[open...]' line 431 # 'inside' - have found the following '--' line 432 openBlockState = 'outside' 433 434 # Dictionary of interesting line numbers and strings related to an API 435 # name 436 pageMap = {} 437 438 numLines = len(file) 439 line = 0 440 441 # Track the pageInfo object corresponding to the current open block 442 pi = None 443 444 while (line < numLines): 445 setLogLine(line) 446 447 # Only one of the patterns can possibly match. Add it to 448 # the dictionary for that name. 449 450 # [open,refpage=...] starting a refpage block 451 matches = beginPat.search(file[line]) 452 if matches is not None: 453 logDiag('Matched open block pattern') 454 attribs = matches.group('attribs') 455 456 # If the previous open block was not closed, raise an error 457 if openBlockState != 'outside': 458 logErr('Nested open block starting at line', line, 'of', 459 filename) 460 461 openBlockState = 'start' 462 463 # Parse the block attributes 464 matches = attribPat.findall(attribs) 465 466 # Extract each attribute 467 name = None 468 desc = None 469 refpage_type = None 470 spec_type = None 471 anchor = None 472 alias = None 473 xrefs = None 474 475 for (key,value) in matches: 476 logDiag('got attribute', key, '=', value) 477 if key == 'refpage': 478 name = value 479 elif key == 'desc': 480 desc = unescapeQuotes(value) 481 elif key == 'type': 482 refpage_type = value 483 elif key == 'spec': 484 spec_type = value 485 elif key == 'anchor': 486 anchor = value 487 elif key == 'alias': 488 alias = value 489 elif key == 'xrefs': 490 xrefs = value 491 else: 492 logWarn('unknown open block attribute:', key) 493 494 if name is None or desc is None or refpage_type is None: 495 logWarn('missing one or more required open block attributes:' 496 'refpage, desc, or type') 497 # Leave pi is None so open block delimiters are ignored 498 else: 499 pi = lookupPage(pageMap, name) 500 pi.desc = desc 501 # Must match later type definitions in interface/validity includes 502 pi.type = refpage_type 503 pi.spec = spec_type 504 pi.anchor = anchor 505 if alias: 506 pi.alias = alias 507 if xrefs: 508 pi.refs = xrefs 509 logDiag('open block for', name, 'added DESC =', desc, 510 'TYPE =', refpage_type, 'ALIAS =', alias, 511 'XREFS =', xrefs, 'SPEC =', spec_type, 512 'ANCHOR =', anchor) 513 514 line = line + 1 515 continue 516 517 # '--' starting or ending and open block 518 if file[line].rstrip() == '--': 519 if openBlockState == 'outside': 520 # Only refpage open blocks should use -- delimiters 521 logWarn('Unexpected double-dash block delimiters') 522 elif openBlockState == 'start': 523 # -- delimiter following [open,refpage=...] 524 openBlockState = 'inside' 525 526 if pi is None: 527 logWarn('no pageInfo available for opening -- delimiter') 528 else: 529 pi.begin = line + 1 530 logDiag('opening -- delimiter: added BEGIN =', pi.begin) 531 elif openBlockState == 'inside': 532 # -- delimiter ending an open block 533 if pi is None: 534 logWarn('no pageInfo available for closing -- delimiter') 535 else: 536 pi.end = line - 1 537 logDiag('closing -- delimiter: added END =', pi.end) 538 539 openBlockState = 'outside' 540 pi = None 541 else: 542 logWarn('unknown openBlockState:', openBlockState) 543 544 line = line + 1 545 continue 546 547 matches = INCLUDE.search(file[line]) 548 if matches is not None: 549 # Something got included, not sure what yet. 550 gen_type = matches.group('generated_type') 551 refpage_type = matches.group('category') 552 name = matches.group('entity_name') 553 554 # This will never match in OpenCL 555 if gen_type == 'validity': 556 logDiag('Matched validity pattern') 557 if pi is not None: 558 if pi.type and not compatiblePageTypes(refpage_type, pi.type): 559 logWarn('ERROR: pageMap[' + name + '] type:', 560 pi.type, 'does not match type:', refpage_type) 561 pi.type = refpage_type 562 pi.validity = line 563 logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity) 564 else: 565 logWarn('validity include:: line NOT inside block') 566 567 line = line + 1 568 continue 569 570 if gen_type == 'api': 571 logDiag('Matched include pattern') 572 if pi is not None: 573 if pi.include is not None: 574 logDiag('found multiple includes for this block') 575 if pi.type and not compatiblePageTypes(refpage_type, pi.type): 576 logWarn('ERROR: pageMap[' + name + '] type:', 577 pi.type, 'does not match type:', refpage_type) 578 pi.type = refpage_type 579 pi.include = line 580 logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include) 581 else: 582 logWarn('interface include:: line NOT inside block') 583 584 line = line + 1 585 continue 586 587 logDiag('ignoring unrecognized include line ', matches.group()) 588 589 # Vulkan 1.1 markup allows the last API include construct to be 590 # followed by an asciidoctor endif:: construct (and also preceded, 591 # at some distance). 592 # This looks for endif:: immediately following an include:: line 593 # and, if found, moves the include boundary to this line. 594 matches = endifPat.search(file[line]) 595 if matches is not None and pi is not None: 596 if pi.include == line - 1: 597 logDiag('Matched endif pattern following include; moving include') 598 pi.include = line 599 else: 600 logDiag('Matched endif pattern (not following include)') 601 602 line = line + 1 603 continue 604 605 matches = bodyPat.search(file[line]) 606 if matches is not None: 607 logDiag('Matched // refBody pattern') 608 if pi is not None: 609 pi.body = line 610 logDiag('added BODY =', pi.body) 611 else: 612 logWarn('// refBody line NOT inside block') 613 614 line = line + 1 615 continue 616 617 # OpenCL spec uses // refError to tag "validity" (Errors) language, 618 # instead of /validity/ includes. 619 matches = errorPat.search(file[line]) 620 if matches is not None: 621 logDiag('Matched // refError pattern') 622 if pi is not None: 623 pi.validity = line 624 logDiag('added VALIDITY (refError) =', pi.validity) 625 else: 626 logWarn('// refError line NOT inside block') 627 628 line = line + 1 629 continue 630 631 line = line + 1 632 continue 633 634 if pi is not None: 635 logErr('Unclosed open block at EOF!') 636 637 setLogSourcefile(None) 638 setLogProcname(None) 639 setLogLine(None) 640 641 return pageMap 642 643 644def getBranch(): 645 """Determine current git branch 646 647 Returns (branch name, ''), or (None, stderr output) if the branch name 648 cannot be determined""" 649 650 command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ] 651 results = subprocess.run(command, 652 stdout=subprocess.PIPE, 653 stderr=subprocess.PIPE) 654 655 # git command failed 656 if len(results.stderr) > 0: 657 return (None, results.stderr) 658 659 # Remove newline from output and convert to a string 660 branch = results.stdout.rstrip().decode() 661 if len(branch) > 0: 662 # Strip trailing newline 663 branch = results.stdout.decode()[0:-1] 664 665 return (branch, '') 666