1#!/usr/bin/python3 2# 3# Copyright 2016-2022 The Khronos Group Inc. 4# 5# SPDX-License-Identifier: Apache-2.0 6 7"""Used for automatic reflow of spec sources to satisfy the agreed layout to 8minimize git churn. Most of the logic has to do with detecting asciidoc 9markup or block types that should not be reflowed (tables, code) and 10ignoring them. It is very likely there are many asciidoc constructs not yet 11accounted for in the script, our usage of asciidoc markup is intentionally 12somewhat limited. 13 14Also used to insert identifying tags on explicit Valid Usage statements. 15 16Usage: `reflow.py [-noflow] [-tagvu] [-nextvu #] [-overwrite] [-out dir] [-suffix str] files` 17 18- `-noflow` acts as a passthrough, instead of reflowing text. Other 19 processing may occur. 20- `-tagvu` generates explicit VUID tag for Valid Usage statements which 21 do not already have them. 22- `-nextvu #` starts VUID tag generation at the specified # instead of 23 the value wired into the `reflow.py` script. 24- `-overwrite` updates in place (can be risky, make sure there are backups) 25- `-check FAIL|WARN` runs some consistency checks on markup. If the checks 26 fail and the WARN option is given, the script will simply print a warning 27 message. If the checks fail and the FAIL option is given, the script will 28 exit with an error code. FAIL is for use with continuous integration 29 scripts enforcing the checks. 30- `-out` specifies directory to create output file in, default 'out' 31- `-suffix` specifies suffix to add to output files, default '' 32- `files` are asciidoc source files from the spec to reflow. 33""" 34# For error and file-loading interfaces only 35import argparse 36import os 37import re 38import sys 39from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch 40from pathlib import Path 41 42# Vulkan-specific - will consolidate into scripts/ like OpenXR soon 43sys.path.insert(0, 'xml') 44 45from apiconventions import APIConventions 46conventions = APIConventions() 47 48# Markup that always ends a paragraph 49# empty line or whitespace 50# [block options] 51# [[anchor]] 52# // comment 53# <<<< page break 54# :attribute-setting 55# macro-directive::terms 56# + standalone list item continuation 57# label:: labelled list - label must be standalone 58endPara = re.compile(r'^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$') 59 60# Special case of markup ending a paragraph, used to track the current 61# command/structure. This allows for either OpenXR or Vulkan API path 62# conventions. Nominally it should use the file suffix defined by the API 63# conventions (conventions.file_suffix), except that XR uses '.txt' for 64# generated API include files, not '.adoc' like its other includes. 65includePat = re.compile( 66 r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).adoc[\[][\]]') 67 68# Find the first pname: or code: pattern in a Valid Usage statement 69pnamePat = re.compile(r'pname:(?P<param>\{?\w+\}?)') 70codePat = re.compile(r'code:(?P<param>\w+)') 71 72# Markup that is OK in a contiguous paragraph but otherwise passed through 73# .anything (except .., which indicates a literal block) 74# === Section Titles 75# image::path_to_image[attributes] (apparently a single colon is OK but less idiomatic) 76endParaContinue = re.compile(r'^(\.[^.].*|=+ .*|image:.*\[.*\])$') 77 78# Markup for block delimiters whose contents *should* be reformatted 79# -- (exactly two) (open block) 80# **** (4 or more) (sidebar block) 81# ==== (4 or more) (example block) 82# ____ (4 or more) (quote block) 83blockReflow = re.compile(r'^(--|[*=_]{4,})$') 84 85# Fake block delimiters for "common" VU statements 86blockCommonReflow = '// Common Valid Usage\n' 87 88# Markup for block delimiters whose contents should *not* be reformatted 89# |=== (3 or more) (table) 90# ``` (3 or more) (listing block) 91# //// (4 or more) (comment block) 92# ---- (4 or more) (listing block) 93# .... (4 or more) (literal block) 94# ++++ (4 or more) (passthrough block) 95# ~~~~ (4 or more) (alternate open block delimiter, supported via extension) 96blockPassthrough = re.compile(r'^(\|={3,}|[`]{3}|[\-+./~]{4,})$') 97 98# Markup for introducing lists (hanging paragraphs) 99# * bullet 100# ** bullet 101# -- bullet 102# . bullet 103# :: bullet (no longer supported by asciidoctor 2) 104# {empty}:: bullet 105# 1. list item 106# <1> source listing callout 107beginBullet = re.compile(r'^ *([-*.]+|\{empty\}::|::|[0-9]+[.]|<([0-9]+)>) ') 108 109# Start of an asciidoctor conditional 110# ifdef:: 111# ifndef:: 112conditionalStart = re.compile(r'^(ifdef|ifndef)::') 113 114# Text that (may) not end sentences 115 116# A single letter followed by a period, typically a middle initial. 117endInitial = re.compile(r'^[A-Z]\.$') 118# An abbreviation, which does not (usually) end a line. 119endAbbrev = re.compile(r'(e\.g|i\.e|c\.f|vs)\.$', re.IGNORECASE) 120 121class ReflowState: 122 """State machine for reflowing. 123 124 Represents the state of the reflow operation""" 125 def __init__(self, 126 filename, 127 margin = 76, 128 file = sys.stdout, 129 breakPeriod = True, 130 reflow = True, 131 nextvu = None, 132 maxvu = None): 133 134 self.blockStack = [ None ] 135 """The last element is a line with the asciidoc block delimiter that is currently in effect, 136 such as '--', '----', '****', '====', or '++++'. 137 This affects whether or not the block contents should be formatted.""" 138 139 self.reflowStack = [ True ] 140 """The last element is True or False if the current blockStack contents 141 should be reflowed.""" 142 self.vuStack = [ False ] 143 """the last element is True or False if the current blockStack contents 144 are an explicit Valid Usage block.""" 145 146 self.margin = margin 147 """margin to reflow text to.""" 148 149 self.para = [] 150 """list of lines in the paragraph being accumulated. 151 When this is non-empty, there is a current paragraph.""" 152 153 self.lastTitle = False 154 """true if the previous line was a document title line 155 (e.g. :leveloffset: 0 - no attempt to track changes to this is made).""" 156 157 self.leadIndent = 0 158 """indent level (in spaces) of the first line of a paragraph.""" 159 160 self.hangIndent = 0 161 """indent level of the remaining lines of a paragraph.""" 162 163 self.file = file 164 """file handle to write to.""" 165 166 self.filename = filename 167 """base name of file being read from.""" 168 169 self.lineNumber = 0 170 """line number being read from the input file.""" 171 172 self.breakPeriod = breakPeriod 173 """True if justification should break to a new line after the end of a sentence.""" 174 175 self.breakInitial = True 176 """True if justification should break to a new line after 177 something that appears to be an initial in someone's name. **TBD**""" 178 179 self.reflow = reflow 180 """True if text should be reflowed, False to pass through unchanged.""" 181 182 self.vuPrefix = 'VUID' 183 """Prefix of generated Valid Usage tags""" 184 185 self.vuFormat = '{0}-{1}-{2}-{3:0>5d}' 186 """Format string for generating Valid Usage tags. 187 First argument is vuPrefix, second is command/struct name, third is parameter name, fourth is the tag number.""" 188 189 self.nextvu = nextvu 190 """Integer to start tagging un-numbered Valid Usage statements with, 191 or None if no tagging should be done.""" 192 193 self.maxvu = maxvu 194 """Maximum tag to use for Valid Usage statements, or None if no 195 tagging should be done.""" 196 197 self.defaultApiName = '{refpage}' 198 self.apiName = self.defaultApiName 199 """String name of an API structure or command for VUID tag 200 generation, or {refpage} if one has not been included in this file 201 yet.""" 202 203 def incrLineNumber(self): 204 self.lineNumber = self.lineNumber + 1 205 206 def printLines(self, lines): 207 """Print an array of lines with newlines already present""" 208 if len(lines) > 0: 209 logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='') 210 211 if self.file is not None: 212 for line in lines: 213 print(line, file=self.file, end='') 214 215 def endSentence(self, word): 216 """Return True if word ends with a sentence-period, False otherwise. 217 218 Allows for contraction cases which will not end a line: 219 220 - A single letter (if breakInitial is True) 221 - Abbreviations: 'c.f.', 'e.g.', 'i.e.' (or mixed-case versions)""" 222 if (word[-1:] != '.' or 223 endAbbrev.search(word) or 224 (self.breakInitial and endInitial.match(word))): 225 return False 226 227 return True 228 229 def vuidAnchor(self, word): 230 """Return True if word is a Valid Usage ID Tag anchor.""" 231 return (word[0:7] == '[[VUID-') 232 233 def isOpenBlockDelimiter(self, line): 234 """Returns True if line is an open block delimiter.""" 235 return line[0:2] == '--' 236 237 def reflowPara(self): 238 """Reflow the current paragraph, respecting the paragraph lead and 239 hanging indentation levels. 240 241 The algorithm also respects trailing '+' signs that indicate embedded newlines, 242 and will not reflow a very long word immediately after a bullet point. 243 244 Just return the paragraph unchanged if the -noflow argument was 245 given.""" 246 if not self.reflow: 247 return self.para 248 249 logDiag('reflowPara lead indent = ', self.leadIndent, 250 'hangIndent =', self.hangIndent, 251 'para:', self.para[0], end='') 252 253 # Total words processed (we care about the *first* word vs. others) 254 wordCount = 0 255 256 # Tracks the *previous* word processed. It must not be empty. 257 prevWord = ' ' 258 259 # Track the previous line and paragraph being indented, if any 260 outLine = None 261 outPara = [] 262 263 for line in self.para: 264 line = line.rstrip() 265 words = line.split() 266 267 # logDiag('reflowPara: input line =', line) 268 numWords = len(words) - 1 269 270 for i in range(0, numWords + 1): 271 word = words[i] 272 wordLen = len(word) 273 wordCount += 1 274 275 endEscape = False 276 if i == numWords and word == '+': 277 # Trailing ' +' must stay on the same line 278 endEscape = word 279 # logDiag('reflowPara last word of line =', word, 'prevWord =', prevWord, 'endEscape =', endEscape) 280 else: 281 pass 282 # logDiag('reflowPara wordCount =', wordCount, 'word =', word, 'prevWord =', prevWord) 283 284 if wordCount == 1: 285 # The first word of the paragraph is treated specially. 286 # The loop logic becomes trickier if all this code is 287 # done prior to looping over lines and words, so all the 288 # setup logic is done here. 289 290 outPara = [] 291 outLine = ''.ljust(self.leadIndent) + word 292 outLineLen = self.leadIndent + wordLen 293 294 # If the paragraph begins with a bullet point, generate 295 # a hanging indent level if there is not one already. 296 if beginBullet.match(self.para[0]): 297 bulletPoint = True 298 if len(self.para) > 1: 299 logDiag('reflowPara first line matches bullet point', 300 'but indent already hanging @ input line', 301 self.lineNumber) 302 else: 303 logDiag('reflowPara first line matches bullet point -' 304 'single line, assuming hangIndent @ input line', 305 self.lineNumber) 306 self.hangIndent = outLineLen + 1 307 else: 308 bulletPoint = False 309 else: 310 # Possible actions to take with this word 311 # 312 # addWord - add word to current line 313 # closeLine - append line and start a new (null) one 314 # startLine - add word to a new line 315 316 # Default behavior if all the tests below fail is to add 317 # this word to the current line, and keep accumulating 318 # that line. 319 (addWord, closeLine, startLine) = (True, False, False) 320 321 # How long would this line be if the word were added? 322 newLen = outLineLen + 1 + wordLen 323 324 # Are we on the first word following a bullet point? 325 firstBullet = (wordCount == 2 and bulletPoint) 326 327 if endEscape: 328 # If the new word ends the input line with ' +', 329 # add it to the current line. 330 331 (addWord, closeLine, startLine) = (True, True, False) 332 elif self.vuidAnchor(word): 333 # If the new word is a Valid Usage anchor, break the 334 # line afterwards. Note that this should only happen 335 # immediately after a bullet point, but we do not 336 # currently check for this. 337 (addWord, closeLine, startLine) = (True, True, False) 338 elif newLen > self.margin: 339 if firstBullet: 340 # If the word follows a bullet point, add it to 341 # the current line no matter its length. 342 343 (addWord, closeLine, startLine) = (True, True, False) 344 elif beginBullet.match(word + ' '): 345 # If the word *is* a bullet point, add it to 346 # the current line no matter its length. 347 # This avoids an innocent inline '-' or '*' 348 # turning into a bogus bullet point. 349 350 (addWord, closeLine, startLine) = (True, True, False) 351 else: 352 # The word overflows, so add it to a new line. 353 354 (addWord, closeLine, startLine) = (False, True, True) 355 elif (self.breakPeriod and 356 (wordCount > 2 or not firstBullet) and 357 self.endSentence(prevWord)): 358 # If the previous word ends a sentence and 359 # breakPeriod is set, start a new line. 360 # The complicated logic allows for leading bullet 361 # points which are periods (implicitly numbered lists). 362 # @@@ But not yet for explicitly numbered lists. 363 364 (addWord, closeLine, startLine) = (False, True, True) 365 366 # Add a word to the current line 367 if addWord: 368 if outLine: 369 outLine += ' ' + word 370 outLineLen = newLen 371 else: 372 # Fall through to startLine case if there is no 373 # current line yet. 374 startLine = True 375 376 # Add current line to the output paragraph. Force 377 # starting a new line, although we do not yet know if it 378 # will ever have contents. 379 if closeLine: 380 if outLine: 381 outPara.append(outLine + '\n') 382 outLine = None 383 384 # Start a new line and add a word to it 385 if startLine: 386 outLine = ''.ljust(self.hangIndent) + word 387 outLineLen = self.hangIndent + wordLen 388 389 # Track the previous word, for use in breaking at end of 390 # a sentence 391 prevWord = word 392 393 # Add this line to the output paragraph. 394 if outLine: 395 outPara.append(outLine + '\n') 396 397 return outPara 398 399 def emitPara(self): 400 """Emit a paragraph, possibly reflowing it depending on the block context. 401 402 Resets the paragraph accumulator.""" 403 if self.para != []: 404 if self.vuStack[-1] and self.nextvu is not None: 405 # If: 406 # - this paragraph is in a Valid Usage block, 407 # - VUID tags are being assigned, 408 # Try to assign VUIDs 409 410 if nestedVuPat.search(self.para[0]): 411 # Check for nested bullet points. These should not be 412 # assigned VUIDs, nor present at all, because they break 413 # the VU extractor. 414 logWarn(self.filename + ': Invalid nested bullet point in VU block:', self.para[0]) 415 elif self.vuPrefix not in self.para[0]: 416 # If: 417 # - a tag is not already present, and 418 # - the paragraph is a properly marked-up list item 419 # Then add a VUID tag starting with the next free ID. 420 421 # Split the first line after the bullet point 422 matches = vuPat.search(self.para[0]) 423 if matches is not None: 424 logDiag('findRefs: Matched vuPat on line:', self.para[0], end='') 425 head = matches.group('head') 426 tail = matches.group('tail') 427 428 # Use the first pname: or code: tag in the paragraph as 429 # the parameter name in the VUID tag. This will not always 430 # be correct, but should be highly reliable. 431 for vuLine in self.para: 432 matches = pnamePat.search(vuLine) 433 if matches is not None: 434 break 435 matches = codePat.search(vuLine) 436 if matches is not None: 437 break 438 439 if matches is not None: 440 paramName = matches.group('param') 441 else: 442 paramName = 'None' 443 logWarn(self.filename, 444 'No param name found for VUID tag on line:', 445 self.para[0]) 446 447 newline = (head + ' [[' + 448 self.vuFormat.format(self.vuPrefix, 449 self.apiName, 450 paramName, 451 self.nextvu) + ']] ' + tail) 452 453 logDiag('Assigning', self.vuPrefix, self.apiName, self.nextvu, 454 ' on line:', self.para[0], '->', newline, 'END') 455 456 # Do not actually assign the VUID unless it is in the reserved range 457 if self.nextvu <= self.maxvu: 458 if self.nextvu == self.maxvu: 459 logWarn('Skipping VUID assignment, no more VUIDs available') 460 self.para[0] = newline 461 self.nextvu = self.nextvu + 1 462 # else: 463 # There are only a few cases of this, and they are all 464 # legitimate. Leave detecting this case to another tool 465 # or hand inspection. 466 # logWarn(self.filename + ': Unexpected non-bullet item in VU block (harmless if following an ifdef):', 467 # self.para[0]) 468 469 if self.reflowStack[-1]: 470 self.printLines(self.reflowPara()) 471 else: 472 self.printLines(self.para) 473 474 # Reset the paragraph, including its indentation level 475 self.para = [] 476 self.leadIndent = 0 477 self.hangIndent = 0 478 479 def endPara(self, line): 480 """'line' ends a paragraph and should itself be emitted. 481 line may be None to indicate EOF or other exception.""" 482 logDiag('endPara line', self.lineNumber, ': emitting paragraph') 483 484 # Emit current paragraph, this line, and reset tracker 485 self.emitPara() 486 487 if line: 488 self.printLines( [ line ] ) 489 490 def endParaContinue(self, line): 491 """'line' ends a paragraph (unless there is already a paragraph being 492 accumulated, e.g. len(para) > 0 - currently not implemented)""" 493 self.endPara(line) 494 495 def endBlock(self, line, reflow = False, vuBlock = False): 496 """'line' begins or ends a block. 497 498 If beginning a block, tag whether or not to reflow the contents. 499 500 vuBlock is True if the previous line indicates this is a Valid Usage block.""" 501 self.endPara(line) 502 503 if self.blockStack[-1] == line: 504 logDiag('endBlock line', self.lineNumber, 505 ': popping block end depth:', len(self.blockStack), 506 ':', line, end='') 507 508 # Reset apiName at the end of an open block. 509 # Open blocks cannot be nested (at present), so this is safe. 510 if self.isOpenBlockDelimiter(line): 511 logDiag('reset apiName to empty at line', self.lineNumber) 512 self.apiName = self.defaultApiName 513 else: 514 logDiag('NOT resetting apiName to default at line', self.lineNumber) 515 516 self.blockStack.pop() 517 self.reflowStack.pop() 518 self.vuStack.pop() 519 else: 520 # Start a block 521 self.blockStack.append(line) 522 self.reflowStack.append(reflow) 523 self.vuStack.append(vuBlock) 524 525 logDiag('endBlock reflow =', reflow, ' line', self.lineNumber, 526 ': pushing block start depth', len(self.blockStack), 527 ':', line, end='') 528 529 def endParaBlockReflow(self, line, vuBlock): 530 """'line' begins or ends a block. The paragraphs in the block *should* be 531 reformatted (e.g. a NOTE).""" 532 self.endBlock(line, reflow = True, vuBlock = vuBlock) 533 534 def endParaBlockPassthrough(self, line): 535 """'line' begins or ends a block. The paragraphs in the block should 536 *not* be reformatted (e.g. a code listing).""" 537 self.endBlock(line, reflow = False) 538 539 def addLine(self, line): 540 """'line' starts or continues a paragraph. 541 542 Paragraphs may have "hanging indent", e.g. 543 544 ``` 545 * Bullet point... 546 ... continued 547 ``` 548 549 In this case, when the higher indentation level ends, so does the 550 paragraph.""" 551 logDiag('addLine line', self.lineNumber, ':', line, end='') 552 553 # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string 554 indent = len(line) - len(line.lstrip()) 555 556 # A hanging paragraph ends due to a less-indented line. 557 if self.para != [] and indent < self.hangIndent: 558 logDiag('addLine: line reduces indentation, emit paragraph') 559 self.emitPara() 560 561 # A bullet point (or something that looks like one) always ends the 562 # current paragraph. 563 if beginBullet.match(line): 564 logDiag('addLine: line matches beginBullet, emit paragraph') 565 self.emitPara() 566 567 if self.para == []: 568 # Begin a new paragraph 569 self.para = [ line ] 570 self.leadIndent = indent 571 self.hangIndent = indent 572 else: 573 # Add a line to a paragraph. Increase the hanging indentation 574 # level - once. 575 if self.hangIndent == self.leadIndent: 576 self.hangIndent = indent 577 self.para.append(line) 578 579def apiMatch(oldname, newname): 580 """Returns whether oldname and newname match, up to an API suffix. 581 This should use the API map instead of this heuristic, since aliases 582 like VkPhysicalDeviceVariablePointerFeatures -> 583 VkPhysicalDeviceVariablePointersFeatures are not recognized.""" 584 upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 585 return oldname.rstrip(upper) == newname.rstrip(upper) 586 587def reflowFile(filename, args): 588 logDiag('reflow: filename', filename) 589 590 lines, newline_string = loadFile(filename) 591 if lines is None: 592 return 593 594 # Output file handle and reflow object for this file. There are no race 595 # conditions on overwriting the input, but it is not recommended unless 596 # you have backing store such as git. 597 598 if args.overwrite: 599 outFilename = filename 600 else: 601 outDir = Path(args.outDir).resolve() 602 # TOCTOU-safe directory creation 603 try: 604 outDir.mkdir() 605 except FileExistsError: 606 pass 607 608 outFilename = str(outDir / (os.path.basename(filename) + args.suffix)) 609 610 if args.nowrite: 611 fp = None 612 else: 613 try: 614 fp = open(outFilename, 'w', encoding='utf8', newline=newline_string) 615 except: 616 logWarn('Cannot open output file', outFilename, ':', sys.exc_info()[0]) 617 return 618 619 state = ReflowState(filename, 620 margin = args.margin, 621 file = fp, 622 reflow = not args.noflow, 623 nextvu = args.nextvu, 624 maxvu = args.maxvu) 625 626 for line in lines: 627 state.incrLineNumber() 628 629 # Is this a title line (leading '= ' followed by text)? 630 thisTitle = False 631 632 matches = vuidPat.search(line) 633 if matches is not None: 634 # If we found a VUID pattern, add the (filename,line) it was 635 # found at to a list for that VUID, to find duplicates. 636 vuid = matches.group('vuid') 637 if vuid not in args.vuidDict: 638 args.vuidDict[vuid] = [] 639 args.vuidDict[vuid].append([filename, line]) 640 641 # The logic here is broken. If we are in a non-reflowable block and 642 # this line *does not* end the block, it should always be 643 # accumulated. 644 645 # Test for a blockCommonReflow delimiter comment first, to avoid 646 # treating it solely as a end-Paragraph marker comment. 647 if line == blockCommonReflow: 648 # Starting or ending a pseudo-block for "common" VU statements. 649 state.endParaBlockReflow(line, vuBlock = True) 650 651 elif blockReflow.match(line): 652 # Starting or ending a block whose contents may be reflowed. 653 # Blocks cannot be nested. 654 655 # Is this is an explicit Valid Usage block? 656 vuBlock = (state.lineNumber > 1 and 657 lines[state.lineNumber-2] == '.Valid Usage\n') 658 659 state.endParaBlockReflow(line, vuBlock) 660 661 elif endPara.match(line): 662 # Ending a paragraph. Emit the current paragraph, if any, and 663 # prepare to begin a new paragraph. 664 665 state.endPara(line) 666 667 # If this is an include:: line starting the definition of a 668 # structure or command, track that for use in VUID generation. 669 670 matches = includePat.search(line) 671 if matches is not None: 672 generated_type = matches.group('generated_type') 673 include_type = matches.group('category') 674 if generated_type == 'api' and include_type in ('protos', 'structs', 'funcpointers'): 675 apiName = matches.group('entity_name') 676 if state.apiName != state.defaultApiName: 677 # This happens when there are multiple API include 678 # lines in a single block. The style guideline is to 679 # always place the API which others are promoted to 680 # first. In virtually all cases, the promoted API 681 # will differ solely in the vendor suffix (or 682 # absence of it), which is benign. 683 if not apiMatch(state.apiName, apiName): 684 logDiag(f'Promoted API name mismatch at line {state.lineNumber}: {apiName} does not match state.apiName (this is OK if it is just a spelling alias)') 685 else: 686 state.apiName = apiName 687 688 elif endParaContinue.match(line): 689 # For now, always just end the paragraph. 690 # Could check see if len(para) > 0 to accumulate. 691 692 state.endParaContinue(line) 693 694 # If it is a title line, track that 695 if line[0:2] == '= ': 696 thisTitle = True 697 698 elif blockPassthrough.match(line): 699 # Starting or ending a block whose contents must not be reflowed. 700 # These are tables, etc. Blocks cannot be nested. 701 702 state.endParaBlockPassthrough(line) 703 elif state.lastTitle: 704 # The previous line was a document title line. This line 705 # is the author / credits line and must not be reflowed. 706 707 state.endPara(line) 708 else: 709 # Just accumulate a line to the current paragraph. Watch out for 710 # hanging indents / bullet-points and track that indent level. 711 712 state.addLine(line) 713 714 # This test looks for disallowed conditionals inside Valid Usage 715 # blocks, by checking if (a) this line does not start a new VU 716 # (bullet point) and (b) the previous line starts an asciidoctor 717 # conditional (ifdef:: or ifndef::). 718 719 if (args.check 720 and state.vuStack[-1] 721 and not beginBullet.match(line) 722 and conditionalStart.match(lines[state.lineNumber-2])): 723 724 logWarn('Detected embedded Valid Usage conditional: {}:{}'.format( 725 filename, state.lineNumber - 1)) 726 # Keep track of warning check count 727 args.warnCount = args.warnCount + 1 728 729 state.lastTitle = thisTitle 730 731 # Cleanup at end of file 732 state.endPara(None) 733 734 # Check for sensible block nesting 735 if len(state.blockStack) > 1: 736 logWarn('file', filename, 737 'mismatched asciidoc block delimiters at EOF:', 738 state.blockStack[-1]) 739 740 if fp is not None: 741 fp.close() 742 743 # Update the 'nextvu' value 744 if args.nextvu != state.nextvu: 745 logWarn('Updated nextvu to', state.nextvu, 'after file', filename) 746 args.nextvu = state.nextvu 747 748def reflowAllAdocFiles(folder_to_reflow, args): 749 for root, subdirs, files in os.walk(folder_to_reflow): 750 for file in files: 751 if file.endswith(conventions.file_suffix): 752 file_path = os.path.join(root, file) 753 reflowFile(file_path, args) 754 for subdir in subdirs: 755 sub_folder = os.path.join(root, subdir) 756 print('Sub-folder = %s' % sub_folder) 757 if subdir.lower() not in conventions.spec_no_reflow_dirs: 758 print(' Parsing = %s' % sub_folder) 759 reflowAllAdocFiles(sub_folder, args) 760 else: 761 print(' Skipping = %s' % sub_folder) 762 763# Patterns used to recognize interesting lines in an asciidoc source file. 764# These patterns are only compiled once. 765 766# Explicit Valid Usage list item with one or more leading asterisks 767# The re.DOTALL is needed to prevent vuPat.search() from stripping 768# the trailing newline. 769vuPat = re.compile(r'^(?P<head> [*]+)( *)(?P<tail>.*)', re.DOTALL) 770 771# VUID with the numeric portion captured in the match object 772vuidPat = re.compile(r'VUID-[^-]+-[^-]+-(?P<vuid>[0-9]+)') 773 774# Pattern matching leading nested bullet points 775global nestedVuPat 776nestedVuPat = re.compile(r'^ \*\*') 777 778if __name__ == '__main__': 779 parser = argparse.ArgumentParser() 780 781 parser.add_argument('-diag', action='store', dest='diagFile', 782 help='Set the diagnostic file') 783 parser.add_argument('-warn', action='store', dest='warnFile', 784 help='Set the warning file') 785 parser.add_argument('-log', action='store', dest='logFile', 786 help='Set the log file for both diagnostics and warnings') 787 parser.add_argument('-overwrite', action='store_true', 788 help='Overwrite input filenames instead of writing different output filenames') 789 parser.add_argument('-out', action='store', dest='outDir', 790 default='out', 791 help='Set the output directory in which updated files are generated (default: out)') 792 parser.add_argument('-nowrite', action='store_true', 793 help='Do not write output files, for use with -check') 794 parser.add_argument('-check', action='store', dest='check', 795 help='Run markup checks and warn if WARN option is given, error exit if FAIL option is given') 796 parser.add_argument('-checkVUID', action='store', dest='checkVUID', 797 help='Detect duplicated VUID numbers and warn if WARN option is given, error exit if FAIL option is given') 798 parser.add_argument('-tagvu', action='store_true', 799 help='Tag un-tagged Valid Usage statements starting at the value wired into reflow.py') 800 parser.add_argument('-nextvu', action='store', dest='nextvu', type=int, 801 default=None, 802 help='Tag un-tagged Valid Usage statements starting at the specified base VUID instead of the value wired into reflow.py') 803 parser.add_argument('-maxvu', action='store', dest='maxvu', type=int, 804 default=None, 805 help='Specify maximum VUID instead of the value wired into vuidCounts.py') 806 parser.add_argument('-branch', action='store', dest='branch', 807 help='Specify branch to assign VUIDs for') 808 parser.add_argument('-noflow', action='store_true', dest='noflow', 809 help='Do not reflow text. Other actions may apply') 810 parser.add_argument('-margin', action='store', type=int, dest='margin', 811 default='76', 812 help='Width to reflow text, defaults to 76 characters') 813 parser.add_argument('-suffix', action='store', dest='suffix', 814 default='', 815 help='Set the suffix added to updated file names (default: none)') 816 parser.add_argument('files', metavar='filename', nargs='*', 817 help='a filename to reflow text in') 818 parser.add_argument('--version', action='version', version='%(prog)s 1.0') 819 820 args = parser.parse_args() 821 822 setLogFile(True, True, args.logFile) 823 setLogFile(True, False, args.diagFile) 824 setLogFile(False, True, args.warnFile) 825 826 if args.overwrite: 827 logWarn("reflow.py: will overwrite all input files") 828 829 errors = '' 830 if args.branch is None: 831 (args.branch, errors) = getBranch() 832 if args.branch is None: 833 # This is not fatal unless VUID assignment is required 834 if args.tagvu: 835 logErr('Cannot determine current git branch, so cannot assign VUIDs:', errors) 836 837 if args.tagvu and args.nextvu is None: 838 # Moved here since vuidCounts is only needed in the internal 839 # repository 840 from vuidCounts import vuidCounts 841 842 if args.branch not in vuidCounts: 843 logErr('Branch', args.branch, 'not in vuidCounts, cannot continue') 844 maxVUID = vuidCounts[args.branch][1] 845 startVUID = vuidCounts[args.branch][2] 846 args.nextvu = startVUID 847 args.maxvu = maxVUID 848 849 if args.nextvu is not None: 850 logWarn('Tagging untagged Valid Usage statements starting at', args.nextvu) 851 852 # Count of markup check warnings encountered 853 # This is added to the argparse structure 854 args.warnCount = 0 855 856 # Dictionary of VUID numbers found, containing a list of (file, line) on 857 # which that number was found 858 # This is added to the argparse structure 859 args.vuidDict = {} 860 861 # If no files are specified, reflow the entire specification chapters folder 862 if not args.files: 863 folder_to_reflow = conventions.spec_reflow_path 864 logWarn('Reflowing all asciidoc files under', folder_to_reflow) 865 reflowAllAdocFiles(folder_to_reflow, args) 866 else: 867 for file in args.files: 868 reflowFile(file, args) 869 870 if args.warnCount > 0: 871 if args.check == 'FAIL': 872 logErr('Failed with', args.warnCount, 'markup errors detected.\n' + 873 'To fix these, you can take actions such as:\n' + 874 ' * Moving conditionals outside VU start / end without changing VU meaning\n' + 875 ' * Refactor conditional text using terminology defined conditionally outside the VU itself\n' + 876 ' * Remove the conditional (allowable when this just affects command / structure / enum names)\n') 877 else: 878 logWarn('Total warning count for markup issues is', args.warnCount) 879 880 # Look for duplicated VUID numbers 881 if args.checkVUID: 882 dupVUIDs = 0 883 for vuid in sorted(args.vuidDict): 884 found = args.vuidDict[vuid] 885 if len(found) > 1: 886 logWarn('Duplicate VUID number {} found in files:'.format(vuid)) 887 for (file, line) in found: 888 logWarn(' {}: {}'.format(file, line)) 889 dupVUIDs = dupVUIDs + 1 890 891 if dupVUIDs > 0: 892 if args.checkVUID == 'FAIL': 893 logErr('Failed with', dupVUIDs, 'duplicated VUID numbers found.\n' + 894 'To fix this, either convert these to commonvalidity VUs if possible, or strip\n' + 895 'the VUIDs from all but one of the duplicates and regenerate new ones.') 896 else: 897 logWarn('Total number of duplicated VUID numbers is', dupVUIDs) 898 899 if args.nextvu is not None and args.nextvu != startVUID: 900 # Update next free VUID to assign 901 vuidCounts[args.branch][2] = args.nextvu 902 try: 903 reflow_count_file_path = os.path.dirname(os.path.realpath(__file__)) 904 reflow_count_file_path += '/vuidCounts.py' 905 reflow_count_file = open(reflow_count_file_path, 'w', encoding='utf8') 906 print('# Do not edit this file!', file=reflow_count_file) 907 print('# VUID ranges reserved for branches', file=reflow_count_file) 908 print('# Key is branch name, value is [ start, end, nextfree ]', file=reflow_count_file) 909 print('vuidCounts = {', file=reflow_count_file) 910 for key in sorted(vuidCounts): 911 print(" '{}': [ {}, {}, {} ],".format( 912 key, 913 vuidCounts[key][0], 914 vuidCounts[key][1], 915 vuidCounts[key][2]), 916 file=reflow_count_file) 917 print('}', file=reflow_count_file) 918 reflow_count_file.close() 919 except: 920 logWarn('Cannot open output count file vuidCounts.py', ':', sys.exc_info()[0]) 921