1#!/usr/bin/python3 2# 3# Copyright (c) 2016-2018 The Khronos Group Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17# Used for automatic reflow of Vulkan spec to satisfy the agreed layout to 18# minimize git churn. Most of the logic has to to with detecting asciidoc 19# markup or block types that *shouldn't* be reflowed (tables, code) and 20# ignoring them. It's very likely there are many asciidoc constructs not yet 21# accounted for in the script, our usage of asciidoc markup is intentionally 22# somewhat limited. 23# 24# Also used to insert identifying tags on explicit Valid Usage statements. 25 26# Usage: reflow.py [-noflow] [-tagvu] [-nextvu #] [-overwrite] [-out dir] [-suffix str] files 27# -noflow acts as a passthrough, instead of reflowing text. Other 28# processing may occur. 29# -tagvu generates explicit VUID tag for Valid Usage statements which 30# don't already have them. 31# -nextvu # starts VUID tag generation at the specified # instead of 32# the value wired into the reflow.py script. 33# -overwrite updates in place (can be risky, make sure there are backups) 34# -out specifies directory to create output file in, default 'out' 35# -suffix specifies suffix to add to output files, default '' 36# files are asciidoc source files from the Vulkan spec to reflow. 37 38# For error and file-loading interfaces only 39from reflib import * 40from reflow_count import startVUID 41 42import argparse, copy, os, pdb, re, string, sys 43 44# Markup that always ends a paragraph 45# empty line or whitespace 46# [block options] 47# [[anchor]] 48# // comment 49# <<<< page break 50# :attribute-setting 51# macro-directive::terms 52# + standalone list item continuation 53# label:: labelled list - label must be standalone 54endPara = re.compile('^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$') 55 56# Special case of markup ending a paragraph, used to track the current command/structure 57includePat = re.compile('^include::(\.\./)+api/+(?P<type>\w+)/(?P<name>\w+).txt\[\]') 58 59# Find the first pname: pattern in a Valid Usage statement 60pnamePat = re.compile('pname:(?P<param>\w+)') 61 62# Markup that's OK in a contiguous paragraph but otherwise passed through 63# .anything 64# === Section Titles 65endParaContinue = re.compile('^(\..*|=+ .*)$') 66 67# Markup for block delimiters whose contents *should* be reformatted 68# -- (exactly two) (open block) 69# **** (4 or more) (sidebar block - why do we have these?!) 70# ==== (4 or more) (example block) 71# ____ (4 or more) (quote block) 72blockReflow = re.compile('^(--|[*=_]{4,})$') 73 74# Markup for block delimiters whose contents should *not* be reformatted 75# |=== (3 or more) (table) 76# ++++ (4 or more) (passthrough block) 77# .... (4 or more) (literal block) 78# //// (4 or more) (comment block) 79# ---- (4 or more) (listing block) 80# **** (4 or more) (sidebar block) 81blockPassthrough = re.compile('^(\|={3,}|[-+./]{4,})$') 82 83# Markup for introducing bullet points (hanging paragraphs) 84# * bullet 85# ** bullet 86# -- bullet 87# . bullet 88# :: bullet 89beginBullet = re.compile('^ *([*-.]+|::) ') 90 91# Text that (may) not end sentences 92 93# A single letter followed by a period, typically a middle initial. 94endInitial = re.compile('^[A-Z]\.$') 95# An abbreviation, which doesn't (usually) end a line. 96endAbbrev = re.compile('(e\.g|i\.e|c\.f)\.$', re.IGNORECASE) 97 98# State machine for reflowing. 99# 100# blockStack - The last element is a line with the asciidoc block delimiter 101# that's currently in effect, such as 102# '--', '----', '****', '======', or '+++++++++'. 103# This affects whether or not the block contents should be formatted. 104# reflowStack - The last element is True or False if the current blockStack 105# contents should be reflowed. 106# vuStack - the last element is True or False if the current blockStack 107# contents are an explicit Valid Usage block. 108# margin - margin to reflow text to. 109# para - list of lines in the paragraph being accumulated. When this is 110# non-empty, there is a current paragraph. 111# lastTitle - true if the previous line was a document title line (e.g. 112# :leveloffset: 0 - no attempt to track changes to this is made). 113# leadIndent - indent level (in spaces) of the first line of a paragraph. 114# hangIndent - indent level of the remaining lines of a paragraph. 115# file - file pointer to write to. 116# filename - base name of file being read from. 117# lineNumber - line number being read from the input file. 118# breakPeriod - True if justification should break to a new line after 119# the end of a sentence. 120# breakInitial - True if justification should break to a new line after 121# something that appears to be an initial in someone's name. **TBD** 122# reflow - True if text should be reflowed, False to pass through unchanged. 123# vuPrefix - Prefix of generated Valid Usage tags 124# vuFormat - Format string for generating Valid Usage tags. First argument 125# is vuPrefix, second is command/struct name, third is parameter name, 126# fourth is the tag number. 127# nextvu - Integer to start tagging un-numbered Valid Usage statements with, 128# or None if no tagging should be done. 129# apiName - String name of a Vulkan structure or command for VUID tag 130# generation, or None if one hasn't been included in this file yet. 131class ReflowState: 132 """Represents the state of the reflow operation""" 133 def __init__(self, 134 filename, 135 margin = 76, 136 file = sys.stdout, 137 breakPeriod = True, 138 reflow = True, 139 nextvu = None): 140 self.blockStack = [ None ] 141 self.reflowStack = [ True ] 142 self.vuStack = [ False ] 143 self.margin = margin 144 self.para = [] 145 self.lastTitle = False 146 self.leadIndent = 0 147 self.hangIndent = 0 148 self.file = file 149 self.filename = filename 150 self.lineNumber = 0 151 self.breakPeriod = breakPeriod 152 self.breakInitial = True 153 self.reflow = reflow 154 self.vuPrefix = 'VUID' 155 self.vuFormat = '{0}-{1}-{2}-{3:0>5d}' 156 self.nextvu = nextvu 157 self.apiName = '' 158 159 def incrLineNumber(self): 160 self.lineNumber = self.lineNumber + 1 161 162 # Print an array of lines with newlines already present 163 def printLines(self, lines): 164 logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='') 165 for line in lines: 166 print(line, file=self.file, end='') 167 168 # Returns True if word ends with a sentence-period, False otherwise. 169 # Allows for contraction cases which won't end a line: 170 # - A single letter (if breakInitial is True) 171 # - Abbreviations: 'c.f.', 'e.g.', 'i.e.' (or mixed-case versions) 172 def endSentence(self, word): 173 if (word[-1:] != '.' or 174 endAbbrev.search(word) or 175 (self.breakInitial and endInitial.match(word))): 176 return False 177 else: 178 return True 179 180 # Returns True if word is a Valid Usage ID Tag anchor. 181 def vuidAnchor(self, word): 182 return (word[0:7] == '[[VUID-') 183 184 # Reflow the current paragraph, respecting the paragraph lead and 185 # hanging indentation levels. The algorithm also respects trailing '+' 186 # signs that indicate imbedded newlines, and will not reflow a very long 187 # word immediately after a bullet point. 188 # Just return the paragraph unchanged if the -noflow argument was 189 # given. 190 def reflowPara(self): 191 if not self.reflow: 192 return self.para 193 194 logDiag('reflowPara lead indent = ', self.leadIndent, 195 'hangIndent =', self.hangIndent, 196 'para:', self.para[0], end='') 197 198 # Total words processed (we care about the *first* word vs. others) 199 wordCount = 0 200 201 # Tracks the *previous* word processed. It must not be empty. 202 prevWord = ' ' 203 204 #import pdb; pdb.set_trace() 205 206 for line in self.para: 207 line = line.rstrip() 208 words = line.split() 209 210 # logDiag('reflowPara: input line =', line) 211 numWords = len(words) - 1 212 213 for i in range(0, numWords + 1): 214 word = words[i] 215 wordLen = len(word) 216 wordCount += 1 217 218 endEscape = False 219 if (i == numWords and word == '+'): 220 # Trailing ' +' must stay on the same line 221 endEscape = word 222 # logDiag('reflowPara last word of line =', word, 'prevWord =', prevWord, 'endEscape =', endEscape) 223 else: 224 True 225 # logDiag('reflowPara wordCount =', wordCount, 'word =', word, 'prevWord =', prevWord) 226 227 if wordCount == 1: 228 # The first word of the paragraph is treated specially. 229 # The loop logic becomes trickier if all this code is 230 # done prior to looping over lines and words, so all the 231 # setup logic is done here. 232 233 outPara = [] 234 outLine = ''.ljust(self.leadIndent) + word 235 outLineLen = self.leadIndent + wordLen 236 237 # If the paragraph begins with a bullet point, generate 238 # a hanging indent level if there isn't one already. 239 if beginBullet.match(self.para[0]): 240 bulletPoint = True 241 if len(self.para) > 1: 242 logDiag('reflowPara first line matches bullet point', 243 'but indent already hanging @ input line', 244 self.lineNumber) 245 else: 246 logDiag('reflowPara first line matches bullet point -' 247 'single line, assuming hangIndent @ input line', 248 self.lineNumber) 249 self.hangIndent = outLineLen + 1 250 else: 251 bulletPoint = False 252 else: 253 # Possible actions to take with this word 254 # 255 # addWord - add word to current line 256 # closeLine - append line and start a new (null) one 257 # startLine - add word to a new line 258 259 # Default behavior if all the tests below fail is to add 260 # this word to the current line, and keep accumulating 261 # that line. 262 (addWord, closeLine, startLine) = (True, False, False) 263 264 # How long would this line be if the word were added? 265 newLen = outLineLen + 1 + wordLen 266 267 # Are we on the first word following a bullet point? 268 firstBullet = (wordCount == 2 and bulletPoint) 269 270 if (endEscape): 271 # If the new word ends the input line with ' +', 272 # add it to the current line. 273 274 (addWord, closeLine, startLine) = (True, True, False) 275 elif self.vuidAnchor(word): 276 # If the new word is a Valid Usage anchor, break the 277 # line afterwards. Note that this should only happen 278 # immediately after a bullet point, but we don't 279 # currently check for this. 280 (addWord, closeLine, startLine) = (True, True, False) 281 elif newLen > self.margin: 282 if firstBullet: 283 # If the word follows a bullet point, add it to 284 # the current line no matter its length. 285 286 (addWord, closeLine, startLine) = (True, True, False) 287 else: 288 # The word overflows, so add it to a new line. 289 290 (addWord, closeLine, startLine) = (False, True, True) 291 elif (self.breakPeriod and 292 (wordCount > 2 or not firstBullet) and 293 self.endSentence(prevWord)): 294 # If the previous word ends a sentence and 295 # breakPeriod is set, start a new line. 296 # The complicated logic allows for leading bullet 297 # points which are periods (implicitly numbered lists). 298 # @@@ But not yet for explicitly numbered lists. 299 300 (addWord, closeLine, startLine) = (False, True, True) 301 302 # Add a word to the current line 303 if addWord: 304 if outLine: 305 outLine += ' ' + word 306 outLineLen = newLen 307 else: 308 # Fall through to startLine case if there's no 309 # current line yet. 310 startLine = True 311 312 # Add current line to the output paragraph. Force 313 # starting a new line, although we don't yet know if it 314 # will ever have contents. 315 if closeLine: 316 if outLine: 317 outPara.append(outLine + '\n') 318 outLine = None 319 320 # Start a new line and add a word to it 321 if startLine: 322 outLine = ''.ljust(self.hangIndent) + word 323 outLineLen = self.hangIndent + wordLen 324 325 # Track the previous word, for use in breaking at end of 326 # a sentence 327 prevWord = word 328 329 # Add this line to the output paragraph. 330 if (outLine): 331 outPara.append(outLine + '\n') 332 333 return outPara 334 335 # Emit a paragraph, possibly reflowing it depending on the block 336 # context. Reset the paragraph accumulator. 337 def emitPara(self): 338 global vuPat 339 340 if self.para != []: 341 if (self.vuStack[-1] and 342 self.nextvu != None and 343 self.vuPrefix not in self.para[0]): 344 # If: 345 # - this paragraph is in a Valid Usage block, 346 # - VUID tags are being assigned, 347 # - a tag is not already present, and 348 # - the paragraph is a properly marked-up list item 349 # Then add a VUID tag starting with the next free ID. 350 351 # Split the first line after the bullet point 352 matches = vuPat.search(self.para[0]) 353 if matches != None: 354 logDiag('findRefs: Matched vuPat on line:', self.para[0], end='') 355 head = matches.group('head') 356 tail = matches.group('tail') 357 358 # Use the first pname: statement in the paragraph as 359 # the parameter name in the VUID tag. This won't always 360 # be correct, but should be highly reliable. 361 for vuLine in self.para: 362 matches = pnamePat.search(vuLine) 363 if matches != None: 364 break 365 366 if matches != None: 367 paramName = matches.group('param') 368 else: 369 paramName = 'None' 370 logWarn(self.filename, 371 'No param name found for VUID tag on line:', 372 self.para[0]) 373 374 newline = (head + ' [[' + 375 self.vuFormat.format(self.vuPrefix, 376 self.apiName, 377 paramName, 378 self.nextvu) + ']] ' + tail) 379 380 logDiag('Assigning', self.vuPrefix, self.apiName, self.nextvu, 381 ' on line:', self.para[0], '->', newline, 'END') 382 383 self.para[0] = newline 384 self.nextvu = self.nextvu + 1 385 # else: 386 # There are only a few cases of this, and they're all 387 # legitimate. Leave detecting this case to another tool 388 # or hand inspection. 389 # logWarn(self.filename + ': Unexpected non-bullet item in VU block (harmless if following an ifdef):', 390 # self.para[0]) 391 392 if self.reflowStack[-1]: 393 self.printLines(self.reflowPara()) 394 else: 395 self.printLines(self.para) 396 397 # Reset the paragraph, including its indentation level 398 self.para = [] 399 self.leadIndent = 0 400 self.hangIndent = 0 401 402 # 'line' ends a paragraph and should itself be emitted. 403 # line may be None to indicate EOF or other exception. 404 def endPara(self, line): 405 logDiag('endPara line', self.lineNumber, ': emitting paragraph') 406 407 # Emit current paragraph, this line, and reset tracker 408 self.emitPara() 409 410 if line: 411 self.printLines( [ line ] ) 412 413 # 'line' ends a paragraph (unless there's already a paragraph being 414 # accumulated, e.g. len(para) > 0 - currently not implemented) 415 def endParaContinue(self, line): 416 self.endPara(line) 417 418 # 'line' begins or ends a block. If beginning a block, tag whether or 419 # not to reflow the contents. 420 # vuBlock is True if the previous line indicates this is a Valid Usage 421 # block. 422 def endBlock(self, line, reflow = False, vuBlock = False): 423 self.endPara(line) 424 425 if self.blockStack[-1] == line: 426 logDiag('endBlock line', self.lineNumber, 427 ': popping block end depth:', len(self.blockStack), 428 ':', line, end='') 429 self.blockStack.pop() 430 self.reflowStack.pop() 431 self.vuStack.pop() 432 else: 433 # Start a block 434 self.blockStack.append(line) 435 self.reflowStack.append(reflow) 436 self.vuStack.append(vuBlock) 437 438 logDiag('endBlock reflow =', reflow, ' line', self.lineNumber, 439 ': pushing block start depth', len(self.blockStack), 440 ':', line, end='') 441 442 # 'line' begins or ends a block. The paragraphs in the block *should* be 443 # reformatted (e.g. a NOTE). 444 def endParaBlockReflow(self, line, vuBlock): 445 self.endBlock(line, reflow = True, vuBlock = vuBlock) 446 447 # 'line' begins or ends a block. The paragraphs in the block should 448 # *not* be reformatted (e.g. a NOTE). 449 def endParaBlockPassthrough(self, line): 450 self.endBlock(line, reflow = False) 451 452 # 'line' starts or continues a paragraph. 453 # Paragraphs may have "hanging indent", e.g. 454 # * Bullet point... 455 # ... continued 456 # In this case, when the higher indentation level ends, so does the 457 # paragraph. 458 def addLine(self, line): 459 logDiag('addLine line', self.lineNumber, ':', line, end='') 460 461 # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string 462 indent = len(line) - len(line.lstrip()) 463 464 # A hanging paragraph ends due to a less-indented line. 465 if self.para != [] and indent < self.hangIndent: 466 logDiag('addLine: line reduces indentation, emit paragraph') 467 self.emitPara() 468 469 # A bullet point (or something that looks like one) always ends the 470 # current paragraph. 471 if beginBullet.match(line): 472 logDiag('addLine: line matches beginBullet, emit paragraph') 473 self.emitPara() 474 475 if self.para == []: 476 # Begin a new paragraph 477 self.para = [ line ] 478 self.leadIndent = indent 479 self.hangIndent = indent 480 else: 481 # Add a line to a paragraph. Increase the hanging indentation 482 # level - once. 483 if self.hangIndent == self.leadIndent: 484 self.hangIndent = indent 485 self.para.append(line) 486 487def reflowFile(filename, args): 488 logDiag('reflow: filename', filename) 489 490 lines = loadFile(filename) 491 if (lines == None): 492 return 493 494 # Output file handle and reflow object for this file. There are no race 495 # conditions on overwriting the input, but it's not recommended unless 496 # you have backing store such as git. 497 498 if args.overwrite: 499 outFilename = filename 500 else: 501 outFilename = args.outDir + '/' + os.path.basename(filename) + args.suffix 502 503 try: 504 fp = open(outFilename, 'w', encoding='utf8') 505 except: 506 logWarn('Cannot open output file', filename, ':', sys.exc_info()[0]) 507 return None 508 509 state = ReflowState(filename, 510 file = fp, 511 reflow = not args.noflow, 512 nextvu = args.nextvu) 513 514 for line in lines: 515 state.incrLineNumber() 516 517 # Is this a title line (leading '= ' followed by text)? 518 thisTitle = False 519 520 # The logic here is broken. If we're in a non-reflowable block and 521 # this line *doesn't* end the block, it should always be 522 # accumulated. 523 524 if endPara.match(line): 525 # Ending a paragraph. Emit the current paragraph, if any, and 526 # prepare to begin a new paragraph. 527 528 state.endPara(line) 529 530 # If this is an include:: line starting the definition of a 531 # structure or command, track that for use in VUID generation. 532 533 matches = includePat.search(line) 534 if matches != None: 535 type = matches.group('type') 536 if (type == 'protos' or type == 'structs'): 537 state.apiName = matches.group('name') 538 539 elif endParaContinue.match(line): 540 # For now, always just end the paragraph. 541 # Could check see if len(para) > 0 to accumulate. 542 543 state.endParaContinue(line) 544 545 # If it's a title line, track that 546 if line[0:2] == '= ': 547 thisTitle = True 548 549 elif blockReflow.match(line): 550 # Starting or ending a block whose contents may be reflowed. 551 # Blocks cannot be nested. 552 553 # First see if this is an explicit Valid Usage block 554 vuBlock = (state.lineNumber > 1 and 555 lines[state.lineNumber-2] == '.Valid Usage\n') 556 557 state.endParaBlockReflow(line, vuBlock) 558 elif blockPassthrough.match(line): 559 # Starting or ending a block whose contents must not be reflowed. 560 # These are tables, etc. Blocks cannot be nested. 561 562 state.endParaBlockPassthrough(line) 563 elif state.lastTitle: 564 # The previous line was a document title line. This line 565 # is the author / credits line and must not be reflowed. 566 567 state.endPara(line) 568 else: 569 # Just accumulate a line to the current paragraph. Watch out for 570 # hanging indents / bullet-points and track that indent level. 571 572 state.addLine(line) 573 574 state.lastTitle = thisTitle 575 576 # Cleanup at end of file 577 state.endPara(None) 578 579 # Sanity check on block nesting 580 if len(state.blockStack) > 1: 581 logWarn('file', filename, 582 'mismatched asciidoc block delimiters at EOF:', 583 state.blockStack[-1]) 584 585 fp.close() 586 587 # Update the 'nextvu' value 588 if (args.nextvu != state.nextvu): 589 logWarn('Updated nextvu to', state.nextvu, 'after file', filename) 590 args.nextvu = state.nextvu 591 592def reflowAllAdocFiles(folder_to_reflow, args): 593 for root, subdirs, files in os.walk(folder_to_reflow): 594 for file in files: 595 if file.endswith(".txt"): 596 file_path = os.path.join(root, file) 597 reflowFile(file_path, args) 598 for subdir in subdirs: 599 sub_folder = os.path.join(root, subdir) 600 print('Sub-folder = %s' % sub_folder) 601 if not (subdir.lower() == "scripts") and not (subdir.lower() == "style"): 602 print(' Parsing = %s' % sub_folder) 603 reflowAllAdocFiles(sub_folder, args) 604 else: 605 print(' Skipping = %s' % sub_folder) 606 607# Patterns used to recognize interesting lines in an asciidoc source file. 608# These patterns are only compiled once. 609 610# Explicit Valid Usage list item with one or more leading asterisks 611# The re.DOTALL is needed to prevent vuPat.search() from stripping 612# the trailing newline. 613global vuPat 614vuPat = re.compile('^(?P<head> [*]+)( *)(?P<tail>.*)', re.DOTALL) 615 616 617if __name__ == '__main__': 618 parser = argparse.ArgumentParser() 619 620 parser.add_argument('-diag', action='store', dest='diagFile', 621 help='Set the diagnostic file') 622 parser.add_argument('-warn', action='store', dest='warnFile', 623 help='Set the warning file') 624 parser.add_argument('-log', action='store', dest='logFile', 625 help='Set the log file for both diagnostics and warnings') 626 parser.add_argument('-overwrite', action='store_true', 627 help='Overwrite input filenames instead of writing different output filenames') 628 parser.add_argument('-out', action='store', dest='outDir', 629 default='out', 630 help='Set the output directory in which updated files are generated (default: out)') 631 parser.add_argument('-tagvu', action='store_true', 632 help='Tag un-tagged Valid Usage statements starting at the value wired into reflow.py') 633 parser.add_argument('-nextvu', action='store', dest='nextvu', type=int, 634 default=None, 635 help='Tag un-tagged Valid Usage statements starting at the specified base VUID instead of the value wired into reflow.py') 636 parser.add_argument('-noflow', action='store_true', dest='noflow', 637 help='Do not reflow text. Other actions may apply.') 638 parser.add_argument('-suffix', action='store', dest='suffix', 639 default='', 640 help='Set the suffix added to updated file names (default: none)') 641 parser.add_argument('files', metavar='filename', nargs='*', 642 help='a filename to reflow text in') 643 parser.add_argument('--version', action='version', version='%(prog)s 1.0') 644 645 args = parser.parse_args() 646 647 setLogFile(True, True, args.logFile) 648 setLogFile(True, False, args.diagFile) 649 setLogFile(False, True, args.warnFile) 650 651 if args.overwrite: 652 logWarn('reflow.py: will overwrite all input files') 653 654 if args.tagvu and args.nextvu == None: 655 args.nextvu = startVUID 656 657 if args.nextvu != None: 658 logWarn('Tagging untagged Valid Usage statements starting at', args.nextvu) 659 660 # If no files are specified, reflow the entire specification chapters folder 661 if len(args.files) == 0: 662 folder_to_reflow = os.getcwd() 663 # folder_to_reflow += '/chapters' 664 reflowAllAdocFiles(folder_to_reflow, args) 665 else: 666 for file in args.files: 667 reflowFile(file, args) 668 669 if args.nextvu != None and args.nextvu != startVUID: 670 try: 671 reflow_count_file_path = os.path.dirname(os.path.realpath(__file__)) 672 reflow_count_file_path += '/reflow_count.py' 673 reflow_count_file = open(reflow_count_file_path, 'w', encoding='utf8') 674 print('# The value to start tagging VU statements at, unless overridden by -nextvu\n', file=reflow_count_file, end='') 675 count_string = 'startVUID = %d\n' % args.nextvu 676 print(count_string, file=reflow_count_file, end='') 677 reflow_count_file.close() 678 except: 679 logWarn('Cannot open output count file reflow_count.py', ':', sys.exc_info()[0]) 680