• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2#
3# Copyright 2016-2021 The Khronos Group Inc.
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""Used for automatic reflow of spec sources to satisfy the agreed layout to
8minimize git churn. Most of the logic has to do with detecting asciidoc
9markup or block types that *shouldn't* be reflowed (tables, code) and
10ignoring them. It's very likely there are many asciidoc constructs not yet
11accounted for in the script, our usage of asciidoc markup is intentionally
12somewhat limited.
13
14Also used to insert identifying tags on explicit Valid Usage statements.
15
16Usage: `reflow.py [-noflow] [-tagvu] [-nextvu #] [-overwrite] [-out dir] [-suffix str] files`
17
18- `-noflow` acts as a passthrough, instead of reflowing text. Other
19  processing may occur.
20- `-tagvu` generates explicit VUID tag for Valid Usage statements which
21  don't already have them.
22- `-nextvu #` starts VUID tag generation at the specified # instead of
23  the value wired into the `reflow.py` script.
24- `-overwrite` updates in place (can be risky, make sure there are backups)
25- `-check FAIL|WARN` runs some simple sanity checks on markup. If the checks
26  fail and the WARN option is given, the script will simply print a warning
27  message. If the checks fail and the FAIL option is given, the script will
28  exit with an error code. FAIL is for use with continuous integration
29  scripts enforcing the checks.
30- `-out` specifies directory to create output file in, default 'out'
31- `-suffix` specifies suffix to add to output files, default ''
32- `files` are asciidoc source files from the spec to reflow.
33"""
34# For error and file-loading interfaces only
35import argparse
36import os
37import re
38import sys
39from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch
40
41# Vulkan-specific - will consolidate into scripts/ like OpenXR soon
42sys.path.insert(0, 'xml')
43
44from vkconventions import VulkanConventions as APIConventions
45conventions = APIConventions()
46
47# Markup that always ends a paragraph
48#   empty line or whitespace
49#   [block options]
50#   [[anchor]]
51#   //                  comment
52#   <<<<                page break
53#   :attribute-setting
54#   macro-directive::terms
55#   +                   standalone list item continuation
56#   label::             labelled list - label must be standalone
57endPara = re.compile(r'^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$')
58
59# Special case of markup ending a paragraph, used to track the current
60# command/structure. This allows for either OpenXR or Vulkan API path
61# conventions. Nominally it should use the file suffix defined by the API
62# conventions (conventions.file_suffix), except that XR uses '.txt' for
63# generated API include files, not '.adoc' like its other includes.
64includePat = re.compile(
65        r'include::(?P<directory_traverse>((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).txt[\[][\]]')
66
67# Find the first pname: or code: pattern in a Valid Usage statement
68pnamePat = re.compile(r'pname:(?P<param>\{?\w+\}?)')
69codePat = re.compile(r'code:(?P<param>\w+)')
70
71# Markup that's OK in a contiguous paragraph but otherwise passed through
72#   .anything (except .., which indicates a literal block)
73#   === Section Titles
74endParaContinue = re.compile(r'^(\.[^.].*|=+ .*)$')
75
76# Markup for block delimiters whose contents *should* be reformatted
77#   --   (exactly two)  (open block)
78#   **** (4 or more)    (sidebar block - why do we have these?!)
79#   ==== (4 or more)    (example block)
80#   ____ (4 or more)    (quote block)
81blockReflow = re.compile(r'^(--|[*=_]{4,})$')
82
83# Fake block delimiters for "common" VU statements
84blockCommonReflow = '// Common Valid Usage\n'
85
86# Markup for block delimiters whose contents should *not* be reformatted
87#   |=== (3 or more)  (table)
88#   ++++ (4 or more)  (passthrough block)
89#   .... (4 or more)  (literal block)
90#   //// (4 or more)  (comment block)
91#   ---- (4 or more)  (listing block)
92#   ```  (3 or more)  (listing block)
93#   **** (4 or more)  (sidebar block)
94blockPassthrough = re.compile(r'^(\|={3,}|[`]{3}|[\-+./~]{4,})$')
95
96# Markup for introducing lists (hanging paragraphs)
97#   * bullet
98#     ** bullet
99#     -- bullet
100#   . bullet
101#   :: bullet (no longer supported by asciidoctor 2)
102#   {empty}:: bullet
103#   1. list item
104beginBullet = re.compile(r'^ *([*\-.]+|\{empty\}::|::|[0-9]+[.]) ')
105
106# Start of an asciidoctor conditional
107#   ifdef::
108#   ifndef::
109conditionalStart = re.compile(r'^(ifdef|ifndef)::')
110
111# Text that (may) not end sentences
112
113# A single letter followed by a period, typically a middle initial.
114endInitial = re.compile(r'^[A-Z]\.$')
115# An abbreviation, which doesn't (usually) end a line.
116endAbbrev = re.compile(r'(e\.g|i\.e|c\.f|vs)\.$', re.IGNORECASE)
117
118class ReflowState:
119    """State machine for reflowing.
120
121    Represents the state of the reflow operation"""
122    def __init__(self,
123                 filename,
124                 margin = 76,
125                 file = sys.stdout,
126                 breakPeriod = True,
127                 reflow = True,
128                 nextvu = None,
129                 maxvu = None):
130
131        self.blockStack = [ None ]
132        """The last element is a line with the asciidoc block delimiter that's currently in effect,
133        such as '--', '----', '****', '======', or '+++++++++'.
134        This affects whether or not the block contents should be formatted."""
135
136        self.reflowStack = [ True ]
137        """The last element is True or False if the current blockStack contents
138        should be reflowed."""
139        self.vuStack = [ False ]
140        """the last element is True or False if the current blockStack contents
141        are an explicit Valid Usage block."""
142
143        self.margin = margin
144        """margin to reflow text to."""
145
146        self.para = []
147        """list of lines in the paragraph being accumulated.
148        When this is non-empty, there is a current paragraph."""
149
150        self.lastTitle = False
151        """true if the previous line was a document title line
152        (e.g. :leveloffset: 0 - no attempt to track changes to this is made)."""
153
154        self.leadIndent = 0
155        """indent level (in spaces) of the first line of a paragraph."""
156
157        self.hangIndent = 0
158        """indent level of the remaining lines of a paragraph."""
159
160        self.file = file
161        """file handle to write to."""
162
163        self.filename = filename
164        """base name of file being read from."""
165
166        self.lineNumber = 0
167        """line number being read from the input file."""
168
169        self.breakPeriod = breakPeriod
170        """True if justification should break to a new line after the end of a sentence."""
171
172        self.breakInitial = True
173        """True if justification should break to a new line after
174        something that appears to be an initial in someone's name. **TBD**"""
175
176        self.reflow = reflow
177        """True if text should be reflowed, False to pass through unchanged."""
178
179        self.vuPrefix = 'VUID'
180        """Prefix of generated Valid Usage tags"""
181
182        self.vuFormat = '{0}-{1}-{2}-{3:0>5d}'
183        """Format string for generating Valid Usage tags.
184        First argument is vuPrefix, second is command/struct name, third is parameter name, fourth is the tag number."""
185
186        self.nextvu = nextvu
187        """Integer to start tagging un-numbered Valid Usage statements with,
188        or None if no tagging should be done."""
189
190        self.maxvu = maxvu
191        """Maximum tag to use for Valid Usage statements, or None if no
192        tagging should be done."""
193
194        self.defaultApiName = '{refpage}'
195        self.apiName = self.defaultApiName
196        """String name of a Vulkan structure or command for VUID tag
197        generation, or {refpage} if one hasn't been included in this file
198        yet."""
199
200    def incrLineNumber(self):
201        self.lineNumber = self.lineNumber + 1
202
203    def printLines(self, lines):
204        """Print an array of lines with newlines already present"""
205        if len(lines) > 0:
206            logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='')
207
208        if self.file is not None:
209            for line in lines:
210                print(line, file=self.file, end='')
211
212    def endSentence(self, word):
213        """Return True if word ends with a sentence-period, False otherwise.
214
215        Allows for contraction cases which won't end a line:
216
217         - A single letter (if breakInitial is True)
218         - Abbreviations: 'c.f.', 'e.g.', 'i.e.' (or mixed-case versions)"""
219        if (word[-1:] != '.' or
220            endAbbrev.search(word) or
221                (self.breakInitial and endInitial.match(word))):
222            return False
223
224        return True
225
226    def vuidAnchor(self, word):
227        """Return True if word is a Valid Usage ID Tag anchor."""
228        return (word[0:7] == '[[VUID-')
229
230    def isOpenBlockDelimiter(self, line):
231        """Returns True if line is an open block delimiter."""
232        return line[0:2] == '--'
233
234    def reflowPara(self):
235        """Reflow the current paragraph, respecting the paragraph lead and
236        hanging indentation levels.
237
238        The algorithm also respects trailing '+' signs that indicate embedded newlines,
239        and will not reflow a very long word immediately after a bullet point.
240
241        Just return the paragraph unchanged if the -noflow argument was
242        given."""
243        if not self.reflow:
244            return self.para
245
246        logDiag('reflowPara lead indent = ', self.leadIndent,
247                'hangIndent =', self.hangIndent,
248                'para:', self.para[0], end='')
249
250        # Total words processed (we care about the *first* word vs. others)
251        wordCount = 0
252
253        # Tracks the *previous* word processed. It must not be empty.
254        prevWord = ' '
255
256        # Track the previous line and paragraph being indented, if any
257        outLine = None
258        outPara = []
259
260        for line in self.para:
261            line = line.rstrip()
262            words = line.split()
263
264            # logDiag('reflowPara: input line =', line)
265            numWords = len(words) - 1
266
267            for i in range(0, numWords + 1):
268                word = words[i]
269                wordLen = len(word)
270                wordCount += 1
271
272                endEscape = False
273                if i == numWords and word == '+':
274                    # Trailing ' +' must stay on the same line
275                    endEscape = word
276                    # logDiag('reflowPara last word of line =', word, 'prevWord =', prevWord, 'endEscape =', endEscape)
277                else:
278                    pass
279                    # logDiag('reflowPara wordCount =', wordCount, 'word =', word, 'prevWord =', prevWord)
280
281                if wordCount == 1:
282                    # The first word of the paragraph is treated specially.
283                    # The loop logic becomes trickier if all this code is
284                    # done prior to looping over lines and words, so all the
285                    # setup logic is done here.
286
287                    outPara = []
288                    outLine = ''.ljust(self.leadIndent) + word
289                    outLineLen = self.leadIndent + wordLen
290
291                    # If the paragraph begins with a bullet point, generate
292                    # a hanging indent level if there isn't one already.
293                    if beginBullet.match(self.para[0]):
294                        bulletPoint = True
295                        if len(self.para) > 1:
296                            logDiag('reflowPara first line matches bullet point',
297                                    'but indent already hanging @ input line',
298                                    self.lineNumber)
299                        else:
300                            logDiag('reflowPara first line matches bullet point -'
301                                    'single line, assuming hangIndent @ input line',
302                                    self.lineNumber)
303                            self.hangIndent = outLineLen + 1
304                    else:
305                        bulletPoint = False
306                else:
307                    # Possible actions to take with this word
308                    #
309                    # addWord - add word to current line
310                    # closeLine - append line and start a new (null) one
311                    # startLine - add word to a new line
312
313                    # Default behavior if all the tests below fail is to add
314                    # this word to the current line, and keep accumulating
315                    # that line.
316                    (addWord, closeLine, startLine) = (True, False, False)
317
318                    # How long would this line be if the word were added?
319                    newLen = outLineLen + 1 + wordLen
320
321                    # Are we on the first word following a bullet point?
322                    firstBullet = (wordCount == 2 and bulletPoint)
323
324                    if endEscape:
325                        # If the new word ends the input line with ' +',
326                        # add it to the current line.
327
328                        (addWord, closeLine, startLine) = (True, True, False)
329                    elif self.vuidAnchor(word):
330                        # If the new word is a Valid Usage anchor, break the
331                        # line afterwards. Note that this should only happen
332                        # immediately after a bullet point, but we don't
333                        # currently check for this.
334                        (addWord, closeLine, startLine) = (True, True, False)
335                    elif newLen > self.margin:
336                        if firstBullet:
337                            # If the word follows a bullet point, add it to
338                            # the current line no matter its length.
339
340                            (addWord, closeLine, startLine) = (True, True, False)
341                        elif beginBullet.match(word + ' '):
342                            # If the word *is* a bullet point, add it to
343                            # the current line no matter its length.
344                            # This avoids an innocent inline '-' or '*'
345                            # turning into a bogus bullet point.
346
347                            (addWord, closeLine, startLine) = (True, True, False)
348                        else:
349                            # The word overflows, so add it to a new line.
350
351                            (addWord, closeLine, startLine) = (False, True, True)
352                    elif (self.breakPeriod and
353                          (wordCount > 2 or not firstBullet) and
354                          self.endSentence(prevWord)):
355                        # If the previous word ends a sentence and
356                        # breakPeriod is set, start a new line.
357                        # The complicated logic allows for leading bullet
358                        # points which are periods (implicitly numbered lists).
359                        # @@@ But not yet for explicitly numbered lists.
360
361                        (addWord, closeLine, startLine) = (False, True, True)
362
363                    # Add a word to the current line
364                    if addWord:
365                        if outLine:
366                            outLine += ' ' + word
367                            outLineLen = newLen
368                        else:
369                            # Fall through to startLine case if there's no
370                            # current line yet.
371                            startLine = True
372
373                    # Add current line to the output paragraph. Force
374                    # starting a new line, although we don't yet know if it
375                    # will ever have contents.
376                    if closeLine:
377                        if outLine:
378                            outPara.append(outLine + '\n')
379                            outLine = None
380
381                    # Start a new line and add a word to it
382                    if startLine:
383                        outLine = ''.ljust(self.hangIndent) + word
384                        outLineLen = self.hangIndent + wordLen
385
386                # Track the previous word, for use in breaking at end of
387                # a sentence
388                prevWord = word
389
390        # Add this line to the output paragraph.
391        if outLine:
392            outPara.append(outLine + '\n')
393
394        return outPara
395
396    def emitPara(self):
397        """Emit a paragraph, possibly reflowing it depending on the block context.
398
399        Resets the paragraph accumulator."""
400        if self.para != []:
401            if self.vuStack[-1] and self.nextvu is not None:
402                # If:
403                #   - this paragraph is in a Valid Usage block,
404                #   - VUID tags are being assigned,
405                # Try to assign VUIDs
406
407                if nestedVuPat.search(self.para[0]):
408                    # Check for nested bullet points. These should not be
409                    # assigned VUIDs, nor present at all, because they break
410                    # the VU extractor.
411                    logWarn(self.filename + ': Invalid nested bullet point in VU block:', self.para[0])
412                elif self.vuPrefix not in self.para[0]:
413                    # If:
414                    #   - a tag is not already present, and
415                    #   - the paragraph is a properly marked-up list item
416                    # Then add a VUID tag starting with the next free ID.
417
418                    # Split the first line after the bullet point
419                    matches = vuPat.search(self.para[0])
420                    if matches is not None:
421                        logDiag('findRefs: Matched vuPat on line:', self.para[0], end='')
422                        head = matches.group('head')
423                        tail = matches.group('tail')
424
425                        # Use the first pname: or code: tag in the paragraph as
426                        # the parameter name in the VUID tag. This won't always
427                        # be correct, but should be highly reliable.
428                        for vuLine in self.para:
429                            matches = pnamePat.search(vuLine)
430                            if matches is not None:
431                                break
432                            matches = codePat.search(vuLine)
433                            if matches is not None:
434                                break
435
436                        if matches is not None:
437                            paramName = matches.group('param')
438                        else:
439                            paramName = 'None'
440                            logWarn(self.filename,
441                                    'No param name found for VUID tag on line:',
442                                    self.para[0])
443
444                        newline = (head + ' [[' +
445                                   self.vuFormat.format(self.vuPrefix,
446                                                        self.apiName,
447                                                        paramName,
448                                                        self.nextvu) + ']] ' + tail)
449
450                        logDiag('Assigning', self.vuPrefix, self.apiName, self.nextvu,
451                                ' on line:', self.para[0], '->', newline, 'END')
452
453                        # Don't actually assign the VUID unless it's in the reserved range
454                        if self.nextvu <= self.maxvu:
455                            if self.nextvu == self.maxvu:
456                                logWarn('Skipping VUID assignment, no more VUIDs available')
457                            self.para[0] = newline
458                            self.nextvu = self.nextvu + 1
459                # else:
460                #     There are only a few cases of this, and they're all
461                #     legitimate. Leave detecting this case to another tool
462                #     or hand inspection.
463                #     logWarn(self.filename + ': Unexpected non-bullet item in VU block (harmless if following an ifdef):',
464                #             self.para[0])
465
466            if self.reflowStack[-1]:
467                self.printLines(self.reflowPara())
468            else:
469                self.printLines(self.para)
470
471        # Reset the paragraph, including its indentation level
472        self.para = []
473        self.leadIndent = 0
474        self.hangIndent = 0
475
476    def endPara(self, line):
477        """'line' ends a paragraph and should itself be emitted.
478        line may be None to indicate EOF or other exception."""
479        logDiag('endPara line', self.lineNumber, ': emitting paragraph')
480
481        # Emit current paragraph, this line, and reset tracker
482        self.emitPara()
483
484        if line:
485            self.printLines( [ line ] )
486
487    def endParaContinue(self, line):
488        """'line' ends a paragraph (unless there's already a paragraph being
489        accumulated, e.g. len(para) > 0 - currently not implemented)"""
490        self.endPara(line)
491
492    def endBlock(self, line, reflow = False, vuBlock = False):
493        """'line' begins or ends a block.
494
495        If beginning a block, tag whether or not to reflow the contents.
496
497        vuBlock is True if the previous line indicates this is a Valid Usage block."""
498        self.endPara(line)
499
500        if self.blockStack[-1] == line:
501            logDiag('endBlock line', self.lineNumber,
502                    ': popping block end depth:', len(self.blockStack),
503                    ':', line, end='')
504
505            # Reset apiName at the end of an open block.
506            # Open blocks cannot be nested (at present), so this is safe.
507            if self.isOpenBlockDelimiter(line):
508                logDiag('reset apiName to empty at line', self.lineNumber)
509                self.apiName = self.defaultApiName
510            else:
511                logDiag('NOT resetting apiName to default at line', self.lineNumber)
512
513            self.blockStack.pop()
514            self.reflowStack.pop()
515            self.vuStack.pop()
516        else:
517            # Start a block
518            self.blockStack.append(line)
519            self.reflowStack.append(reflow)
520            self.vuStack.append(vuBlock)
521
522            logDiag('endBlock reflow =', reflow, ' line', self.lineNumber,
523                    ': pushing block start depth', len(self.blockStack),
524                    ':', line, end='')
525
526    def endParaBlockReflow(self, line, vuBlock):
527        """'line' begins or ends a block. The paragraphs in the block *should* be
528        reformatted (e.g. a NOTE)."""
529        self.endBlock(line, reflow = True, vuBlock = vuBlock)
530
531    def endParaBlockPassthrough(self, line):
532        """'line' begins or ends a block. The paragraphs in the block should
533        *not* be reformatted (e.g. a code listing)."""
534        self.endBlock(line, reflow = False)
535
536    def addLine(self, line):
537        """'line' starts or continues a paragraph.
538
539        Paragraphs may have "hanging indent", e.g.
540
541        ```
542          * Bullet point...
543            ... continued
544        ```
545
546        In this case, when the higher indentation level ends, so does the
547        paragraph."""
548        logDiag('addLine line', self.lineNumber, ':', line, end='')
549
550        # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string
551        indent = len(line) - len(line.lstrip())
552
553        # A hanging paragraph ends due to a less-indented line.
554        if self.para != [] and indent < self.hangIndent:
555            logDiag('addLine: line reduces indentation, emit paragraph')
556            self.emitPara()
557
558        # A bullet point (or something that looks like one) always ends the
559        # current paragraph.
560        if beginBullet.match(line):
561            logDiag('addLine: line matches beginBullet, emit paragraph')
562            self.emitPara()
563
564        if self.para == []:
565            # Begin a new paragraph
566            self.para = [ line ]
567            self.leadIndent = indent
568            self.hangIndent = indent
569        else:
570            # Add a line to a paragraph. Increase the hanging indentation
571            # level - once.
572            if self.hangIndent == self.leadIndent:
573                self.hangIndent = indent
574            self.para.append(line)
575
576def apiMatch(oldname, newname):
577    """Returns whether oldname and newname match, up to an API suffix.
578       This should use the API map instead of this heuristic, since aliases
579       like VkPhysicalDeviceVariablePointerFeatures ->
580       VkPhysicalDeviceVariablePointersFeatures are not recognized."""
581    upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
582    return oldname.rstrip(upper) == newname.rstrip(upper)
583
584def reflowFile(filename, args):
585    logDiag('reflow: filename', filename)
586
587    lines = loadFile(filename)
588    if lines is None:
589        return
590
591    # Output file handle and reflow object for this file. There are no race
592    # conditions on overwriting the input, but it's not recommended unless
593    # you have backing store such as git.
594
595    if args.overwrite:
596        outFilename = filename
597    else:
598        outFilename = args.outDir + '/' + os.path.basename(filename) + args.suffix
599
600    if args.nowrite:
601        fp = None
602    else:
603        try:
604            fp = open(outFilename, 'w', encoding='utf8')
605        except:
606            logWarn('Cannot open output file', outFilename, ':', sys.exc_info()[0])
607            return
608
609    state = ReflowState(filename,
610                        margin = args.margin,
611                        file = fp,
612                        reflow = not args.noflow,
613                        nextvu = args.nextvu,
614                        maxvu = args.maxvu)
615
616    for line in lines:
617        state.incrLineNumber()
618
619        # Is this a title line (leading '= ' followed by text)?
620        thisTitle = False
621
622        matches = vuidPat.search(line)
623        if matches is not None:
624            # If we found a VUID pattern, add the (filename,line) it was
625            # found at to a list for that VUID, to find duplicates.
626            vuid = matches.group('vuid')
627            if vuid not in args.vuidDict:
628                args.vuidDict[vuid] = []
629            args.vuidDict[vuid].append([filename, line])
630
631        # The logic here is broken. If we're in a non-reflowable block and
632        # this line *doesn't* end the block, it should always be
633        # accumulated.
634
635        # Test for a blockCommonReflow delimiter comment first, to avoid
636        # treating it solely as a end-Paragraph marker comment.
637        if line == blockCommonReflow:
638            # Starting or ending a pseudo-block for "common" VU statements.
639            state.endParaBlockReflow(line, vuBlock = True)
640
641        elif blockReflow.match(line):
642            # Starting or ending a block whose contents may be reflowed.
643            # Blocks cannot be nested.
644
645            # Is this is an explicit Valid Usage block?
646            vuBlock = (state.lineNumber > 1 and
647                       lines[state.lineNumber-2] == '.Valid Usage\n')
648
649            state.endParaBlockReflow(line, vuBlock)
650
651        elif endPara.match(line):
652            # Ending a paragraph. Emit the current paragraph, if any, and
653            # prepare to begin a new paragraph.
654
655            state.endPara(line)
656
657            # If this is an include:: line starting the definition of a
658            # structure or command, track that for use in VUID generation.
659
660            matches = includePat.search(line)
661            if matches is not None:
662                generated_type = matches.group('generated_type')
663                include_type = matches.group('category')
664                if generated_type == 'api' and include_type in ('protos', 'structs', 'funcpointers'):
665                    apiName = matches.group('entity_name')
666                    if state.apiName != state.defaultApiName:
667                        # This happens when there are multiple API include
668                        # lines in a single block. The style guideline is to
669                        # always place the API which others are promoted to
670                        # first. In virtually all cases, the promoted API
671                        # will differ solely in the vendor suffix (or
672                        # absence of it), which is benign.
673                        if not apiMatch(state.apiName, apiName):
674                            logDiag(f'Promoted API name mismatch at line {state.lineNumber}: {apiName} does not match state.apiName (this is OK if it is just a spelling alias)')
675                    else:
676                        state.apiName = apiName
677
678        elif endParaContinue.match(line):
679            # For now, always just end the paragraph.
680            # Could check see if len(para) > 0 to accumulate.
681
682            state.endParaContinue(line)
683
684            # If it's a title line, track that
685            if line[0:2] == '= ':
686                thisTitle = True
687
688        elif blockPassthrough.match(line):
689            # Starting or ending a block whose contents must not be reflowed.
690            # These are tables, etc. Blocks cannot be nested.
691
692            state.endParaBlockPassthrough(line)
693        elif state.lastTitle:
694            # The previous line was a document title line. This line
695            # is the author / credits line and must not be reflowed.
696
697            state.endPara(line)
698        else:
699            # Just accumulate a line to the current paragraph. Watch out for
700            # hanging indents / bullet-points and track that indent level.
701
702            state.addLine(line)
703
704            # This test looks for disallowed conditionals inside Valid Usage
705            # blocks, by checking if (a) this line does not start a new VU
706            # (bullet point) and (b) the previous line starts an asciidoctor
707            # conditional (ifdef:: or ifndef::).
708
709            if (args.check
710                and state.vuStack[-1]
711                and not beginBullet.match(line)
712                and conditionalStart.match(lines[state.lineNumber-2])):
713
714                logWarn('Detected embedded Valid Usage conditional: {}:{}'.format(
715                        filename, state.lineNumber - 1))
716                # Keep track of warning check count
717                args.warnCount = args.warnCount + 1
718
719        state.lastTitle = thisTitle
720
721    # Cleanup at end of file
722    state.endPara(None)
723
724    # Check for sensible block nesting
725    if len(state.blockStack) > 1:
726        logWarn('file', filename,
727                'mismatched asciidoc block delimiters at EOF:',
728                state.blockStack[-1])
729
730    if fp is not None:
731        fp.close()
732
733    # Update the 'nextvu' value
734    if args.nextvu != state.nextvu:
735        logWarn('Updated nextvu to', state.nextvu, 'after file', filename)
736        args.nextvu = state.nextvu
737
738def reflowAllAdocFiles(folder_to_reflow, args):
739    for root, subdirs, files in os.walk(folder_to_reflow):
740        for file in files:
741            if file.endswith(conventions.file_suffix):
742                file_path = os.path.join(root, file)
743                reflowFile(file_path, args)
744        for subdir in subdirs:
745            sub_folder = os.path.join(root, subdir)
746            print('Sub-folder = %s' % sub_folder)
747            if subdir.lower() not in conventions.spec_no_reflow_dirs:
748                print('   Parsing = %s' % sub_folder)
749                reflowAllAdocFiles(sub_folder, args)
750            else:
751                print('   Skipping = %s' % sub_folder)
752
753# Patterns used to recognize interesting lines in an asciidoc source file.
754# These patterns are only compiled once.
755
756# Explicit Valid Usage list item with one or more leading asterisks
757# The re.DOTALL is needed to prevent vuPat.search() from stripping
758# the trailing newline.
759vuPat = re.compile(r'^(?P<head>  [*]+)( *)(?P<tail>.*)', re.DOTALL)
760
761# VUID with the numeric portion captured in the match object
762vuidPat = re.compile(r'VUID-[^-]+-[^-]+-(?P<vuid>[0-9]+)')
763
764# Pattern matching leading nested bullet points
765global nestedVuPat
766nestedVuPat = re.compile(r'^  \*\*')
767
768if __name__ == '__main__':
769    parser = argparse.ArgumentParser()
770
771    parser.add_argument('-diag', action='store', dest='diagFile',
772                        help='Set the diagnostic file')
773    parser.add_argument('-warn', action='store', dest='warnFile',
774                        help='Set the warning file')
775    parser.add_argument('-log', action='store', dest='logFile',
776                        help='Set the log file for both diagnostics and warnings')
777    parser.add_argument('-overwrite', action='store_true',
778                        help='Overwrite input filenames instead of writing different output filenames')
779    parser.add_argument('-out', action='store', dest='outDir',
780                        default='out',
781                        help='Set the output directory in which updated files are generated (default: out)')
782    parser.add_argument('-nowrite', action='store_true',
783                        help='Do not write output files, for use with -check')
784    parser.add_argument('-check', action='store', dest='check',
785                        help='Run markup checks and warn if WARN option is given, error exit if FAIL option is given')
786    parser.add_argument('-checkVUID', action='store', dest='checkVUID',
787                        help='Detect duplicated VUID numbers and warn if WARN option is given, error exit if FAIL option is given')
788    parser.add_argument('-tagvu', action='store_true',
789                        help='Tag un-tagged Valid Usage statements starting at the value wired into reflow.py')
790    parser.add_argument('-nextvu', action='store', dest='nextvu', type=int,
791                        default=None,
792                        help='Specify start VUID to use instead of the value wired into vuidCounts.py')
793    parser.add_argument('-maxvu', action='store', dest='maxvu', type=int,
794                        default=None,
795                        help='Specify maximum VUID instead of the value wired into vuidCounts.py')
796    parser.add_argument('-branch', action='store', dest='branch',
797                        help='Specify branch to assign VUIDs for')
798    parser.add_argument('-noflow', action='store_true', dest='noflow',
799                        help='Do not reflow text. Other actions may apply')
800    parser.add_argument('-margin', action='store', type=int, dest='margin',
801                        default='76',
802                        help='Width to reflow text, defaults to 76 characters')
803    parser.add_argument('-suffix', action='store', dest='suffix',
804                        default='',
805                        help='Set the suffix added to updated file names (default: none)')
806    parser.add_argument('files', metavar='filename', nargs='*',
807                        help='a filename to reflow text in')
808    parser.add_argument('--version', action='version', version='%(prog)s 1.0')
809
810    args = parser.parse_args()
811
812    setLogFile(True,  True, args.logFile)
813    setLogFile(True, False, args.diagFile)
814    setLogFile(False, True, args.warnFile)
815
816    print('args.margin = ', args.margin)
817
818    if args.overwrite:
819        logWarn("reflow.py: will overwrite all input files")
820
821    errors = ''
822    if args.branch is None:
823        (args.branch, errors) = getBranch()
824    if args.branch is None:
825        # This is not fatal unless VUID assignment is required
826        if args.tagvu:
827            logErr('Cannot determine current git branch, so cannot assign VUIDs:', errors)
828
829    if args.tagvu and args.nextvu is None:
830        # Moved here since vuidCounts is only needed in the internal
831        # repository
832        from vuidCounts import vuidCounts
833
834        if args.branch not in vuidCounts:
835            logErr('Branch', args.branch, 'not in vuidCounts, cannot continue')
836        maxVUID = vuidCounts[args.branch][1]
837        startVUID = vuidCounts[args.branch][2]
838        args.nextvu = startVUID
839        args.maxvu = maxVUID
840
841    if args.nextvu is not None:
842        logWarn('Tagging untagged Valid Usage statements starting at', args.nextvu)
843
844    # Count of markup check warnings encountered
845    # This is added to the argparse structure
846    args.warnCount = 0
847
848    # Dictionary of VUID numbers found, containing a list of (file, line) on
849    # which that number was found
850    # This is added to the argparse structure
851    args.vuidDict = {}
852
853    # If no files are specified, reflow the entire specification chapters folder
854    if not args.files:
855        folder_to_reflow = conventions.spec_reflow_path
856        logWarn('Reflowing all asciidoc files under', folder_to_reflow)
857        reflowAllAdocFiles(folder_to_reflow, args)
858    else:
859        for file in args.files:
860            reflowFile(file, args)
861
862    if args.warnCount > 0:
863        if args.check == 'FAIL':
864            logErr('Failed with', args.warnCount, 'markup errors detected.\n' +
865                   'To fix these, you can take actions such as:\n' +
866                   '  * Moving conditionals outside VU start / end without changing VU meaning\n' +
867                   '  * Refactor conditional text using terminology defined conditionally outside the VU itself\n' +
868                   '  * Remove the conditional (allowable when this just affects command / structure / enum names)\n')
869        else:
870            logWarn('Total warning count for markup issues is', args.warnCount)
871
872    # Look for duplicated VUID numbers
873    if args.checkVUID:
874        dupVUIDs = 0
875        for vuid in sorted(args.vuidDict):
876            found = args.vuidDict[vuid]
877            if len(found) > 1:
878                logWarn('Duplicate VUID number {} found in files:'.format(vuid))
879                for (file, line) in found:
880                    logWarn('    {}: {}'.format(file, line))
881                dupVUIDs = dupVUIDs + 1
882
883        if dupVUIDs > 0:
884            if args.checkVUID == 'FAIL':
885                logErr('Failed with', dupVUIDs, 'duplicated VUID numbers found.\n' +
886                       'To fix this, either convert these to commonvalidity VUs if possible, or strip\n' +
887                       'the VUIDs from all but one of the duplicates and regenerate new ones.')
888            else:
889                logWarn('Total number of duplicated VUID numbers is', dupVUIDs)
890
891    if args.nextvu is not None and args.nextvu != startVUID:
892        # Update next free VUID to assign
893        vuidCounts[args.branch][2] = args.nextvu
894        try:
895            reflow_count_file_path = os.path.dirname(os.path.realpath(__file__))
896            reflow_count_file_path += '/vuidCounts.py'
897            reflow_count_file = open(reflow_count_file_path, 'w', encoding='utf8')
898            print('# Do not edit this file!', file=reflow_count_file)
899            print('# VUID ranges reserved for branches', file=reflow_count_file)
900            print('# Key is branch name, value is [ start, end, nextfree ]', file=reflow_count_file)
901            print('vuidCounts = {', file=reflow_count_file)
902            for key in sorted(vuidCounts):
903                print("    '{}': [ {}, {}, {} ],".format(
904                    key,
905                    vuidCounts[key][0],
906                    vuidCounts[key][1],
907                    vuidCounts[key][2]),
908                    file=reflow_count_file)
909            print('}', file=reflow_count_file)
910            reflow_count_file.close()
911        except:
912            logWarn('Cannot open output count file vuidCounts.py', ':', sys.exc_info()[0])
913