• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2#
3# Copyright 2016-2024 The Khronos Group Inc.
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# Utility functions for automatic ref page generation and other script stuff
8
9import io
10import re
11import sys
12import subprocess
13
14# global errFile, warnFile, diagFile
15
16errFile = sys.stderr
17warnFile = sys.stdout
18diagFile = None
19logSourcefile = None
20logProcname = None
21logLine = None
22
23def unescapeQuotes(s):
24    """Remove \' escape sequences in a string (refpage description)"""
25    return s.replace('\\\'', '\'')
26
27def write(*args, **kwargs ):
28    file = kwargs.pop('file',sys.stdout)
29    end = kwargs.pop('end','\n')
30    file.write(' '.join(str(arg) for arg in args))
31    file.write(end)
32
33def setLogSourcefile(filename):
34    """Metadata which may be printed (if not None) for diagnostic messages"""
35    global logSourcefile
36    logSourcefile = filename
37
38def setLogProcname(procname):
39    global logProcname
40    logProcname = procname
41
42def setLogLine(line):
43    global logLine
44    logLine = line
45
46def logHeader(severity):
47    """Generate prefix for a diagnostic line using metadata and severity"""
48    global logSourcefile, logProcname, logLine
49
50    msg = severity + ': '
51    if logProcname:
52        msg = msg + ' in ' + logProcname
53    if logSourcefile:
54        msg = msg + ' for ' + logSourcefile
55    if logLine:
56        msg = msg + ' line ' + str(logLine)
57    return msg + ' '
58
59def setLogFile(setDiag, setWarn, filename):
60    """Set the file handle to log either or both warnings and diagnostics to.
61
62    - setDiag and setWarn are True if the corresponding handle is to be set.
63    - filename is None for no logging, '-' for stdout, or a pathname."""
64    global diagFile, warnFile
65
66    if filename is None:
67        return
68
69    if filename == '-':
70        fp = sys.stdout
71    else:
72        fp = open(filename, 'w', encoding='utf-8')
73
74    if setDiag:
75        diagFile = fp
76    if setWarn:
77        warnFile = fp
78
79def logDiag(*args, **kwargs):
80    file = kwargs.pop('file', diagFile)
81    end = kwargs.pop('end','\n')
82    if file is not None:
83        file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args))
84        file.write(end)
85
86def logWarn(*args, **kwargs):
87    file = kwargs.pop('file', warnFile)
88    end = kwargs.pop('end','\n')
89    if file is not None:
90        file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args))
91        file.write(end)
92
93def logErr(*args, **kwargs):
94    file = kwargs.pop('file', errFile)
95    end = kwargs.pop('end','\n')
96
97    strfile = io.StringIO()
98    strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args))
99    strfile.write(end)
100
101    if file is not None:
102        file.write(strfile.getvalue())
103    raise UserWarning(strfile.getvalue())
104
105def isempty(s):
106    """Return True if s is nothing but white space, False otherwise"""
107    return len(''.join(s.split())) == 0
108
109class pageInfo:
110    """Information about a ref page relative to the file it is extracted from."""
111    def __init__(self):
112        self.extractPage = True
113        """True if page should be extracted"""
114
115        self.Warning  = None
116        """string warning if page is suboptimal or cannot be generated"""
117
118        self.embed    = False
119        """False or the name of the ref page this include is embedded within"""
120
121        self.type     = None
122        """refpage type attribute - 'structs', 'protos', 'freeform', etc."""
123
124        self.name     = None
125        """struct/proto/enumerant/etc. name"""
126
127        self.desc     = None
128        """short description of ref page"""
129
130        self.begin    = None
131        """index of first line of the page (heuristic or // refBegin)"""
132
133        self.include  = None
134        """index of include:: line defining the page"""
135
136        self.param    = None
137        """index of first line of parameter/member definitions"""
138
139        self.body     = None
140        """index of first line of body text"""
141
142        self.validity = None
143        """index of validity include"""
144
145        self.end      = None
146        """index of last line of the page (heuristic validity include, or // refEnd)"""
147
148        self.alias    = ''
149        """aliases of this name, if supplied, or ''"""
150
151        self.refs     = ''
152        """cross-references on // refEnd line, if supplied"""
153
154        self.spec     = None
155        """'spec' attribute in refpage open block, if supplied, or None for the default ('api') type"""
156
157        self.anchor   = None
158        """'anchor' attribute in refpage open block, if supplied, or inferred to be the same as the 'name'"""
159
160def printPageInfoField(desc, line, file):
161    """Print a single field of a pageInfo struct, possibly None.
162
163    - desc - string description of field
164    - line - field value or None
165    - file - indexed by line"""
166    if line is not None:
167        logDiag(desc + ':', line + 1, '\t-> ', file[line], end='')
168    else:
169        logDiag(desc + ':', line)
170
171def printPageInfo(pi, file):
172    """Print out fields of a pageInfo struct
173
174    - pi - pageInfo
175    - file - indexed by pageInfo"""
176    logDiag('TYPE:   ', pi.type)
177    logDiag('NAME:   ', pi.name)
178    logDiag('WARNING:', pi.Warning)
179    logDiag('EXTRACT:', pi.extractPage)
180    logDiag('EMBED:  ', pi.embed)
181    logDiag('DESC:   ', pi.desc)
182    printPageInfoField('BEGIN   ', pi.begin,    file)
183    printPageInfoField('INCLUDE ', pi.include,  file)
184    printPageInfoField('PARAM   ', pi.param,    file)
185    printPageInfoField('BODY    ', pi.body,     file)
186    printPageInfoField('VALIDITY', pi.validity, file)
187    printPageInfoField('END     ', pi.end,      file)
188    logDiag('REFS: "' + pi.refs + '"')
189
190def prevPara(file, line):
191    """Go back one paragraph from the specified line and return the line number
192    of the first line of that paragraph.
193
194    Paragraphs are delimited by blank lines. It is assumed that the
195    current line is the first line of a paragraph.
196
197    - file is an array of strings
198    - line is the starting point (zero-based)"""
199    # Skip over current paragraph
200    while (line >= 0 and not isempty(file[line])):
201        line = line - 1
202    # Skip over white space
203    while (line >= 0 and isempty(file[line])):
204        line = line - 1
205    # Skip to first line of previous paragraph
206    while (line >= 1 and not isempty(file[line-1])):
207        line = line - 1
208    return line
209
210def nextPara(file, line):
211    """Go forward one paragraph from the specified line and return the line
212    number of the first line of that paragraph.
213
214    Paragraphs are delimited by blank lines. It is assumed that the
215    current line is standalone (which is bogus).
216
217    - file is an array of strings
218    - line is the starting point (zero-based)"""
219    maxLine = len(file) - 1
220    # Skip over current paragraph
221    while (line != maxLine and not isempty(file[line])):
222        line = line + 1
223    # Skip over white space
224    while (line != maxLine and isempty(file[line])):
225        line = line + 1
226    return line
227
228def lookupPage(pageMap, name):
229    """Return (creating if needed) the pageInfo entry in pageMap for name"""
230    if name not in pageMap:
231        pi = pageInfo()
232        pi.name = name
233        pageMap[name] = pi
234    else:
235        pi = pageMap[name]
236    return pi
237
238def loadFile(filename):
239    """Load a file into a list of strings. Return the (list, newline_string) or (None, None) on failure"""
240    newline_string = "\n"
241    try:
242        with open(filename, 'rb') as fp:
243            contents = fp.read()
244            if contents.count(b"\r\n") > 1:
245                newline_string = "\r\n"
246
247        with open(filename, 'r', encoding='utf-8') as fp:
248            lines = fp.readlines()
249    except:
250        logWarn('Cannot open file', filename, ':', sys.exc_info()[0])
251        return None, None
252
253    return lines, newline_string
254
255def clampToBlock(line, minline, maxline):
256    """Clamp a line number to be in the range [minline,maxline].
257
258    If the line number is None, just return it.
259    If minline is None, do not clamp to that value."""
260    if line is None:
261        return line
262    if minline and line < minline:
263        return minline
264    if line > maxline:
265        return maxline
266
267    return line
268
269def fixupRefs(pageMap, specFile, file):
270    """Fill in missing fields in pageInfo structures, to the extent they can be
271    inferred.
272
273    - pageMap - dictionary of pageInfo structures
274    - specFile - filename
275    - file - list of strings making up the file, indexed by pageInfo"""
276    # All potential ref pages are now in pageMap. Process them to
277    # identify actual page start/end/description boundaries, if
278    # not already determined from the text.
279    for name in sorted(pageMap.keys()):
280        pi = pageMap[name]
281
282        # # If nothing is found but an include line with no begin, validity,
283        # # or end, this is not intended as a ref page (yet). Set the begin
284        # # line to the include line, so autogeneration can at least
285        # # pull the include out, but mark it not to be extracted.
286        # # Examples include the host sync table includes in
287        # # chapters/fundamentals.adoc and the table of Vk*Flag types in
288        # # appendices/boilerplate.adoc.
289        # if pi.begin is None and pi.validity is None and pi.end is None:
290        #     pi.begin = pi.include
291        #     pi.extractPage = False
292        #     pi.Warning = 'No begin, validity, or end lines identified'
293        #     continue
294
295        # Using open block delimiters, ref pages must *always* have a
296        # defined begin and end. If either is undefined, that is fatal.
297        if pi.begin is None:
298            pi.extractPage = False
299            pi.Warning = 'Can\'t identify begin of ref page open block'
300            continue
301
302        if pi.end is None:
303            pi.extractPage = False
304            pi.Warning = 'Can\'t identify end of ref page open block'
305            continue
306
307        # If there is no description of the page, infer one from the type
308        if pi.desc is None:
309            if pi.type is not None:
310                # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)'
311                pi.Warning = 'No short description available; could infer from the type and name'
312            else:
313                pi.extractPage = False
314                pi.Warning = 'No short description available, cannot infer from the type'
315                continue
316
317        # Try to determine where the parameter and body sections of the page
318        # begin. funcpointer, proto, and struct pages infer the location of
319        # the parameter and body sections. Other pages infer the location of
320        # the body, but have no parameter sections.
321        #
322        # Probably some other types infer this as well - refer to list of
323        # all page types in genRef.py:emitPage()
324        if pi.include is not None:
325            if pi.type in ['funcpointers', 'protos', 'structs']:
326                pi.param = nextPara(file, pi.include)
327                if pi.body is None:
328                    pi.body = nextPara(file, pi.param)
329            else:
330                if pi.body is None:
331                    pi.body = nextPara(file, pi.include)
332        else:
333            pi.Warning = 'Page does not have an API definition include::'
334
335        # It is possible for the inferred param and body lines to run past
336        # the end of block, if, for example, there is no parameter section.
337        pi.param = clampToBlock(pi.param, pi.include, pi.end)
338        pi.body = clampToBlock(pi.body, pi.param, pi.end)
339
340        # We can get to this point with .include, .param, and .validity
341        # all being None, indicating those sections were not found.
342
343        logDiag('fixupRefs: after processing,', pi.name, 'looks like:')
344        printPageInfo(pi, file)
345
346    # Now that all the valid pages have been found, try to make some
347    # inferences about invalid pages.
348    #
349    # If a reference without a .end is entirely inside a valid reference,
350    # then it is intentionally embedded - may want to create an indirect
351    # page that links into the embedding page. This is done by a very
352    # inefficient double loop, but the loop depth is small.
353    for name in sorted(pageMap.keys()):
354        pi = pageMap[name]
355
356        if pi.end is None:
357            for embedName in sorted(pageMap.keys()):
358                logDiag('fixupRefs: comparing', pi.name, 'to', embedName)
359                embed = pageMap[embedName]
360                # Do not check embeddings which are themselves invalid
361                if not embed.extractPage:
362                    logDiag('Skipping check for embedding in:', embed.name)
363                    continue
364                if embed.begin is None or embed.end is None:
365                    logDiag('fixupRefs:', name + ':',
366                            'can\'t compare to unanchored ref:', embed.name,
367                            'in', specFile, 'at line', pi.include )
368                    printPageInfo(pi, file)
369                    printPageInfo(embed, file)
370                # If an embed is found, change the error to a warning
371                elif (pi.include is not None and pi.include >= embed.begin and
372                      pi.include <= embed.end):
373                    logDiag('fixupRefs: Found embed for:', name,
374                            'inside:', embedName,
375                            'in', specFile, 'at line', pi.include )
376                    pi.embed = embed.name
377                    pi.Warning = 'Embedded in definition for ' + embed.name
378                    break
379                else:
380                    logDiag('fixupRefs: No embed match for:', name,
381                            'inside:', embedName, 'in', specFile,
382                            'at line', pi.include)
383
384
385def compatiblePageTypes(refpage_type, pagemap_type):
386    """Returns whether two refpage 'types' (categories) are compatible -
387       this is only true for 'consts' and 'enums' types."""
388
389    constsEnums = [ 'consts', 'enums' ]
390
391    if refpage_type == pagemap_type:
392        return True
393    if refpage_type in constsEnums and pagemap_type in constsEnums:
394        return True
395    return False
396
397# Patterns used to recognize interesting lines in an asciidoc source file.
398# These patterns are only compiled once.
399endifPat   = re.compile(r'^endif::(?P<condition>[\w_+,]+)\[\]')
400beginPat   = re.compile(r'^\[open,(?P<attribs>refpage=.*)\]')
401# attribute key/value pairs of an open block
402attribStr  = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'"
403attribPat  = re.compile(attribStr)
404bodyPat    = re.compile(r'^// *refBody')
405errorPat   = re.compile(r'^// *refError')
406
407# This regex transplanted from check_spec_links
408# It looks for various generated file conventions, and for the api/validity
409# include (generated_type), protos/struct/etc path (category), and API name
410# (entity_name).
411# It could be put into the API conventions object, instead of being
412# generalized for all the different specs.
413INCLUDE = re.compile(
414        r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+)\.(adoc|txt)[\[][\]]')
415
416def findRefs(file, filename):
417    """Identify reference pages in a list of strings, returning a dictionary of
418    pageInfo entries for each one found, or None on failure."""
419    setLogSourcefile(filename)
420    setLogProcname('findRefs')
421
422    # To reliably detect the open blocks around reference pages, we must
423    # first detect the '[open,refpage=...]' markup delimiting the block;
424    # skip past the '--' block delimiter on the next line; and identify the
425    # '--' block delimiter closing the page.
426    # This cannot be done solely with pattern matching, and requires state to
427    # track 'inside/outside block'.
428    # When looking for open blocks, possible states are:
429    #   'outside' - outside a block
430    #   'start' - have found the '[open...]' line
431    #   'inside' - have found the following '--' line
432    openBlockState = 'outside'
433
434    # Dictionary of interesting line numbers and strings related to an API
435    # name
436    pageMap = {}
437
438    numLines = len(file)
439    line = 0
440
441    # Track the pageInfo object corresponding to the current open block
442    pi = None
443
444    while (line < numLines):
445        setLogLine(line)
446
447        # Only one of the patterns can possibly match. Add it to
448        # the dictionary for that name.
449
450        # [open,refpage=...] starting a refpage block
451        matches = beginPat.search(file[line])
452        if matches is not None:
453            logDiag('Matched open block pattern')
454            attribs = matches.group('attribs')
455
456            # If the previous open block was not closed, raise an error
457            if openBlockState != 'outside':
458                logErr('Nested open block starting at line', line, 'of',
459                       filename)
460
461            openBlockState = 'start'
462
463            # Parse the block attributes
464            matches = attribPat.findall(attribs)
465
466            # Extract each attribute
467            name = None
468            desc = None
469            refpage_type = None
470            spec_type = None
471            anchor = None
472            alias = None
473            xrefs = None
474
475            for (key,value) in matches:
476                logDiag('got attribute', key, '=', value)
477                if key == 'refpage':
478                    name = value
479                elif key == 'desc':
480                    desc = unescapeQuotes(value)
481                elif key == 'type':
482                    refpage_type = value
483                elif key == 'spec':
484                    spec_type = value
485                elif key == 'anchor':
486                    anchor = value
487                elif key == 'alias':
488                    alias = value
489                elif key == 'xrefs':
490                    xrefs = value
491                else:
492                    logWarn('unknown open block attribute:', key)
493
494            if name is None or desc is None or refpage_type is None:
495                logWarn('missing one or more required open block attributes:'
496                        'refpage, desc, or type')
497                # Leave pi is None so open block delimiters are ignored
498            else:
499                pi = lookupPage(pageMap, name)
500                pi.desc = desc
501                # Must match later type definitions in interface/validity includes
502                pi.type = refpage_type
503                pi.spec = spec_type
504                pi.anchor = anchor
505                if alias:
506                    pi.alias = alias
507                if xrefs:
508                    pi.refs = xrefs
509                logDiag('open block for', name, 'added DESC =', desc,
510                        'TYPE =', refpage_type, 'ALIAS =', alias,
511                        'XREFS =', xrefs, 'SPEC =', spec_type,
512                        'ANCHOR =', anchor)
513
514            line = line + 1
515            continue
516
517        # '--' starting or ending and open block
518        if file[line].rstrip() == '--':
519            if openBlockState == 'outside':
520                # Only refpage open blocks should use -- delimiters
521                logWarn('Unexpected double-dash block delimiters')
522            elif openBlockState == 'start':
523                # -- delimiter following [open,refpage=...]
524                openBlockState = 'inside'
525
526                if pi is None:
527                    logWarn('no pageInfo available for opening -- delimiter')
528                else:
529                    pi.begin = line + 1
530                    logDiag('opening -- delimiter: added BEGIN =', pi.begin)
531            elif openBlockState == 'inside':
532                # -- delimiter ending an open block
533                if pi is None:
534                    logWarn('no pageInfo available for closing -- delimiter')
535                else:
536                    pi.end = line - 1
537                    logDiag('closing -- delimiter: added END =', pi.end)
538
539                openBlockState = 'outside'
540                pi = None
541            else:
542                logWarn('unknown openBlockState:', openBlockState)
543
544            line = line + 1
545            continue
546
547        matches = INCLUDE.search(file[line])
548        if matches is not None:
549            # Something got included, not sure what yet.
550            gen_type = matches.group('generated_type')
551            refpage_type = matches.group('category')
552            name = matches.group('entity_name')
553
554            # This will never match in OpenCL
555            if gen_type == 'validity':
556                logDiag('Matched validity pattern')
557                if pi is not None:
558                    if pi.type and not compatiblePageTypes(refpage_type, pi.type):
559                        logWarn('ERROR: pageMap[' + name + '] type:',
560                                pi.type, 'does not match type:', refpage_type)
561                    pi.type = refpage_type
562                    pi.validity = line
563                    logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity)
564                else:
565                    logWarn('validity include:: line NOT inside block')
566
567                line = line + 1
568                continue
569
570            if gen_type == 'api':
571                logDiag('Matched include pattern')
572                if pi is not None:
573                    if pi.include is not None:
574                        logDiag('found multiple includes for this block')
575                    if pi.type and not compatiblePageTypes(refpage_type, pi.type):
576                        logWarn('ERROR: pageMap[' + name + '] type:',
577                                pi.type, 'does not match type:', refpage_type)
578                    pi.type = refpage_type
579                    pi.include = line
580                    logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include)
581                else:
582                    logWarn('interface include:: line NOT inside block')
583
584                line = line + 1
585                continue
586
587            logDiag('ignoring unrecognized include line ', matches.group())
588
589        # Vulkan 1.1 markup allows the last API include construct to be
590        # followed by an asciidoctor endif:: construct (and also preceded,
591        # at some distance).
592        # This looks for endif:: immediately following an include:: line
593        # and, if found, moves the include boundary to this line.
594        matches = endifPat.search(file[line])
595        if matches is not None and pi is not None:
596            if pi.include == line - 1:
597                logDiag('Matched endif pattern following include; moving include')
598                pi.include = line
599            else:
600                logDiag('Matched endif pattern (not following include)')
601
602            line = line + 1
603            continue
604
605        matches = bodyPat.search(file[line])
606        if matches is not None:
607            logDiag('Matched // refBody pattern')
608            if pi is not None:
609                pi.body = line
610                logDiag('added BODY =', pi.body)
611            else:
612                logWarn('// refBody line NOT inside block')
613
614            line = line + 1
615            continue
616
617        # OpenCL spec uses // refError to tag "validity" (Errors) language,
618        # instead of /validity/ includes.
619        matches = errorPat.search(file[line])
620        if matches is not None:
621            logDiag('Matched // refError pattern')
622            if pi is not None:
623                pi.validity = line
624                logDiag('added VALIDITY (refError) =', pi.validity)
625            else:
626                logWarn('// refError line NOT inside block')
627
628            line = line + 1
629            continue
630
631        line = line + 1
632        continue
633
634    if pi is not None:
635        logErr('Unclosed open block at EOF!')
636
637    setLogSourcefile(None)
638    setLogProcname(None)
639    setLogLine(None)
640
641    return pageMap
642
643
644def getBranch():
645    """Determine current git branch
646
647    Returns (branch name, ''), or (None, stderr output) if the branch name
648    cannot be determined"""
649
650    command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ]
651    results = subprocess.run(command,
652                             stdout=subprocess.PIPE,
653                             stderr=subprocess.PIPE)
654
655    # git command failed
656    if len(results.stderr) > 0:
657        return (None, results.stderr)
658
659    # Remove newline from output and convert to a string
660    branch = results.stdout.rstrip().decode()
661    if len(branch) > 0:
662        # Strip trailing newline
663        branch = results.stdout.decode()[0:-1]
664
665    return (branch, '')
666