• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2# Copyright 2013-2023 The Khronos Group Inc.
3# SPDX-License-Identifier: Apache-2.0
4
5# linkcheck - check internal links of the specified HTML file against
6# internal anchors and report inconsistencies.
7#
8# Usage: linkcheck file.html
9
10import argparse
11from lxml import etree as et
12
13def printSet(s):
14    for key in sorted(s):
15        print('    {}'.format(key))
16
17def checkLinks(file, args):
18    parser = et.HTMLParser()
19    tree = et.parse(file, parser)
20
21    # Remove all <svg> elements, which just add noise to the cross-referencing
22    for svg in tree.findall('//svg'):
23        svg.getparent().remove(svg)
24
25    # Extract elements with href= and id= attributes
26    hrefs = tree.findall('//*[@href]')
27    ids = tree.findall('//*[@id]')
28
29    # Extract xref name from each xref
30    internals = set()
31    externals = set()
32
33    for e in hrefs:
34        # Do not track '<link>' tags from HTML headers
35        if e.tag != 'link':
36            xref = e.get('href')
37
38            if xref[0:1] == '#':
39                # Internal anchor
40                internals.add(xref[1:])
41            else:
42                externals.add(xref)
43
44    # Extract anchor name from each id
45    anchors = set()
46
47    for e in ids:
48        # Do not track SVG '<g>' tags
49        if e.tag != 'g':
50            anchors.add(e.get('id'))
51
52    # Intersect them to find inconsistencies
53    xrefsOnly = internals.difference(anchors)
54    anchorsOnly = anchors.difference(internals)
55
56    # print('External xrefs:', len(externals))
57    # printSet(externals)
58    #
59    # print('Internal xrefs:', len(internals))
60    # print('Anchors:       ', len(anchors))
61
62    print('Internal xrefs not in anchors:', len(xrefsOnly))
63    printSet(xrefsOnly)
64
65    if args.anchors:
66        print('Internal anchors not in xrefs:', len(anchorsOnly))
67        printSet(anchorsOnly)
68
69# Patterns used to recognize interesting lines in an asciidoc source file.
70# These patterns are only compiled once.
71
72if __name__ == '__main__':
73    parser = argparse.ArgumentParser()
74
75    parser.add_argument('files', metavar='filename', nargs='*',
76                        help='a filename to promote text in')
77    parser.add_argument('-anchors', action='store_true',
78                        help='Report orphaned anchors')
79
80
81    args = parser.parse_args()
82
83    for file in args.files:
84        checkLinks(file, args)
85