• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2#
3# Copyright 2020 The Khronos Group Inc.
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# check_html_xrefs - simple-minded check for internal xrefs in spec HTML
8# that don't exist.
9
10# Usage: check_html_xrefs file
11# Just reports bad xrefs, not where they occur
12
13import argparse, cProfile, pdb, string, sys, time
14import io, os, re, string, sys, copy
15from lxml import etree
16
17if __name__ == '__main__':
18    parser = argparse.ArgumentParser()
19
20    parser.add_argument('files', metavar='filename', nargs='*',
21                        help='Path to registry XML')
22    args = parser.parse_args()
23
24    if len(args.files) > 0:
25        file = open(args.files[0], 'r')
26        parser = etree.HTMLParser()
27        tree = etree.parse(file, parser)
28
29        # Find all 'id' elements
30        id_elems = tree.findall('.//*[@id]')
31        ids = set()
32        for elem in id_elems:
33            id = elem.get('id')
34            if id in ids:
35                True
36                # print('Duplicate ID attribute:', id)
37            else:
38                ids.add(id)
39
40        # Find all 'href' attributes
41        ref_elems = tree.findall('.//a[@href]')
42        refs = set()
43        for elem in ref_elems:
44            ref = elem.get('href')
45            # If not a local ref, skip it
46            if ref[0] == '#':
47                ref = ref[1:]
48                if ref in refs:
49                    True
50                    # print('Duplicate href:', ref)
51                else:
52                    refs.add(ref)
53            else:
54                True
55                # print('Skipping ref:', ref)
56
57        # Check for hrefs not found in ids
58        for ref in refs:
59            if ref not in ids:
60                print('Reference not found in HTML: #' + ref)
61