1#!/usr/bin/python3 2# 3# Copyright 2020 The Khronos Group Inc. 4# 5# SPDX-License-Identifier: Apache-2.0 6 7# check_html_xrefs - simple-minded check for internal xrefs in spec HTML 8# that don't exist. 9 10# Usage: check_html_xrefs file 11# Just reports bad xrefs, not where they occur 12 13import argparse, cProfile, pdb, string, sys, time 14import io, os, re, string, sys, copy 15from lxml import etree 16 17if __name__ == '__main__': 18 parser = argparse.ArgumentParser() 19 20 parser.add_argument('files', metavar='filename', nargs='*', 21 help='Path to registry XML') 22 args = parser.parse_args() 23 24 if len(args.files) > 0: 25 file = open(args.files[0], 'r') 26 parser = etree.HTMLParser() 27 tree = etree.parse(file, parser) 28 29 # Find all 'id' elements 30 id_elems = tree.findall('.//*[@id]') 31 ids = set() 32 for elem in id_elems: 33 id = elem.get('id') 34 if id in ids: 35 True 36 # print('Duplicate ID attribute:', id) 37 else: 38 ids.add(id) 39 40 # Find all 'href' attributes 41 ref_elems = tree.findall('.//a[@href]') 42 refs = set() 43 for elem in ref_elems: 44 ref = elem.get('href') 45 # If not a local ref, skip it 46 if ref[0] == '#': 47 ref = ref[1:] 48 if ref in refs: 49 True 50 # print('Duplicate href:', ref) 51 else: 52 refs.add(ref) 53 else: 54 True 55 # print('Skipping ref:', ref) 56 57 # Check for hrefs not found in ids 58 for ref in refs: 59 if ref not in ids: 60 print('Reference not found in HTML: #' + ref) 61