1from __future__ import absolute_import, division, unicode_literals 2 3import os 4import sys 5import traceback 6import warnings 7import re 8 9warnings.simplefilter("error") 10 11from .support import get_data_files 12from .support import TestData, convert, convertExpected, treeTypes 13from html5lib import html5parser, constants 14 15# Run the parse error checks 16checkParseErrors = False 17 18# XXX - There should just be one function here but for some reason the testcase 19# format differs from the treedump format by a single space character 20 21 22def convertTreeDump(data): 23 return "\n".join(convert(3)(data).split("\n")[1:]) 24 25namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub 26 27 28def runParserTest(innerHTML, input, expected, errors, treeClass, 29 namespaceHTMLElements): 30 with warnings.catch_warnings(record=True) as caughtWarnings: 31 warnings.simplefilter("always") 32 p = html5parser.HTMLParser(tree=treeClass, 33 namespaceHTMLElements=namespaceHTMLElements) 34 35 try: 36 if innerHTML: 37 document = p.parseFragment(input, innerHTML) 38 else: 39 document = p.parse(input) 40 except: 41 errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, 42 "\nTraceback:", traceback.format_exc()]) 43 assert False, errorMsg 44 45 otherWarnings = [x for x in caughtWarnings 46 if not issubclass(x.category, constants.DataLossWarning)] 47 assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings] 48 if len(caughtWarnings): 49 return 50 51 output = convertTreeDump(p.tree.testSerializer(document)) 52 53 expected = convertExpected(expected) 54 if namespaceHTMLElements: 55 expected = namespaceExpected(r"\1<html \2>", expected) 56 57 errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, 58 "\nReceived:", output]) 59 assert expected == output, errorMsg 60 61 errStr = [] 62 for (line, col), errorcode, datavars in p.errors: 63 assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) 64 errStr.append("Line: %i Col: %i %s" % (line, col, 65 constants.E[errorcode] % datavars)) 66 67 errorMsg2 = "\n".join(["\n\nInput:", input, 68 "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), 69 "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) 70 if checkParseErrors: 71 assert len(p.errors) == len(errors), errorMsg2 72 73 74def test_parser(): 75 sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n") 76 files = get_data_files('tree-construction') 77 78 for filename in files: 79 testName = os.path.basename(filename).replace(".dat", "") 80 if testName in ("template",): 81 continue 82 83 tests = TestData(filename, "data") 84 85 for index, test in enumerate(tests): 86 input, errors, innerHTML, expected = [test[key] for key in 87 ('data', 'errors', 88 'document-fragment', 89 'document')] 90 if errors: 91 errors = errors.split("\n") 92 93 for treeName, treeCls in treeTypes.items(): 94 for namespaceHTMLElements in (True, False): 95 yield (runParserTest, innerHTML, input, expected, errors, treeCls, 96 namespaceHTMLElements) 97