• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import, division, unicode_literals
2
3import os
4import sys
5import traceback
6import warnings
7import re
8
9warnings.simplefilter("error")
10
11from .support import get_data_files
12from .support import TestData, convert, convertExpected, treeTypes
13from html5lib import html5parser, constants
14
15# Run the parse error checks
16checkParseErrors = False
17
18# XXX - There should just be one function here but for some reason the testcase
19# format differs from the treedump format by a single space character
20
21
22def convertTreeDump(data):
23    return "\n".join(convert(3)(data).split("\n")[1:])
24
25namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
26
27
28def runParserTest(innerHTML, input, expected, errors, treeClass,
29                  namespaceHTMLElements):
30    with warnings.catch_warnings(record=True) as caughtWarnings:
31        warnings.simplefilter("always")
32        p = html5parser.HTMLParser(tree=treeClass,
33                                   namespaceHTMLElements=namespaceHTMLElements)
34
35        try:
36            if innerHTML:
37                document = p.parseFragment(input, innerHTML)
38            else:
39                document = p.parse(input)
40        except:
41            errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
42                                  "\nTraceback:", traceback.format_exc()])
43            assert False, errorMsg
44
45    otherWarnings = [x for x in caughtWarnings
46                     if not issubclass(x.category, constants.DataLossWarning)]
47    assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
48    if len(caughtWarnings):
49        return
50
51    output = convertTreeDump(p.tree.testSerializer(document))
52
53    expected = convertExpected(expected)
54    if namespaceHTMLElements:
55        expected = namespaceExpected(r"\1<html \2>", expected)
56
57    errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
58                          "\nReceived:", output])
59    assert expected == output, errorMsg
60
61    errStr = []
62    for (line, col), errorcode, datavars in p.errors:
63        assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
64        errStr.append("Line: %i Col: %i %s" % (line, col,
65                                               constants.E[errorcode] % datavars))
66
67    errorMsg2 = "\n".join(["\n\nInput:", input,
68                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
69                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
70    if checkParseErrors:
71            assert len(p.errors) == len(errors), errorMsg2
72
73
74def test_parser():
75    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
76    files = get_data_files('tree-construction')
77
78    for filename in files:
79        testName = os.path.basename(filename).replace(".dat", "")
80        if testName in ("template",):
81            continue
82
83        tests = TestData(filename, "data")
84
85        for index, test in enumerate(tests):
86            input, errors, innerHTML, expected = [test[key] for key in
87                                                  ('data', 'errors',
88                                                   'document-fragment',
89                                                   'document')]
90            if errors:
91                errors = errors.split("\n")
92
93            for treeName, treeCls in treeTypes.items():
94                for namespaceHTMLElements in (True, False):
95                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
96                           namespaceHTMLElements)
97