• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# this tests the DTD validation with the XmlTextReader interface
5#
6import sys
7import glob
8import os
9import libxml2
10try:
11    import StringIO
12    str_io = StringIO.StringIO
13except:
14    import io
15    str_io = io.StringIO
16
17# Memory debug specific
18libxml2.debugMemory(1)
19
20err = ""
21basedir = os.path.dirname(os.path.realpath(__file__))
22dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid"))
23
24# This dictionary reflects the contents of the files
25# ../../test/valid/*.xml.err that are not empty, except that
26# the file paths in the messages start with ../../test/
27
28expect = {
29    '766956':
30"""{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
31%ä%ent;
32   ^
33{0}/dtds/766956.dtd:2: parser error : Content error in the external subset
34%ä%ent;
35        ^
36Entity: line 1:
37value
38^
39""".format(dir_prefix),
40    '781333':
41"""{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
42<a/>
43    ^
44{0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child
45
46^
47""".format(dir_prefix),
48    'cond_sect2':
49"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
50    %ent;
51         ^
52Entity: line 1:
53]]>
54^
55{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
56
57^
58""".format(dir_prefix),
59    'rss':
60"""{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
61</rss>
62      ^
63""".format(dir_prefix),
64    't8':
65"""{0}/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
66
67%defroot; %defmiddle; %deftest;
68         ^
69Entity: line 1:
70&lt;!ELEMENT root (middle) >
71^
72""".format(dir_prefix),
73    't8a':
74"""{0}/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
75
76%defroot;%defmiddle;%deftest;
77         ^
78Entity: line 1:
79&lt;!ELEMENT root (middle) >
80^
81""".format(dir_prefix),
82    'xlink':
83"""{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
84	<p><termdef id="dt-arc" term="Arc">An <ter
85	                                  ^
86validity error : attribute def line 199 references an unknown ID "dt-xlg"
87""".format(dir_prefix),
88}
89
90# Add prefix_dir and extension to the keys
91expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()}
92
93def callback(ctx, str):
94    global err
95    err = err + "%s" % (str)
96libxml2.registerErrorHandler(callback, "")
97
98parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
99expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
100
101valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
102assert valid_files, "found no valid files in '{}'".format(dir_prefix)
103valid_files.sort()
104failures = 0
105for file in valid_files:
106    err = ""
107    reader = libxml2.newTextReaderFilename(file)
108    #print "%s:" % (file)
109    reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
110    ret = reader.Read()
111    while ret == 1:
112        ret = reader.Read()
113    if ret != 0 and file not in expect_parsing_error:
114        print("Error parsing and validating %s" % (file))
115        #sys.exit(1)
116    if (err):
117        if not(file in expect and err == expect[file]):
118            failures += 1
119            print("Error: ", err)
120            if file in expect:
121                print("Expected: ", expect[file])
122
123if failures:
124    print("Failed %d tests" % failures)
125    sys.exit(1)
126
127#
128# another separate test based on Stephane Bidoul one
129#
130s = """
131<!DOCTYPE test [
132<!ELEMENT test (x,b)>
133<!ELEMENT x (c)>
134<!ELEMENT b (#PCDATA)>
135<!ELEMENT c (#PCDATA)>
136<!ENTITY x "<x><c>xxx</c></x>">
137]>
138<test>
139    &x;
140    <b>bbb</b>
141</test>
142"""
143expect="""10,test
1441,test
14514,#text
1461,x
1471,c
1483,#text
14915,c
15015,x
15114,#text
1521,b
1533,#text
15415,b
15514,#text
15615,test
157"""
158res=""
159err=""
160
161input = libxml2.inputBuffer(str_io(s))
162reader = input.newTextReader("test2")
163reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
164reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
165reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
166reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
167while reader.Read() == 1:
168    res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
169
170if res != expect:
171    print("test2 failed: unexpected output")
172    print(res)
173    sys.exit(1)
174if err != "":
175    print("test2 failed: validation error found")
176    print(err)
177    sys.exit(1)
178
179#
180# Another test for external entity parsing and validation
181#
182
183s = """<!DOCTYPE test [
184<!ELEMENT test (x)>
185<!ELEMENT x (#PCDATA)>
186<!ENTITY e SYSTEM "tst.ent">
187]>
188<test>
189  &e;
190</test>
191"""
192tst_ent = """<x>hello</x>"""
193expect="""10 test
1941 test
19514 #text
1961 x
1973 #text
19815 x
19914 #text
20015 test
201"""
202res=""
203
204def myResolver(URL, ID, ctxt):
205    if URL == "tst.ent":
206        return(str_io(tst_ent))
207    return None
208
209libxml2.setEntityLoader(myResolver)
210
211input = libxml2.inputBuffer(str_io(s))
212reader = input.newTextReader("test3")
213reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
214reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
215reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
216reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
217while reader.Read() == 1:
218    res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
219
220if res != expect:
221    print("test3 failed: unexpected output")
222    print(res)
223    sys.exit(1)
224if err != "":
225    print("test3 failed: validation error found")
226    print(err)
227    sys.exit(1)
228
229#
230# Another test for recursive entity parsing, validation, and replacement of
231# entities, making sure the entity ref node doesn't show up in that case
232#
233
234s = """<!DOCTYPE test [
235<!ELEMENT test (x, x)>
236<!ELEMENT x (y)>
237<!ELEMENT y (#PCDATA)>
238<!ENTITY x "<x>&y;</x>">
239<!ENTITY y "<y>yyy</y>">
240]>
241<test>
242  &x;
243  &x;
244</test>"""
245expect="""10 test 0
2461 test 0
24714 #text 1
2481 x 1
2491 y 2
2503 #text 3
25115 y 2
25215 x 1
25314 #text 1
2541 x 1
2551 y 2
2563 #text 3
25715 y 2
25815 x 1
25914 #text 1
26015 test 0
261"""
262res=""
263err=""
264
265input = libxml2.inputBuffer(str_io(s))
266reader = input.newTextReader("test4")
267reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
268reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
269reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
270reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
271while reader.Read() == 1:
272    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
273
274if res != expect:
275    print("test4 failed: unexpected output")
276    print(res)
277    sys.exit(1)
278if err != "":
279    print("test4 failed: validation error found")
280    print(err)
281    sys.exit(1)
282
283#
284# The same test but without entity substitution this time
285#
286
287s = """<!DOCTYPE test [
288<!ELEMENT test (x, x)>
289<!ELEMENT x (y)>
290<!ELEMENT y (#PCDATA)>
291<!ENTITY x "<x>&y;</x>">
292<!ENTITY y "<y>yyy</y>">
293]>
294<test>
295  &x;
296  &x;
297</test>"""
298expect="""10 test 0
2991 test 0
30014 #text 1
3015 x 1
30214 #text 1
3035 x 1
30414 #text 1
30515 test 0
306"""
307res=""
308err=""
309
310input = libxml2.inputBuffer(str_io(s))
311reader = input.newTextReader("test5")
312reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
313while reader.Read() == 1:
314    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
315
316if res != expect:
317    print("test5 failed: unexpected output")
318    print(res)
319    sys.exit(1)
320if err != "":
321    print("test5 failed: validation error found")
322    print(err)
323    sys.exit(1)
324
325#
326# cleanup
327#
328del input
329del reader
330
331# Memory debug specific
332libxml2.cleanupParser()
333if libxml2.debugMemory(1) == 0:
334    print("OK")
335else:
336    print("Memory leak %d bytes" % (libxml2.debugMemory(1)))
337    libxml2.dumpMemory()
338