1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# this tests the DTD validation with the XmlTextReader interface 5# 6import sys 7import glob 8import os 9import setup_test 10import libxml2 11try: 12 import StringIO 13 str_io = StringIO.StringIO 14except: 15 import io 16 str_io = io.StringIO 17 18# Memory debug specific 19libxml2.debugMemory(1) 20 21err = "" 22basedir = os.path.dirname(os.path.realpath(__file__)) 23dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid")) 24 25# This dictionary reflects the contents of the files 26# ../../test/valid/*.xml.err that are not empty, except that 27# the file paths in the messages start with ../../test/ 28 29expect = { 30 '766956': 31"""{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';' 32%ä%ent; 33 ^ 34{0}/dtds/766956.dtd:2: parser error : Content error in the external subset 35%ä%ent; 36 ^ 37Entity: line 1: 38value 39^ 40""".format(dir_prefix), 41 '781333': 42"""{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got 43<a/> 44 ^ 45""".format(dir_prefix), 46 'cond_sect2': 47"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity 48 %ent; 49 ^ 50Entity: line 1: 51]]> 52^ 53{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset 54 55^ 56""".format(dir_prefix), 57 'rss': 58"""{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version 59</rss> 60 ^ 61""".format(dir_prefix), 62 't8': 63"""{0}/t8.xml:6: parser error : Content error in the internal subset 64%defroot; %defmiddle; %deftest; 65 ^ 66Entity: line 1: 67<!ELEMENT root (middle) > 68^ 69""".format(dir_prefix), 70 't8a': 71"""{0}/t8a.xml:6: parser error : Content error in the internal subset 72%defroot;%defmiddle;%deftest; 73 ^ 74Entity: line 1: 75<!ELEMENT root (middle) > 76^ 77""".format(dir_prefix), 78 'xlink': 79"""{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined 80 <p><termdef id="dt-arc" term="Arc">An <ter 81 ^ 82validity error : attribute def line 199 references an unknown ID "dt-xlg" 83""".format(dir_prefix), 84} 85 86# Add prefix_dir and extension to the keys 87expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()} 88 89def callback(ctx, str): 90 global err 91 err = err + "%s" % (str) 92libxml2.registerErrorHandler(callback, "") 93 94parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"] 95expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files] 96 97valid_files = glob.glob(os.path.join(dir_prefix, "*.x*")) 98assert valid_files, "found no valid files in '{}'".format(dir_prefix) 99valid_files.sort() 100failures = 0 101for file in valid_files: 102 err = "" 103 reader = libxml2.newTextReaderFilename(file) 104 #print "%s:" % (file) 105 reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) 106 ret = reader.Read() 107 while ret == 1: 108 ret = reader.Read() 109 if ret != 0 and file not in expect_parsing_error: 110 print("Error parsing and validating %s" % (file)) 111 #sys.exit(1) 112 if file in expect and err != expect[file]: 113 failures += 1 114 print("Error: ", err) 115 if file in expect: 116 print("Expected: ", expect[file]) 117 118if failures: 119 print("Failed %d tests" % failures) 120 sys.exit(1) 121 122# 123# another separate test based on Stephane Bidoul one 124# 125s = """ 126<!DOCTYPE test [ 127<!ELEMENT test (x,b)> 128<!ELEMENT x (c)> 129<!ELEMENT b (#PCDATA)> 130<!ELEMENT c (#PCDATA)> 131<!ENTITY x "<x><c>xxx</c></x>"> 132]> 133<test> 134 &x; 135 <b>bbb</b> 136</test> 137""" 138expect="""10,test 1391,test 14014,#text 1411,x 1421,c 1433,#text 14415,c 14515,x 14614,#text 1471,b 1483,#text 14915,b 15014,#text 15115,test 152""" 153res="" 154err="" 155 156input = libxml2.inputBuffer(str_io(s)) 157reader = input.newTextReader("test2") 158reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 159reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 160reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 161reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 162while reader.Read() == 1: 163 res = res + "%s,%s\n" % (reader.NodeType(),reader.Name()) 164 165if res != expect: 166 print("test2 failed: unexpected output") 167 print(res) 168 sys.exit(1) 169if err != "": 170 print("test2 failed: validation error found") 171 print(err) 172 sys.exit(1) 173 174# 175# Another test for external entity parsing and validation 176# 177 178s = """<!DOCTYPE test [ 179<!ELEMENT test (x)> 180<!ELEMENT x (#PCDATA)> 181<!ENTITY e SYSTEM "tst.ent"> 182]> 183<test> 184 &e; 185</test> 186""" 187tst_ent = """<x>hello</x>""" 188expect="""10 test 1891 test 19014 #text 1911 x 1923 #text 19315 x 19414 #text 19515 test 196""" 197res="" 198 199def myResolver(URL, ID, ctxt): 200 if URL == "tst.ent": 201 return(str_io(tst_ent)) 202 return None 203 204libxml2.setEntityLoader(myResolver) 205 206input = libxml2.inputBuffer(str_io(s)) 207reader = input.newTextReader("test3") 208reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 209reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 210reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 211reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 212while reader.Read() == 1: 213 res = res + "%s %s\n" % (reader.NodeType(),reader.Name()) 214 215if res != expect: 216 print("test3 failed: unexpected output") 217 print(res) 218 sys.exit(1) 219if err != "": 220 print("test3 failed: validation error found") 221 print(err) 222 sys.exit(1) 223 224# 225# Another test for recursive entity parsing, validation, and replacement of 226# entities, making sure the entity ref node doesn't show up in that case 227# 228 229s = """<!DOCTYPE test [ 230<!ELEMENT test (x, x)> 231<!ELEMENT x (y)> 232<!ELEMENT y (#PCDATA)> 233<!ENTITY x "<x>&y;</x>"> 234<!ENTITY y "<y>yyy</y>"> 235]> 236<test> 237 &x; 238 &x; 239</test>""" 240expect="""10 test 0 2411 test 0 24214 #text 1 2431 x 1 2441 y 2 2453 #text 3 24615 y 2 24715 x 1 24814 #text 1 2491 x 1 2501 y 2 2513 #text 3 25215 y 2 25315 x 1 25414 #text 1 25515 test 0 256""" 257res="" 258err="" 259 260input = libxml2.inputBuffer(str_io(s)) 261reader = input.newTextReader("test4") 262reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 263reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 264reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 265reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 266while reader.Read() == 1: 267 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 268 269if res != expect: 270 print("test4 failed: unexpected output") 271 print(res) 272 sys.exit(1) 273if err != "": 274 print("test4 failed: validation error found") 275 print(err) 276 sys.exit(1) 277 278# 279# The same test but without entity substitution this time 280# 281 282s = """<!DOCTYPE test [ 283<!ELEMENT test (x, x)> 284<!ELEMENT x (y)> 285<!ELEMENT y (#PCDATA)> 286<!ENTITY x "<x>&y;</x>"> 287<!ENTITY y "<y>yyy</y>"> 288]> 289<test> 290 &x; 291 &x; 292</test>""" 293expect="""10 test 0 2941 test 0 29514 #text 1 2965 x 1 29714 #text 1 2985 x 1 29914 #text 1 30015 test 0 301""" 302res="" 303err="" 304 305input = libxml2.inputBuffer(str_io(s)) 306reader = input.newTextReader("test5") 307reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 308while reader.Read() == 1: 309 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 310 311if res != expect: 312 print("test5 failed: unexpected output") 313 print(res) 314 sys.exit(1) 315if err != "": 316 print("test5 failed: validation error found") 317 print(err) 318 sys.exit(1) 319 320# 321# cleanup 322# 323del input 324del reader 325 326# Memory debug specific 327libxml2.cleanupParser() 328if libxml2.debugMemory(1) == 0: 329 print("OK") 330else: 331 print("Memory leak %d bytes" % (libxml2.debugMemory(1))) 332