1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# this tests the DTD validation with the XmlTextReader interface 5# 6import sys 7import glob 8import os 9import libxml2 10try: 11 import StringIO 12 str_io = StringIO.StringIO 13except: 14 import io 15 str_io = io.StringIO 16 17# Memory debug specific 18libxml2.debugMemory(1) 19 20err = "" 21basedir = os.path.dirname(os.path.realpath(__file__)) 22dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid")) 23 24# This dictionary reflects the contents of the files 25# ../../test/valid/*.xml.err that are not empty, except that 26# the file paths in the messages start with ../../test/ 27 28expect = { 29 '766956': 30"""{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';' 31%ä%ent; 32 ^ 33{0}/dtds/766956.dtd:2: parser error : Content error in the external subset 34%ä%ent; 35 ^ 36Entity: line 1: 37value 38^ 39""".format(dir_prefix), 40 '781333': 41"""{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got 42<a/> 43 ^ 44{0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child 45 46^ 47""".format(dir_prefix), 48 'cond_sect2': 49"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity 50 %ent; 51 ^ 52Entity: line 1: 53]]> 54^ 55{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset 56 57^ 58""".format(dir_prefix), 59 'rss': 60"""{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version 61</rss> 62 ^ 63""".format(dir_prefix), 64 't8': 65"""{0}/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 66 67%defroot; %defmiddle; %deftest; 68 ^ 69Entity: line 1: 70<!ELEMENT root (middle) > 71^ 72""".format(dir_prefix), 73 't8a': 74"""{0}/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 75 76%defroot;%defmiddle;%deftest; 77 ^ 78Entity: line 1: 79<!ELEMENT root (middle) > 80^ 81""".format(dir_prefix), 82 'xlink': 83"""{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined 84 <p><termdef id="dt-arc" term="Arc">An <ter 85 ^ 86validity error : attribute def line 199 references an unknown ID "dt-xlg" 87""".format(dir_prefix), 88} 89 90# Add prefix_dir and extension to the keys 91expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()} 92 93def callback(ctx, str): 94 global err 95 err = err + "%s" % (str) 96libxml2.registerErrorHandler(callback, "") 97 98parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"] 99expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files] 100 101valid_files = glob.glob(os.path.join(dir_prefix, "*.x*")) 102assert valid_files, "found no valid files in '{}'".format(dir_prefix) 103valid_files.sort() 104failures = 0 105for file in valid_files: 106 err = "" 107 reader = libxml2.newTextReaderFilename(file) 108 #print "%s:" % (file) 109 reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) 110 ret = reader.Read() 111 while ret == 1: 112 ret = reader.Read() 113 if ret != 0 and file not in expect_parsing_error: 114 print("Error parsing and validating %s" % (file)) 115 #sys.exit(1) 116 if (err): 117 if not(file in expect and err == expect[file]): 118 failures += 1 119 print("Error: ", err) 120 if file in expect: 121 print("Expected: ", expect[file]) 122 123if failures: 124 print("Failed %d tests" % failures) 125 sys.exit(1) 126 127# 128# another separate test based on Stephane Bidoul one 129# 130s = """ 131<!DOCTYPE test [ 132<!ELEMENT test (x,b)> 133<!ELEMENT x (c)> 134<!ELEMENT b (#PCDATA)> 135<!ELEMENT c (#PCDATA)> 136<!ENTITY x "<x><c>xxx</c></x>"> 137]> 138<test> 139 &x; 140 <b>bbb</b> 141</test> 142""" 143expect="""10,test 1441,test 14514,#text 1461,x 1471,c 1483,#text 14915,c 15015,x 15114,#text 1521,b 1533,#text 15415,b 15514,#text 15615,test 157""" 158res="" 159err="" 160 161input = libxml2.inputBuffer(str_io(s)) 162reader = input.newTextReader("test2") 163reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 164reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 165reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 166reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 167while reader.Read() == 1: 168 res = res + "%s,%s\n" % (reader.NodeType(),reader.Name()) 169 170if res != expect: 171 print("test2 failed: unexpected output") 172 print(res) 173 sys.exit(1) 174if err != "": 175 print("test2 failed: validation error found") 176 print(err) 177 sys.exit(1) 178 179# 180# Another test for external entity parsing and validation 181# 182 183s = """<!DOCTYPE test [ 184<!ELEMENT test (x)> 185<!ELEMENT x (#PCDATA)> 186<!ENTITY e SYSTEM "tst.ent"> 187]> 188<test> 189 &e; 190</test> 191""" 192tst_ent = """<x>hello</x>""" 193expect="""10 test 1941 test 19514 #text 1961 x 1973 #text 19815 x 19914 #text 20015 test 201""" 202res="" 203 204def myResolver(URL, ID, ctxt): 205 if URL == "tst.ent": 206 return(str_io(tst_ent)) 207 return None 208 209libxml2.setEntityLoader(myResolver) 210 211input = libxml2.inputBuffer(str_io(s)) 212reader = input.newTextReader("test3") 213reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 214reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 215reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 216reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 217while reader.Read() == 1: 218 res = res + "%s %s\n" % (reader.NodeType(),reader.Name()) 219 220if res != expect: 221 print("test3 failed: unexpected output") 222 print(res) 223 sys.exit(1) 224if err != "": 225 print("test3 failed: validation error found") 226 print(err) 227 sys.exit(1) 228 229# 230# Another test for recursive entity parsing, validation, and replacement of 231# entities, making sure the entity ref node doesn't show up in that case 232# 233 234s = """<!DOCTYPE test [ 235<!ELEMENT test (x, x)> 236<!ELEMENT x (y)> 237<!ELEMENT y (#PCDATA)> 238<!ENTITY x "<x>&y;</x>"> 239<!ENTITY y "<y>yyy</y>"> 240]> 241<test> 242 &x; 243 &x; 244</test>""" 245expect="""10 test 0 2461 test 0 24714 #text 1 2481 x 1 2491 y 2 2503 #text 3 25115 y 2 25215 x 1 25314 #text 1 2541 x 1 2551 y 2 2563 #text 3 25715 y 2 25815 x 1 25914 #text 1 26015 test 0 261""" 262res="" 263err="" 264 265input = libxml2.inputBuffer(str_io(s)) 266reader = input.newTextReader("test4") 267reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 268reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 269reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 270reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 271while reader.Read() == 1: 272 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 273 274if res != expect: 275 print("test4 failed: unexpected output") 276 print(res) 277 sys.exit(1) 278if err != "": 279 print("test4 failed: validation error found") 280 print(err) 281 sys.exit(1) 282 283# 284# The same test but without entity substitution this time 285# 286 287s = """<!DOCTYPE test [ 288<!ELEMENT test (x, x)> 289<!ELEMENT x (y)> 290<!ELEMENT y (#PCDATA)> 291<!ENTITY x "<x>&y;</x>"> 292<!ENTITY y "<y>yyy</y>"> 293]> 294<test> 295 &x; 296 &x; 297</test>""" 298expect="""10 test 0 2991 test 0 30014 #text 1 3015 x 1 30214 #text 1 3035 x 1 30414 #text 1 30515 test 0 306""" 307res="" 308err="" 309 310input = libxml2.inputBuffer(str_io(s)) 311reader = input.newTextReader("test5") 312reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 313while reader.Read() == 1: 314 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 315 316if res != expect: 317 print("test5 failed: unexpected output") 318 print(res) 319 sys.exit(1) 320if err != "": 321 print("test5 failed: validation error found") 322 print(err) 323 sys.exit(1) 324 325# 326# cleanup 327# 328del input 329del reader 330 331# Memory debug specific 332libxml2.cleanupParser() 333if libxml2.debugMemory(1) == 0: 334 print("OK") 335else: 336 print("Memory leak %d bytes" % (libxml2.debugMemory(1))) 337 libxml2.dumpMemory() 338