1#!/usr/bin/python -u 2# -*- coding: utf-8 -*- 3# 4# this tests the DTD validation with the XmlTextReader interface 5# 6import sys 7import glob 8import string 9import libxml2 10try: 11 import StringIO 12 str_io = StringIO.StringIO 13except: 14 import io 15 str_io = io.StringIO 16 17# Memory debug specific 18libxml2.debugMemory(1) 19 20err = "" 21dir_prefix = "../../test/valid/" 22# This dictionary reflects the contents of the files 23# ../../test/valid/*.xml.err that are not empty, except that 24# the file paths in the messages start with ../../test/ 25 26expect = { 27 '766956': 28"""../../test/valid/dtds/766956.dtd:2: parser error : PEReference: expecting ';' 29%ä%ent; 30 ^ 31../../test/valid/dtds/766956.dtd:2: parser error : Content error in the external subset 32%ä%ent; 33 ^ 34Entity: line 1: 35value 36^ 37""", 38 '781333': 39"""../../test/valid/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got 40<a/> 41 ^ 42../../test/valid/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child 43 44^ 45""", 46 'cond_sect2': 47"""../../test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity 48 %ent; 49 ^ 50Entity: line 1: 51]]> 52^ 53../../test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset 54 55^ 56""", 57 'rss': 58"""../../test/valid/rss.xml:177: element rss: validity error : Element rss does not carry attribute version 59</rss> 60 ^ 61""", 62 't8': 63"""../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 64 65%defroot; %defmiddle; %deftest; 66 ^ 67Entity: line 1: 68<!ELEMENT root (middle) > 69^ 70../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 71 72%defroot; %defmiddle; %deftest; 73 ^ 74Entity: line 1: 75<!ELEMENT middle (test) > 76^ 77../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 78 79%defroot; %defmiddle; %deftest; 80 ^ 81Entity: line 1: 82<!ELEMENT test (#PCDATA) > 83^ 84""", 85 't8a': 86"""../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 87 88%defroot;%defmiddle;%deftest; 89 ^ 90Entity: line 1: 91<!ELEMENT root (middle) > 92^ 93../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 94 95%defroot;%defmiddle;%deftest; 96 ^ 97Entity: line 1: 98<!ELEMENT middle (test) > 99^ 100../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration 101 102%defroot;%defmiddle;%deftest; 103 ^ 104Entity: line 1: 105<!ELEMENT test (#PCDATA) > 106^ 107""", 108 'xlink': 109"""../../test/valid/xlink.xml:450: element termdef: validity error : ID dt-arc already defined 110 <p><termdef id="dt-arc" term="Arc">An <ter 111 ^ 112validity error : attribute def line 199 references an unknown ID "dt-xlg" 113""", 114} 115 116# Add prefix_dir and extension to the keys 117expect = {"{}{}.xml".format(dir_prefix, key): val for key, val in expect.items()} 118 119def callback(ctx, str): 120 global err 121 err = err + "%s" % (str) 122libxml2.registerErrorHandler(callback, "") 123 124parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"] 125expect_parsing_error = ["{}{}.xml".format(dir_prefix, f) for f in parsing_error_files] 126 127valid_files = glob.glob(dir_prefix + "*.x*") 128valid_files.sort() 129for file in valid_files: 130 err = "" 131 reader = libxml2.newTextReaderFilename(file) 132 #print "%s:" % (file) 133 reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) 134 ret = reader.Read() 135 while ret == 1: 136 ret = reader.Read() 137 if ret != 0 and file not in expect_parsing_error: 138 print("Error parsing and validating %s" % (file)) 139 #sys.exit(1) 140 if (err): 141 if not(file in expect and err == expect[file]): 142 print("Error: ", err) 143 if file in expect: 144 print("Expected: ", expect[file]) 145# 146# another separate test based on Stephane Bidoul one 147# 148s = """ 149<!DOCTYPE test [ 150<!ELEMENT test (x,b)> 151<!ELEMENT x (c)> 152<!ELEMENT b (#PCDATA)> 153<!ELEMENT c (#PCDATA)> 154<!ENTITY x "<x><c>xxx</c></x>"> 155]> 156<test> 157 &x; 158 <b>bbb</b> 159</test> 160""" 161expect="""10,test 1621,test 16314,#text 1641,x 1651,c 1663,#text 16715,c 16815,x 16914,#text 1701,b 1713,#text 17215,b 17314,#text 17415,test 175""" 176res="" 177err="" 178 179input = libxml2.inputBuffer(str_io(s)) 180reader = input.newTextReader("test2") 181reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 182reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 183reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 184reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 185while reader.Read() == 1: 186 res = res + "%s,%s\n" % (reader.NodeType(),reader.Name()) 187 188if res != expect: 189 print("test2 failed: unexpected output") 190 print(res) 191 sys.exit(1) 192if err != "": 193 print("test2 failed: validation error found") 194 print(err) 195 sys.exit(1) 196 197# 198# Another test for external entity parsing and validation 199# 200 201s = """<!DOCTYPE test [ 202<!ELEMENT test (x)> 203<!ELEMENT x (#PCDATA)> 204<!ENTITY e SYSTEM "tst.ent"> 205]> 206<test> 207 &e; 208</test> 209""" 210tst_ent = """<x>hello</x>""" 211expect="""10 test 2121 test 21314 #text 2141 x 2153 #text 21615 x 21714 #text 21815 test 219""" 220res="" 221 222def myResolver(URL, ID, ctxt): 223 if URL == "tst.ent": 224 return(str_io(tst_ent)) 225 return None 226 227libxml2.setEntityLoader(myResolver) 228 229input = libxml2.inputBuffer(str_io(s)) 230reader = input.newTextReader("test3") 231reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 232reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 233reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 234reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 235while reader.Read() == 1: 236 res = res + "%s %s\n" % (reader.NodeType(),reader.Name()) 237 238if res != expect: 239 print("test3 failed: unexpected output") 240 print(res) 241 sys.exit(1) 242if err != "": 243 print("test3 failed: validation error found") 244 print(err) 245 sys.exit(1) 246 247# 248# Another test for recursive entity parsing, validation, and replacement of 249# entities, making sure the entity ref node doesn't show up in that case 250# 251 252s = """<!DOCTYPE test [ 253<!ELEMENT test (x, x)> 254<!ELEMENT x (y)> 255<!ELEMENT y (#PCDATA)> 256<!ENTITY x "<x>&y;</x>"> 257<!ENTITY y "<y>yyy</y>"> 258]> 259<test> 260 &x; 261 &x; 262</test>""" 263expect="""10 test 0 2641 test 0 26514 #text 1 2661 x 1 2671 y 2 2683 #text 3 26915 y 2 27015 x 1 27114 #text 1 2721 x 1 2731 y 2 2743 #text 3 27515 y 2 27615 x 1 27714 #text 1 27815 test 0 279""" 280res="" 281err="" 282 283input = libxml2.inputBuffer(str_io(s)) 284reader = input.newTextReader("test4") 285reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 286reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 287reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 288reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 289while reader.Read() == 1: 290 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 291 292if res != expect: 293 print("test4 failed: unexpected output") 294 print(res) 295 sys.exit(1) 296if err != "": 297 print("test4 failed: validation error found") 298 print(err) 299 sys.exit(1) 300 301# 302# The same test but without entity substitution this time 303# 304 305s = """<!DOCTYPE test [ 306<!ELEMENT test (x, x)> 307<!ELEMENT x (y)> 308<!ELEMENT y (#PCDATA)> 309<!ENTITY x "<x>&y;</x>"> 310<!ENTITY y "<y>yyy</y>"> 311]> 312<test> 313 &x; 314 &x; 315</test>""" 316expect="""10 test 0 3171 test 0 31814 #text 1 3195 x 1 32014 #text 1 3215 x 1 32214 #text 1 32315 test 0 324""" 325res="" 326err="" 327 328input = libxml2.inputBuffer(str_io(s)) 329reader = input.newTextReader("test5") 330reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 331while reader.Read() == 1: 332 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 333 334if res != expect: 335 print("test5 failed: unexpected output") 336 print(res) 337if err != "": 338 print("test5 failed: validation error found") 339 print(err) 340 341# 342# cleanup 343# 344del input 345del reader 346 347# Memory debug specific 348libxml2.cleanupParser() 349if libxml2.debugMemory(1) == 0: 350 print("OK") 351else: 352 print("Memory leak %d bytes" % (libxml2.debugMemory(1))) 353 libxml2.dumpMemory() 354