1"""Tests for HTMLParser.py.""" 2 3import HTMLParser 4import pprint 5import unittest 6from test import test_support 7 8 9class EventCollector(HTMLParser.HTMLParser): 10 11 def __init__(self): 12 self.events = [] 13 self.append = self.events.append 14 HTMLParser.HTMLParser.__init__(self) 15 16 def get_events(self): 17 # Normalize the list of events so that buffer artefacts don't 18 # separate runs of contiguous characters. 19 L = [] 20 prevtype = None 21 for event in self.events: 22 type = event[0] 23 if type == prevtype == "data": 24 L[-1] = ("data", L[-1][1] + event[1]) 25 else: 26 L.append(event) 27 prevtype = type 28 self.events = L 29 return L 30 31 # structure markup 32 33 def handle_starttag(self, tag, attrs): 34 self.append(("starttag", tag, attrs)) 35 36 def handle_startendtag(self, tag, attrs): 37 self.append(("startendtag", tag, attrs)) 38 39 def handle_endtag(self, tag): 40 self.append(("endtag", tag)) 41 42 # all other markup 43 44 def handle_comment(self, data): 45 self.append(("comment", data)) 46 47 def handle_charref(self, data): 48 self.append(("charref", data)) 49 50 def handle_data(self, data): 51 self.append(("data", data)) 52 53 def handle_decl(self, data): 54 self.append(("decl", data)) 55 56 def handle_entityref(self, data): 57 self.append(("entityref", data)) 58 59 def handle_pi(self, data): 60 self.append(("pi", data)) 61 62 def unknown_decl(self, decl): 63 self.append(("unknown decl", decl)) 64 65 66class EventCollectorExtra(EventCollector): 67 68 def handle_starttag(self, tag, attrs): 69 EventCollector.handle_starttag(self, tag, attrs) 70 self.append(("starttag_text", self.get_starttag_text())) 71 72 73class TestCaseBase(unittest.TestCase): 74 75 def _run_check(self, source, expected_events, collector=EventCollector): 76 parser = collector() 77 for s in source: 78 parser.feed(s) 79 parser.close() 80 events = parser.get_events() 81 if events != expected_events: 82 self.fail("received events did not match expected events\n" 83 "Expected:\n" + pprint.pformat(expected_events) + 84 "\nReceived:\n" + pprint.pformat(events)) 85 86 def _run_check_extra(self, source, events): 87 self._run_check(source, events, EventCollectorExtra) 88 89 def _parse_error(self, source): 90 def parse(source=source): 91 parser = HTMLParser.HTMLParser() 92 parser.feed(source) 93 parser.close() 94 self.assertRaises(HTMLParser.HTMLParseError, parse) 95 96 97class HTMLParserTestCase(TestCaseBase): 98 99 def test_processing_instruction_only(self): 100 self._run_check("<?processing instruction>", [ 101 ("pi", "processing instruction"), 102 ]) 103 self._run_check("<?processing instruction ?>", [ 104 ("pi", "processing instruction ?"), 105 ]) 106 107 def test_simple_html(self): 108 self._run_check(""" 109<!DOCTYPE html PUBLIC 'foo'> 110<HTML>&entity;  111<!--comment1a 112-></foo><bar><<?pi?></foo<bar 113comment1b--> 114<Img sRc='Bar' isMAP>sample 115text 116“ 117<!--comment2a-- --comment2b--> 118</Html> 119""", [ 120 ("data", "\n"), 121 ("decl", "DOCTYPE html PUBLIC 'foo'"), 122 ("data", "\n"), 123 ("starttag", "html", []), 124 ("entityref", "entity"), 125 ("charref", "32"), 126 ("data", "\n"), 127 ("comment", "comment1a\n-></foo><bar><<?pi?></foo<bar\ncomment1b"), 128 ("data", "\n"), 129 ("starttag", "img", [("src", "Bar"), ("ismap", None)]), 130 ("data", "sample\ntext\n"), 131 ("charref", "x201C"), 132 ("data", "\n"), 133 ("comment", "comment2a-- --comment2b"), 134 ("data", "\n"), 135 ("endtag", "html"), 136 ("data", "\n"), 137 ]) 138 139 def test_unclosed_entityref(self): 140 self._run_check("&entityref foo", [ 141 ("entityref", "entityref"), 142 ("data", " foo"), 143 ]) 144 145 def test_bad_nesting(self): 146 # Strangely, this *is* supposed to test that overlapping 147 # elements are allowed. HTMLParser is more geared toward 148 # lexing the input that parsing the structure. 149 self._run_check("<a><b></a></b>", [ 150 ("starttag", "a", []), 151 ("starttag", "b", []), 152 ("endtag", "a"), 153 ("endtag", "b"), 154 ]) 155 156 def test_bare_ampersands(self): 157 self._run_check("this text & contains & ampersands &", [ 158 ("data", "this text & contains & ampersands &"), 159 ]) 160 161 def test_bare_pointy_brackets(self): 162 self._run_check("this < text > contains < bare>pointy< brackets", [ 163 ("data", "this < text > contains < bare>pointy< brackets"), 164 ]) 165 166 def test_illegal_declarations(self): 167 self._run_check('<!spacer type="block" height="25">', 168 [('comment', 'spacer type="block" height="25"')]) 169 170 def test_starttag_end_boundary(self): 171 self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])]) 172 self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])]) 173 174 def test_buffer_artefacts(self): 175 output = [("starttag", "a", [("b", "<")])] 176 self._run_check(["<a b='<'>"], output) 177 self._run_check(["<a ", "b='<'>"], output) 178 self._run_check(["<a b", "='<'>"], output) 179 self._run_check(["<a b=", "'<'>"], output) 180 self._run_check(["<a b='<", "'>"], output) 181 self._run_check(["<a b='<'", ">"], output) 182 183 output = [("starttag", "a", [("b", ">")])] 184 self._run_check(["<a b='>'>"], output) 185 self._run_check(["<a ", "b='>'>"], output) 186 self._run_check(["<a b", "='>'>"], output) 187 self._run_check(["<a b=", "'>'>"], output) 188 self._run_check(["<a b='>", "'>"], output) 189 self._run_check(["<a b='>'", ">"], output) 190 191 output = [("comment", "abc")] 192 self._run_check(["", "<!--abc-->"], output) 193 self._run_check(["<", "!--abc-->"], output) 194 self._run_check(["<!", "--abc-->"], output) 195 self._run_check(["<!-", "-abc-->"], output) 196 self._run_check(["<!--", "abc-->"], output) 197 self._run_check(["<!--a", "bc-->"], output) 198 self._run_check(["<!--ab", "c-->"], output) 199 self._run_check(["<!--abc", "-->"], output) 200 self._run_check(["<!--abc-", "->"], output) 201 self._run_check(["<!--abc--", ">"], output) 202 self._run_check(["<!--abc-->", ""], output) 203 204 def test_starttag_junk_chars(self): 205 self._run_check("</>", []) 206 self._run_check("</$>", [('comment', '$')]) 207 self._run_check("</", [('data', '</')]) 208 self._run_check("</a", [('data', '</a')]) 209 self._run_check("<a<a>", [('starttag', 'a<a', [])]) 210 self._run_check("</a<a>", [('endtag', 'a<a')]) 211 self._run_check("<!", [('data', '<!')]) 212 self._run_check("<a", [('data', '<a')]) 213 self._run_check("<a foo='bar'", [('data', "<a foo='bar'")]) 214 self._run_check("<a foo='bar", [('data', "<a foo='bar")]) 215 self._run_check("<a foo='>'", [('data', "<a foo='>'")]) 216 self._run_check("<a foo='>", [('data', "<a foo='>")]) 217 self._run_check("<a$>", [('starttag', 'a$', [])]) 218 self._run_check("<a$b>", [('starttag', 'a$b', [])]) 219 self._run_check("<a$b/>", [('startendtag', 'a$b', [])]) 220 self._run_check("<a$b >", [('starttag', 'a$b', [])]) 221 self._run_check("<a$b />", [('startendtag', 'a$b', [])]) 222 223 def test_valid_doctypes(self): 224 # from http://www.w3.org/QA/2002/04/valid-dtd-list.html 225 dtds = ['HTML', # HTML5 doctype 226 ('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 227 '"http://www.w3.org/TR/html4/strict.dtd"'), 228 ('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' 229 '"http://www.w3.org/TR/html4/loose.dtd"'), 230 ('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ' 231 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'), 232 ('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" ' 233 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'), 234 ('math PUBLIC "-//W3C//DTD MathML 2.0//EN" ' 235 '"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'), 236 ('html PUBLIC "-//W3C//DTD ' 237 'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" ' 238 '"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'), 239 ('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" ' 240 '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'), 241 'html PUBLIC "-//IETF//DTD HTML 2.0//EN"', 242 'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"'] 243 for dtd in dtds: 244 self._run_check("<!DOCTYPE %s>" % dtd, 245 [('decl', 'DOCTYPE ' + dtd)]) 246 247 def test_slashes_in_starttag(self): 248 self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])]) 249 html = ('<img width=902 height=250px ' 250 'src="/sites/default/files/images/homepage/foo.jpg" ' 251 '/*what am I doing here*/ />') 252 expected = [( 253 'startendtag', 'img', 254 [('width', '902'), ('height', '250px'), 255 ('src', '/sites/default/files/images/homepage/foo.jpg'), 256 ('*what', None), ('am', None), ('i', None), 257 ('doing', None), ('here*', None)] 258 )] 259 self._run_check(html, expected) 260 html = ('<a / /foo/ / /=/ / /bar/ / />' 261 '<a / /foo/ / /=/ / /bar/ / >') 262 expected = [ 263 ('startendtag', 'a', [('foo', None), ('=', None), ('bar', None)]), 264 ('starttag', 'a', [('foo', None), ('=', None), ('bar', None)]) 265 ] 266 self._run_check(html, expected) 267 #see issue #14538 268 html = ('<meta><meta / ><meta // ><meta / / >' 269 '<meta/><meta /><meta //><meta//>') 270 expected = [ 271 ('starttag', 'meta', []), ('starttag', 'meta', []), 272 ('starttag', 'meta', []), ('starttag', 'meta', []), 273 ('startendtag', 'meta', []), ('startendtag', 'meta', []), 274 ('startendtag', 'meta', []), ('startendtag', 'meta', []), 275 ] 276 self._run_check(html, expected) 277 278 def test_declaration_junk_chars(self): 279 self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) 280 281 def test_startendtag(self): 282 self._run_check("<p/>", [ 283 ("startendtag", "p", []), 284 ]) 285 self._run_check("<p></p>", [ 286 ("starttag", "p", []), 287 ("endtag", "p"), 288 ]) 289 self._run_check("<p><img src='foo' /></p>", [ 290 ("starttag", "p", []), 291 ("startendtag", "img", [("src", "foo")]), 292 ("endtag", "p"), 293 ]) 294 295 def test_invalid_end_tags(self): 296 # A collection of broken end tags. <br> is used as separator. 297 # see http://www.w3.org/TR/html5/tokenization.html#end-tag-open-state 298 # and #13993 299 html = ('<br></label</p><br></div end tmAd-leaderBoard><br></<h4><br>' 300 '</li class="unit"><br></li\r\n\t\t\t\t\t\t</ul><br></><br>') 301 expected = [('starttag', 'br', []), 302 # < is part of the name, / is discarded, p is an attribute 303 ('endtag', 'label<'), 304 ('starttag', 'br', []), 305 # text and attributes are discarded 306 ('endtag', 'div'), 307 ('starttag', 'br', []), 308 # comment because the first char after </ is not a-zA-Z 309 ('comment', '<h4'), 310 ('starttag', 'br', []), 311 # attributes are discarded 312 ('endtag', 'li'), 313 ('starttag', 'br', []), 314 # everything till ul (included) is discarded 315 ('endtag', 'li'), 316 ('starttag', 'br', []), 317 # </> is ignored 318 ('starttag', 'br', [])] 319 self._run_check(html, expected) 320 321 def test_broken_invalid_end_tag(self): 322 # This is technically wrong (the "> shouldn't be included in the 'data') 323 # but is probably not worth fixing it (in addition to all the cases of 324 # the previous test, it would require a full attribute parsing). 325 # see #13993 326 html = '<b>This</b attr=">"> confuses the parser' 327 expected = [('starttag', 'b', []), 328 ('data', 'This'), 329 ('endtag', 'b'), 330 ('data', '"> confuses the parser')] 331 self._run_check(html, expected) 332 333 def test_get_starttag_text(self): 334 s = """<foo:bar \n one="1"\ttwo=2 >""" 335 self._run_check_extra(s, [ 336 ("starttag", "foo:bar", [("one", "1"), ("two", "2")]), 337 ("starttag_text", s)]) 338 339 def test_cdata_content(self): 340 contents = [ 341 '<!-- not a comment --> ¬-an-entity-ref;', 342 "<not a='start tag'>", 343 '<a href="" /> <p> <span></span>', 344 'foo = "</scr" + "ipt>";', 345 'foo = "</SCRIPT" + ">";', 346 'foo = <\n/script> ', 347 '<!-- document.write("</scr" + "ipt>"); -->', 348 ('\n//<![CDATA[\n' 349 'document.write(\'<s\'+\'cript type="text/javascript" ' 350 'src="http://www.example.org/r=\'+new ' 351 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), 352 '\n<!-- //\nvar foo = 3.14;\n// -->\n', 353 'foo = "</sty" + "le>";', 354 u'<!-- \u2603 -->', 355 # these two should be invalid according to the HTML 5 spec, 356 # section 8.1.2.2 357 #'foo = </\nscript>', 358 #'foo = </ script>', 359 ] 360 elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] 361 for content in contents: 362 for element in elements: 363 element_lower = element.lower() 364 s = u'<{element}>{content}</{element}>'.format(element=element, 365 content=content) 366 self._run_check(s, [("starttag", element_lower, []), 367 ("data", content), 368 ("endtag", element_lower)]) 369 370 def test_cdata_with_closing_tags(self): 371 # see issue #13358 372 # make sure that HTMLParser calls handle_data only once for each CDATA. 373 # The normal event collector normalizes the events in get_events, 374 # so we override it to return the original list of events. 375 class Collector(EventCollector): 376 def get_events(self): 377 return self.events 378 379 content = """<!-- not a comment --> ¬-an-entity-ref; 380 <a href="" /> </p><p> & <span></span></style> 381 '</script' + '>' </html> </head> </scripter>!""" 382 for element in [' script', 'script ', ' script ', 383 '\nscript', 'script\n', '\nscript\n']: 384 s = u'<script>{content}</{element}>'.format(element=element, 385 content=content) 386 self._run_check(s, [("starttag", "script", []), 387 ("data", content), 388 ("endtag", "script")], 389 collector=Collector) 390 391 def test_malformatted_charref(self): 392 self._run_check("<p>&#bad;</p>", [ 393 ("starttag", "p", []), 394 ("data", "&#bad;"), 395 ("endtag", "p"), 396 ]) 397 # add the [] as a workaround to avoid buffering (see #20288) 398 self._run_check(["<div>&#bad;</div>"], [ 399 ("starttag", "div", []), 400 ("data", "&#bad;"), 401 ("endtag", "div"), 402 ]) 403 404 def test_unescape_function(self): 405 parser = HTMLParser.HTMLParser() 406 self.assertEqual(parser.unescape('&#bad;'),'&#bad;') 407 self.assertEqual(parser.unescape('&'),'&') 408 409 410 411class AttributesTestCase(TestCaseBase): 412 413 def test_attr_syntax(self): 414 output = [ 415 ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) 416 ] 417 self._run_check("""<a b='v' c="v" d=v e>""", output) 418 self._run_check("""<a b = 'v' c = "v" d = v e>""", output) 419 self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) 420 self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) 421 422 def test_attr_values(self): 423 self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", 424 [("starttag", "a", [("b", "xxx\n\txxx"), 425 ("c", "yyy\t\nyyy"), 426 ("d", "\txyz\n")])]) 427 self._run_check("""<a b='' c="">""", 428 [("starttag", "a", [("b", ""), ("c", "")])]) 429 # Regression test for SF patch #669683. 430 self._run_check("<e a=rgb(1,2,3)>", 431 [("starttag", "e", [("a", "rgb(1,2,3)")])]) 432 # Regression test for SF bug #921657. 433 self._run_check( 434 "<a href=mailto:xyz@example.com>", 435 [("starttag", "a", [("href", "mailto:xyz@example.com")])]) 436 437 def test_attr_nonascii(self): 438 # see issue 7311 439 self._run_check( 440 u"<img src=/foo/bar.png alt=\u4e2d\u6587>", 441 [("starttag", "img", [("src", "/foo/bar.png"), 442 ("alt", u"\u4e2d\u6587")])]) 443 self._run_check( 444 u"<a title='\u30c6\u30b9\u30c8' href='\u30c6\u30b9\u30c8.html'>", 445 [("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"), 446 ("href", u"\u30c6\u30b9\u30c8.html")])]) 447 self._run_check( 448 u'<a title="\u30c6\u30b9\u30c8" href="\u30c6\u30b9\u30c8.html">', 449 [("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"), 450 ("href", u"\u30c6\u30b9\u30c8.html")])]) 451 452 def test_attr_entity_replacement(self): 453 self._run_check( 454 "<a b='&><"''>", 455 [("starttag", "a", [("b", "&><\"'")])]) 456 457 def test_attr_funky_names(self): 458 self._run_check( 459 "<a a.b='v' c:d=v e-f=v>", 460 [("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")])]) 461 self._run_check( 462 "<a $><b $=%><c \=/>", 463 [("starttag", "a", [("$", None)]), 464 ("starttag", "b", [("$", "%")]), 465 ("starttag", "c", [("\\", "/")])]) 466 467 def test_entityrefs_in_attributes(self): 468 self._run_check( 469 "<html foo='€&aa&unsupported;'>", 470 [("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])]) 471 472 def test_entities_in_attribute_value(self): 473 # see #1200313 474 for entity in ['&', '&', '&', '&']: 475 self._run_check('<a href="%s">' % entity, 476 [("starttag", "a", [("href", "&")])]) 477 self._run_check("<a href='%s'>" % entity, 478 [("starttag", "a", [("href", "&")])]) 479 self._run_check("<a href=%s>" % entity, 480 [("starttag", "a", [("href", "&")])]) 481 482 def test_malformed_attributes(self): 483 # see #13357 484 html = ( 485 "<a href=test'style='color:red;bad1'>test - bad1</a>" 486 "<a href=test'+style='color:red;ba2'>test - bad2</a>" 487 "<a href=test' style='color:red;bad3'>test - bad3</a>" 488 "<a href = test' style='color:red;bad4' >test - bad4</a>" 489 ) 490 expected = [ 491 ('starttag', 'a', [('href', "test'style='color:red;bad1'")]), 492 ('data', 'test - bad1'), ('endtag', 'a'), 493 ('starttag', 'a', [('href', "test'+style='color:red;ba2'")]), 494 ('data', 'test - bad2'), ('endtag', 'a'), 495 ('starttag', 'a', [('href', u"test'\xa0style='color:red;bad3'")]), 496 ('data', 'test - bad3'), ('endtag', 'a'), 497 ('starttag', 'a', [('href', u"test'\xa0style='color:red;bad4'")]), 498 ('data', 'test - bad4'), ('endtag', 'a') 499 ] 500 self._run_check(html, expected) 501 502 def test_malformed_adjacent_attributes(self): 503 # see #12629 504 self._run_check('<x><y z=""o"" /></x>', 505 [('starttag', 'x', []), 506 ('startendtag', 'y', [('z', ''), ('o""', None)]), 507 ('endtag', 'x')]) 508 self._run_check('<x><y z="""" /></x>', 509 [('starttag', 'x', []), 510 ('startendtag', 'y', [('z', ''), ('""', None)]), 511 ('endtag', 'x')]) 512 513 # see #755670 for the following 3 tests 514 def test_adjacent_attributes(self): 515 self._run_check('<a width="100%"cellspacing=0>', 516 [("starttag", "a", 517 [("width", "100%"), ("cellspacing","0")])]) 518 519 self._run_check('<a id="foo"class="bar">', 520 [("starttag", "a", 521 [("id", "foo"), ("class","bar")])]) 522 523 def test_missing_attribute_value(self): 524 self._run_check('<a v=>', 525 [("starttag", "a", [("v", "")])]) 526 527 def test_javascript_attribute_value(self): 528 self._run_check("<a href=javascript:popup('/popup/help.html')>", 529 [("starttag", "a", 530 [("href", "javascript:popup('/popup/help.html')")])]) 531 532 def test_end_tag_in_attribute_value(self): 533 # see #1745761 534 self._run_check("<a href='http://www.example.org/\">;'>spam</a>", 535 [("starttag", "a", 536 [("href", "http://www.example.org/\">;")]), 537 ("data", "spam"), ("endtag", "a")]) 538 539 def test_comments(self): 540 html = ("<!-- I'm a valid comment -->" 541 '<!--me too!-->' 542 '<!------>' 543 '<!---->' 544 '<!----I have many hyphens---->' 545 '<!-- I have a > in the middle -->' 546 '<!-- and I have -- in the middle! -->') 547 expected = [('comment', " I'm a valid comment "), 548 ('comment', 'me too!'), 549 ('comment', '--'), 550 ('comment', ''), 551 ('comment', '--I have many hyphens--'), 552 ('comment', ' I have a > in the middle '), 553 ('comment', ' and I have -- in the middle! ')] 554 self._run_check(html, expected) 555 556 def test_broken_comments(self): 557 html = ('<! not really a comment >' 558 '<! not a comment either -->' 559 '<! -- close enough -->' 560 '<!><!<-- this was an empty comment>' 561 '<!!! another bogus comment !!!>') 562 expected = [ 563 ('comment', ' not really a comment '), 564 ('comment', ' not a comment either --'), 565 ('comment', ' -- close enough --'), 566 ('comment', ''), 567 ('comment', '<-- this was an empty comment'), 568 ('comment', '!! another bogus comment !!!'), 569 ] 570 self._run_check(html, expected) 571 572 def test_condcoms(self): 573 html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->' 574 '<!--[if IE 8]>condcoms<![endif]-->' 575 '<!--[if lte IE 7]>pretty?<![endif]-->') 576 expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"), 577 ('comment', '[if IE 8]>condcoms<![endif]'), 578 ('comment', '[if lte IE 7]>pretty?<![endif]')] 579 self._run_check(html, expected) 580 581 def test_broken_condcoms(self): 582 # these condcoms are missing the '--' after '<!' and before the '>' 583 html = ('<![if !(IE)]>broken condcom<![endif]>' 584 '<![if ! IE]><link href="favicon.tiff"/><![endif]>' 585 '<![if !IE 6]><img src="firefox.png" /><![endif]>' 586 '<![if !ie 6]><b>foo</b><![endif]>' 587 '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>') 588 # According to the HTML5 specs sections "8.2.4.44 Bogus comment state" 589 # and "8.2.4.45 Markup declaration open state", comment tokens should 590 # be emitted instead of 'unknown decl', but calling unknown_decl 591 # provides more flexibility. 592 # See also Lib/_markupbase.py:parse_declaration 593 expected = [ 594 ('unknown decl', 'if !(IE)'), 595 ('data', 'broken condcom'), 596 ('unknown decl', 'endif'), 597 ('unknown decl', 'if ! IE'), 598 ('startendtag', 'link', [('href', 'favicon.tiff')]), 599 ('unknown decl', 'endif'), 600 ('unknown decl', 'if !IE 6'), 601 ('startendtag', 'img', [('src', 'firefox.png')]), 602 ('unknown decl', 'endif'), 603 ('unknown decl', 'if !ie 6'), 604 ('starttag', 'b', []), 605 ('data', 'foo'), 606 ('endtag', 'b'), 607 ('unknown decl', 'endif'), 608 ('unknown decl', 'if (!IE)|(lt IE 9)'), 609 ('startendtag', 'img', [('src', 'mammoth.bmp')]), 610 ('unknown decl', 'endif') 611 ] 612 self._run_check(html, expected) 613 614 615def test_main(): 616 test_support.run_unittest(HTMLParserTestCase, AttributesTestCase) 617 618 619if __name__ == "__main__": 620 test_main() 621