1 /*
2 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1999,2001 Lars Knoll (knoll@kde.org)
5 (C) 2000,2001 Dirk Mueller (mueller@kde.org)
6 Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
22 */
23
24 #include "config.h"
25 #include "HTMLParser.h"
26
27 #include "CharacterNames.h"
28 #include "CSSPropertyNames.h"
29 #include "CSSValueKeywords.h"
30 #include "Comment.h"
31 #include "Console.h"
32 #include "DOMWindow.h"
33 #include "DocumentFragment.h"
34 #include "DocumentType.h"
35 #include "Frame.h"
36 #include "HTMLBodyElement.h"
37 #include "HTMLDocument.h"
38 #include "HTMLDivElement.h"
39 #include "HTMLDListElement.h"
40 #include "HTMLElementFactory.h"
41 #include "HTMLFormElement.h"
42 #include "HTMLHeadElement.h"
43 #include "HTMLHRElement.h"
44 #include "HTMLHtmlElement.h"
45 #include "HTMLIsIndexElement.h"
46 #include "HTMLMapElement.h"
47 #include "HTMLNames.h"
48 #include "HTMLTableCellElement.h"
49 #include "HTMLTableRowElement.h"
50 #include "HTMLTableSectionElement.h"
51 #include "HTMLTokenizer.h"
52 #include "LocalizedStrings.h"
53 #include "Settings.h"
54 #include "Text.h"
55 #include <wtf/StdLibExtras.h>
56
57 namespace WebCore {
58
59 using namespace HTMLNames;
60
61 static const unsigned cMaxRedundantTagDepth = 20;
62 static const unsigned cResidualStyleMaxDepth = 200;
63
64 static const int minBlockLevelTagPriority = 3;
65
66 // A cap on the number of tags with priority minBlockLevelTagPriority or higher
67 // allowed in blockStack. The cap is enforced by adding such new elements as
68 // siblings instead of children once it is reached.
69 static const size_t cMaxBlockDepth = 4096;
70
71 struct HTMLStackElem : Noncopyable {
HTMLStackElemWebCore::HTMLStackElem72 HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
73 : tagName(t)
74 , level(lvl)
75 , strayTableContent(false)
76 , node(n)
77 , didRefNode(r)
78 , next(nx)
79 {
80 }
81
derefNodeWebCore::HTMLStackElem82 void derefNode()
83 {
84 if (didRefNode)
85 node->deref();
86 }
87
88 AtomicString tagName;
89 int level;
90 bool strayTableContent;
91 Node* node;
92 bool didRefNode;
93 HTMLStackElem* next;
94 };
95
96 /**
97 * The parser parses tokenized input into the document, building up the
98 * document tree. If the document is well-formed, parsing it is straightforward.
99 *
100 * Unfortunately, we have to handle many HTML documents that are not well-formed,
101 * so the parser has to be tolerant about errors.
102 *
103 * We have to take care of at least the following error conditions:
104 *
105 * 1. The element being added is explicitly forbidden inside some outer tag.
106 * In this case we should close all tags up to the one, which forbids
107 * the element, and add it afterwards.
108 *
109 * 2. We are not allowed to add the element directly. It could be that
110 * the person writing the document forgot some tag in between (or that the
111 * tag in between is optional). This could be the case with the following
112 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
113 *
114 * 3. We want to add a block element inside to an inline element. Close all
115 * inline elements up to the next higher block element.
116 *
117 * 4. If this doesn't help, close elements until we are allowed to add the
118 * element or ignore the tag.
119 *
120 */
121
HTMLParser(HTMLDocument * doc,bool reportErrors)122 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
123 : document(doc)
124 , current(doc)
125 , didRefCurrent(false)
126 , blockStack(0)
127 , m_blocksInStack(0)
128 , m_hasPElementInScope(NotInScope)
129 , head(0)
130 , inBody(false)
131 , haveContent(false)
132 , haveFrameSet(false)
133 , m_isParsingFragment(false)
134 , m_reportErrors(reportErrors)
135 , m_handlingResidualStyleAcrossBlocks(false)
136 , inStrayTableContent(0)
137 {
138 }
139
HTMLParser(DocumentFragment * frag)140 HTMLParser::HTMLParser(DocumentFragment* frag)
141 : document(frag->document())
142 , current(frag)
143 , didRefCurrent(true)
144 , blockStack(0)
145 , m_blocksInStack(0)
146 , m_hasPElementInScope(NotInScope)
147 , head(0)
148 , inBody(true)
149 , haveContent(false)
150 , haveFrameSet(false)
151 , m_isParsingFragment(true)
152 , m_reportErrors(false)
153 , m_handlingResidualStyleAcrossBlocks(false)
154 , inStrayTableContent(0)
155 {
156 if (frag)
157 frag->ref();
158 }
159
~HTMLParser()160 HTMLParser::~HTMLParser()
161 {
162 freeBlock();
163 if (didRefCurrent)
164 current->deref();
165 }
166
reset()167 void HTMLParser::reset()
168 {
169 ASSERT(!m_isParsingFragment);
170
171 setCurrent(document);
172
173 freeBlock();
174
175 inBody = false;
176 haveFrameSet = false;
177 haveContent = false;
178 inStrayTableContent = 0;
179
180 m_currentFormElement = 0;
181 m_currentMapElement = 0;
182 head = 0;
183 m_isindexElement = 0;
184
185 m_skipModeTag = nullAtom;
186 }
187
setCurrent(Node * newCurrent)188 void HTMLParser::setCurrent(Node* newCurrent)
189 {
190 bool didRefNewCurrent = newCurrent && newCurrent != document;
191 if (didRefNewCurrent)
192 newCurrent->ref();
193 if (didRefCurrent)
194 current->deref();
195 current = newCurrent;
196 didRefCurrent = didRefNewCurrent;
197 }
198
parseToken(Token * t)199 PassRefPtr<Node> HTMLParser::parseToken(Token* t)
200 {
201 if (!m_skipModeTag.isNull()) {
202 if (!t->beginTag && t->tagName == m_skipModeTag)
203 // Found the end tag for the current skip mode, so we're done skipping.
204 m_skipModeTag = nullAtom;
205 else if (current->localName() == t->tagName)
206 // Do not skip </iframe>.
207 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
208 ;
209 else
210 return 0;
211 }
212
213 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
214 if (t->isCloseTag(brTag) && document->inCompatMode()) {
215 reportError(MalformedBRError);
216 t->beginTag = true;
217 }
218
219 if (!t->beginTag) {
220 processCloseTag(t);
221 return 0;
222 }
223
224 // Ignore spaces, if we're not inside a paragraph or other inline code.
225 // Do not alter the text if it is part of a scriptTag.
226 if (t->tagName == textAtom && t->text && current->localName() != scriptTag) {
227 if (inBody && !skipMode() && current->localName() != styleTag &&
228 current->localName() != titleTag && !t->text->containsOnlyWhitespace())
229 haveContent = true;
230
231 RefPtr<Node> n;
232 String text = t->text.get();
233 unsigned charsLeft = text.length();
234 while (charsLeft) {
235 // split large blocks of text to nodes of manageable size
236 n = Text::createWithLengthLimit(document, text, charsLeft);
237 if (!insertNode(n.get(), t->selfClosingTag))
238 return 0;
239 }
240 return n;
241 }
242
243 RefPtr<Node> n = getNode(t);
244 // just to be sure, and to catch currently unimplemented stuff
245 if (!n)
246 return 0;
247
248 // set attributes
249 if (n->isHTMLElement()) {
250 HTMLElement* e = static_cast<HTMLElement*>(n.get());
251 e->setAttributeMap(t->attrs.get());
252
253 // take care of optional close tags
254 if (e->endTagRequirement() == TagStatusOptional)
255 popBlock(t->tagName);
256
257 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
258 // syntax was used, report an error.
259 if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
260 if (t->tagName == scriptTag)
261 reportError(IncorrectXMLCloseScriptWarning);
262 else
263 reportError(IncorrectXMLSelfCloseError, &t->tagName);
264 }
265 }
266
267 if (!insertNode(n.get(), t->selfClosingTag)) {
268 // we couldn't insert the node
269
270 if (n->isElementNode()) {
271 Element* e = static_cast<Element*>(n.get());
272 e->setAttributeMap(0);
273 }
274
275 if (m_currentMapElement == n)
276 m_currentMapElement = 0;
277
278 if (m_currentFormElement == n)
279 m_currentFormElement = 0;
280
281 if (head == n)
282 head = 0;
283
284 return 0;
285 }
286 return n;
287 }
288
parseDoctypeToken(DoctypeToken * t)289 void HTMLParser::parseDoctypeToken(DoctypeToken* t)
290 {
291 // Ignore any doctype after the first. Ignore doctypes in fragments.
292 if (document->doctype() || m_isParsingFragment || current != document)
293 return;
294
295 // Make a new doctype node and set it as our doctype.
296 document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
297 }
298
isTableSection(Node * n)299 static bool isTableSection(Node* n)
300 {
301 return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
302 }
303
isTablePart(Node * n)304 static bool isTablePart(Node* n)
305 {
306 return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
307 isTableSection(n);
308 }
309
isTableRelated(Node * n)310 static bool isTableRelated(Node* n)
311 {
312 return n->hasTagName(tableTag) || isTablePart(n);
313 }
314
isScopingTag(const AtomicString & tagName)315 static bool isScopingTag(const AtomicString& tagName)
316 {
317 return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;
318 }
319
insertNode(Node * n,bool flat)320 bool HTMLParser::insertNode(Node* n, bool flat)
321 {
322 RefPtr<Node> protectNode(n);
323
324 const AtomicString& localName = n->localName();
325 int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
326
327 // <table> is never allowed inside stray table content. Always pop out of the stray table content
328 // and close up the first table, and then start the second table as a sibling.
329 if (inStrayTableContent && localName == tableTag)
330 popBlock(tableTag);
331
332 if (tagPriority >= minBlockLevelTagPriority) {
333 while (m_blocksInStack >= cMaxBlockDepth)
334 popBlock(blockStack->tagName);
335 }
336
337 // let's be stupid and just try to insert it.
338 // this should work if the document is well-formed
339 Node* newNode = current->addChild(n);
340 if (!newNode)
341 return handleError(n, flat, localName, tagPriority); // Try to handle the error.
342
343 // don't push elements without end tags (e.g., <img>) on the stack
344 bool parentAttached = current->attached();
345 if (tagPriority > 0 && !flat) {
346 if (newNode == current) {
347 // This case should only be hit when a demoted <form> is placed inside a table.
348 ASSERT(localName == formTag);
349 reportError(FormInsideTablePartError, ¤t->localName());
350 } else {
351 // The pushBlock function transfers ownership of current to the block stack
352 // so we're guaranteed that didRefCurrent is false. The code below is an
353 // optimized version of setCurrent that takes advantage of that fact and also
354 // assumes that newNode is neither 0 nor a pointer to the document.
355 pushBlock(localName, tagPriority);
356 newNode->beginParsingChildren();
357 ASSERT(!didRefCurrent);
358 newNode->ref();
359 current = newNode;
360 didRefCurrent = true;
361 }
362 if (parentAttached && !n->attached() && !m_isParsingFragment)
363 n->attach();
364 } else {
365 if (parentAttached && !n->attached() && !m_isParsingFragment)
366 n->attach();
367 n->finishParsingChildren();
368 }
369
370 return true;
371 }
372
handleError(Node * n,bool flat,const AtomicString & localName,int tagPriority)373 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
374 {
375 // Error handling code. This is just ad hoc handling of specific parent/child combinations.
376 HTMLElement* e;
377 bool handled = false;
378
379 // 1. Check out the element's tag name to decide how to deal with errors.
380 if (n->isHTMLElement()) {
381 HTMLElement* h = static_cast<HTMLElement*>(n);
382 if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
383 if (inStrayTableContent && !isTableRelated(current)) {
384 reportError(MisplacedTablePartError, &localName, ¤t->localName());
385 // pop out to the nearest enclosing table-related tag.
386 while (blockStack && !isTableRelated(current))
387 popOneBlock();
388 return insertNode(n);
389 }
390 } else if (h->hasLocalName(headTag)) {
391 if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
392 reportError(MisplacedHeadError);
393 return false;
394 }
395 } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
396 bool createdHead = false;
397 if (!head) {
398 createHead();
399 createdHead = true;
400 }
401 if (head) {
402 if (!createdHead)
403 reportError(MisplacedHeadContentError, &localName, ¤t->localName());
404 if (head->addChild(n)) {
405 if (!n->attached() && !m_isParsingFragment)
406 n->attach();
407 return true;
408 } else
409 return false;
410 }
411 } else if (h->hasLocalName(htmlTag)) {
412 if (!current->isDocumentNode() ) {
413 if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) {
414 reportError(RedundantHTMLBodyError, &localName);
415 // we have another <HTML> element.... apply attributes to existing one
416 // make sure we don't overwrite already existing attributes
417 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
418 Element* existingHTML = static_cast<Element*>(document->documentElement());
419 NamedAttrMap* bmap = existingHTML->attributes(false);
420 for (unsigned l = 0; map && l < map->length(); ++l) {
421 Attribute* it = map->attributeItem(l);
422 if (!bmap->getAttributeItem(it->name()))
423 existingHTML->setAttribute(it->name(), it->value());
424 }
425 }
426 return false;
427 }
428 } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
429 bool createdHead = false;
430 if (!head) {
431 createHead();
432 createdHead = true;
433 }
434 if (head) {
435 Node* newNode = head->addChild(n);
436 if (!newNode) {
437 setSkipMode(h->tagQName());
438 return false;
439 }
440
441 if (!createdHead)
442 reportError(MisplacedHeadContentError, &localName, ¤t->localName());
443
444 pushBlock(localName, tagPriority);
445 newNode->beginParsingChildren();
446 setCurrent(newNode);
447 if (!n->attached() && !m_isParsingFragment)
448 n->attach();
449 return true;
450 }
451 if (inBody) {
452 setSkipMode(h->tagQName());
453 return false;
454 }
455 } else if (h->hasLocalName(bodyTag)) {
456 if (inBody && document->body()) {
457 // we have another <BODY> element.... apply attributes to existing one
458 // make sure we don't overwrite already existing attributes
459 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
460 reportError(RedundantHTMLBodyError, &localName);
461 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
462 Element* existingBody = document->body();
463 NamedAttrMap* bmap = existingBody->attributes(false);
464 for (unsigned l = 0; map && l < map->length(); ++l) {
465 Attribute* it = map->attributeItem(l);
466 if (!bmap->getAttributeItem(it->name()))
467 existingBody->setAttribute(it->name(), it->value());
468 }
469 return false;
470 }
471 else if (!current->isDocumentNode())
472 return false;
473 } else if (h->hasLocalName(areaTag)) {
474 if (m_currentMapElement) {
475 reportError(MisplacedAreaError, ¤t->localName());
476 m_currentMapElement->addChild(n);
477 if (!n->attached() && !m_isParsingFragment)
478 n->attach();
479 handled = true;
480 return true;
481 }
482 return false;
483 } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
484 if (isTableRelated(current)) {
485 while (blockStack && isTablePart(current))
486 popOneBlock();
487 return insertNode(n);
488 }
489 }
490 } else if (n->isCommentNode() && !head)
491 return false;
492
493 // 2. Next we examine our currently active element to do some further error handling.
494 if (current->isHTMLElement()) {
495 HTMLElement* h = static_cast<HTMLElement*>(current);
496 const AtomicString& currentTagName = h->localName();
497 if (h->hasLocalName(htmlTag)) {
498 HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
499 if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
500 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
501 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
502 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
503 elt->hasLocalName(baseTag))) {
504 if (!head) {
505 head = new HTMLHeadElement(headTag, document);
506 e = head;
507 insertNode(e);
508 handled = true;
509 }
510 } else {
511 if (n->isTextNode()) {
512 Text* t = static_cast<Text*>(n);
513 if (t->containsOnlyWhitespace())
514 return false;
515 }
516 if (!haveFrameSet) {
517 e = new HTMLBodyElement(bodyTag, document);
518 startBody();
519 insertNode(e);
520 handled = true;
521 } else
522 reportError(MisplacedFramesetContentError, &localName);
523 }
524 } else if (h->hasLocalName(headTag)) {
525 if (n->hasTagName(htmlTag))
526 return false;
527 else {
528 // This means the body starts here...
529 if (!haveFrameSet) {
530 popBlock(currentTagName);
531 e = new HTMLBodyElement(bodyTag, document);
532 startBody();
533 insertNode(e);
534 handled = true;
535 } else
536 reportError(MisplacedFramesetContentError, &localName);
537 }
538 } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag)
539 || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
540 reportError(MisplacedContentRetryError, &localName, ¤tTagName);
541 popBlock(currentTagName);
542 handled = true;
543 } else if (h->hasLocalName(captionTag)) {
544 // Illegal content in a caption. Close the caption and try again.
545 reportError(MisplacedCaptionContentError, &localName);
546 popBlock(currentTagName);
547 if (isTablePart(n))
548 return insertNode(n, flat);
549 } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
550 if (n->hasTagName(tableTag)) {
551 reportError(MisplacedTableError, ¤tTagName);
552 if (m_isParsingFragment && !h->hasLocalName(tableTag))
553 // fragment may contain table parts without <table> ancestor, pop them one by one
554 popBlock(h->localName());
555 popBlock(localName); // end the table
556 handled = true; // ...and start a new one
557 } else {
558 ExceptionCode ec = 0;
559 Node* node = current;
560 Node* parent = node->parentNode();
561 // A script may have removed the current node's parent from the DOM
562 // http://bugs.webkit.org/show_bug.cgi?id=7137
563 // FIXME: we should do real recovery here and re-parent with the correct node.
564 if (!parent)
565 return false;
566 Node* grandparent = parent->parentNode();
567
568 if (n->isTextNode() ||
569 (h->hasLocalName(trTag) &&
570 isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
571 ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
572 !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
573 parent->hasTagName(tableTag))) {
574 node = (node->hasTagName(tableTag)) ? node :
575 ((node->hasTagName(trTag)) ? grandparent : parent);
576 // This can happen with fragments
577 if (!node)
578 return false;
579 Node* parent = node->parentNode();
580 if (!parent)
581 return false;
582 parent->insertBefore(n, node, ec);
583 if (!ec) {
584 reportError(StrayTableContentError, &localName, ¤tTagName);
585 if (n->isHTMLElement() && tagPriority > 0 &&
586 !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
587 {
588 pushBlock(localName, tagPriority);
589 n->beginParsingChildren();
590 setCurrent(n);
591 inStrayTableContent++;
592 blockStack->strayTableContent = true;
593 }
594 return true;
595 }
596 }
597
598 if (!ec) {
599 if (current->hasTagName(trTag)) {
600 reportError(TablePartRequiredError, &localName, &tdTag.localName());
601 e = new HTMLTableCellElement(tdTag, document);
602 } else if (current->hasTagName(tableTag)) {
603 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
604 // and it isn't really a parse error per se.
605 e = new HTMLTableSectionElement(tbodyTag, document);
606 } else {
607 reportError(TablePartRequiredError, &localName, &trTag.localName());
608 e = new HTMLTableRowElement(trTag, document);
609 }
610
611 insertNode(e);
612 handled = true;
613 }
614 }
615 } else if (h->hasLocalName(objectTag)) {
616 reportError(MisplacedContentRetryError, &localName, ¤tTagName);
617 popBlock(objectTag);
618 handled = true;
619 } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
620 if (!isInline(n)) {
621 popBlock(currentTagName);
622 handled = true;
623 }
624 } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
625 if (localName == optgroupTag) {
626 popBlock(currentTagName);
627 handled = true;
628 } else if (localName == selectTag) {
629 // IE treats a nested select as </select>. Let's do the same
630 popBlock(localName);
631 }
632 } else if (h->hasLocalName(selectTag)) {
633 if (localName == inputTag || localName == textareaTag) {
634 reportError(MisplacedContentRetryError, &localName, ¤tTagName);
635 popBlock(currentTagName);
636 handled = true;
637 }
638 } else if (h->hasLocalName(colgroupTag)) {
639 popBlock(currentTagName);
640 handled = true;
641 } else if (!h->hasLocalName(bodyTag)) {
642 if (isInline(current)) {
643 popInlineBlocks();
644 handled = true;
645 }
646 }
647 } else if (current->isDocumentNode()) {
648 if (n->isTextNode()) {
649 Text* t = static_cast<Text*>(n);
650 if (t->containsOnlyWhitespace())
651 return false;
652 }
653
654 if (!document->documentElement()) {
655 e = new HTMLHtmlElement(htmlTag, document);
656 insertNode(e);
657 handled = true;
658 }
659 }
660
661 // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
662 if (!handled) {
663 reportError(IgnoredContentError, &localName, ¤t->localName());
664 return false;
665 }
666 return insertNode(n);
667 }
668
669 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
670 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
671
textCreateErrorCheck(Token * t,RefPtr<Node> & result)672 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
673 {
674 result = new Text(document, t->text.get());
675 return false;
676 }
677
commentCreateErrorCheck(Token * t,RefPtr<Node> & result)678 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
679 {
680 result = new Comment(document, t->text.get());
681 return false;
682 }
683
headCreateErrorCheck(Token *,RefPtr<Node> & result)684 bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
685 {
686 if (!head || current->localName() == htmlTag) {
687 head = new HTMLHeadElement(headTag, document);
688 result = head;
689 } else
690 reportError(MisplacedHeadError);
691 return false;
692 }
693
bodyCreateErrorCheck(Token *,RefPtr<Node> &)694 bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
695 {
696 // body no longer allowed if we have a frameset
697 if (haveFrameSet)
698 return false;
699 popBlock(headTag);
700 startBody();
701 return true;
702 }
703
framesetCreateErrorCheck(Token *,RefPtr<Node> &)704 bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
705 {
706 popBlock(headTag);
707 if (inBody && !haveFrameSet && !haveContent) {
708 popBlock(bodyTag);
709 // ### actually for IE document.body returns the now hidden "body" element
710 // we can't implement that behaviour now because it could cause too many
711 // regressions and the headaches are not worth the work as long as there is
712 // no site actually relying on that detail (Dirk)
713 if (document->body())
714 document->body()->setAttribute(styleAttr, "display:none");
715 inBody = false;
716 }
717 if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
718 return false;
719 haveFrameSet = true;
720 startBody();
721 return true;
722 }
723
formCreateErrorCheck(Token * t,RefPtr<Node> & result)724 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
725 {
726 // Only create a new form if we're not already inside one.
727 // This is consistent with other browsers' behavior.
728 if (!m_currentFormElement) {
729 m_currentFormElement = new HTMLFormElement(formTag, document);
730 result = m_currentFormElement;
731 pCloserCreateErrorCheck(t, result);
732 }
733 return false;
734 }
735
isindexCreateErrorCheck(Token * t,RefPtr<Node> & result)736 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
737 {
738 RefPtr<Node> n = handleIsindex(t);
739 if (!inBody)
740 m_isindexElement = n.release();
741 else {
742 t->selfClosingTag = true;
743 result = n.release();
744 }
745 return false;
746 }
747
selectCreateErrorCheck(Token *,RefPtr<Node> &)748 bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&)
749 {
750 return true;
751 }
752
ddCreateErrorCheck(Token * t,RefPtr<Node> & result)753 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
754 {
755 pCloserCreateErrorCheck(t, result);
756 popBlock(dtTag);
757 popBlock(ddTag);
758 return true;
759 }
760
dtCreateErrorCheck(Token * t,RefPtr<Node> & result)761 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
762 {
763 pCloserCreateErrorCheck(t, result);
764 popBlock(ddTag);
765 popBlock(dtTag);
766 return true;
767 }
768
nestedCreateErrorCheck(Token * t,RefPtr<Node> &)769 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
770 {
771 popBlock(t->tagName);
772 return true;
773 }
774
nestedPCloserCreateErrorCheck(Token * t,RefPtr<Node> & result)775 bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result)
776 {
777 pCloserCreateErrorCheck(t, result);
778 popBlock(t->tagName);
779 return true;
780 }
781
nestedStyleCreateErrorCheck(Token * t,RefPtr<Node> &)782 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&)
783 {
784 return allowNestedRedundantTag(t->tagName);
785 }
786
tableCellCreateErrorCheck(Token *,RefPtr<Node> &)787 bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&)
788 {
789 popBlock(tdTag);
790 popBlock(thTag);
791 return true;
792 }
793
tableSectionCreateErrorCheck(Token *,RefPtr<Node> &)794 bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&)
795 {
796 popBlock(theadTag);
797 popBlock(tbodyTag);
798 popBlock(tfootTag);
799 return true;
800 }
801
noembedCreateErrorCheck(Token *,RefPtr<Node> &)802 bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&)
803 {
804 setSkipMode(noembedTag);
805 return true;
806 }
807
noframesCreateErrorCheck(Token *,RefPtr<Node> &)808 bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&)
809 {
810 setSkipMode(noframesTag);
811 return true;
812 }
813
noscriptCreateErrorCheck(Token *,RefPtr<Node> &)814 bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
815 {
816 if (!m_isParsingFragment) {
817 Settings* settings = document->settings();
818 if (settings && settings->isJavaScriptEnabled())
819 setSkipMode(noscriptTag);
820 }
821 return true;
822 }
823
pCloserCreateErrorCheck(Token *,RefPtr<Node> &)824 bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&)
825 {
826 if (hasPElementInScope())
827 popBlock(pTag);
828 return true;
829 }
830
pCloserStrictCreateErrorCheck(Token *,RefPtr<Node> &)831 bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
832 {
833 if (document->inCompatMode())
834 return true;
835 if (hasPElementInScope())
836 popBlock(pTag);
837 return true;
838 }
839
mapCreateErrorCheck(Token *,RefPtr<Node> & result)840 bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
841 {
842 m_currentMapElement = new HTMLMapElement(mapTag, document);
843 result = m_currentMapElement;
844 return false;
845 }
846
getNode(Token * t)847 PassRefPtr<Node> HTMLParser::getNode(Token* t)
848 {
849 // Init our error handling table.
850 DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ());
851 if (gFunctionMap.isEmpty()) {
852 gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
853 gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
854 gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
855 gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
856 gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
857 gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
858 gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
859 gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
860 gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
861 gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
862 gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
863 gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
864 gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
865 gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
866 gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
867 gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
868 gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
869 gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
870 gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
871 gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
872 gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
873 gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
874 gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
875 gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
876 gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
877 gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
878 gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
879 gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck);
880 gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
881 gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
882 gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
883 gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
884 gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
885 gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
886 gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
887 gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
888 gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
889 gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
890 gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
891 gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
892 gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
893 gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
894 gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
895 gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck);
896 gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
897 gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
898 gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
899 gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
900 gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
901 gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
902 gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
903 gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
904 gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
905 gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
906 }
907
908 bool proceed = true;
909 RefPtr<Node> result;
910 if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
911 proceed = (this->*errorCheckFunc)(t, result);
912 if (proceed)
913 result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), document, m_currentFormElement.get());
914 return result.release();
915 }
916
allowNestedRedundantTag(const AtomicString & tagName)917 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
918 {
919 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
920 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
921 // nested tags of the same type before just ignoring them all together.
922 unsigned i = 0;
923 for (HTMLStackElem* curr = blockStack;
924 i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
925 curr = curr->next, i++) { }
926 return i != cMaxRedundantTagDepth;
927 }
928
processCloseTag(Token * t)929 void HTMLParser::processCloseTag(Token* t)
930 {
931 // Support for really broken html.
932 // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
933 // let's rely on the end() call to close things.
934 if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
935 return;
936
937 bool checkForCloseTagErrors = true;
938 if (t->tagName == formTag && m_currentFormElement) {
939 m_currentFormElement = 0;
940 checkForCloseTagErrors = false;
941 } else if (t->tagName == mapTag)
942 m_currentMapElement = 0;
943 else if (t->tagName == pTag)
944 checkForCloseTagErrors = false;
945
946 HTMLStackElem* oldElem = blockStack;
947 popBlock(t->tagName, checkForCloseTagErrors);
948 if (oldElem == blockStack && t->tagName == pTag) {
949 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
950 // this as a valid break, i.e., <p></p>. So go ahead and make the empty
951 // paragraph.
952 t->beginTag = true;
953 parseToken(t);
954 popBlock(t->tagName);
955 reportError(StrayParagraphCloseError);
956 }
957 }
958
isHeaderTag(const AtomicString & tagName)959 bool HTMLParser::isHeaderTag(const AtomicString& tagName)
960 {
961 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ());
962 if (headerTags.isEmpty()) {
963 headerTags.add(h1Tag.localName().impl());
964 headerTags.add(h2Tag.localName().impl());
965 headerTags.add(h3Tag.localName().impl());
966 headerTags.add(h4Tag.localName().impl());
967 headerTags.add(h5Tag.localName().impl());
968 headerTags.add(h6Tag.localName().impl());
969 }
970
971 return headerTags.contains(tagName.impl());
972 }
973
isInline(Node * node) const974 bool HTMLParser::isInline(Node* node) const
975 {
976 if (node->isTextNode())
977 return true;
978
979 if (node->isHTMLElement()) {
980 HTMLElement* e = static_cast<HTMLElement*>(node);
981 if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
982 e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
983 e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
984 e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
985 e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
986 e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
987 e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
988 e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
989 e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
990 e->hasLocalName(noembedTag))
991 return true;
992 if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
993 Settings* settings = document->settings();
994 if (settings && settings->isJavaScriptEnabled())
995 return true;
996 }
997 }
998
999 return false;
1000 }
1001
isResidualStyleTag(const AtomicString & tagName)1002 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
1003 {
1004 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());
1005 if (residualStyleTags.isEmpty()) {
1006 residualStyleTags.add(aTag.localName().impl());
1007 residualStyleTags.add(fontTag.localName().impl());
1008 residualStyleTags.add(ttTag.localName().impl());
1009 residualStyleTags.add(uTag.localName().impl());
1010 residualStyleTags.add(bTag.localName().impl());
1011 residualStyleTags.add(iTag.localName().impl());
1012 residualStyleTags.add(sTag.localName().impl());
1013 residualStyleTags.add(strikeTag.localName().impl());
1014 residualStyleTags.add(bigTag.localName().impl());
1015 residualStyleTags.add(smallTag.localName().impl());
1016 residualStyleTags.add(emTag.localName().impl());
1017 residualStyleTags.add(strongTag.localName().impl());
1018 residualStyleTags.add(dfnTag.localName().impl());
1019 residualStyleTags.add(codeTag.localName().impl());
1020 residualStyleTags.add(sampTag.localName().impl());
1021 residualStyleTags.add(kbdTag.localName().impl());
1022 residualStyleTags.add(varTag.localName().impl());
1023 residualStyleTags.add(nobrTag.localName().impl());
1024 }
1025
1026 return residualStyleTags.contains(tagName.impl());
1027 }
1028
isAffectedByResidualStyle(const AtomicString & tagName)1029 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
1030 {
1031 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());
1032 if (unaffectedTags.isEmpty()) {
1033 unaffectedTags.add(bodyTag.localName().impl());
1034 unaffectedTags.add(tableTag.localName().impl());
1035 unaffectedTags.add(theadTag.localName().impl());
1036 unaffectedTags.add(tbodyTag.localName().impl());
1037 unaffectedTags.add(tfootTag.localName().impl());
1038 unaffectedTags.add(trTag.localName().impl());
1039 unaffectedTags.add(thTag.localName().impl());
1040 unaffectedTags.add(tdTag.localName().impl());
1041 unaffectedTags.add(captionTag.localName().impl());
1042 unaffectedTags.add(colgroupTag.localName().impl());
1043 unaffectedTags.add(colTag.localName().impl());
1044 unaffectedTags.add(optionTag.localName().impl());
1045 unaffectedTags.add(optgroupTag.localName().impl());
1046 unaffectedTags.add(selectTag.localName().impl());
1047 unaffectedTags.add(objectTag.localName().impl());
1048 }
1049
1050 return !unaffectedTags.contains(tagName.impl());
1051 }
1052
handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem * elem)1053 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1054 {
1055 HTMLStackElem* maxElem = 0;
1056 bool finished = false;
1057 bool strayTableContent = elem->strayTableContent;
1058
1059 m_handlingResidualStyleAcrossBlocks = true;
1060 while (!finished) {
1061 // Find the outermost element that crosses over to a higher level. If there exists another higher-level
1062 // element, we will do another pass, until we have corrected the innermost one.
1063 ExceptionCode ec = 0;
1064 HTMLStackElem* curr = blockStack;
1065 HTMLStackElem* prev = 0;
1066 HTMLStackElem* prevMaxElem = 0;
1067 maxElem = 0;
1068 finished = true;
1069 while (curr && curr != elem) {
1070 if (curr->level > elem->level) {
1071 if (!isAffectedByResidualStyle(curr->tagName))
1072 return;
1073 if (maxElem)
1074 // We will need another pass.
1075 finished = false;
1076 maxElem = curr;
1077 prevMaxElem = prev;
1078 }
1079
1080 prev = curr;
1081 curr = curr->next;
1082 }
1083
1084 if (!curr || !maxElem)
1085 return;
1086
1087 Node* residualElem = prev->node;
1088 Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
1089 Node* parentElem = elem->node;
1090
1091 // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1092 // FIXME: We should either always allow it or perform an additional fixup instead of
1093 // just bailing here.
1094 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1095 if (!parentElem->childAllowed(blockElem))
1096 return;
1097
1098 m_hasPElementInScope = Unknown;
1099
1100 if (maxElem->node->parentNode() != elem->node) {
1101 // Walk the stack and remove any elements that aren't residual style tags. These
1102 // are basically just being closed up. Example:
1103 // <font><span>Moo<p>Goo</font></p>.
1104 // In the above example, the <span> doesn't need to be reopened. It can just close.
1105 HTMLStackElem* currElem = maxElem->next;
1106 HTMLStackElem* prevElem = maxElem;
1107 while (currElem != elem) {
1108 HTMLStackElem* nextElem = currElem->next;
1109 if (!isResidualStyleTag(currElem->tagName)) {
1110 prevElem->next = nextElem;
1111 prevElem->derefNode();
1112 prevElem->node = currElem->node;
1113 prevElem->didRefNode = currElem->didRefNode;
1114 delete currElem;
1115 }
1116 else
1117 prevElem = currElem;
1118 currElem = nextElem;
1119 }
1120
1121 // We have to reopen residual tags in between maxElem and elem. An example of this case is:
1122 // <font><i>Moo<p>Foo</font>.
1123 // In this case, we need to transform the part before the <p> into:
1124 // <font><i>Moo</i></font><i>
1125 // so that the <i> will remain open. This involves the modification of elements
1126 // in the block stack.
1127 // This will also affect how we ultimately reparent the block, since we want it to end up
1128 // under the reopened residual tags (e.g., the <i> in the above example.)
1129 RefPtr<Node> prevNode = 0;
1130 currElem = maxElem;
1131 while (currElem->node != residualElem) {
1132 if (isResidualStyleTag(currElem->node->localName())) {
1133 // Create a clone of this element.
1134 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
1135 Node* currNode = currElem->node->cloneNode(false).releaseRef();
1136 reportError(ResidualStyleError, &currNode->localName());
1137
1138 // Change the stack element's node to point to the clone.
1139 // The stack element adopts the reference we obtained above by calling release().
1140 currElem->derefNode();
1141 currElem->node = currNode;
1142 currElem->didRefNode = true;
1143
1144 // Attach the previous node as a child of this new node.
1145 if (prevNode)
1146 currNode->appendChild(prevNode, ec);
1147 else // The new parent for the block element is going to be the innermost clone.
1148 parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though.
1149
1150 prevNode = currNode;
1151 }
1152
1153 currElem = currElem->next;
1154 }
1155
1156 // Now append the chain of new residual style elements if one exists.
1157 if (prevNode)
1158 elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
1159 }
1160
1161 // Check if the block is still in the tree. If it isn't, then we don't
1162 // want to remove it from its parent (that would crash) or insert it into
1163 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
1164 bool isBlockStillInTree = blockElem->parentNode();
1165
1166 // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1167 // All content of |blockElem| is reparented to be under this clone. We then
1168 // reparent |blockElem| using real DOM calls so that attachment/detachment will
1169 // be performed to fix up the rendering tree.
1170 // So for this example: <b>...<p>Foo</b>Goo</p>
1171 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1172 //
1173 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1174 if (isBlockStillInTree)
1175 blockElem->parentNode()->removeChild(blockElem, ec);
1176
1177 Node* newNodePtr = 0;
1178 if (blockElem->firstChild()) {
1179 // Step 2: Clone |residualElem|.
1180 RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1181 newNodePtr = newNode.get();
1182 reportError(ResidualStyleError, &newNode->localName());
1183
1184 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1185 // before we've put |newElem| into the document. That way we'll only do one attachment of all
1186 // the new content (instead of a bunch of individual attachments).
1187 Node* currNode = blockElem->firstChild();
1188 while (currNode) {
1189 Node* nextNode = currNode->nextSibling();
1190 newNode->appendChild(currNode, ec);
1191 currNode = nextNode;
1192 }
1193
1194 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1195 // attachment can occur yet.
1196 blockElem->appendChild(newNode.release(), ec);
1197 } else
1198 finished = true;
1199
1200 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1201 if (isBlockStillInTree)
1202 parentElem->appendChild(blockElem, ec);
1203
1204 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update
1205 // the node associated with the previous stack element so that when it gets popped,
1206 // it doesn't make the residual element the next current node.
1207 HTMLStackElem* currElem = maxElem;
1208 HTMLStackElem* prevElem = 0;
1209 while (currElem != elem) {
1210 prevElem = currElem;
1211 currElem = currElem->next;
1212 }
1213 prevElem->next = elem->next;
1214 prevElem->derefNode();
1215 prevElem->node = elem->node;
1216 prevElem->didRefNode = elem->didRefNode;
1217 if (!finished) {
1218 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
1219 // in the stack. We do not do this for the innermost block, because in that case the new
1220 // node is effectively no longer open.
1221 elem->next = maxElem;
1222 elem->node = prevMaxElem->node;
1223 elem->didRefNode = prevMaxElem->didRefNode;
1224 elem->strayTableContent = false;
1225 prevMaxElem->next = elem;
1226 ASSERT(newNodePtr);
1227 prevMaxElem->node = newNodePtr;
1228 prevMaxElem->didRefNode = false;
1229 } else
1230 delete elem;
1231 }
1232
1233 // FIXME: If we ever make a case like this work:
1234 // <table><b><i><form></b></form></i></table>
1235 // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
1236 if (strayTableContent)
1237 inStrayTableContent--;
1238
1239 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1240 // In the above example, Goo should stay italic.
1241 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
1242
1243 HTMLStackElem* curr = blockStack;
1244 HTMLStackElem* residualStyleStack = 0;
1245 unsigned stackDepth = 1;
1246 unsigned redundantStyleCount = 0;
1247 while (curr && curr != maxElem) {
1248 // We will actually schedule this tag for reopening
1249 // after we complete the close of this entire block.
1250 if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1251 // We've overloaded the use of stack elements and are just reusing the
1252 // struct with a slightly different meaning to the variables. Instead of chaining
1253 // from innermost to outermost, we build up a list of all the tags we need to reopen
1254 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1255 // to the outermost tag we need to reopen.
1256 // We also set curr->node to be the actual element that corresponds to the ID stored in
1257 // curr->id rather than the node that you should pop to when the element gets pulled off
1258 // the stack.
1259 if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1260 redundantStyleCount++;
1261 else
1262 redundantStyleCount = 0;
1263
1264 if (redundantStyleCount < cMaxRedundantTagDepth)
1265 moveOneBlockToStack(residualStyleStack);
1266 else
1267 popOneBlock();
1268 } else
1269 popOneBlock();
1270
1271 curr = blockStack;
1272 }
1273
1274 reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
1275
1276 m_handlingResidualStyleAcrossBlocks = false;
1277 }
1278
reopenResidualStyleTags(HTMLStackElem * elem,Node * malformedTableParent)1279 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
1280 {
1281 // Loop for each tag that needs to be reopened.
1282 while (elem) {
1283 // Create a shallow clone of the DOM node for this element.
1284 RefPtr<Node> newNode = elem->node->cloneNode(false);
1285 reportError(ResidualStyleError, &newNode->localName());
1286
1287 // Append the new node. In the malformed table case, we need to insert before the table,
1288 // which will be the last child.
1289 ExceptionCode ec = 0;
1290 if (malformedTableParent)
1291 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
1292 else
1293 current->appendChild(newNode, ec);
1294 // FIXME: Is it really OK to ignore the exceptions here?
1295
1296 // Now push a new stack element for this node we just created.
1297 pushBlock(elem->tagName, elem->level);
1298 newNode->beginParsingChildren();
1299
1300 // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1301 // that it is inside a malformed table.
1302 blockStack->strayTableContent = malformedTableParent != 0;
1303 if (blockStack->strayTableContent)
1304 inStrayTableContent++;
1305
1306 // Clear our malformed table parent variable.
1307 malformedTableParent = 0;
1308
1309 // Update |current| manually to point to the new node.
1310 setCurrent(newNode.get());
1311
1312 // Advance to the next tag that needs to be reopened.
1313 HTMLStackElem* next = elem->next;
1314 elem->derefNode();
1315 delete elem;
1316 elem = next;
1317 }
1318 }
1319
pushBlock(const AtomicString & tagName,int level)1320 void HTMLParser::pushBlock(const AtomicString& tagName, int level)
1321 {
1322 blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
1323 if (level >= minBlockLevelTagPriority)
1324 m_blocksInStack++;
1325 didRefCurrent = false;
1326 if (tagName == pTag)
1327 m_hasPElementInScope = InScope;
1328 else if (isScopingTag(tagName))
1329 m_hasPElementInScope = NotInScope;
1330 }
1331
popBlock(const AtomicString & tagName,bool reportErrors)1332 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
1333 {
1334 HTMLStackElem* elem = blockStack;
1335
1336 int maxLevel = 0;
1337
1338 while (elem && (elem->tagName != tagName)) {
1339 if (maxLevel < elem->level)
1340 maxLevel = elem->level;
1341 elem = elem->next;
1342 }
1343
1344 if (!elem) {
1345 if (reportErrors)
1346 reportError(StrayCloseTagError, &tagName, 0, true);
1347 return;
1348 }
1349
1350 if (maxLevel > elem->level) {
1351 // We didn't match because the tag is in a different scope, e.g.,
1352 // <b><p>Foo</b>. Try to correct the problem.
1353 if (!isResidualStyleTag(tagName))
1354 return;
1355 return handleResidualStyleCloseTagAcrossBlocks(elem);
1356 }
1357
1358 bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
1359 HTMLStackElem* residualStyleStack = 0;
1360 Node* malformedTableParent = 0;
1361
1362 elem = blockStack;
1363 unsigned stackDepth = 1;
1364 unsigned redundantStyleCount = 0;
1365 while (elem) {
1366 if (elem->tagName == tagName) {
1367 int strayTable = inStrayTableContent;
1368 popOneBlock();
1369 elem = 0;
1370
1371 // This element was the root of some malformed content just inside an implicit or
1372 // explicit <tbody> or <tr>.
1373 // If we end up needing to reopen residual style tags, the root of the reopened chain
1374 // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1375 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1376 Node* curr = current;
1377 while (curr && !curr->hasTagName(tableTag))
1378 curr = curr->parentNode();
1379 malformedTableParent = curr ? curr->parentNode() : 0;
1380 }
1381 }
1382 else {
1383 if (m_currentFormElement && elem->tagName == formTag)
1384 // A <form> is being closed prematurely (and this is
1385 // malformed HTML). Set an attribute on the form to clear out its
1386 // bottom margin.
1387 m_currentFormElement->setMalformed(true);
1388
1389 // Schedule this tag for reopening
1390 // after we complete the close of this entire block.
1391 if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1392 // We've overloaded the use of stack elements and are just reusing the
1393 // struct with a slightly different meaning to the variables. Instead of chaining
1394 // from innermost to outermost, we build up a list of all the tags we need to reopen
1395 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1396 // to the outermost tag we need to reopen.
1397 // We also set elem->node to be the actual element that corresponds to the ID stored in
1398 // elem->id rather than the node that you should pop to when the element gets pulled off
1399 // the stack.
1400 if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1401 redundantStyleCount++;
1402 else
1403 redundantStyleCount = 0;
1404
1405 if (redundantStyleCount < cMaxRedundantTagDepth)
1406 moveOneBlockToStack(residualStyleStack);
1407 else
1408 popOneBlock();
1409 } else
1410 popOneBlock();
1411 elem = blockStack;
1412 }
1413 }
1414
1415 reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1416 }
1417
popOneBlockCommon()1418 inline HTMLStackElem* HTMLParser::popOneBlockCommon()
1419 {
1420 HTMLStackElem* elem = blockStack;
1421
1422 // Form elements restore their state during the parsing process.
1423 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
1424 if (current && elem->node != current)
1425 current->finishParsingChildren();
1426
1427 if (blockStack->level >= minBlockLevelTagPriority) {
1428 ASSERT(m_blocksInStack > 0);
1429 m_blocksInStack--;
1430 }
1431 blockStack = elem->next;
1432 current = elem->node;
1433 didRefCurrent = elem->didRefNode;
1434
1435 if (elem->strayTableContent)
1436 inStrayTableContent--;
1437
1438 if (elem->tagName == pTag)
1439 m_hasPElementInScope = NotInScope;
1440 else if (isScopingTag(elem->tagName))
1441 m_hasPElementInScope = Unknown;
1442
1443 return elem;
1444 }
1445
popOneBlock()1446 void HTMLParser::popOneBlock()
1447 {
1448 // Store the current node before popOneBlockCommon overwrites it.
1449 Node* lastCurrent = current;
1450 bool didRefLastCurrent = didRefCurrent;
1451
1452 delete popOneBlockCommon();
1453
1454 if (didRefLastCurrent)
1455 lastCurrent->deref();
1456 }
1457
moveOneBlockToStack(HTMLStackElem * & head)1458 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
1459 {
1460 // We'll be using the stack element we're popping, but for the current node.
1461 // See the two callers for details.
1462
1463 // Store the current node before popOneBlockCommon overwrites it.
1464 Node* lastCurrent = current;
1465 bool didRefLastCurrent = didRefCurrent;
1466
1467 // Pop the block, but don't deref the current node as popOneBlock does because
1468 // we'll be using the pointer in the new stack element.
1469 HTMLStackElem* elem = popOneBlockCommon();
1470
1471 // Transfer the current node into the stack element.
1472 // No need to deref the old elem->node because popOneBlockCommon transferred
1473 // it into the current/didRefCurrent fields.
1474 elem->node = lastCurrent;
1475 elem->didRefNode = didRefLastCurrent;
1476 elem->next = head;
1477 head = elem;
1478 }
1479
checkIfHasPElementInScope()1480 void HTMLParser::checkIfHasPElementInScope()
1481 {
1482 m_hasPElementInScope = NotInScope;
1483 HTMLStackElem* elem = blockStack;
1484 while (elem) {
1485 const AtomicString& tagName = elem->tagName;
1486 if (tagName == pTag) {
1487 m_hasPElementInScope = InScope;
1488 return;
1489 } else if (isScopingTag(tagName))
1490 return;
1491 elem = elem->next;
1492 }
1493 }
1494
popInlineBlocks()1495 void HTMLParser::popInlineBlocks()
1496 {
1497 while (blockStack && isInline(current))
1498 popOneBlock();
1499 }
1500
freeBlock()1501 void HTMLParser::freeBlock()
1502 {
1503 while (blockStack)
1504 popOneBlock();
1505 ASSERT(!m_blocksInStack);
1506 }
1507
createHead()1508 void HTMLParser::createHead()
1509 {
1510 if (head || !document->documentElement())
1511 return;
1512
1513 head = new HTMLHeadElement(headTag, document);
1514 HTMLElement* body = document->body();
1515 ExceptionCode ec = 0;
1516 document->documentElement()->insertBefore(head, body, ec);
1517 if (ec)
1518 head = 0;
1519
1520 // If the body does not exist yet, then the <head> should be pushed as the current block.
1521 if (head && !body) {
1522 pushBlock(head->localName(), head->tagPriority());
1523 setCurrent(head);
1524 }
1525 }
1526
handleIsindex(Token * t)1527 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
1528 {
1529 RefPtr<Node> n = new HTMLDivElement(divTag, document);
1530
1531 NamedMappedAttrMap* attrs = t->attrs.get();
1532
1533 RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, document, m_currentFormElement.get());
1534 isIndex->setAttributeMap(attrs);
1535 isIndex->setAttribute(typeAttr, "khtml_isindex");
1536
1537 String text = searchableIndexIntroduction();
1538 if (attrs) {
1539 if (Attribute* a = attrs->getAttributeItem(promptAttr))
1540 text = a->value().string() + " ";
1541 t->attrs = 0;
1542 }
1543
1544 n->addChild(new HTMLHRElement(hrTag, document));
1545 n->addChild(new Text(document, text));
1546 n->addChild(isIndex.release());
1547 n->addChild(new HTMLHRElement(hrTag, document));
1548
1549 return n.release();
1550 }
1551
startBody()1552 void HTMLParser::startBody()
1553 {
1554 if (inBody)
1555 return;
1556
1557 inBody = true;
1558
1559 if (m_isindexElement) {
1560 insertNode(m_isindexElement.get(), true /* don't descend into this node */);
1561 m_isindexElement = 0;
1562 }
1563 }
1564
finished()1565 void HTMLParser::finished()
1566 {
1567 // In the case of a completely empty document, here's the place to create the HTML element.
1568 if (current && current->isDocumentNode() && !document->documentElement())
1569 insertNode(new HTMLHtmlElement(htmlTag, document));
1570
1571 // This ensures that "current" is not left pointing to a node when the document is destroyed.
1572 freeBlock();
1573 setCurrent(0);
1574
1575 // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
1576 if (!m_isParsingFragment)
1577 document->finishedParsing();
1578 }
1579
reportErrorToConsole(HTMLParserErrorCode errorCode,const AtomicString * tagName1,const AtomicString * tagName2,bool closeTags)1580 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
1581 {
1582 Frame* frame = document->frame();
1583 if (!frame)
1584 return;
1585
1586 HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
1587 int lineNumber = htmlTokenizer->lineNumber() + 1;
1588
1589 AtomicString tag1;
1590 AtomicString tag2;
1591 if (tagName1) {
1592 if (*tagName1 == "#text")
1593 tag1 = "Text";
1594 else if (*tagName1 == "#comment")
1595 tag1 = "<!-- comment -->";
1596 else
1597 tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
1598 }
1599 if (tagName2) {
1600 if (*tagName2 == "#text")
1601 tag2 = "Text";
1602 else if (*tagName2 == "#comment")
1603 tag2 = "<!-- comment -->";
1604 else
1605 tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
1606 }
1607
1608 const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
1609 if (!errorMsg)
1610 return;
1611
1612 String message;
1613 if (htmlTokenizer->processingContentWrittenByScript())
1614 message += htmlParserDocumentWriteMessage();
1615 message += errorMsg;
1616 message.replace("%tag1", tag1);
1617 message.replace("%tag2", tag2);
1618
1619 frame->domWindow()->console()->addMessage(HTMLMessageSource,
1620 isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
1621 message, lineNumber, document->url().string());
1622 }
1623
1624 }
1625