• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     Copyright (C) 1997 Martin Jones (mjones@kde.org)
3               (C) 1997 Torben Weis (weis@kde.org)
4               (C) 1999,2001 Lars Knoll (knoll@kde.org)
5               (C) 2000,2001 Dirk Mueller (mueller@kde.org)
6     Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
7 
8     This library is free software; you can redistribute it and/or
9     modify it under the terms of the GNU Library General Public
10     License as published by the Free Software Foundation; either
11     version 2 of the License, or (at your option) any later version.
12 
13     This library is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16     Library General Public License for more details.
17 
18     You should have received a copy of the GNU Library General Public License
19     along with this library; see the file COPYING.LIB.  If not, write to
20     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21     Boston, MA 02110-1301, USA.
22 */
23 
24 #include "config.h"
25 #include "HTMLParser.h"
26 
27 #include "CharacterNames.h"
28 #include "CSSPropertyNames.h"
29 #include "CSSValueKeywords.h"
30 #include "Comment.h"
31 #include "Console.h"
32 #include "DOMWindow.h"
33 #include "DocumentFragment.h"
34 #include "DocumentType.h"
35 #include "Frame.h"
36 #include "HTMLBodyElement.h"
37 #include "HTMLDocument.h"
38 #include "HTMLDivElement.h"
39 #include "HTMLDListElement.h"
40 #include "HTMLElementFactory.h"
41 #include "HTMLFormElement.h"
42 #include "HTMLHeadElement.h"
43 #include "HTMLHRElement.h"
44 #include "HTMLHtmlElement.h"
45 #include "HTMLIsIndexElement.h"
46 #include "HTMLMapElement.h"
47 #include "HTMLNames.h"
48 #include "HTMLTableCellElement.h"
49 #include "HTMLTableRowElement.h"
50 #include "HTMLTableSectionElement.h"
51 #include "HTMLTokenizer.h"
52 #include "LocalizedStrings.h"
53 #include "Settings.h"
54 #include "Text.h"
55 #include <wtf/StdLibExtras.h>
56 
57 namespace WebCore {
58 
59 using namespace HTMLNames;
60 
61 static const unsigned cMaxRedundantTagDepth = 20;
62 static const unsigned cResidualStyleMaxDepth = 200;
63 
64 static const int minBlockLevelTagPriority = 3;
65 
66 // A cap on the number of tags with priority minBlockLevelTagPriority or higher
67 // allowed in blockStack. The cap is enforced by adding such new elements as
68 // siblings instead of children once it is reached.
69 static const size_t cMaxBlockDepth = 4096;
70 
71 struct HTMLStackElem : Noncopyable {
HTMLStackElemWebCore::HTMLStackElem72     HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
73         : tagName(t)
74         , level(lvl)
75         , strayTableContent(false)
76         , node(n)
77         , didRefNode(r)
78         , next(nx)
79     {
80     }
81 
derefNodeWebCore::HTMLStackElem82     void derefNode()
83     {
84         if (didRefNode)
85             node->deref();
86     }
87 
88     AtomicString tagName;
89     int level;
90     bool strayTableContent;
91     Node* node;
92     bool didRefNode;
93     HTMLStackElem* next;
94 };
95 
96 /**
97  * The parser parses tokenized input into the document, building up the
98  * document tree. If the document is well-formed, parsing it is straightforward.
99  *
100  * Unfortunately, we have to handle many HTML documents that are not well-formed,
101  * so the parser has to be tolerant about errors.
102  *
103  * We have to take care of at least the following error conditions:
104  *
105  * 1. The element being added is explicitly forbidden inside some outer tag.
106  *    In this case we should close all tags up to the one, which forbids
107  *    the element, and add it afterwards.
108  *
109  * 2. We are not allowed to add the element directly. It could be that
110  *    the person writing the document forgot some tag in between (or that the
111  *    tag in between is optional). This could be the case with the following
112  *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
113  *
114  * 3. We want to add a block element inside to an inline element. Close all
115  *    inline elements up to the next higher block element.
116  *
117  * 4. If this doesn't help, close elements until we are allowed to add the
118  *    element or ignore the tag.
119  *
120  */
121 
HTMLParser(HTMLDocument * doc,bool reportErrors)122 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
123     : document(doc)
124     , current(doc)
125     , didRefCurrent(false)
126     , blockStack(0)
127     , m_blocksInStack(0)
128     , m_hasPElementInScope(NotInScope)
129     , head(0)
130     , inBody(false)
131     , haveContent(false)
132     , haveFrameSet(false)
133     , m_isParsingFragment(false)
134     , m_reportErrors(reportErrors)
135     , m_handlingResidualStyleAcrossBlocks(false)
136     , inStrayTableContent(0)
137 {
138 }
139 
HTMLParser(DocumentFragment * frag)140 HTMLParser::HTMLParser(DocumentFragment* frag)
141     : document(frag->document())
142     , current(frag)
143     , didRefCurrent(true)
144     , blockStack(0)
145     , m_blocksInStack(0)
146     , m_hasPElementInScope(NotInScope)
147     , head(0)
148     , inBody(true)
149     , haveContent(false)
150     , haveFrameSet(false)
151     , m_isParsingFragment(true)
152     , m_reportErrors(false)
153     , m_handlingResidualStyleAcrossBlocks(false)
154     , inStrayTableContent(0)
155 {
156     if (frag)
157         frag->ref();
158 }
159 
~HTMLParser()160 HTMLParser::~HTMLParser()
161 {
162     freeBlock();
163     if (didRefCurrent)
164         current->deref();
165 }
166 
reset()167 void HTMLParser::reset()
168 {
169     ASSERT(!m_isParsingFragment);
170 
171     setCurrent(document);
172 
173     freeBlock();
174 
175     inBody = false;
176     haveFrameSet = false;
177     haveContent = false;
178     inStrayTableContent = 0;
179 
180     m_currentFormElement = 0;
181     m_currentMapElement = 0;
182     head = 0;
183     m_isindexElement = 0;
184 
185     m_skipModeTag = nullAtom;
186 }
187 
setCurrent(Node * newCurrent)188 void HTMLParser::setCurrent(Node* newCurrent)
189 {
190     bool didRefNewCurrent = newCurrent && newCurrent != document;
191     if (didRefNewCurrent)
192         newCurrent->ref();
193     if (didRefCurrent)
194         current->deref();
195     current = newCurrent;
196     didRefCurrent = didRefNewCurrent;
197 }
198 
parseToken(Token * t)199 PassRefPtr<Node> HTMLParser::parseToken(Token* t)
200 {
201     if (!m_skipModeTag.isNull()) {
202         if (!t->beginTag && t->tagName == m_skipModeTag)
203             // Found the end tag for the current skip mode, so we're done skipping.
204             m_skipModeTag = nullAtom;
205         else if (current->localName() == t->tagName)
206             // Do not skip </iframe>.
207             // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
208             ;
209         else
210             return 0;
211     }
212 
213     // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
214     if (t->isCloseTag(brTag) && document->inCompatMode()) {
215         reportError(MalformedBRError);
216         t->beginTag = true;
217     }
218 
219     if (!t->beginTag) {
220         processCloseTag(t);
221         return 0;
222     }
223 
224     // Ignore spaces, if we're not inside a paragraph or other inline code.
225     // Do not alter the text if it is part of a scriptTag.
226     if (t->tagName == textAtom && t->text && current->localName() != scriptTag) {
227         if (inBody && !skipMode() && current->localName() != styleTag &&
228             current->localName() != titleTag && !t->text->containsOnlyWhitespace())
229             haveContent = true;
230 
231         RefPtr<Node> n;
232         String text = t->text.get();
233         unsigned charsLeft = text.length();
234         while (charsLeft) {
235             // split large blocks of text to nodes of manageable size
236             n = Text::createWithLengthLimit(document, text, charsLeft);
237             if (!insertNode(n.get(), t->selfClosingTag))
238                 return 0;
239         }
240         return n;
241     }
242 
243     RefPtr<Node> n = getNode(t);
244     // just to be sure, and to catch currently unimplemented stuff
245     if (!n)
246         return 0;
247 
248     // set attributes
249     if (n->isHTMLElement()) {
250         HTMLElement* e = static_cast<HTMLElement*>(n.get());
251         e->setAttributeMap(t->attrs.get());
252 
253         // take care of optional close tags
254         if (e->endTagRequirement() == TagStatusOptional)
255             popBlock(t->tagName);
256 
257         // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
258         // syntax was used, report an error.
259         if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
260             if (t->tagName == scriptTag)
261                 reportError(IncorrectXMLCloseScriptWarning);
262             else
263                 reportError(IncorrectXMLSelfCloseError, &t->tagName);
264         }
265     }
266 
267     if (!insertNode(n.get(), t->selfClosingTag)) {
268         // we couldn't insert the node
269 
270         if (n->isElementNode()) {
271             Element* e = static_cast<Element*>(n.get());
272             e->setAttributeMap(0);
273         }
274 
275         if (m_currentMapElement == n)
276             m_currentMapElement = 0;
277 
278         if (m_currentFormElement == n)
279             m_currentFormElement = 0;
280 
281         if (head == n)
282             head = 0;
283 
284         return 0;
285     }
286     return n;
287 }
288 
parseDoctypeToken(DoctypeToken * t)289 void HTMLParser::parseDoctypeToken(DoctypeToken* t)
290 {
291     // Ignore any doctype after the first.  Ignore doctypes in fragments.
292     if (document->doctype() || m_isParsingFragment || current != document)
293         return;
294 
295     // Make a new doctype node and set it as our doctype.
296     document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
297 }
298 
isTableSection(Node * n)299 static bool isTableSection(Node* n)
300 {
301     return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
302 }
303 
isTablePart(Node * n)304 static bool isTablePart(Node* n)
305 {
306     return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
307            isTableSection(n);
308 }
309 
isTableRelated(Node * n)310 static bool isTableRelated(Node* n)
311 {
312     return n->hasTagName(tableTag) || isTablePart(n);
313 }
314 
isScopingTag(const AtomicString & tagName)315 static bool isScopingTag(const AtomicString& tagName)
316 {
317     return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;
318 }
319 
insertNode(Node * n,bool flat)320 bool HTMLParser::insertNode(Node* n, bool flat)
321 {
322     RefPtr<Node> protectNode(n);
323 
324     const AtomicString& localName = n->localName();
325     int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
326 
327     // <table> is never allowed inside stray table content.  Always pop out of the stray table content
328     // and close up the first table, and then start the second table as a sibling.
329     if (inStrayTableContent && localName == tableTag)
330         popBlock(tableTag);
331 
332     if (tagPriority >= minBlockLevelTagPriority) {
333         while (m_blocksInStack >= cMaxBlockDepth)
334             popBlock(blockStack->tagName);
335     }
336 
337     // let's be stupid and just try to insert it.
338     // this should work if the document is well-formed
339     Node* newNode = current->addChild(n);
340     if (!newNode)
341         return handleError(n, flat, localName, tagPriority); // Try to handle the error.
342 
343     // don't push elements without end tags (e.g., <img>) on the stack
344     bool parentAttached = current->attached();
345     if (tagPriority > 0 && !flat) {
346         if (newNode == current) {
347             // This case should only be hit when a demoted <form> is placed inside a table.
348             ASSERT(localName == formTag);
349             reportError(FormInsideTablePartError, &current->localName());
350         } else {
351             // The pushBlock function transfers ownership of current to the block stack
352             // so we're guaranteed that didRefCurrent is false. The code below is an
353             // optimized version of setCurrent that takes advantage of that fact and also
354             // assumes that newNode is neither 0 nor a pointer to the document.
355             pushBlock(localName, tagPriority);
356             newNode->beginParsingChildren();
357             ASSERT(!didRefCurrent);
358             newNode->ref();
359             current = newNode;
360             didRefCurrent = true;
361         }
362         if (parentAttached && !n->attached() && !m_isParsingFragment)
363             n->attach();
364     } else {
365         if (parentAttached && !n->attached() && !m_isParsingFragment)
366             n->attach();
367         n->finishParsingChildren();
368     }
369 
370     return true;
371 }
372 
handleError(Node * n,bool flat,const AtomicString & localName,int tagPriority)373 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
374 {
375     // Error handling code.  This is just ad hoc handling of specific parent/child combinations.
376     HTMLElement* e;
377     bool handled = false;
378 
379     // 1. Check out the element's tag name to decide how to deal with errors.
380     if (n->isHTMLElement()) {
381         HTMLElement* h = static_cast<HTMLElement*>(n);
382         if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
383             if (inStrayTableContent && !isTableRelated(current)) {
384                 reportError(MisplacedTablePartError, &localName, &current->localName());
385                 // pop out to the nearest enclosing table-related tag.
386                 while (blockStack && !isTableRelated(current))
387                     popOneBlock();
388                 return insertNode(n);
389             }
390         } else if (h->hasLocalName(headTag)) {
391             if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
392                 reportError(MisplacedHeadError);
393                 return false;
394             }
395         } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
396             bool createdHead = false;
397             if (!head) {
398                 createHead();
399                 createdHead = true;
400             }
401             if (head) {
402                 if (!createdHead)
403                     reportError(MisplacedHeadContentError, &localName, &current->localName());
404                 if (head->addChild(n)) {
405                     if (!n->attached() && !m_isParsingFragment)
406                         n->attach();
407                     return true;
408                 } else
409                     return false;
410             }
411         } else if (h->hasLocalName(htmlTag)) {
412             if (!current->isDocumentNode() ) {
413                 if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) {
414                     reportError(RedundantHTMLBodyError, &localName);
415                     // we have another <HTML> element.... apply attributes to existing one
416                     // make sure we don't overwrite already existing attributes
417                     NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
418                     Element* existingHTML = static_cast<Element*>(document->documentElement());
419                     NamedAttrMap* bmap = existingHTML->attributes(false);
420                     for (unsigned l = 0; map && l < map->length(); ++l) {
421                         Attribute* it = map->attributeItem(l);
422                         if (!bmap->getAttributeItem(it->name()))
423                             existingHTML->setAttribute(it->name(), it->value());
424                     }
425                 }
426                 return false;
427             }
428         } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
429             bool createdHead = false;
430             if (!head) {
431                 createHead();
432                 createdHead = true;
433             }
434             if (head) {
435                 Node* newNode = head->addChild(n);
436                 if (!newNode) {
437                     setSkipMode(h->tagQName());
438                     return false;
439                 }
440 
441                 if (!createdHead)
442                     reportError(MisplacedHeadContentError, &localName, &current->localName());
443 
444                 pushBlock(localName, tagPriority);
445                 newNode->beginParsingChildren();
446                 setCurrent(newNode);
447                 if (!n->attached() && !m_isParsingFragment)
448                     n->attach();
449                 return true;
450             }
451             if (inBody) {
452                 setSkipMode(h->tagQName());
453                 return false;
454             }
455         } else if (h->hasLocalName(bodyTag)) {
456             if (inBody && document->body()) {
457                 // we have another <BODY> element.... apply attributes to existing one
458                 // make sure we don't overwrite already existing attributes
459                 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
460                 reportError(RedundantHTMLBodyError, &localName);
461                 NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
462                 Element* existingBody = document->body();
463                 NamedAttrMap* bmap = existingBody->attributes(false);
464                 for (unsigned l = 0; map && l < map->length(); ++l) {
465                     Attribute* it = map->attributeItem(l);
466                     if (!bmap->getAttributeItem(it->name()))
467                         existingBody->setAttribute(it->name(), it->value());
468                 }
469                 return false;
470             }
471             else if (!current->isDocumentNode())
472                 return false;
473         } else if (h->hasLocalName(areaTag)) {
474             if (m_currentMapElement) {
475                 reportError(MisplacedAreaError, &current->localName());
476                 m_currentMapElement->addChild(n);
477                 if (!n->attached() && !m_isParsingFragment)
478                     n->attach();
479                 handled = true;
480                 return true;
481             }
482             return false;
483         } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
484             if (isTableRelated(current)) {
485                 while (blockStack && isTablePart(current))
486                     popOneBlock();
487                 return insertNode(n);
488             }
489         }
490     } else if (n->isCommentNode() && !head)
491         return false;
492 
493     // 2. Next we examine our currently active element to do some further error handling.
494     if (current->isHTMLElement()) {
495         HTMLElement* h = static_cast<HTMLElement*>(current);
496         const AtomicString& currentTagName = h->localName();
497         if (h->hasLocalName(htmlTag)) {
498             HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
499             if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
500                 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
501                 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
502                 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
503                 elt->hasLocalName(baseTag))) {
504                 if (!head) {
505                     head = new HTMLHeadElement(headTag, document);
506                     e = head;
507                     insertNode(e);
508                     handled = true;
509                 }
510             } else {
511                 if (n->isTextNode()) {
512                     Text* t = static_cast<Text*>(n);
513                     if (t->containsOnlyWhitespace())
514                         return false;
515                 }
516                 if (!haveFrameSet) {
517                     e = new HTMLBodyElement(bodyTag, document);
518                     startBody();
519                     insertNode(e);
520                     handled = true;
521                 } else
522                     reportError(MisplacedFramesetContentError, &localName);
523             }
524         } else if (h->hasLocalName(headTag)) {
525             if (n->hasTagName(htmlTag))
526                 return false;
527             else {
528                 // This means the body starts here...
529                 if (!haveFrameSet) {
530                     popBlock(currentTagName);
531                     e = new HTMLBodyElement(bodyTag, document);
532                     startBody();
533                     insertNode(e);
534                     handled = true;
535                 } else
536                     reportError(MisplacedFramesetContentError, &localName);
537             }
538         } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag)
539                    || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
540             reportError(MisplacedContentRetryError, &localName, &currentTagName);
541             popBlock(currentTagName);
542             handled = true;
543         } else if (h->hasLocalName(captionTag)) {
544             // Illegal content in a caption. Close the caption and try again.
545             reportError(MisplacedCaptionContentError, &localName);
546             popBlock(currentTagName);
547             if (isTablePart(n))
548                 return insertNode(n, flat);
549         } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
550             if (n->hasTagName(tableTag)) {
551                 reportError(MisplacedTableError, &currentTagName);
552                 if (m_isParsingFragment && !h->hasLocalName(tableTag))
553                     // fragment may contain table parts without <table> ancestor, pop them one by one
554                     popBlock(h->localName());
555                 popBlock(localName); // end the table
556                 handled = true;      // ...and start a new one
557             } else {
558                 ExceptionCode ec = 0;
559                 Node* node = current;
560                 Node* parent = node->parentNode();
561                 // A script may have removed the current node's parent from the DOM
562                 // http://bugs.webkit.org/show_bug.cgi?id=7137
563                 // FIXME: we should do real recovery here and re-parent with the correct node.
564                 if (!parent)
565                     return false;
566                 Node* grandparent = parent->parentNode();
567 
568                 if (n->isTextNode() ||
569                     (h->hasLocalName(trTag) &&
570                      isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
571                      ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
572                        !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
573                      parent->hasTagName(tableTag))) {
574                     node = (node->hasTagName(tableTag)) ? node :
575                             ((node->hasTagName(trTag)) ? grandparent : parent);
576                     // This can happen with fragments
577                     if (!node)
578                         return false;
579                     Node* parent = node->parentNode();
580                     if (!parent)
581                         return false;
582                     parent->insertBefore(n, node, ec);
583                     if (!ec) {
584                         reportError(StrayTableContentError, &localName, &currentTagName);
585                         if (n->isHTMLElement() && tagPriority > 0 &&
586                             !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
587                         {
588                             pushBlock(localName, tagPriority);
589                             n->beginParsingChildren();
590                             setCurrent(n);
591                             inStrayTableContent++;
592                             blockStack->strayTableContent = true;
593                         }
594                         return true;
595                     }
596                 }
597 
598                 if (!ec) {
599                     if (current->hasTagName(trTag)) {
600                         reportError(TablePartRequiredError, &localName, &tdTag.localName());
601                         e = new HTMLTableCellElement(tdTag, document);
602                     } else if (current->hasTagName(tableTag)) {
603                         // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
604                         // and it isn't really a parse error per se.
605                         e = new HTMLTableSectionElement(tbodyTag, document);
606                     } else {
607                         reportError(TablePartRequiredError, &localName, &trTag.localName());
608                         e = new HTMLTableRowElement(trTag, document);
609                     }
610 
611                     insertNode(e);
612                     handled = true;
613                 }
614             }
615         } else if (h->hasLocalName(objectTag)) {
616             reportError(MisplacedContentRetryError, &localName, &currentTagName);
617             popBlock(objectTag);
618             handled = true;
619         } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
620             if (!isInline(n)) {
621                 popBlock(currentTagName);
622                 handled = true;
623             }
624         } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
625             if (localName == optgroupTag) {
626                 popBlock(currentTagName);
627                 handled = true;
628             } else if (localName == selectTag) {
629                 // IE treats a nested select as </select>. Let's do the same
630                 popBlock(localName);
631             }
632         } else if (h->hasLocalName(selectTag)) {
633             if (localName == inputTag || localName == textareaTag) {
634                 reportError(MisplacedContentRetryError, &localName, &currentTagName);
635                 popBlock(currentTagName);
636                 handled = true;
637             }
638         } else if (h->hasLocalName(colgroupTag)) {
639             popBlock(currentTagName);
640             handled = true;
641         } else if (!h->hasLocalName(bodyTag)) {
642             if (isInline(current)) {
643                 popInlineBlocks();
644                 handled = true;
645             }
646         }
647     } else if (current->isDocumentNode()) {
648         if (n->isTextNode()) {
649             Text* t = static_cast<Text*>(n);
650             if (t->containsOnlyWhitespace())
651                 return false;
652         }
653 
654         if (!document->documentElement()) {
655             e = new HTMLHtmlElement(htmlTag, document);
656             insertNode(e);
657             handled = true;
658         }
659     }
660 
661     // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
662     if (!handled) {
663         reportError(IgnoredContentError, &localName, &current->localName());
664         return false;
665     }
666     return insertNode(n);
667 }
668 
669 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
670 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
671 
textCreateErrorCheck(Token * t,RefPtr<Node> & result)672 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
673 {
674     result = new Text(document, t->text.get());
675     return false;
676 }
677 
commentCreateErrorCheck(Token * t,RefPtr<Node> & result)678 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
679 {
680     result = new Comment(document, t->text.get());
681     return false;
682 }
683 
headCreateErrorCheck(Token *,RefPtr<Node> & result)684 bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
685 {
686     if (!head || current->localName() == htmlTag) {
687         head = new HTMLHeadElement(headTag, document);
688         result = head;
689     } else
690         reportError(MisplacedHeadError);
691     return false;
692 }
693 
bodyCreateErrorCheck(Token *,RefPtr<Node> &)694 bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
695 {
696     // body no longer allowed if we have a frameset
697     if (haveFrameSet)
698         return false;
699     popBlock(headTag);
700     startBody();
701     return true;
702 }
703 
framesetCreateErrorCheck(Token *,RefPtr<Node> &)704 bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
705 {
706     popBlock(headTag);
707     if (inBody && !haveFrameSet && !haveContent) {
708         popBlock(bodyTag);
709         // ### actually for IE document.body returns the now hidden "body" element
710         // we can't implement that behaviour now because it could cause too many
711         // regressions and the headaches are not worth the work as long as there is
712         // no site actually relying on that detail (Dirk)
713         if (document->body())
714             document->body()->setAttribute(styleAttr, "display:none");
715         inBody = false;
716     }
717     if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
718         return false;
719     haveFrameSet = true;
720     startBody();
721     return true;
722 }
723 
formCreateErrorCheck(Token * t,RefPtr<Node> & result)724 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
725 {
726     // Only create a new form if we're not already inside one.
727     // This is consistent with other browsers' behavior.
728     if (!m_currentFormElement) {
729         m_currentFormElement = new HTMLFormElement(formTag, document);
730         result = m_currentFormElement;
731         pCloserCreateErrorCheck(t, result);
732     }
733     return false;
734 }
735 
isindexCreateErrorCheck(Token * t,RefPtr<Node> & result)736 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
737 {
738     RefPtr<Node> n = handleIsindex(t);
739     if (!inBody)
740         m_isindexElement = n.release();
741     else {
742         t->selfClosingTag = true;
743         result = n.release();
744     }
745     return false;
746 }
747 
selectCreateErrorCheck(Token *,RefPtr<Node> &)748 bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&)
749 {
750     return true;
751 }
752 
ddCreateErrorCheck(Token * t,RefPtr<Node> & result)753 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
754 {
755     pCloserCreateErrorCheck(t, result);
756     popBlock(dtTag);
757     popBlock(ddTag);
758     return true;
759 }
760 
dtCreateErrorCheck(Token * t,RefPtr<Node> & result)761 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
762 {
763     pCloserCreateErrorCheck(t, result);
764     popBlock(ddTag);
765     popBlock(dtTag);
766     return true;
767 }
768 
nestedCreateErrorCheck(Token * t,RefPtr<Node> &)769 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
770 {
771     popBlock(t->tagName);
772     return true;
773 }
774 
nestedPCloserCreateErrorCheck(Token * t,RefPtr<Node> & result)775 bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result)
776 {
777     pCloserCreateErrorCheck(t, result);
778     popBlock(t->tagName);
779     return true;
780 }
781 
nestedStyleCreateErrorCheck(Token * t,RefPtr<Node> &)782 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&)
783 {
784     return allowNestedRedundantTag(t->tagName);
785 }
786 
tableCellCreateErrorCheck(Token *,RefPtr<Node> &)787 bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&)
788 {
789     popBlock(tdTag);
790     popBlock(thTag);
791     return true;
792 }
793 
tableSectionCreateErrorCheck(Token *,RefPtr<Node> &)794 bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&)
795 {
796     popBlock(theadTag);
797     popBlock(tbodyTag);
798     popBlock(tfootTag);
799     return true;
800 }
801 
noembedCreateErrorCheck(Token *,RefPtr<Node> &)802 bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&)
803 {
804     setSkipMode(noembedTag);
805     return true;
806 }
807 
noframesCreateErrorCheck(Token *,RefPtr<Node> &)808 bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&)
809 {
810     setSkipMode(noframesTag);
811     return true;
812 }
813 
noscriptCreateErrorCheck(Token *,RefPtr<Node> &)814 bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
815 {
816     if (!m_isParsingFragment) {
817         Settings* settings = document->settings();
818         if (settings && settings->isJavaScriptEnabled())
819             setSkipMode(noscriptTag);
820     }
821     return true;
822 }
823 
pCloserCreateErrorCheck(Token *,RefPtr<Node> &)824 bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&)
825 {
826     if (hasPElementInScope())
827         popBlock(pTag);
828     return true;
829 }
830 
pCloserStrictCreateErrorCheck(Token *,RefPtr<Node> &)831 bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
832 {
833     if (document->inCompatMode())
834         return true;
835     if (hasPElementInScope())
836         popBlock(pTag);
837     return true;
838 }
839 
mapCreateErrorCheck(Token *,RefPtr<Node> & result)840 bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
841 {
842     m_currentMapElement = new HTMLMapElement(mapTag, document);
843     result = m_currentMapElement;
844     return false;
845 }
846 
getNode(Token * t)847 PassRefPtr<Node> HTMLParser::getNode(Token* t)
848 {
849     // Init our error handling table.
850     DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ());
851     if (gFunctionMap.isEmpty()) {
852         gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
853         gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
854         gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
855         gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
856         gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
857         gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
858         gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
859         gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
860         gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
861         gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
862         gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
863         gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
864         gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
865         gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
866         gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
867         gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
868         gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
869         gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
870         gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
871         gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
872         gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
873         gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
874         gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
875         gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
876         gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
877         gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
878         gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
879         gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck);
880         gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
881         gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
882         gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
883         gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
884         gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
885         gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
886         gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
887         gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
888         gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
889         gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
890         gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
891         gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
892         gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
893         gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
894         gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
895         gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck);
896         gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
897         gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
898         gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
899         gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
900         gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
901         gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
902         gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
903         gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
904         gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
905         gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
906     }
907 
908     bool proceed = true;
909     RefPtr<Node> result;
910     if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
911         proceed = (this->*errorCheckFunc)(t, result);
912     if (proceed)
913         result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), document, m_currentFormElement.get());
914     return result.release();
915 }
916 
allowNestedRedundantTag(const AtomicString & tagName)917 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
918 {
919     // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
920     // about 1500 tags, all from a bunch of <b>s.  We will only allow at most 20
921     // nested tags of the same type before just ignoring them all together.
922     unsigned i = 0;
923     for (HTMLStackElem* curr = blockStack;
924          i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
925          curr = curr->next, i++) { }
926     return i != cMaxRedundantTagDepth;
927 }
928 
processCloseTag(Token * t)929 void HTMLParser::processCloseTag(Token* t)
930 {
931     // Support for really broken html.
932     // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
933     // let's rely on the end() call to close things.
934     if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
935         return;
936 
937     bool checkForCloseTagErrors = true;
938     if (t->tagName == formTag && m_currentFormElement) {
939         m_currentFormElement = 0;
940         checkForCloseTagErrors = false;
941     } else if (t->tagName == mapTag)
942         m_currentMapElement = 0;
943     else if (t->tagName == pTag)
944         checkForCloseTagErrors = false;
945 
946     HTMLStackElem* oldElem = blockStack;
947     popBlock(t->tagName, checkForCloseTagErrors);
948     if (oldElem == blockStack && t->tagName == pTag) {
949         // We encountered a stray </p>.  Amazingly Gecko, WinIE, and MacIE all treat
950         // this as a valid break, i.e., <p></p>.  So go ahead and make the empty
951         // paragraph.
952         t->beginTag = true;
953         parseToken(t);
954         popBlock(t->tagName);
955         reportError(StrayParagraphCloseError);
956     }
957 }
958 
isHeaderTag(const AtomicString & tagName)959 bool HTMLParser::isHeaderTag(const AtomicString& tagName)
960 {
961     DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ());
962     if (headerTags.isEmpty()) {
963         headerTags.add(h1Tag.localName().impl());
964         headerTags.add(h2Tag.localName().impl());
965         headerTags.add(h3Tag.localName().impl());
966         headerTags.add(h4Tag.localName().impl());
967         headerTags.add(h5Tag.localName().impl());
968         headerTags.add(h6Tag.localName().impl());
969     }
970 
971     return headerTags.contains(tagName.impl());
972 }
973 
isInline(Node * node) const974 bool HTMLParser::isInline(Node* node) const
975 {
976     if (node->isTextNode())
977         return true;
978 
979     if (node->isHTMLElement()) {
980         HTMLElement* e = static_cast<HTMLElement*>(node);
981         if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
982             e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
983             e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
984             e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
985             e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
986             e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
987             e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
988             e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
989             e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
990             e->hasLocalName(noembedTag))
991             return true;
992         if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
993             Settings* settings = document->settings();
994             if (settings && settings->isJavaScriptEnabled())
995                 return true;
996         }
997     }
998 
999     return false;
1000 }
1001 
isResidualStyleTag(const AtomicString & tagName)1002 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
1003 {
1004     DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());
1005     if (residualStyleTags.isEmpty()) {
1006         residualStyleTags.add(aTag.localName().impl());
1007         residualStyleTags.add(fontTag.localName().impl());
1008         residualStyleTags.add(ttTag.localName().impl());
1009         residualStyleTags.add(uTag.localName().impl());
1010         residualStyleTags.add(bTag.localName().impl());
1011         residualStyleTags.add(iTag.localName().impl());
1012         residualStyleTags.add(sTag.localName().impl());
1013         residualStyleTags.add(strikeTag.localName().impl());
1014         residualStyleTags.add(bigTag.localName().impl());
1015         residualStyleTags.add(smallTag.localName().impl());
1016         residualStyleTags.add(emTag.localName().impl());
1017         residualStyleTags.add(strongTag.localName().impl());
1018         residualStyleTags.add(dfnTag.localName().impl());
1019         residualStyleTags.add(codeTag.localName().impl());
1020         residualStyleTags.add(sampTag.localName().impl());
1021         residualStyleTags.add(kbdTag.localName().impl());
1022         residualStyleTags.add(varTag.localName().impl());
1023         residualStyleTags.add(nobrTag.localName().impl());
1024     }
1025 
1026     return residualStyleTags.contains(tagName.impl());
1027 }
1028 
isAffectedByResidualStyle(const AtomicString & tagName)1029 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
1030 {
1031     DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());
1032     if (unaffectedTags.isEmpty()) {
1033         unaffectedTags.add(bodyTag.localName().impl());
1034         unaffectedTags.add(tableTag.localName().impl());
1035         unaffectedTags.add(theadTag.localName().impl());
1036         unaffectedTags.add(tbodyTag.localName().impl());
1037         unaffectedTags.add(tfootTag.localName().impl());
1038         unaffectedTags.add(trTag.localName().impl());
1039         unaffectedTags.add(thTag.localName().impl());
1040         unaffectedTags.add(tdTag.localName().impl());
1041         unaffectedTags.add(captionTag.localName().impl());
1042         unaffectedTags.add(colgroupTag.localName().impl());
1043         unaffectedTags.add(colTag.localName().impl());
1044         unaffectedTags.add(optionTag.localName().impl());
1045         unaffectedTags.add(optgroupTag.localName().impl());
1046         unaffectedTags.add(selectTag.localName().impl());
1047         unaffectedTags.add(objectTag.localName().impl());
1048     }
1049 
1050     return !unaffectedTags.contains(tagName.impl());
1051 }
1052 
handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem * elem)1053 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
1054 {
1055     HTMLStackElem* maxElem = 0;
1056     bool finished = false;
1057     bool strayTableContent = elem->strayTableContent;
1058 
1059     m_handlingResidualStyleAcrossBlocks = true;
1060     while (!finished) {
1061         // Find the outermost element that crosses over to a higher level. If there exists another higher-level
1062         // element, we will do another pass, until we have corrected the innermost one.
1063         ExceptionCode ec = 0;
1064         HTMLStackElem* curr = blockStack;
1065         HTMLStackElem* prev = 0;
1066         HTMLStackElem* prevMaxElem = 0;
1067         maxElem = 0;
1068         finished = true;
1069         while (curr && curr != elem) {
1070             if (curr->level > elem->level) {
1071                 if (!isAffectedByResidualStyle(curr->tagName))
1072                     return;
1073                 if (maxElem)
1074                     // We will need another pass.
1075                     finished = false;
1076                 maxElem = curr;
1077                 prevMaxElem = prev;
1078             }
1079 
1080             prev = curr;
1081             curr = curr->next;
1082         }
1083 
1084         if (!curr || !maxElem)
1085             return;
1086 
1087         Node* residualElem = prev->node;
1088         Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
1089         Node* parentElem = elem->node;
1090 
1091         // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1092         // FIXME: We should either always allow it or perform an additional fixup instead of
1093         // just bailing here.
1094         // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1095         if (!parentElem->childAllowed(blockElem))
1096             return;
1097 
1098         m_hasPElementInScope = Unknown;
1099 
1100         if (maxElem->node->parentNode() != elem->node) {
1101             // Walk the stack and remove any elements that aren't residual style tags.  These
1102             // are basically just being closed up.  Example:
1103             // <font><span>Moo<p>Goo</font></p>.
1104             // In the above example, the <span> doesn't need to be reopened.  It can just close.
1105             HTMLStackElem* currElem = maxElem->next;
1106             HTMLStackElem* prevElem = maxElem;
1107             while (currElem != elem) {
1108                 HTMLStackElem* nextElem = currElem->next;
1109                 if (!isResidualStyleTag(currElem->tagName)) {
1110                     prevElem->next = nextElem;
1111                     prevElem->derefNode();
1112                     prevElem->node = currElem->node;
1113                     prevElem->didRefNode = currElem->didRefNode;
1114                     delete currElem;
1115                 }
1116                 else
1117                     prevElem = currElem;
1118                 currElem = nextElem;
1119             }
1120 
1121             // We have to reopen residual tags in between maxElem and elem.  An example of this case is:
1122             // <font><i>Moo<p>Foo</font>.
1123             // In this case, we need to transform the part before the <p> into:
1124             // <font><i>Moo</i></font><i>
1125             // so that the <i> will remain open.  This involves the modification of elements
1126             // in the block stack.
1127             // This will also affect how we ultimately reparent the block, since we want it to end up
1128             // under the reopened residual tags (e.g., the <i> in the above example.)
1129             RefPtr<Node> prevNode = 0;
1130             currElem = maxElem;
1131             while (currElem->node != residualElem) {
1132                 if (isResidualStyleTag(currElem->node->localName())) {
1133                     // Create a clone of this element.
1134                     // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
1135                     Node* currNode = currElem->node->cloneNode(false).releaseRef();
1136                     reportError(ResidualStyleError, &currNode->localName());
1137 
1138                     // Change the stack element's node to point to the clone.
1139                     // The stack element adopts the reference we obtained above by calling release().
1140                     currElem->derefNode();
1141                     currElem->node = currNode;
1142                     currElem->didRefNode = true;
1143 
1144                     // Attach the previous node as a child of this new node.
1145                     if (prevNode)
1146                         currNode->appendChild(prevNode, ec);
1147                     else // The new parent for the block element is going to be the innermost clone.
1148                         parentElem = currNode;  // FIXME: We shifted parentElem to be a residual inline.  We never checked to see if blockElem could be legally placed inside the inline though.
1149 
1150                     prevNode = currNode;
1151                 }
1152 
1153                 currElem = currElem->next;
1154             }
1155 
1156             // Now append the chain of new residual style elements if one exists.
1157             if (prevNode)
1158                 elem->node->appendChild(prevNode, ec);  // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
1159         }
1160 
1161         // Check if the block is still in the tree. If it isn't, then we don't
1162         // want to remove it from its parent (that would crash) or insert it into
1163         // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
1164         bool isBlockStillInTree = blockElem->parentNode();
1165 
1166         // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1167         // All content of |blockElem| is reparented to be under this clone.  We then
1168         // reparent |blockElem| using real DOM calls so that attachment/detachment will
1169         // be performed to fix up the rendering tree.
1170         // So for this example: <b>...<p>Foo</b>Goo</p>
1171         // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1172         //
1173         // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1174         if (isBlockStillInTree)
1175             blockElem->parentNode()->removeChild(blockElem, ec);
1176 
1177         Node* newNodePtr = 0;
1178         if (blockElem->firstChild()) {
1179             // Step 2: Clone |residualElem|.
1180             RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1181             newNodePtr = newNode.get();
1182             reportError(ResidualStyleError, &newNode->localName());
1183 
1184             // Step 3: Place |blockElem|'s children under |newNode|.  Remove all of the children of |blockElem|
1185             // before we've put |newElem| into the document.  That way we'll only do one attachment of all
1186             // the new content (instead of a bunch of individual attachments).
1187             Node* currNode = blockElem->firstChild();
1188             while (currNode) {
1189                 Node* nextNode = currNode->nextSibling();
1190                 newNode->appendChild(currNode, ec);
1191                 currNode = nextNode;
1192             }
1193 
1194             // Step 4: Place |newNode| under |blockElem|.  |blockElem| is still out of the document, so no
1195             // attachment can occur yet.
1196             blockElem->appendChild(newNode.release(), ec);
1197         } else
1198             finished = true;
1199 
1200         // Step 5: Reparent |blockElem|.  Now the full attachment of the fixed up tree takes place.
1201         if (isBlockStillInTree)
1202             parentElem->appendChild(blockElem, ec);
1203 
1204         // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us.  Also update
1205         // the node associated with the previous stack element so that when it gets popped,
1206         // it doesn't make the residual element the next current node.
1207         HTMLStackElem* currElem = maxElem;
1208         HTMLStackElem* prevElem = 0;
1209         while (currElem != elem) {
1210             prevElem = currElem;
1211             currElem = currElem->next;
1212         }
1213         prevElem->next = elem->next;
1214         prevElem->derefNode();
1215         prevElem->node = elem->node;
1216         prevElem->didRefNode = elem->didRefNode;
1217         if (!finished) {
1218             // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
1219             // in the stack. We do not do this for the innermost block, because in that case the new
1220             // node is effectively no longer open.
1221             elem->next = maxElem;
1222             elem->node = prevMaxElem->node;
1223             elem->didRefNode = prevMaxElem->didRefNode;
1224             elem->strayTableContent = false;
1225             prevMaxElem->next = elem;
1226             ASSERT(newNodePtr);
1227             prevMaxElem->node = newNodePtr;
1228             prevMaxElem->didRefNode = false;
1229         } else
1230             delete elem;
1231     }
1232 
1233     // FIXME: If we ever make a case like this work:
1234     // <table><b><i><form></b></form></i></table>
1235     // Then this check will be too simplistic.  Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
1236     if (strayTableContent)
1237         inStrayTableContent--;
1238 
1239     // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1240     // In the above example, Goo should stay italic.
1241     // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
1242 
1243     HTMLStackElem* curr = blockStack;
1244     HTMLStackElem* residualStyleStack = 0;
1245     unsigned stackDepth = 1;
1246     unsigned redundantStyleCount = 0;
1247     while (curr && curr != maxElem) {
1248         // We will actually schedule this tag for reopening
1249         // after we complete the close of this entire block.
1250         if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1251             // We've overloaded the use of stack elements and are just reusing the
1252             // struct with a slightly different meaning to the variables.  Instead of chaining
1253             // from innermost to outermost, we build up a list of all the tags we need to reopen
1254             // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1255             // to the outermost tag we need to reopen.
1256             // We also set curr->node to be the actual element that corresponds to the ID stored in
1257             // curr->id rather than the node that you should pop to when the element gets pulled off
1258             // the stack.
1259             if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1260                 redundantStyleCount++;
1261             else
1262                 redundantStyleCount = 0;
1263 
1264             if (redundantStyleCount < cMaxRedundantTagDepth)
1265                 moveOneBlockToStack(residualStyleStack);
1266             else
1267                 popOneBlock();
1268         } else
1269             popOneBlock();
1270 
1271         curr = blockStack;
1272     }
1273 
1274     reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
1275 
1276     m_handlingResidualStyleAcrossBlocks = false;
1277 }
1278 
reopenResidualStyleTags(HTMLStackElem * elem,Node * malformedTableParent)1279 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
1280 {
1281     // Loop for each tag that needs to be reopened.
1282     while (elem) {
1283         // Create a shallow clone of the DOM node for this element.
1284         RefPtr<Node> newNode = elem->node->cloneNode(false);
1285         reportError(ResidualStyleError, &newNode->localName());
1286 
1287         // Append the new node. In the malformed table case, we need to insert before the table,
1288         // which will be the last child.
1289         ExceptionCode ec = 0;
1290         if (malformedTableParent)
1291             malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
1292         else
1293             current->appendChild(newNode, ec);
1294         // FIXME: Is it really OK to ignore the exceptions here?
1295 
1296         // Now push a new stack element for this node we just created.
1297         pushBlock(elem->tagName, elem->level);
1298         newNode->beginParsingChildren();
1299 
1300         // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1301         // that it is inside a malformed table.
1302         blockStack->strayTableContent = malformedTableParent != 0;
1303         if (blockStack->strayTableContent)
1304             inStrayTableContent++;
1305 
1306         // Clear our malformed table parent variable.
1307         malformedTableParent = 0;
1308 
1309         // Update |current| manually to point to the new node.
1310         setCurrent(newNode.get());
1311 
1312         // Advance to the next tag that needs to be reopened.
1313         HTMLStackElem* next = elem->next;
1314         elem->derefNode();
1315         delete elem;
1316         elem = next;
1317     }
1318 }
1319 
pushBlock(const AtomicString & tagName,int level)1320 void HTMLParser::pushBlock(const AtomicString& tagName, int level)
1321 {
1322     blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
1323     if (level >= minBlockLevelTagPriority)
1324         m_blocksInStack++;
1325     didRefCurrent = false;
1326     if (tagName == pTag)
1327         m_hasPElementInScope = InScope;
1328     else if (isScopingTag(tagName))
1329         m_hasPElementInScope = NotInScope;
1330 }
1331 
popBlock(const AtomicString & tagName,bool reportErrors)1332 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
1333 {
1334     HTMLStackElem* elem = blockStack;
1335 
1336     int maxLevel = 0;
1337 
1338     while (elem && (elem->tagName != tagName)) {
1339         if (maxLevel < elem->level)
1340             maxLevel = elem->level;
1341         elem = elem->next;
1342     }
1343 
1344     if (!elem) {
1345         if (reportErrors)
1346             reportError(StrayCloseTagError, &tagName, 0, true);
1347         return;
1348     }
1349 
1350     if (maxLevel > elem->level) {
1351         // We didn't match because the tag is in a different scope, e.g.,
1352         // <b><p>Foo</b>.  Try to correct the problem.
1353         if (!isResidualStyleTag(tagName))
1354             return;
1355         return handleResidualStyleCloseTagAcrossBlocks(elem);
1356     }
1357 
1358     bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
1359     HTMLStackElem* residualStyleStack = 0;
1360     Node* malformedTableParent = 0;
1361 
1362     elem = blockStack;
1363     unsigned stackDepth = 1;
1364     unsigned redundantStyleCount = 0;
1365     while (elem) {
1366         if (elem->tagName == tagName) {
1367             int strayTable = inStrayTableContent;
1368             popOneBlock();
1369             elem = 0;
1370 
1371             // This element was the root of some malformed content just inside an implicit or
1372             // explicit <tbody> or <tr>.
1373             // If we end up needing to reopen residual style tags, the root of the reopened chain
1374             // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1375             if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1376                 Node* curr = current;
1377                 while (curr && !curr->hasTagName(tableTag))
1378                     curr = curr->parentNode();
1379                 malformedTableParent = curr ? curr->parentNode() : 0;
1380             }
1381         }
1382         else {
1383             if (m_currentFormElement && elem->tagName == formTag)
1384                 // A <form> is being closed prematurely (and this is
1385                 // malformed HTML).  Set an attribute on the form to clear out its
1386                 // bottom margin.
1387                 m_currentFormElement->setMalformed(true);
1388 
1389             // Schedule this tag for reopening
1390             // after we complete the close of this entire block.
1391             if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
1392                 // We've overloaded the use of stack elements and are just reusing the
1393                 // struct with a slightly different meaning to the variables.  Instead of chaining
1394                 // from innermost to outermost, we build up a list of all the tags we need to reopen
1395                 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1396                 // to the outermost tag we need to reopen.
1397                 // We also set elem->node to be the actual element that corresponds to the ID stored in
1398                 // elem->id rather than the node that you should pop to when the element gets pulled off
1399                 // the stack.
1400                 if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
1401                     redundantStyleCount++;
1402                 else
1403                     redundantStyleCount = 0;
1404 
1405                 if (redundantStyleCount < cMaxRedundantTagDepth)
1406                     moveOneBlockToStack(residualStyleStack);
1407                 else
1408                     popOneBlock();
1409             } else
1410                 popOneBlock();
1411             elem = blockStack;
1412         }
1413     }
1414 
1415     reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1416 }
1417 
popOneBlockCommon()1418 inline HTMLStackElem* HTMLParser::popOneBlockCommon()
1419 {
1420     HTMLStackElem* elem = blockStack;
1421 
1422     // Form elements restore their state during the parsing process.
1423     // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
1424     if (current && elem->node != current)
1425         current->finishParsingChildren();
1426 
1427     if (blockStack->level >= minBlockLevelTagPriority) {
1428         ASSERT(m_blocksInStack > 0);
1429         m_blocksInStack--;
1430     }
1431     blockStack = elem->next;
1432     current = elem->node;
1433     didRefCurrent = elem->didRefNode;
1434 
1435     if (elem->strayTableContent)
1436         inStrayTableContent--;
1437 
1438     if (elem->tagName == pTag)
1439         m_hasPElementInScope = NotInScope;
1440     else if (isScopingTag(elem->tagName))
1441         m_hasPElementInScope = Unknown;
1442 
1443     return elem;
1444 }
1445 
popOneBlock()1446 void HTMLParser::popOneBlock()
1447 {
1448     // Store the current node before popOneBlockCommon overwrites it.
1449     Node* lastCurrent = current;
1450     bool didRefLastCurrent = didRefCurrent;
1451 
1452     delete popOneBlockCommon();
1453 
1454     if (didRefLastCurrent)
1455         lastCurrent->deref();
1456 }
1457 
moveOneBlockToStack(HTMLStackElem * & head)1458 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
1459 {
1460     // We'll be using the stack element we're popping, but for the current node.
1461     // See the two callers for details.
1462 
1463     // Store the current node before popOneBlockCommon overwrites it.
1464     Node* lastCurrent = current;
1465     bool didRefLastCurrent = didRefCurrent;
1466 
1467     // Pop the block, but don't deref the current node as popOneBlock does because
1468     // we'll be using the pointer in the new stack element.
1469     HTMLStackElem* elem = popOneBlockCommon();
1470 
1471     // Transfer the current node into the stack element.
1472     // No need to deref the old elem->node because popOneBlockCommon transferred
1473     // it into the current/didRefCurrent fields.
1474     elem->node = lastCurrent;
1475     elem->didRefNode = didRefLastCurrent;
1476     elem->next = head;
1477     head = elem;
1478 }
1479 
checkIfHasPElementInScope()1480 void HTMLParser::checkIfHasPElementInScope()
1481 {
1482     m_hasPElementInScope = NotInScope;
1483     HTMLStackElem* elem = blockStack;
1484     while (elem) {
1485         const AtomicString& tagName = elem->tagName;
1486         if (tagName == pTag) {
1487             m_hasPElementInScope = InScope;
1488             return;
1489         } else if (isScopingTag(tagName))
1490             return;
1491         elem = elem->next;
1492     }
1493 }
1494 
popInlineBlocks()1495 void HTMLParser::popInlineBlocks()
1496 {
1497     while (blockStack && isInline(current))
1498         popOneBlock();
1499 }
1500 
freeBlock()1501 void HTMLParser::freeBlock()
1502 {
1503     while (blockStack)
1504         popOneBlock();
1505     ASSERT(!m_blocksInStack);
1506 }
1507 
createHead()1508 void HTMLParser::createHead()
1509 {
1510     if (head || !document->documentElement())
1511         return;
1512 
1513     head = new HTMLHeadElement(headTag, document);
1514     HTMLElement* body = document->body();
1515     ExceptionCode ec = 0;
1516     document->documentElement()->insertBefore(head, body, ec);
1517     if (ec)
1518         head = 0;
1519 
1520     // If the body does not exist yet, then the <head> should be pushed as the current block.
1521     if (head && !body) {
1522         pushBlock(head->localName(), head->tagPriority());
1523         setCurrent(head);
1524     }
1525 }
1526 
handleIsindex(Token * t)1527 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
1528 {
1529     RefPtr<Node> n = new HTMLDivElement(divTag, document);
1530 
1531     NamedMappedAttrMap* attrs = t->attrs.get();
1532 
1533     RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, document, m_currentFormElement.get());
1534     isIndex->setAttributeMap(attrs);
1535     isIndex->setAttribute(typeAttr, "khtml_isindex");
1536 
1537     String text = searchableIndexIntroduction();
1538     if (attrs) {
1539         if (Attribute* a = attrs->getAttributeItem(promptAttr))
1540             text = a->value().string() + " ";
1541         t->attrs = 0;
1542     }
1543 
1544     n->addChild(new HTMLHRElement(hrTag, document));
1545     n->addChild(new Text(document, text));
1546     n->addChild(isIndex.release());
1547     n->addChild(new HTMLHRElement(hrTag, document));
1548 
1549     return n.release();
1550 }
1551 
startBody()1552 void HTMLParser::startBody()
1553 {
1554     if (inBody)
1555         return;
1556 
1557     inBody = true;
1558 
1559     if (m_isindexElement) {
1560         insertNode(m_isindexElement.get(), true /* don't descend into this node */);
1561         m_isindexElement = 0;
1562     }
1563 }
1564 
finished()1565 void HTMLParser::finished()
1566 {
1567     // In the case of a completely empty document, here's the place to create the HTML element.
1568     if (current && current->isDocumentNode() && !document->documentElement())
1569         insertNode(new HTMLHtmlElement(htmlTag, document));
1570 
1571     // This ensures that "current" is not left pointing to a node when the document is destroyed.
1572     freeBlock();
1573     setCurrent(0);
1574 
1575     // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
1576     if (!m_isParsingFragment)
1577         document->finishedParsing();
1578 }
1579 
reportErrorToConsole(HTMLParserErrorCode errorCode,const AtomicString * tagName1,const AtomicString * tagName2,bool closeTags)1580 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
1581 {
1582     Frame* frame = document->frame();
1583     if (!frame)
1584         return;
1585 
1586     HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
1587     int lineNumber = htmlTokenizer->lineNumber() + 1;
1588 
1589     AtomicString tag1;
1590     AtomicString tag2;
1591     if (tagName1) {
1592         if (*tagName1 == "#text")
1593             tag1 = "Text";
1594         else if (*tagName1 == "#comment")
1595             tag1 = "<!-- comment -->";
1596         else
1597             tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
1598     }
1599     if (tagName2) {
1600         if (*tagName2 == "#text")
1601             tag2 = "Text";
1602         else if (*tagName2 == "#comment")
1603             tag2 = "<!-- comment -->";
1604         else
1605             tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
1606     }
1607 
1608     const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
1609     if (!errorMsg)
1610         return;
1611 
1612     String message;
1613     if (htmlTokenizer->processingContentWrittenByScript())
1614         message += htmlParserDocumentWriteMessage();
1615     message += errorMsg;
1616     message.replace("%tag1", tag1);
1617     message.replace("%tag2", tag2);
1618 
1619     frame->domWindow()->console()->addMessage(HTMLMessageSource,
1620         isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
1621         message, lineNumber, document->url().string());
1622 }
1623 
1624 }
1625