• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29 
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "ScriptController.h"
50 #include "Text.h"
51 #include "XLinkNames.h"
52 #include "XMLNSNames.h"
53 #include "XMLNames.h"
54 #include <wtf/unicode/CharacterNames.h>
55 
56 namespace WebCore {
57 
58 using namespace HTMLNames;
59 
60 static const int uninitializedLineNumberValue = -1;
61 
uninitializedPositionValue1()62 static TextPosition1 uninitializedPositionValue1()
63 {
64     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
65 }
66 
67 namespace {
68 
isHTMLSpaceOrReplacementCharacter(UChar character)69 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
70 {
71     return isHTMLSpace(character) || character == replacementCharacter;
72 }
73 
isAllWhitespace(const String & string)74 inline bool isAllWhitespace(const String& string)
75 {
76     return string.isAllSpecialCharacters<isHTMLSpace>();
77 }
78 
isAllWhitespaceOrReplacementCharacters(const String & string)79 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
80 {
81     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
82 }
83 
isNumberedHeaderTag(const AtomicString & tagName)84 bool isNumberedHeaderTag(const AtomicString& tagName)
85 {
86     return tagName == h1Tag
87         || tagName == h2Tag
88         || tagName == h3Tag
89         || tagName == h4Tag
90         || tagName == h5Tag
91         || tagName == h6Tag;
92 }
93 
isCaptionColOrColgroupTag(const AtomicString & tagName)94 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
95 {
96     return tagName == captionTag
97         || tagName == colTag
98         || tagName == colgroupTag;
99 }
100 
isTableCellContextTag(const AtomicString & tagName)101 bool isTableCellContextTag(const AtomicString& tagName)
102 {
103     return tagName == thTag || tagName == tdTag;
104 }
105 
isTableBodyContextTag(const AtomicString & tagName)106 bool isTableBodyContextTag(const AtomicString& tagName)
107 {
108     return tagName == tbodyTag
109         || tagName == tfootTag
110         || tagName == theadTag;
111 }
112 
113 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
isSpecialNode(Node * node)114 bool isSpecialNode(Node* node)
115 {
116     if (node->hasTagName(MathMLNames::miTag)
117         || node->hasTagName(MathMLNames::moTag)
118         || node->hasTagName(MathMLNames::mnTag)
119         || node->hasTagName(MathMLNames::msTag)
120         || node->hasTagName(MathMLNames::mtextTag)
121         || node->hasTagName(MathMLNames::annotation_xmlTag)
122         || node->hasTagName(SVGNames::foreignObjectTag)
123         || node->hasTagName(SVGNames::descTag)
124         || node->hasTagName(SVGNames::titleTag))
125         return true;
126     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
127         return true;
128     if (!isInHTMLNamespace(node))
129         return false;
130     const AtomicString& tagName = node->localName();
131     return tagName == addressTag
132         || tagName == appletTag
133         || tagName == areaTag
134         || tagName == articleTag
135         || tagName == asideTag
136         || tagName == baseTag
137         || tagName == basefontTag
138         || tagName == bgsoundTag
139         || tagName == blockquoteTag
140         || tagName == bodyTag
141         || tagName == brTag
142         || tagName == buttonTag
143         || tagName == captionTag
144         || tagName == centerTag
145         || tagName == colTag
146         || tagName == colgroupTag
147         || tagName == commandTag
148         || tagName == ddTag
149         || tagName == detailsTag
150         || tagName == dirTag
151         || tagName == divTag
152         || tagName == dlTag
153         || tagName == dtTag
154         || tagName == embedTag
155         || tagName == fieldsetTag
156         || tagName == figcaptionTag
157         || tagName == figureTag
158         || tagName == footerTag
159         || tagName == formTag
160         || tagName == frameTag
161         || tagName == framesetTag
162         || isNumberedHeaderTag(tagName)
163         || tagName == headTag
164         || tagName == headerTag
165         || tagName == hgroupTag
166         || tagName == hrTag
167         || tagName == htmlTag
168         || tagName == iframeTag
169         || tagName == imgTag
170         || tagName == inputTag
171         || tagName == isindexTag
172         || tagName == liTag
173         || tagName == linkTag
174         || tagName == listingTag
175         || tagName == marqueeTag
176         || tagName == menuTag
177         || tagName == metaTag
178         || tagName == navTag
179         || tagName == noembedTag
180         || tagName == noframesTag
181         || tagName == noscriptTag
182         || tagName == objectTag
183         || tagName == olTag
184         || tagName == pTag
185         || tagName == paramTag
186         || tagName == plaintextTag
187         || tagName == preTag
188         || tagName == scriptTag
189         || tagName == sectionTag
190         || tagName == selectTag
191         || tagName == styleTag
192         || tagName == summaryTag
193         || tagName == tableTag
194         || isTableBodyContextTag(tagName)
195         || tagName == tdTag
196         || tagName == textareaTag
197         || tagName == thTag
198         || tagName == titleTag
199         || tagName == trTag
200         || tagName == ulTag
201         || tagName == wbrTag
202         || tagName == xmpTag;
203 }
204 
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)205 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
206 {
207     return tagName == bTag
208         || tagName == bigTag
209         || tagName == codeTag
210         || tagName == emTag
211         || tagName == fontTag
212         || tagName == iTag
213         || tagName == sTag
214         || tagName == smallTag
215         || tagName == strikeTag
216         || tagName == strongTag
217         || tagName == ttTag
218         || tagName == uTag;
219 }
220 
isNonAnchorFormattingTag(const AtomicString & tagName)221 bool isNonAnchorFormattingTag(const AtomicString& tagName)
222 {
223     return tagName == nobrTag
224         || isNonAnchorNonNobrFormattingTag(tagName);
225 }
226 
227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)228 bool isFormattingTag(const AtomicString& tagName)
229 {
230     return tagName == aTag || isNonAnchorFormattingTag(tagName);
231 }
232 
closestFormAncestor(Element * element)233 HTMLFormElement* closestFormAncestor(Element* element)
234 {
235     while (element) {
236         if (element->hasTagName(formTag))
237             return static_cast<HTMLFormElement*>(element);
238         ContainerNode* parent = element->parentNode();
239         if (!parent || !parent->isElementNode())
240             return 0;
241         element = static_cast<Element*>(parent);
242     }
243     return 0;
244 }
245 
246 } // namespace
247 
248 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
249     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
250 public:
ExternalCharacterTokenBuffer(AtomicHTMLToken & token)251     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
252         : m_current(token.characters().data())
253         , m_end(m_current + token.characters().size())
254     {
255         ASSERT(!isEmpty());
256     }
257 
ExternalCharacterTokenBuffer(const String & string)258     explicit ExternalCharacterTokenBuffer(const String& string)
259         : m_current(string.characters())
260         , m_end(m_current + string.length())
261     {
262         ASSERT(!isEmpty());
263     }
264 
~ExternalCharacterTokenBuffer()265     ~ExternalCharacterTokenBuffer()
266     {
267         ASSERT(isEmpty());
268     }
269 
isEmpty() const270     bool isEmpty() const { return m_current == m_end; }
271 
skipLeadingWhitespace()272     void skipLeadingWhitespace()
273     {
274         skipLeading<isHTMLSpace>();
275     }
276 
takeLeadingWhitespace()277     String takeLeadingWhitespace()
278     {
279         return takeLeading<isHTMLSpace>();
280     }
281 
takeLeadingNonWhitespace()282     String takeLeadingNonWhitespace()
283     {
284         return takeLeading<isNotHTMLSpace>();
285     }
286 
takeRemaining()287     String takeRemaining()
288     {
289         ASSERT(!isEmpty());
290         const UChar* start = m_current;
291         m_current = m_end;
292         return String(start, m_current - start);
293     }
294 
giveRemainingTo(Vector<UChar> & recipient)295     void giveRemainingTo(Vector<UChar>& recipient)
296     {
297         recipient.append(m_current, m_end - m_current);
298         m_current = m_end;
299     }
300 
takeRemainingWhitespace()301     String takeRemainingWhitespace()
302     {
303         ASSERT(!isEmpty());
304         Vector<UChar> whitespace;
305         do {
306             UChar cc = *m_current++;
307             if (isHTMLSpace(cc))
308                 whitespace.append(cc);
309         } while (m_current < m_end);
310         // Returning the null string when there aren't any whitespace
311         // characters is slightly cleaner semantically because we don't want
312         // to insert a text node (as opposed to inserting an empty text node).
313         if (whitespace.isEmpty())
314             return String();
315         return String::adopt(whitespace);
316     }
317 
318 private:
319     template<bool characterPredicate(UChar)>
skipLeading()320     void skipLeading()
321     {
322         ASSERT(!isEmpty());
323         while (characterPredicate(*m_current)) {
324             if (++m_current == m_end)
325                 return;
326         }
327     }
328 
329     template<bool characterPredicate(UChar)>
takeLeading()330     String takeLeading()
331     {
332         ASSERT(!isEmpty());
333         const UChar* start = m_current;
334         skipLeading<characterPredicate>();
335         if (start == m_current)
336             return String();
337         return String(start, m_current - start);
338     }
339 
340     const UChar* m_current;
341     const UChar* m_end;
342 };
343 
344 
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,bool reportErrors,bool usePreHTML5ParserQuirks)345 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
346     : m_framesetOk(true)
347     , m_document(document)
348     , m_tree(document)
349     , m_reportErrors(reportErrors)
350     , m_isPaused(false)
351     , m_insertionMode(InitialMode)
352     , m_originalInsertionMode(InitialMode)
353     , m_parser(parser)
354     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
355     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
356     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
357     , m_hasPendingForeignInsertionModeSteps(false)
358 {
359 }
360 
361 // FIXME: Member variables should be grouped into self-initializing structs to
362 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission,bool usePreHTML5ParserQuirks)363 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
364     : m_framesetOk(true)
365     , m_fragmentContext(fragment, contextElement, scriptingPermission)
366     , m_document(fragment->document())
367     , m_tree(fragment, scriptingPermission)
368     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
369     , m_isPaused(false)
370     , m_insertionMode(InitialMode)
371     , m_originalInsertionMode(InitialMode)
372     , m_parser(parser)
373     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
374     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
375     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
376     , m_hasPendingForeignInsertionModeSteps(false)
377 {
378     if (contextElement) {
379         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
380         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
381         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
382         // and instead use the DocumentFragment as a root node.
383         m_tree.openElements()->pushRootNode(fragment);
384         resetInsertionModeAppropriately();
385         m_tree.setForm(closestFormAncestor(contextElement));
386     }
387 }
388 
~HTMLTreeBuilder()389 HTMLTreeBuilder::~HTMLTreeBuilder()
390 {
391 }
392 
detach()393 void HTMLTreeBuilder::detach()
394 {
395     // This call makes little sense in fragment mode, but for consistency
396     // DocumentParser expects detach() to always be called before it's destroyed.
397     m_document = 0;
398     // HTMLConstructionSite might be on the callstack when detach() is called
399     // otherwise we'd just call m_tree.clear() here instead.
400     m_tree.detach();
401 }
402 
FragmentParsingContext()403 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
404     : m_fragment(0)
405     , m_contextElement(0)
406     , m_scriptingPermission(FragmentScriptingAllowed)
407 {
408 }
409 
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)410 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
411     : m_fragment(fragment)
412     , m_contextElement(contextElement)
413     , m_scriptingPermission(scriptingPermission)
414 {
415     ASSERT(!fragment->hasChildNodes());
416 }
417 
~FragmentParsingContext()418 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
419 {
420 }
421 
takeScriptToProcess(TextPosition1 & scriptStartPosition)422 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
423 {
424     // Unpause ourselves, callers may pause us again when processing the script.
425     // The HTML5 spec is written as though scripts are executed inside the tree
426     // builder.  We pause the parser to exit the tree builder, and then resume
427     // before running scripts.
428     m_isPaused = false;
429     scriptStartPosition = m_scriptToProcessStartPosition;
430     m_scriptToProcessStartPosition = uninitializedPositionValue1();
431     return m_scriptToProcess.release();
432 }
433 
constructTreeFromToken(HTMLToken & rawToken)434 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
435 {
436     AtomicHTMLToken token(rawToken);
437 
438     // We clear the rawToken in case constructTreeFromAtomicToken
439     // synchronously re-enters the parser. We don't clear the token immedately
440     // for Character tokens because the AtomicHTMLToken avoids copying the
441     // characters by keeping a pointer to the underlying buffer in the
442     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
443     // the parser.
444     //
445     // FIXME: Top clearing the rawToken once we start running the parser off
446     // the main thread or once we stop allowing synchronous JavaScript
447     // execution from parseMappedAttribute.
448     if (rawToken.type() != HTMLToken::Character)
449         rawToken.clear();
450 
451     constructTreeFromAtomicToken(token);
452 
453     if (!rawToken.isUninitialized()) {
454         ASSERT(rawToken.type() == HTMLToken::Character);
455         rawToken.clear();
456     }
457 }
458 
constructTreeFromAtomicToken(AtomicHTMLToken & token)459 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
460 {
461     processToken(token);
462 
463     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
464     // the U+0000 characters into replacement characters has compatibility
465     // problems.
466     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
467     m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
468 }
469 
processToken(AtomicHTMLToken & token)470 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
471 {
472     switch (token.type()) {
473     case HTMLToken::Uninitialized:
474         ASSERT_NOT_REACHED();
475         break;
476     case HTMLToken::DOCTYPE:
477         processDoctypeToken(token);
478         break;
479     case HTMLToken::StartTag:
480         processStartTag(token);
481         break;
482     case HTMLToken::EndTag:
483         processEndTag(token);
484         break;
485     case HTMLToken::Comment:
486         processComment(token);
487         return;
488     case HTMLToken::Character:
489         processCharacter(token);
490         break;
491     case HTMLToken::EndOfFile:
492         processEndOfFile(token);
493         break;
494     }
495 }
496 
processDoctypeToken(AtomicHTMLToken & token)497 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
498 {
499     ASSERT(token.type() == HTMLToken::DOCTYPE);
500     if (m_insertionMode == InitialMode) {
501         m_tree.insertDoctype(token);
502         setInsertionMode(BeforeHTMLMode);
503         return;
504     }
505     if (m_insertionMode == InTableTextMode) {
506         defaultForInTableText();
507         processDoctypeToken(token);
508         return;
509     }
510     parseError(token);
511 }
512 
processFakeStartTag(const QualifiedName & tagName,PassRefPtr<NamedNodeMap> attributes)513 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
514 {
515     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
516     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
517     processStartTag(fakeToken);
518 }
519 
processFakeEndTag(const QualifiedName & tagName)520 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
521 {
522     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
523     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
524     processEndTag(fakeToken);
525 }
526 
processFakeCharacters(const String & characters)527 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
528 {
529     ASSERT(!characters.isEmpty());
530     ExternalCharacterTokenBuffer buffer(characters);
531     processCharacterBuffer(buffer);
532 }
533 
processFakePEndTagIfPInButtonScope()534 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
535 {
536     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
537         return;
538     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
539     processEndTag(endP);
540 }
541 
attributesForIsindexInput(AtomicHTMLToken & token)542 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
543 {
544     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
545     if (!attributes)
546         attributes = NamedNodeMap::create();
547     else {
548         attributes->removeAttribute(nameAttr);
549         attributes->removeAttribute(actionAttr);
550         attributes->removeAttribute(promptAttr);
551     }
552 
553     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
554     attributes->insertAttribute(mappedAttribute.release(), false);
555     return attributes.release();
556 }
557 
processIsindexStartTagForInBody(AtomicHTMLToken & token)558 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
559 {
560     ASSERT(token.type() == HTMLToken::StartTag);
561     ASSERT(token.name() == isindexTag);
562     parseError(token);
563     if (m_tree.form())
564         return;
565     notImplemented(); // Acknowledge self-closing flag
566     processFakeStartTag(formTag);
567     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
568     if (actionAttribute) {
569         ASSERT(m_tree.currentElement()->hasTagName(formTag));
570         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
571     }
572     processFakeStartTag(hrTag);
573     processFakeStartTag(labelTag);
574     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
575     if (promptAttribute)
576         processFakeCharacters(promptAttribute->value());
577     else
578         processFakeCharacters(searchableIndexIntroduction());
579     processFakeStartTag(inputTag, attributesForIsindexInput(token));
580     notImplemented(); // This second set of characters may be needed by non-english locales.
581     processFakeEndTag(labelTag);
582     processFakeStartTag(hrTag);
583     processFakeEndTag(formTag);
584 }
585 
586 namespace {
587 
isLi(const ContainerNode * element)588 bool isLi(const ContainerNode* element)
589 {
590     return element->hasTagName(liTag);
591 }
592 
isDdOrDt(const ContainerNode * element)593 bool isDdOrDt(const ContainerNode* element)
594 {
595     return element->hasTagName(ddTag)
596         || element->hasTagName(dtTag);
597 }
598 
599 }
600 
601 template <bool shouldClose(const ContainerNode*)>
processCloseWhenNestedTag(AtomicHTMLToken & token)602 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
603 {
604     m_framesetOk = false;
605     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
606     while (1) {
607         ContainerNode* node = nodeRecord->node();
608         if (shouldClose(node)) {
609             ASSERT(node->isElementNode());
610             processFakeEndTag(toElement(node)->tagQName());
611             break;
612         }
613         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
614             break;
615         nodeRecord = nodeRecord->next();
616     }
617     processFakePEndTagIfPInButtonScope();
618     m_tree.insertHTMLElement(token);
619 }
620 
621 namespace {
622 
623 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
624 
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,QualifiedName ** names,size_t length)625 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
626 {
627     for (size_t i = 0; i < length; ++i) {
628         const QualifiedName& name = *names[i];
629         const AtomicString& localName = name.localName();
630         AtomicString loweredLocalName = localName.lower();
631         if (loweredLocalName != localName)
632             map->add(loweredLocalName, name);
633     }
634 }
635 
adjustSVGTagNameCase(AtomicHTMLToken & token)636 void adjustSVGTagNameCase(AtomicHTMLToken& token)
637 {
638     static PrefixedNameToQualifiedNameMap* caseMap = 0;
639     if (!caseMap) {
640         caseMap = new PrefixedNameToQualifiedNameMap;
641         size_t length = 0;
642         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
643         mapLoweredLocalNameToName(caseMap, svgTags, length);
644     }
645 
646     const QualifiedName& casedName = caseMap->get(token.name());
647     if (casedName.localName().isNull())
648         return;
649     token.setName(casedName.localName());
650 }
651 
652 template<QualifiedName** getAttrs(size_t* length)>
adjustAttributes(AtomicHTMLToken & token)653 void adjustAttributes(AtomicHTMLToken& token)
654 {
655     static PrefixedNameToQualifiedNameMap* caseMap = 0;
656     if (!caseMap) {
657         caseMap = new PrefixedNameToQualifiedNameMap;
658         size_t length = 0;
659         QualifiedName** attrs = getAttrs(&length);
660         mapLoweredLocalNameToName(caseMap, attrs, length);
661     }
662 
663     NamedNodeMap* attributes = token.attributes();
664     if (!attributes)
665         return;
666 
667     for (unsigned x = 0; x < attributes->length(); ++x) {
668         Attribute* attribute = attributes->attributeItem(x);
669         const QualifiedName& casedName = caseMap->get(attribute->localName());
670         if (!casedName.localName().isNull())
671             attribute->parserSetName(casedName);
672     }
673 }
674 
adjustSVGAttributes(AtomicHTMLToken & token)675 void adjustSVGAttributes(AtomicHTMLToken& token)
676 {
677     adjustAttributes<SVGNames::getSVGAttrs>(token);
678 }
679 
adjustMathMLAttributes(AtomicHTMLToken & token)680 void adjustMathMLAttributes(AtomicHTMLToken& token)
681 {
682     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
683 }
684 
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,QualifiedName ** names,size_t length)685 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
686 {
687     for (size_t i = 0; i < length; ++i) {
688         QualifiedName* name = names[i];
689         const AtomicString& localName = name->localName();
690         AtomicString prefixColonLocalName(prefix + ":" + localName);
691         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
692         map->add(prefixColonLocalName, nameWithPrefix);
693     }
694 }
695 
adjustForeignAttributes(AtomicHTMLToken & token)696 void adjustForeignAttributes(AtomicHTMLToken& token)
697 {
698     static PrefixedNameToQualifiedNameMap* map = 0;
699     if (!map) {
700         map = new PrefixedNameToQualifiedNameMap;
701         size_t length = 0;
702         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
703         addNamesWithPrefix(map, "xlink", attrs, length);
704 
705         attrs = XMLNames::getXMLAttrs(&length);
706         addNamesWithPrefix(map, "xml", attrs, length);
707 
708         map->add("xmlns", XMLNSNames::xmlnsAttr);
709         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
710     }
711 
712     NamedNodeMap* attributes = token.attributes();
713     if (!attributes)
714         return;
715 
716     for (unsigned x = 0; x < attributes->length(); ++x) {
717         Attribute* attribute = attributes->attributeItem(x);
718         const QualifiedName& name = map->get(attribute->localName());
719         if (!name.localName().isNull())
720             attribute->parserSetName(name);
721     }
722 }
723 
724 }
725 
processStartTagForInBody(AtomicHTMLToken & token)726 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
727 {
728     ASSERT(token.type() == HTMLToken::StartTag);
729     if (token.name() == htmlTag) {
730         m_tree.insertHTMLHtmlStartTagInBody(token);
731         return;
732     }
733     if (token.name() == baseTag
734         || token.name() == basefontTag
735         || token.name() == bgsoundTag
736         || token.name() == commandTag
737         || token.name() == linkTag
738         || token.name() == metaTag
739         || token.name() == noframesTag
740         || token.name() == scriptTag
741         || token.name() == styleTag
742         || token.name() == titleTag) {
743         bool didProcess = processStartTagForInHead(token);
744         ASSERT_UNUSED(didProcess, didProcess);
745         return;
746     }
747     if (token.name() == bodyTag) {
748         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
749             ASSERT(isParsingFragment());
750             return;
751         }
752         m_tree.insertHTMLBodyStartTagInBody(token);
753         return;
754     }
755     if (token.name() == framesetTag) {
756         parseError(token);
757         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
758             ASSERT(isParsingFragment());
759             return;
760         }
761         if (!m_framesetOk)
762             return;
763         ExceptionCode ec = 0;
764         m_tree.openElements()->bodyElement()->remove(ec);
765         ASSERT(!ec);
766         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
767         m_tree.openElements()->popHTMLBodyElement();
768         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
769         m_tree.insertHTMLElement(token);
770         setInsertionMode(InFramesetMode);
771         return;
772     }
773     if (token.name() == addressTag
774         || token.name() == articleTag
775         || token.name() == asideTag
776         || token.name() == blockquoteTag
777         || token.name() == centerTag
778         || token.name() == detailsTag
779         || token.name() == dirTag
780         || token.name() == divTag
781         || token.name() == dlTag
782         || token.name() == fieldsetTag
783         || token.name() == figcaptionTag
784         || token.name() == figureTag
785         || token.name() == footerTag
786         || token.name() == headerTag
787         || token.name() == hgroupTag
788         || token.name() == menuTag
789         || token.name() == navTag
790         || token.name() == olTag
791         || token.name() == pTag
792         || token.name() == sectionTag
793         || token.name() == summaryTag
794         || token.name() == ulTag) {
795         processFakePEndTagIfPInButtonScope();
796         m_tree.insertHTMLElement(token);
797         return;
798     }
799     if (isNumberedHeaderTag(token.name())) {
800         processFakePEndTagIfPInButtonScope();
801         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
802             parseError(token);
803             m_tree.openElements()->pop();
804         }
805         m_tree.insertHTMLElement(token);
806         return;
807     }
808     if (token.name() == preTag || token.name() == listingTag) {
809         processFakePEndTagIfPInButtonScope();
810         m_tree.insertHTMLElement(token);
811         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
812         m_framesetOk = false;
813         return;
814     }
815     if (token.name() == formTag) {
816         if (m_tree.form()) {
817             parseError(token);
818             return;
819         }
820         processFakePEndTagIfPInButtonScope();
821         m_tree.insertHTMLFormElement(token);
822         return;
823     }
824     if (token.name() == liTag) {
825         processCloseWhenNestedTag<isLi>(token);
826         return;
827     }
828     if (token.name() == ddTag || token.name() == dtTag) {
829         processCloseWhenNestedTag<isDdOrDt>(token);
830         return;
831     }
832     if (token.name() == plaintextTag) {
833         processFakePEndTagIfPInButtonScope();
834         m_tree.insertHTMLElement(token);
835         m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
836         return;
837     }
838     if (token.name() == buttonTag) {
839         if (m_tree.openElements()->inScope(buttonTag)) {
840             parseError(token);
841             processFakeEndTag(buttonTag);
842             reprocessStartTag(token); // FIXME: Could we just fall through here?
843             return;
844         }
845         m_tree.reconstructTheActiveFormattingElements();
846         m_tree.insertHTMLElement(token);
847         m_framesetOk = false;
848         return;
849     }
850     if (token.name() == aTag) {
851         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
852         if (activeATag) {
853             parseError(token);
854             processFakeEndTag(aTag);
855             m_tree.activeFormattingElements()->remove(activeATag);
856             if (m_tree.openElements()->contains(activeATag))
857                 m_tree.openElements()->remove(activeATag);
858         }
859         m_tree.reconstructTheActiveFormattingElements();
860         m_tree.insertFormattingElement(token);
861         return;
862     }
863     if (isNonAnchorNonNobrFormattingTag(token.name())) {
864         m_tree.reconstructTheActiveFormattingElements();
865         m_tree.insertFormattingElement(token);
866         return;
867     }
868     if (token.name() == nobrTag) {
869         m_tree.reconstructTheActiveFormattingElements();
870         if (m_tree.openElements()->inScope(nobrTag)) {
871             parseError(token);
872             processFakeEndTag(nobrTag);
873             m_tree.reconstructTheActiveFormattingElements();
874         }
875         m_tree.insertFormattingElement(token);
876         return;
877     }
878     if (token.name() == appletTag
879         || token.name() == marqueeTag
880         || token.name() == objectTag) {
881         m_tree.reconstructTheActiveFormattingElements();
882         m_tree.insertHTMLElement(token);
883         m_tree.activeFormattingElements()->appendMarker();
884         m_framesetOk = false;
885         return;
886     }
887     if (token.name() == tableTag) {
888         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
889             processFakeEndTag(pTag);
890         m_tree.insertHTMLElement(token);
891         m_framesetOk = false;
892         setInsertionMode(InTableMode);
893         return;
894     }
895     if (token.name() == imageTag) {
896         parseError(token);
897         // Apparently we're not supposed to ask.
898         token.setName(imgTag.localName());
899         prepareToReprocessToken();
900         // Note the fall through to the imgTag handling below!
901     }
902     if (token.name() == areaTag
903         || token.name() == brTag
904         || token.name() == embedTag
905         || token.name() == imgTag
906         || token.name() == keygenTag
907         || token.name() == wbrTag) {
908         m_tree.reconstructTheActiveFormattingElements();
909         m_tree.insertSelfClosingHTMLElement(token);
910         m_framesetOk = false;
911         return;
912     }
913     if (token.name() == inputTag) {
914         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
915         m_tree.reconstructTheActiveFormattingElements();
916         m_tree.insertSelfClosingHTMLElement(token);
917         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
918             m_framesetOk = false;
919         return;
920     }
921     if (token.name() == paramTag
922         || token.name() == sourceTag
923         || token.name() == trackTag) {
924         m_tree.insertSelfClosingHTMLElement(token);
925         return;
926     }
927     if (token.name() == hrTag) {
928         processFakePEndTagIfPInButtonScope();
929         m_tree.insertSelfClosingHTMLElement(token);
930         m_framesetOk = false;
931         return;
932     }
933     if (token.name() == isindexTag) {
934         processIsindexStartTagForInBody(token);
935         return;
936     }
937     if (token.name() == textareaTag) {
938         m_tree.insertHTMLElement(token);
939         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
940         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
941         m_originalInsertionMode = m_insertionMode;
942         m_framesetOk = false;
943         setInsertionMode(TextMode);
944         return;
945     }
946     if (token.name() == xmpTag) {
947         processFakePEndTagIfPInButtonScope();
948         m_tree.reconstructTheActiveFormattingElements();
949         m_framesetOk = false;
950         processGenericRawTextStartTag(token);
951         return;
952     }
953     if (token.name() == iframeTag) {
954         m_framesetOk = false;
955         processGenericRawTextStartTag(token);
956         return;
957     }
958     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
959         processGenericRawTextStartTag(token);
960         return;
961     }
962     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
963         processGenericRawTextStartTag(token);
964         return;
965     }
966     if (token.name() == selectTag) {
967         m_tree.reconstructTheActiveFormattingElements();
968         m_tree.insertHTMLElement(token);
969         m_framesetOk = false;
970         if (m_insertionMode == InTableMode
971              || m_insertionMode == InCaptionMode
972              || m_insertionMode == InColumnGroupMode
973              || m_insertionMode == InTableBodyMode
974              || m_insertionMode == InRowMode
975              || m_insertionMode == InCellMode)
976             setInsertionMode(InSelectInTableMode);
977         else
978             setInsertionMode(InSelectMode);
979         return;
980     }
981     if (token.name() == optgroupTag || token.name() == optionTag) {
982         if (m_tree.openElements()->inScope(optionTag.localName())) {
983             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
984             processEndTag(endOption);
985         }
986         m_tree.reconstructTheActiveFormattingElements();
987         m_tree.insertHTMLElement(token);
988         return;
989     }
990     if (token.name() == rpTag || token.name() == rtTag) {
991         if (m_tree.openElements()->inScope(rubyTag.localName())) {
992             m_tree.generateImpliedEndTags();
993             if (!m_tree.currentNode()->hasTagName(rubyTag)) {
994                 parseError(token);
995                 m_tree.openElements()->popUntil(rubyTag.localName());
996             }
997         }
998         m_tree.insertHTMLElement(token);
999         return;
1000     }
1001     if (token.name() == MathMLNames::mathTag.localName()) {
1002         m_tree.reconstructTheActiveFormattingElements();
1003         adjustMathMLAttributes(token);
1004         adjustForeignAttributes(token);
1005         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1006         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1007             setInsertionMode(InForeignContentMode);
1008         return;
1009     }
1010     if (token.name() == SVGNames::svgTag.localName()) {
1011         m_tree.reconstructTheActiveFormattingElements();
1012         adjustSVGAttributes(token);
1013         adjustForeignAttributes(token);
1014         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1015         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1016             setInsertionMode(InForeignContentMode);
1017         return;
1018     }
1019     if (isCaptionColOrColgroupTag(token.name())
1020         || token.name() == frameTag
1021         || token.name() == headTag
1022         || isTableBodyContextTag(token.name())
1023         || isTableCellContextTag(token.name())
1024         || token.name() == trTag) {
1025         parseError(token);
1026         return;
1027     }
1028     m_tree.reconstructTheActiveFormattingElements();
1029     m_tree.insertHTMLElement(token);
1030 }
1031 
processColgroupEndTagForInColumnGroup()1032 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1033 {
1034     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1035         ASSERT(isParsingFragment());
1036         // FIXME: parse error
1037         return false;
1038     }
1039     m_tree.openElements()->pop();
1040     setInsertionMode(InTableMode);
1041     return true;
1042 }
1043 
1044 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()1045 void HTMLTreeBuilder::closeTheCell()
1046 {
1047     ASSERT(insertionMode() == InCellMode);
1048     if (m_tree.openElements()->inTableScope(tdTag)) {
1049         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1050         processFakeEndTag(tdTag);
1051         return;
1052     }
1053     ASSERT(m_tree.openElements()->inTableScope(thTag));
1054     processFakeEndTag(thTag);
1055     ASSERT(insertionMode() == InRowMode);
1056 }
1057 
processStartTagForInTable(AtomicHTMLToken & token)1058 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1059 {
1060     ASSERT(token.type() == HTMLToken::StartTag);
1061     if (token.name() == captionTag) {
1062         m_tree.openElements()->popUntilTableScopeMarker();
1063         m_tree.activeFormattingElements()->appendMarker();
1064         m_tree.insertHTMLElement(token);
1065         setInsertionMode(InCaptionMode);
1066         return;
1067     }
1068     if (token.name() == colgroupTag) {
1069         m_tree.openElements()->popUntilTableScopeMarker();
1070         m_tree.insertHTMLElement(token);
1071         setInsertionMode(InColumnGroupMode);
1072         return;
1073     }
1074     if (token.name() == colTag) {
1075         processFakeStartTag(colgroupTag);
1076         ASSERT(InColumnGroupMode);
1077         reprocessStartTag(token);
1078         return;
1079     }
1080     if (isTableBodyContextTag(token.name())) {
1081         m_tree.openElements()->popUntilTableScopeMarker();
1082         m_tree.insertHTMLElement(token);
1083         setInsertionMode(InTableBodyMode);
1084         return;
1085     }
1086     if (isTableCellContextTag(token.name())
1087         || token.name() == trTag) {
1088         processFakeStartTag(tbodyTag);
1089         ASSERT(insertionMode() == InTableBodyMode);
1090         reprocessStartTag(token);
1091         return;
1092     }
1093     if (token.name() == tableTag) {
1094         parseError(token);
1095         if (!processTableEndTagForInTable()) {
1096             ASSERT(isParsingFragment());
1097             return;
1098         }
1099         reprocessStartTag(token);
1100         return;
1101     }
1102     if (token.name() == styleTag || token.name() == scriptTag) {
1103         processStartTagForInHead(token);
1104         return;
1105     }
1106     if (token.name() == inputTag) {
1107         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1108         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1109             parseError(token);
1110             m_tree.insertSelfClosingHTMLElement(token);
1111             return;
1112         }
1113         // Fall through to "anything else" case.
1114     }
1115     if (token.name() == formTag) {
1116         parseError(token);
1117         if (m_tree.form())
1118             return;
1119         m_tree.insertHTMLFormElement(token, true);
1120         m_tree.openElements()->pop();
1121         return;
1122     }
1123     parseError(token);
1124     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1125     processStartTagForInBody(token);
1126 }
1127 
1128 namespace {
1129 
shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken & token,ContainerNode * currentElement)1130 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
1131 {
1132     ASSERT(token.type() == HTMLToken::StartTag);
1133     if (currentElement->hasTagName(MathMLNames::miTag)
1134         || currentElement->hasTagName(MathMLNames::moTag)
1135         || currentElement->hasTagName(MathMLNames::mnTag)
1136         || currentElement->hasTagName(MathMLNames::msTag)
1137         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1138         return token.name() != MathMLNames::mglyphTag
1139             && token.name() != MathMLNames::malignmarkTag;
1140     }
1141     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1142         return token.name() == SVGNames::svgTag;
1143     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1144         || currentElement->hasTagName(SVGNames::descTag)
1145         || currentElement->hasTagName(SVGNames::titleTag))
1146         return true;
1147     return isInHTMLNamespace(currentElement);
1148 }
1149 
1150 }
1151 
processStartTag(AtomicHTMLToken & token)1152 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1153 {
1154     ASSERT(token.type() == HTMLToken::StartTag);
1155     switch (insertionMode()) {
1156     case InitialMode:
1157         ASSERT(insertionMode() == InitialMode);
1158         defaultForInitial();
1159         // Fall through.
1160     case BeforeHTMLMode:
1161         ASSERT(insertionMode() == BeforeHTMLMode);
1162         if (token.name() == htmlTag) {
1163             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1164             setInsertionMode(BeforeHeadMode);
1165             return;
1166         }
1167         defaultForBeforeHTML();
1168         // Fall through.
1169     case BeforeHeadMode:
1170         ASSERT(insertionMode() == BeforeHeadMode);
1171         if (token.name() == htmlTag) {
1172             m_tree.insertHTMLHtmlStartTagInBody(token);
1173             return;
1174         }
1175         if (token.name() == headTag) {
1176             m_tree.insertHTMLHeadElement(token);
1177             setInsertionMode(InHeadMode);
1178             return;
1179         }
1180         defaultForBeforeHead();
1181         // Fall through.
1182     case InHeadMode:
1183         ASSERT(insertionMode() == InHeadMode);
1184         if (processStartTagForInHead(token))
1185             return;
1186         defaultForInHead();
1187         // Fall through.
1188     case AfterHeadMode:
1189         ASSERT(insertionMode() == AfterHeadMode);
1190         if (token.name() == htmlTag) {
1191             m_tree.insertHTMLHtmlStartTagInBody(token);
1192             return;
1193         }
1194         if (token.name() == bodyTag) {
1195             m_framesetOk = false;
1196             m_tree.insertHTMLBodyElement(token);
1197             setInsertionMode(InBodyMode);
1198             return;
1199         }
1200         if (token.name() == framesetTag) {
1201             m_tree.insertHTMLElement(token);
1202             setInsertionMode(InFramesetMode);
1203             return;
1204         }
1205         if (token.name() == baseTag
1206             || token.name() == basefontTag
1207             || token.name() == bgsoundTag
1208             || token.name() == linkTag
1209             || token.name() == metaTag
1210             || token.name() == noframesTag
1211             || token.name() == scriptTag
1212             || token.name() == styleTag
1213             || token.name() == titleTag) {
1214             parseError(token);
1215             ASSERT(m_tree.head());
1216             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1217             processStartTagForInHead(token);
1218             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1219             return;
1220         }
1221         if (token.name() == headTag) {
1222             parseError(token);
1223             return;
1224         }
1225         defaultForAfterHead();
1226         // Fall through
1227     case InBodyMode:
1228         ASSERT(insertionMode() == InBodyMode);
1229         processStartTagForInBody(token);
1230         break;
1231     case InTableMode:
1232         ASSERT(insertionMode() == InTableMode);
1233         processStartTagForInTable(token);
1234         break;
1235     case InCaptionMode:
1236         ASSERT(insertionMode() == InCaptionMode);
1237         if (isCaptionColOrColgroupTag(token.name())
1238             || isTableBodyContextTag(token.name())
1239             || isTableCellContextTag(token.name())
1240             || token.name() == trTag) {
1241             parseError(token);
1242             if (!processCaptionEndTagForInCaption()) {
1243                 ASSERT(isParsingFragment());
1244                 return;
1245             }
1246             reprocessStartTag(token);
1247             return;
1248         }
1249         processStartTagForInBody(token);
1250         break;
1251     case InColumnGroupMode:
1252         ASSERT(insertionMode() == InColumnGroupMode);
1253         if (token.name() == htmlTag) {
1254             m_tree.insertHTMLHtmlStartTagInBody(token);
1255             return;
1256         }
1257         if (token.name() == colTag) {
1258             m_tree.insertSelfClosingHTMLElement(token);
1259             return;
1260         }
1261         if (!processColgroupEndTagForInColumnGroup()) {
1262             ASSERT(isParsingFragment());
1263             return;
1264         }
1265         reprocessStartTag(token);
1266         break;
1267     case InTableBodyMode:
1268         ASSERT(insertionMode() == InTableBodyMode);
1269         if (token.name() == trTag) {
1270             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1271             m_tree.insertHTMLElement(token);
1272             setInsertionMode(InRowMode);
1273             return;
1274         }
1275         if (isTableCellContextTag(token.name())) {
1276             parseError(token);
1277             processFakeStartTag(trTag);
1278             ASSERT(insertionMode() == InRowMode);
1279             reprocessStartTag(token);
1280             return;
1281         }
1282         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1283             // FIXME: This is slow.
1284             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1285                 ASSERT(isParsingFragment());
1286                 parseError(token);
1287                 return;
1288             }
1289             m_tree.openElements()->popUntilTableBodyScopeMarker();
1290             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1291             processFakeEndTag(m_tree.currentElement()->tagQName());
1292             reprocessStartTag(token);
1293             return;
1294         }
1295         processStartTagForInTable(token);
1296         break;
1297     case InRowMode:
1298         ASSERT(insertionMode() == InRowMode);
1299         if (isTableCellContextTag(token.name())) {
1300             m_tree.openElements()->popUntilTableRowScopeMarker();
1301             m_tree.insertHTMLElement(token);
1302             setInsertionMode(InCellMode);
1303             m_tree.activeFormattingElements()->appendMarker();
1304             return;
1305         }
1306         if (token.name() == trTag
1307             || isCaptionColOrColgroupTag(token.name())
1308             || isTableBodyContextTag(token.name())) {
1309             if (!processTrEndTagForInRow()) {
1310                 ASSERT(isParsingFragment());
1311                 return;
1312             }
1313             ASSERT(insertionMode() == InTableBodyMode);
1314             reprocessStartTag(token);
1315             return;
1316         }
1317         processStartTagForInTable(token);
1318         break;
1319     case InCellMode:
1320         ASSERT(insertionMode() == InCellMode);
1321         if (isCaptionColOrColgroupTag(token.name())
1322             || isTableCellContextTag(token.name())
1323             || token.name() == trTag
1324             || isTableBodyContextTag(token.name())) {
1325             // FIXME: This could be more efficient.
1326             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1327                 ASSERT(isParsingFragment());
1328                 parseError(token);
1329                 return;
1330             }
1331             closeTheCell();
1332             reprocessStartTag(token);
1333             return;
1334         }
1335         processStartTagForInBody(token);
1336         break;
1337     case AfterBodyMode:
1338     case AfterAfterBodyMode:
1339         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1340         if (token.name() == htmlTag) {
1341             m_tree.insertHTMLHtmlStartTagInBody(token);
1342             return;
1343         }
1344         setInsertionMode(InBodyMode);
1345         reprocessStartTag(token);
1346         break;
1347     case InHeadNoscriptMode:
1348         ASSERT(insertionMode() == InHeadNoscriptMode);
1349         if (token.name() == htmlTag) {
1350             m_tree.insertHTMLHtmlStartTagInBody(token);
1351             return;
1352         }
1353         if (token.name() == basefontTag
1354             || token.name() == bgsoundTag
1355             || token.name() == linkTag
1356             || token.name() == metaTag
1357             || token.name() == noframesTag
1358             || token.name() == styleTag) {
1359             bool didProcess = processStartTagForInHead(token);
1360             ASSERT_UNUSED(didProcess, didProcess);
1361             return;
1362         }
1363         if (token.name() == htmlTag || token.name() == noscriptTag) {
1364             parseError(token);
1365             return;
1366         }
1367         defaultForInHeadNoscript();
1368         processToken(token);
1369         break;
1370     case InFramesetMode:
1371         ASSERT(insertionMode() == InFramesetMode);
1372         if (token.name() == htmlTag) {
1373             m_tree.insertHTMLHtmlStartTagInBody(token);
1374             return;
1375         }
1376         if (token.name() == framesetTag) {
1377             m_tree.insertHTMLElement(token);
1378             return;
1379         }
1380         if (token.name() == frameTag) {
1381             m_tree.insertSelfClosingHTMLElement(token);
1382             return;
1383         }
1384         if (token.name() == noframesTag) {
1385             processStartTagForInHead(token);
1386             return;
1387         }
1388         parseError(token);
1389         break;
1390     case AfterFramesetMode:
1391     case AfterAfterFramesetMode:
1392         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1393         if (token.name() == htmlTag) {
1394             m_tree.insertHTMLHtmlStartTagInBody(token);
1395             return;
1396         }
1397         if (token.name() == noframesTag) {
1398             processStartTagForInHead(token);
1399             return;
1400         }
1401         parseError(token);
1402         break;
1403     case InSelectInTableMode:
1404         ASSERT(insertionMode() == InSelectInTableMode);
1405         if (token.name() == captionTag
1406             || token.name() == tableTag
1407             || isTableBodyContextTag(token.name())
1408             || token.name() == trTag
1409             || isTableCellContextTag(token.name())) {
1410             parseError(token);
1411             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1412             processEndTag(endSelect);
1413             reprocessStartTag(token);
1414             return;
1415         }
1416         // Fall through
1417     case InSelectMode:
1418         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1419         if (token.name() == htmlTag) {
1420             m_tree.insertHTMLHtmlStartTagInBody(token);
1421             return;
1422         }
1423         if (token.name() == optionTag) {
1424             if (m_tree.currentNode()->hasTagName(optionTag)) {
1425                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1426                 processEndTag(endOption);
1427             }
1428             m_tree.insertHTMLElement(token);
1429             return;
1430         }
1431         if (token.name() == optgroupTag) {
1432             if (m_tree.currentNode()->hasTagName(optionTag)) {
1433                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1434                 processEndTag(endOption);
1435             }
1436             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1437                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1438                 processEndTag(endOptgroup);
1439             }
1440             m_tree.insertHTMLElement(token);
1441             return;
1442         }
1443         if (token.name() == selectTag) {
1444             parseError(token);
1445             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1446             processEndTag(endSelect);
1447             return;
1448         }
1449         if (token.name() == inputTag
1450             || token.name() == keygenTag
1451             || token.name() == textareaTag) {
1452             parseError(token);
1453             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1454                 ASSERT(isParsingFragment());
1455                 return;
1456             }
1457             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1458             processEndTag(endSelect);
1459             reprocessStartTag(token);
1460             return;
1461         }
1462         if (token.name() == scriptTag) {
1463             bool didProcess = processStartTagForInHead(token);
1464             ASSERT_UNUSED(didProcess, didProcess);
1465             return;
1466         }
1467         break;
1468     case InTableTextMode:
1469         defaultForInTableText();
1470         processStartTag(token);
1471         break;
1472     case InForeignContentMode: {
1473         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
1474             processForeignContentUsingInBodyModeAndResetMode(token);
1475             return;
1476         }
1477         if (token.name() == bTag
1478             || token.name() == bigTag
1479             || token.name() == blockquoteTag
1480             || token.name() == bodyTag
1481             || token.name() == brTag
1482             || token.name() == centerTag
1483             || token.name() == codeTag
1484             || token.name() == ddTag
1485             || token.name() == divTag
1486             || token.name() == dlTag
1487             || token.name() == dtTag
1488             || token.name() == emTag
1489             || token.name() == embedTag
1490             || isNumberedHeaderTag(token.name())
1491             || token.name() == headTag
1492             || token.name() == hrTag
1493             || token.name() == iTag
1494             || token.name() == imgTag
1495             || token.name() == liTag
1496             || token.name() == listingTag
1497             || token.name() == menuTag
1498             || token.name() == metaTag
1499             || token.name() == nobrTag
1500             || token.name() == olTag
1501             || token.name() == pTag
1502             || token.name() == preTag
1503             || token.name() == rubyTag
1504             || token.name() == sTag
1505             || token.name() == smallTag
1506             || token.name() == spanTag
1507             || token.name() == strongTag
1508             || token.name() == strikeTag
1509             || token.name() == subTag
1510             || token.name() == supTag
1511             || token.name() == tableTag
1512             || token.name() == ttTag
1513             || token.name() == uTag
1514             || token.name() == ulTag
1515             || token.name() == varTag
1516             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1517             parseError(token);
1518             m_tree.openElements()->popUntilForeignContentScopeMarker();
1519             resetInsertionModeAppropriately();
1520             reprocessStartTag(token);
1521             return;
1522         }
1523         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1524         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1525             adjustMathMLAttributes(token);
1526         if (currentNamespace == SVGNames::svgNamespaceURI) {
1527             adjustSVGTagNameCase(token);
1528             adjustSVGAttributes(token);
1529         }
1530         adjustForeignAttributes(token);
1531         m_tree.insertForeignElement(token, currentNamespace);
1532         break;
1533     }
1534     case TextMode:
1535         ASSERT_NOT_REACHED();
1536         break;
1537     }
1538 }
1539 
processBodyEndTagForInBody(AtomicHTMLToken & token)1540 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1541 {
1542     ASSERT(token.type() == HTMLToken::EndTag);
1543     ASSERT(token.name() == bodyTag);
1544     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1545         parseError(token);
1546         return false;
1547     }
1548     notImplemented(); // Emit a more specific parse error based on stack contents.
1549     setInsertionMode(AfterBodyMode);
1550     return true;
1551 }
1552 
processAnyOtherEndTagForInBody(AtomicHTMLToken & token)1553 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1554 {
1555     ASSERT(token.type() == HTMLToken::EndTag);
1556     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1557     while (1) {
1558         ContainerNode* node = record->node();
1559         if (node->hasLocalName(token.name())) {
1560             m_tree.generateImpliedEndTags();
1561             // FIXME: The ElementRecord pointed to by record might be deleted by
1562             // the preceding call. Perhaps we should hold a RefPtr so that it
1563             // stays alive for the duration of record's scope.
1564             record = 0;
1565             if (!m_tree.currentNode()->hasLocalName(token.name())) {
1566                 parseError(token);
1567                 // FIXME: This is either a bug in the spec, or a bug in our
1568                 // implementation.  Filed a bug with HTML5:
1569                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1570                 // We might have already popped the node for the token in
1571                 // generateImpliedEndTags, just abort.
1572                 if (!m_tree.openElements()->contains(toElement(node)))
1573                     return;
1574             }
1575             m_tree.openElements()->popUntilPopped(toElement(node));
1576             return;
1577         }
1578         if (isSpecialNode(node)) {
1579             parseError(token);
1580             return;
1581         }
1582         record = record->next();
1583     }
1584 }
1585 
1586 // FIXME: This probably belongs on HTMLElementStack.
furthestBlockForFormattingElement(Element * formattingElement)1587 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1588 {
1589     HTMLElementStack::ElementRecord* furthestBlock = 0;
1590     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1591     for (; record; record = record->next()) {
1592         if (record->element() == formattingElement)
1593             return furthestBlock;
1594         if (isSpecialNode(record->element()))
1595             furthestBlock = record;
1596     }
1597     ASSERT_NOT_REACHED();
1598     return 0;
1599 }
1600 
1601 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken & token)1602 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1603 {
1604     // The adoption agency algorithm is N^2.  We limit the number of iterations
1605     // to stop from hanging the whole browser.  This limit is copied from the
1606     // legacy tree builder and might need to be tweaked in the future.
1607     static const int adoptionAgencyIterationLimit = 10;
1608 
1609     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1610         // 1.
1611         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1612         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1613             parseError(token);
1614             notImplemented(); // Check the stack of open elements for a more specific parse error.
1615             return;
1616         }
1617         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1618         if (!formattingElementRecord) {
1619             parseError(token);
1620             m_tree.activeFormattingElements()->remove(formattingElement);
1621             return;
1622         }
1623         if (formattingElement != m_tree.currentElement())
1624             parseError(token);
1625         // 2.
1626         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1627         // 3.
1628         if (!furthestBlock) {
1629             m_tree.openElements()->popUntilPopped(formattingElement);
1630             m_tree.activeFormattingElements()->remove(formattingElement);
1631             return;
1632         }
1633         // 4.
1634         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1635         ContainerNode* commonAncestor = formattingElementRecord->next()->node();
1636         // 5.
1637         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1638         // 6.
1639         HTMLElementStack::ElementRecord* node = furthestBlock;
1640         HTMLElementStack::ElementRecord* nextNode = node->next();
1641         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1642         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1643             // 6.1
1644             node = nextNode;
1645             ASSERT(node);
1646             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1647             // 6.2
1648             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1649                 m_tree.openElements()->remove(node->element());
1650                 node = 0;
1651                 continue;
1652             }
1653             // 6.3
1654             if (node == formattingElementRecord)
1655                 break;
1656             // 6.5
1657             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1658             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1659             nodeEntry->replaceElement(newElement.get());
1660             node->replaceElement(newElement.release());
1661             // 6.4 -- Intentionally out of order to handle the case where node
1662             // was replaced in 6.5.
1663             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1664             if (lastNode == furthestBlock)
1665                 bookmark.moveToAfter(nodeEntry);
1666             // 6.6
1667             if (Element* parent = lastNode->element()->parentElement())
1668                 parent->parserRemoveChild(lastNode->element());
1669             node->element()->parserAddChild(lastNode->element());
1670             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1671                 lastNode->element()->lazyAttach();
1672             // 6.7
1673             lastNode = node;
1674         }
1675         // 7
1676         const AtomicString& commonAncestorTag = commonAncestor->localName();
1677         if (Element* parent = lastNode->element()->parentElement())
1678             parent->parserRemoveChild(lastNode->element());
1679         // FIXME: If this moves to HTMLConstructionSite, this check should use
1680         // causesFosterParenting(tagName) instead.
1681         if (commonAncestorTag == tableTag
1682             || commonAncestorTag == trTag
1683             || isTableBodyContextTag(commonAncestorTag))
1684             m_tree.fosterParent(lastNode->element());
1685         else {
1686             commonAncestor->parserAddChild(lastNode->element());
1687             ASSERT(lastNode->node()->isElementNode());
1688             ASSERT(lastNode->element()->parentNode());
1689             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1690                 lastNode->element()->lazyAttach();
1691         }
1692         // 8
1693         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1694         // 9
1695         newElement->takeAllChildrenFrom(furthestBlock->element());
1696         // 10
1697         Element* furthestBlockElement = furthestBlock->element();
1698         // FIXME: All this creation / parserAddChild / attach business should
1699         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1700         //        should all be in some HTMLConstructionSite function.
1701         furthestBlockElement->parserAddChild(newElement);
1702         if (furthestBlockElement->attached() && !newElement->attached()) {
1703             // Notice that newElement might already be attached if, for example, one of the reparented
1704             // children is a style element, which attaches itself automatically.
1705             newElement->attach();
1706         }
1707         // 11
1708         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1709         // 12
1710         m_tree.openElements()->remove(formattingElement);
1711         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1712     }
1713 }
1714 
resetInsertionModeAppropriately()1715 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1716 {
1717     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1718     bool last = false;
1719     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1720     while (1) {
1721         ContainerNode* node = nodeRecord->node();
1722         if (node == m_tree.openElements()->rootNode()) {
1723             ASSERT(isParsingFragment());
1724             last = true;
1725             node = m_fragmentContext.contextElement();
1726         }
1727         if (node->hasTagName(selectTag)) {
1728             ASSERT(isParsingFragment());
1729             return setInsertionMode(InSelectMode);
1730         }
1731         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1732             return setInsertionMode(InCellMode);
1733         if (node->hasTagName(trTag))
1734             return setInsertionMode(InRowMode);
1735         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1736             return setInsertionMode(InTableBodyMode);
1737         if (node->hasTagName(captionTag))
1738             return setInsertionMode(InCaptionMode);
1739         if (node->hasTagName(colgroupTag)) {
1740             ASSERT(isParsingFragment());
1741             return setInsertionMode(InColumnGroupMode);
1742         }
1743         if (node->hasTagName(tableTag))
1744             return setInsertionMode(InTableMode);
1745         if (node->hasTagName(headTag)) {
1746             ASSERT(isParsingFragment());
1747             return setInsertionMode(InBodyMode);
1748         }
1749         if (node->hasTagName(bodyTag))
1750             return setInsertionMode(InBodyMode);
1751         if (node->hasTagName(framesetTag)) {
1752             ASSERT(isParsingFragment());
1753             return setInsertionMode(InFramesetMode);
1754         }
1755         if (node->hasTagName(htmlTag)) {
1756             ASSERT(isParsingFragment());
1757             return setInsertionMode(BeforeHeadMode);
1758         }
1759         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1760             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1761             return setInsertionMode(InForeignContentMode);
1762         if (last) {
1763             ASSERT(isParsingFragment());
1764             return setInsertionMode(InBodyMode);
1765         }
1766         nodeRecord = nodeRecord->next();
1767     }
1768 }
1769 
processEndTagForInTableBody(AtomicHTMLToken & token)1770 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1771 {
1772     ASSERT(token.type() == HTMLToken::EndTag);
1773     if (isTableBodyContextTag(token.name())) {
1774         if (!m_tree.openElements()->inTableScope(token.name())) {
1775             parseError(token);
1776             return;
1777         }
1778         m_tree.openElements()->popUntilTableBodyScopeMarker();
1779         m_tree.openElements()->pop();
1780         setInsertionMode(InTableMode);
1781         return;
1782     }
1783     if (token.name() == tableTag) {
1784         // FIXME: This is slow.
1785         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1786             ASSERT(isParsingFragment());
1787             parseError(token);
1788             return;
1789         }
1790         m_tree.openElements()->popUntilTableBodyScopeMarker();
1791         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1792         processFakeEndTag(m_tree.currentElement()->tagQName());
1793         reprocessEndTag(token);
1794         return;
1795     }
1796     if (token.name() == bodyTag
1797         || isCaptionColOrColgroupTag(token.name())
1798         || token.name() == htmlTag
1799         || isTableCellContextTag(token.name())
1800         || token.name() == trTag) {
1801         parseError(token);
1802         return;
1803     }
1804     processEndTagForInTable(token);
1805 }
1806 
processEndTagForInRow(AtomicHTMLToken & token)1807 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1808 {
1809     ASSERT(token.type() == HTMLToken::EndTag);
1810     if (token.name() == trTag) {
1811         processTrEndTagForInRow();
1812         return;
1813     }
1814     if (token.name() == tableTag) {
1815         if (!processTrEndTagForInRow()) {
1816             ASSERT(isParsingFragment());
1817             return;
1818         }
1819         ASSERT(insertionMode() == InTableBodyMode);
1820         reprocessEndTag(token);
1821         return;
1822     }
1823     if (isTableBodyContextTag(token.name())) {
1824         if (!m_tree.openElements()->inTableScope(token.name())) {
1825             parseError(token);
1826             return;
1827         }
1828         processFakeEndTag(trTag);
1829         ASSERT(insertionMode() == InTableBodyMode);
1830         reprocessEndTag(token);
1831         return;
1832     }
1833     if (token.name() == bodyTag
1834         || isCaptionColOrColgroupTag(token.name())
1835         || token.name() == htmlTag
1836         || isTableCellContextTag(token.name())) {
1837         parseError(token);
1838         return;
1839     }
1840     processEndTagForInTable(token);
1841 }
1842 
processEndTagForInCell(AtomicHTMLToken & token)1843 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1844 {
1845     ASSERT(token.type() == HTMLToken::EndTag);
1846     if (isTableCellContextTag(token.name())) {
1847         if (!m_tree.openElements()->inTableScope(token.name())) {
1848             parseError(token);
1849             return;
1850         }
1851         m_tree.generateImpliedEndTags();
1852         if (!m_tree.currentNode()->hasLocalName(token.name()))
1853             parseError(token);
1854         m_tree.openElements()->popUntilPopped(token.name());
1855         m_tree.activeFormattingElements()->clearToLastMarker();
1856         setInsertionMode(InRowMode);
1857         return;
1858     }
1859     if (token.name() == bodyTag
1860         || isCaptionColOrColgroupTag(token.name())
1861         || token.name() == htmlTag) {
1862         parseError(token);
1863         return;
1864     }
1865     if (token.name() == tableTag
1866         || token.name() == trTag
1867         || isTableBodyContextTag(token.name())) {
1868         if (!m_tree.openElements()->inTableScope(token.name())) {
1869             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1870             parseError(token);
1871             return;
1872         }
1873         closeTheCell();
1874         reprocessEndTag(token);
1875         return;
1876     }
1877     processEndTagForInBody(token);
1878 }
1879 
processEndTagForInBody(AtomicHTMLToken & token)1880 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1881 {
1882     ASSERT(token.type() == HTMLToken::EndTag);
1883     if (token.name() == bodyTag) {
1884         processBodyEndTagForInBody(token);
1885         return;
1886     }
1887     if (token.name() == htmlTag) {
1888         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1889         if (processBodyEndTagForInBody(endBody))
1890             reprocessEndTag(token);
1891         return;
1892     }
1893     if (token.name() == addressTag
1894         || token.name() == articleTag
1895         || token.name() == asideTag
1896         || token.name() == blockquoteTag
1897         || token.name() == buttonTag
1898         || token.name() == centerTag
1899         || token.name() == detailsTag
1900         || token.name() == dirTag
1901         || token.name() == divTag
1902         || token.name() == dlTag
1903         || token.name() == fieldsetTag
1904         || token.name() == figcaptionTag
1905         || token.name() == figureTag
1906         || token.name() == footerTag
1907         || token.name() == headerTag
1908         || token.name() == hgroupTag
1909         || token.name() == listingTag
1910         || token.name() == menuTag
1911         || token.name() == navTag
1912         || token.name() == olTag
1913         || token.name() == preTag
1914         || token.name() == sectionTag
1915         || token.name() == summaryTag
1916         || token.name() == ulTag) {
1917         if (!m_tree.openElements()->inScope(token.name())) {
1918             parseError(token);
1919             return;
1920         }
1921         m_tree.generateImpliedEndTags();
1922         if (!m_tree.currentNode()->hasLocalName(token.name()))
1923             parseError(token);
1924         m_tree.openElements()->popUntilPopped(token.name());
1925         return;
1926     }
1927     if (token.name() == formTag) {
1928         RefPtr<Element> node = m_tree.takeForm();
1929         if (!node || !m_tree.openElements()->inScope(node.get())) {
1930             parseError(token);
1931             return;
1932         }
1933         m_tree.generateImpliedEndTags();
1934         if (m_tree.currentElement() != node.get())
1935             parseError(token);
1936         m_tree.openElements()->remove(node.get());
1937     }
1938     if (token.name() == pTag) {
1939         if (!m_tree.openElements()->inButtonScope(token.name())) {
1940             parseError(token);
1941             processFakeStartTag(pTag);
1942             ASSERT(m_tree.openElements()->inScope(token.name()));
1943             reprocessEndTag(token);
1944             return;
1945         }
1946         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1947         if (!m_tree.currentNode()->hasLocalName(token.name()))
1948             parseError(token);
1949         m_tree.openElements()->popUntilPopped(token.name());
1950         return;
1951     }
1952     if (token.name() == liTag) {
1953         if (!m_tree.openElements()->inListItemScope(token.name())) {
1954             parseError(token);
1955             return;
1956         }
1957         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1958         if (!m_tree.currentNode()->hasLocalName(token.name()))
1959             parseError(token);
1960         m_tree.openElements()->popUntilPopped(token.name());
1961         return;
1962     }
1963     if (token.name() == ddTag
1964         || token.name() == dtTag) {
1965         if (!m_tree.openElements()->inScope(token.name())) {
1966             parseError(token);
1967             return;
1968         }
1969         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1970         if (!m_tree.currentNode()->hasLocalName(token.name()))
1971             parseError(token);
1972         m_tree.openElements()->popUntilPopped(token.name());
1973         return;
1974     }
1975     if (isNumberedHeaderTag(token.name())) {
1976         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1977             parseError(token);
1978             return;
1979         }
1980         m_tree.generateImpliedEndTags();
1981         if (!m_tree.currentNode()->hasLocalName(token.name()))
1982             parseError(token);
1983         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1984         return;
1985     }
1986     if (isFormattingTag(token.name())) {
1987         callTheAdoptionAgency(token);
1988         return;
1989     }
1990     if (token.name() == appletTag
1991         || token.name() == marqueeTag
1992         || token.name() == objectTag) {
1993         if (!m_tree.openElements()->inScope(token.name())) {
1994             parseError(token);
1995             return;
1996         }
1997         m_tree.generateImpliedEndTags();
1998         if (!m_tree.currentNode()->hasLocalName(token.name()))
1999             parseError(token);
2000         m_tree.openElements()->popUntilPopped(token.name());
2001         m_tree.activeFormattingElements()->clearToLastMarker();
2002         return;
2003     }
2004     if (token.name() == brTag) {
2005         parseError(token);
2006         processFakeStartTag(brTag);
2007         return;
2008     }
2009     processAnyOtherEndTagForInBody(token);
2010 }
2011 
processCaptionEndTagForInCaption()2012 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2013 {
2014     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2015         ASSERT(isParsingFragment());
2016         // FIXME: parse error
2017         return false;
2018     }
2019     m_tree.generateImpliedEndTags();
2020     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2021     m_tree.openElements()->popUntilPopped(captionTag.localName());
2022     m_tree.activeFormattingElements()->clearToLastMarker();
2023     setInsertionMode(InTableMode);
2024     return true;
2025 }
2026 
processTrEndTagForInRow()2027 bool HTMLTreeBuilder::processTrEndTagForInRow()
2028 {
2029     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2030         ASSERT(isParsingFragment());
2031         // FIXME: parse error
2032         return false;
2033     }
2034     m_tree.openElements()->popUntilTableRowScopeMarker();
2035     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2036     m_tree.openElements()->pop();
2037     setInsertionMode(InTableBodyMode);
2038     return true;
2039 }
2040 
processTableEndTagForInTable()2041 bool HTMLTreeBuilder::processTableEndTagForInTable()
2042 {
2043     if (!m_tree.openElements()->inTableScope(tableTag)) {
2044         ASSERT(isParsingFragment());
2045         // FIXME: parse error.
2046         return false;
2047     }
2048     m_tree.openElements()->popUntilPopped(tableTag.localName());
2049     resetInsertionModeAppropriately();
2050     return true;
2051 }
2052 
processEndTagForInTable(AtomicHTMLToken & token)2053 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2054 {
2055     ASSERT(token.type() == HTMLToken::EndTag);
2056     if (token.name() == tableTag) {
2057         processTableEndTagForInTable();
2058         return;
2059     }
2060     if (token.name() == bodyTag
2061         || isCaptionColOrColgroupTag(token.name())
2062         || token.name() == htmlTag
2063         || isTableBodyContextTag(token.name())
2064         || isTableCellContextTag(token.name())
2065         || token.name() == trTag) {
2066         parseError(token);
2067         return;
2068     }
2069     // Is this redirection necessary here?
2070     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2071     processEndTagForInBody(token);
2072 }
2073 
processEndTag(AtomicHTMLToken & token)2074 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2075 {
2076     ASSERT(token.type() == HTMLToken::EndTag);
2077     switch (insertionMode()) {
2078     case InitialMode:
2079         ASSERT(insertionMode() == InitialMode);
2080         defaultForInitial();
2081         // Fall through.
2082     case BeforeHTMLMode:
2083         ASSERT(insertionMode() == BeforeHTMLMode);
2084         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2085             parseError(token);
2086             return;
2087         }
2088         defaultForBeforeHTML();
2089         // Fall through.
2090     case BeforeHeadMode:
2091         ASSERT(insertionMode() == BeforeHeadMode);
2092         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2093             parseError(token);
2094             return;
2095         }
2096         defaultForBeforeHead();
2097         // Fall through.
2098     case InHeadMode:
2099         ASSERT(insertionMode() == InHeadMode);
2100         if (token.name() == headTag) {
2101             m_tree.openElements()->popHTMLHeadElement();
2102             setInsertionMode(AfterHeadMode);
2103             return;
2104         }
2105         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2106             parseError(token);
2107             return;
2108         }
2109         defaultForInHead();
2110         // Fall through.
2111     case AfterHeadMode:
2112         ASSERT(insertionMode() == AfterHeadMode);
2113         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2114             parseError(token);
2115             return;
2116         }
2117         defaultForAfterHead();
2118         // Fall through
2119     case InBodyMode:
2120         ASSERT(insertionMode() == InBodyMode);
2121         processEndTagForInBody(token);
2122         break;
2123     case InTableMode:
2124         ASSERT(insertionMode() == InTableMode);
2125         processEndTagForInTable(token);
2126         break;
2127     case InCaptionMode:
2128         ASSERT(insertionMode() == InCaptionMode);
2129         if (token.name() == captionTag) {
2130             processCaptionEndTagForInCaption();
2131             return;
2132         }
2133         if (token.name() == tableTag) {
2134             parseError(token);
2135             if (!processCaptionEndTagForInCaption()) {
2136                 ASSERT(isParsingFragment());
2137                 return;
2138             }
2139             reprocessEndTag(token);
2140             return;
2141         }
2142         if (token.name() == bodyTag
2143             || token.name() == colTag
2144             || token.name() == colgroupTag
2145             || token.name() == htmlTag
2146             || isTableBodyContextTag(token.name())
2147             || isTableCellContextTag(token.name())
2148             || token.name() == trTag) {
2149             parseError(token);
2150             return;
2151         }
2152         processEndTagForInBody(token);
2153         break;
2154     case InColumnGroupMode:
2155         ASSERT(insertionMode() == InColumnGroupMode);
2156         if (token.name() == colgroupTag) {
2157             processColgroupEndTagForInColumnGroup();
2158             return;
2159         }
2160         if (token.name() == colTag) {
2161             parseError(token);
2162             return;
2163         }
2164         if (!processColgroupEndTagForInColumnGroup()) {
2165             ASSERT(isParsingFragment());
2166             return;
2167         }
2168         reprocessEndTag(token);
2169         break;
2170     case InRowMode:
2171         ASSERT(insertionMode() == InRowMode);
2172         processEndTagForInRow(token);
2173         break;
2174     case InCellMode:
2175         ASSERT(insertionMode() == InCellMode);
2176         processEndTagForInCell(token);
2177         break;
2178     case InTableBodyMode:
2179         ASSERT(insertionMode() == InTableBodyMode);
2180         processEndTagForInTableBody(token);
2181         break;
2182     case AfterBodyMode:
2183         ASSERT(insertionMode() == AfterBodyMode);
2184         if (token.name() == htmlTag) {
2185             if (isParsingFragment()) {
2186                 parseError(token);
2187                 return;
2188             }
2189             setInsertionMode(AfterAfterBodyMode);
2190             return;
2191         }
2192         prepareToReprocessToken();
2193         // Fall through.
2194     case AfterAfterBodyMode:
2195         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2196         parseError(token);
2197         setInsertionMode(InBodyMode);
2198         reprocessEndTag(token);
2199         break;
2200     case InHeadNoscriptMode:
2201         ASSERT(insertionMode() == InHeadNoscriptMode);
2202         if (token.name() == noscriptTag) {
2203             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2204             m_tree.openElements()->pop();
2205             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2206             setInsertionMode(InHeadMode);
2207             return;
2208         }
2209         if (token.name() != brTag) {
2210             parseError(token);
2211             return;
2212         }
2213         defaultForInHeadNoscript();
2214         processToken(token);
2215         break;
2216     case TextMode:
2217         if (token.name() == scriptTag) {
2218             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2219             m_isPaused = true;
2220             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2221             m_scriptToProcess = m_tree.currentElement();
2222             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2223             m_tree.openElements()->pop();
2224             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2225                 m_scriptToProcess->removeAllChildren();
2226             setInsertionMode(m_originalInsertionMode);
2227 
2228             // This token will not have been created by the tokenizer if a
2229             // self-closing script tag was encountered and pre-HTML5 parser
2230             // quirks are enabled. We must set the tokenizer's state to
2231             // DataState explicitly if the tokenizer didn't have a chance to.
2232             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2233             m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2234             return;
2235         }
2236         m_tree.openElements()->pop();
2237         setInsertionMode(m_originalInsertionMode);
2238         break;
2239     case InFramesetMode:
2240         ASSERT(insertionMode() == InFramesetMode);
2241         if (token.name() == framesetTag) {
2242             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2243                 parseError(token);
2244                 return;
2245             }
2246             m_tree.openElements()->pop();
2247             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2248                 setInsertionMode(AfterFramesetMode);
2249             return;
2250         }
2251         break;
2252     case AfterFramesetMode:
2253         ASSERT(insertionMode() == AfterFramesetMode);
2254         if (token.name() == htmlTag) {
2255             setInsertionMode(AfterAfterFramesetMode);
2256             return;
2257         }
2258         // Fall through.
2259     case AfterAfterFramesetMode:
2260         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2261         parseError(token);
2262         break;
2263     case InSelectInTableMode:
2264         ASSERT(insertionMode() == InSelectInTableMode);
2265         if (token.name() == captionTag
2266             || token.name() == tableTag
2267             || isTableBodyContextTag(token.name())
2268             || token.name() == trTag
2269             || isTableCellContextTag(token.name())) {
2270             parseError(token);
2271             if (m_tree.openElements()->inTableScope(token.name())) {
2272                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2273                 processEndTag(endSelect);
2274                 reprocessEndTag(token);
2275             }
2276             return;
2277         }
2278         // Fall through.
2279     case InSelectMode:
2280         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2281         if (token.name() == optgroupTag) {
2282             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2283                 processFakeEndTag(optionTag);
2284             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2285                 m_tree.openElements()->pop();
2286                 return;
2287             }
2288             parseError(token);
2289             return;
2290         }
2291         if (token.name() == optionTag) {
2292             if (m_tree.currentNode()->hasTagName(optionTag)) {
2293                 m_tree.openElements()->pop();
2294                 return;
2295             }
2296             parseError(token);
2297             return;
2298         }
2299         if (token.name() == selectTag) {
2300             if (!m_tree.openElements()->inSelectScope(token.name())) {
2301                 ASSERT(isParsingFragment());
2302                 parseError(token);
2303                 return;
2304             }
2305             m_tree.openElements()->popUntilPopped(selectTag.localName());
2306             resetInsertionModeAppropriately();
2307             return;
2308         }
2309         break;
2310     case InTableTextMode:
2311         defaultForInTableText();
2312         processEndTag(token);
2313         break;
2314     case InForeignContentMode:
2315         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2316             notImplemented();
2317             return;
2318         }
2319         if (!isInHTMLNamespace(m_tree.currentNode())) {
2320             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2321             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2322             if (!nodeRecord->node()->hasLocalName(token.name()))
2323                 parseError(token);
2324             while (1) {
2325                 if (nodeRecord->node()->hasLocalName(token.name())) {
2326                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2327                     resetForeignInsertionMode();
2328                     return;
2329                 }
2330                 nodeRecord = nodeRecord->next();
2331 
2332                 if (isInHTMLNamespace(nodeRecord->node()))
2333                     break;
2334             }
2335         }
2336         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2337         processForeignContentUsingInBodyModeAndResetMode(token);
2338         break;
2339     }
2340 }
2341 
prepareToReprocessToken()2342 void HTMLTreeBuilder::prepareToReprocessToken()
2343 {
2344     if (m_hasPendingForeignInsertionModeSteps) {
2345         resetForeignInsertionMode();
2346         m_hasPendingForeignInsertionModeSteps = false;
2347     }
2348 }
2349 
reprocessStartTag(AtomicHTMLToken & token)2350 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2351 {
2352     prepareToReprocessToken();
2353     processStartTag(token);
2354 }
2355 
reprocessEndTag(AtomicHTMLToken & token)2356 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2357 {
2358     prepareToReprocessToken();
2359     processEndTag(token);
2360 }
2361 
2362 class HTMLTreeBuilder::FakeInsertionMode {
2363     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2364 public:
FakeInsertionMode(HTMLTreeBuilder * treeBuilder,InsertionMode mode)2365     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2366         : m_treeBuilder(treeBuilder)
2367         , m_originalMode(treeBuilder->insertionMode())
2368     {
2369         m_treeBuilder->setFakeInsertionMode(mode);
2370     }
2371 
~FakeInsertionMode()2372     ~FakeInsertionMode()
2373     {
2374         if (m_treeBuilder->isFakeInsertionMode())
2375             m_treeBuilder->setInsertionMode(m_originalMode);
2376     }
2377 
2378 private:
2379     HTMLTreeBuilder* m_treeBuilder;
2380     InsertionMode m_originalMode;
2381 };
2382 
processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken & token)2383 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2384 {
2385     m_hasPendingForeignInsertionModeSteps = true;
2386     {
2387         FakeInsertionMode fakeMode(this, InBodyMode);
2388         processToken(token);
2389     }
2390     if (m_hasPendingForeignInsertionModeSteps)
2391         resetForeignInsertionMode();
2392 }
2393 
resetForeignInsertionMode()2394 void HTMLTreeBuilder::resetForeignInsertionMode()
2395 {
2396     if (insertionMode() == InForeignContentMode)
2397         resetInsertionModeAppropriately();
2398 }
2399 
processComment(AtomicHTMLToken & token)2400 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2401 {
2402     ASSERT(token.type() == HTMLToken::Comment);
2403     if (m_insertionMode == InitialMode
2404         || m_insertionMode == BeforeHTMLMode
2405         || m_insertionMode == AfterAfterBodyMode
2406         || m_insertionMode == AfterAfterFramesetMode) {
2407         m_tree.insertCommentOnDocument(token);
2408         return;
2409     }
2410     if (m_insertionMode == AfterBodyMode) {
2411         m_tree.insertCommentOnHTMLHtmlElement(token);
2412         return;
2413     }
2414     if (m_insertionMode == InTableTextMode) {
2415         defaultForInTableText();
2416         processComment(token);
2417         return;
2418     }
2419     m_tree.insertComment(token);
2420 }
2421 
processCharacter(AtomicHTMLToken & token)2422 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2423 {
2424     ASSERT(token.type() == HTMLToken::Character);
2425     ExternalCharacterTokenBuffer buffer(token);
2426     processCharacterBuffer(buffer);
2427 }
2428 
processCharacterBuffer(ExternalCharacterTokenBuffer & buffer)2429 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2430 {
2431 ReprocessBuffer:
2432     switch (insertionMode()) {
2433     case InitialMode: {
2434         ASSERT(insertionMode() == InitialMode);
2435         buffer.skipLeadingWhitespace();
2436         if (buffer.isEmpty())
2437             return;
2438         defaultForInitial();
2439         // Fall through.
2440     }
2441     case BeforeHTMLMode: {
2442         ASSERT(insertionMode() == BeforeHTMLMode);
2443         buffer.skipLeadingWhitespace();
2444         if (buffer.isEmpty())
2445             return;
2446         defaultForBeforeHTML();
2447         // Fall through.
2448     }
2449     case BeforeHeadMode: {
2450         ASSERT(insertionMode() == BeforeHeadMode);
2451         buffer.skipLeadingWhitespace();
2452         if (buffer.isEmpty())
2453             return;
2454         defaultForBeforeHead();
2455         // Fall through.
2456     }
2457     case InHeadMode: {
2458         ASSERT(insertionMode() == InHeadMode);
2459         String leadingWhitespace = buffer.takeLeadingWhitespace();
2460         if (!leadingWhitespace.isEmpty())
2461             m_tree.insertTextNode(leadingWhitespace);
2462         if (buffer.isEmpty())
2463             return;
2464         defaultForInHead();
2465         // Fall through.
2466     }
2467     case AfterHeadMode: {
2468         ASSERT(insertionMode() == AfterHeadMode);
2469         String leadingWhitespace = buffer.takeLeadingWhitespace();
2470         if (!leadingWhitespace.isEmpty())
2471             m_tree.insertTextNode(leadingWhitespace);
2472         if (buffer.isEmpty())
2473             return;
2474         defaultForAfterHead();
2475         // Fall through.
2476     }
2477     case InBodyMode:
2478     case InCaptionMode:
2479     case InCellMode: {
2480         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2481         m_tree.reconstructTheActiveFormattingElements();
2482         String characters = buffer.takeRemaining();
2483         m_tree.insertTextNode(characters);
2484         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2485             m_framesetOk = false;
2486         break;
2487     }
2488     case InTableMode:
2489     case InTableBodyMode:
2490     case InRowMode: {
2491         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2492         ASSERT(m_pendingTableCharacters.isEmpty());
2493         m_originalInsertionMode = m_insertionMode;
2494         setInsertionMode(InTableTextMode);
2495         prepareToReprocessToken();
2496         // Fall through.
2497     }
2498     case InTableTextMode: {
2499         buffer.giveRemainingTo(m_pendingTableCharacters);
2500         break;
2501     }
2502     case InColumnGroupMode: {
2503         ASSERT(insertionMode() == InColumnGroupMode);
2504         String leadingWhitespace = buffer.takeLeadingWhitespace();
2505         if (!leadingWhitespace.isEmpty())
2506             m_tree.insertTextNode(leadingWhitespace);
2507         if (buffer.isEmpty())
2508             return;
2509         if (!processColgroupEndTagForInColumnGroup()) {
2510             ASSERT(isParsingFragment());
2511             // The spec tells us to drop these characters on the floor.
2512             buffer.takeLeadingNonWhitespace();
2513             if (buffer.isEmpty())
2514                 return;
2515         }
2516         prepareToReprocessToken();
2517         goto ReprocessBuffer;
2518     }
2519     case AfterBodyMode:
2520     case AfterAfterBodyMode: {
2521         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2522         // FIXME: parse error
2523         setInsertionMode(InBodyMode);
2524         prepareToReprocessToken();
2525         goto ReprocessBuffer;
2526         break;
2527     }
2528     case TextMode: {
2529         ASSERT(insertionMode() == TextMode);
2530         m_tree.insertTextNode(buffer.takeRemaining());
2531         break;
2532     }
2533     case InHeadNoscriptMode: {
2534         ASSERT(insertionMode() == InHeadNoscriptMode);
2535         String leadingWhitespace = buffer.takeLeadingWhitespace();
2536         if (!leadingWhitespace.isEmpty())
2537             m_tree.insertTextNode(leadingWhitespace);
2538         if (buffer.isEmpty())
2539             return;
2540         defaultForInHeadNoscript();
2541         goto ReprocessBuffer;
2542         break;
2543     }
2544     case InFramesetMode:
2545     case AfterFramesetMode: {
2546         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2547         String leadingWhitespace = buffer.takeRemainingWhitespace();
2548         if (!leadingWhitespace.isEmpty())
2549             m_tree.insertTextNode(leadingWhitespace);
2550         // FIXME: We should generate a parse error if we skipped over any
2551         // non-whitespace characters.
2552         break;
2553     }
2554     case InSelectInTableMode:
2555     case InSelectMode: {
2556         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2557         m_tree.insertTextNode(buffer.takeRemaining());
2558         break;
2559     }
2560     case InForeignContentMode: {
2561         ASSERT(insertionMode() == InForeignContentMode);
2562         String characters = buffer.takeRemaining();
2563         m_tree.insertTextNode(characters);
2564         if (m_framesetOk && !isAllWhitespace(characters))
2565             m_framesetOk = false;
2566         break;
2567     }
2568     case AfterAfterFramesetMode: {
2569         String leadingWhitespace = buffer.takeRemainingWhitespace();
2570         if (!leadingWhitespace.isEmpty()) {
2571             m_tree.reconstructTheActiveFormattingElements();
2572             m_tree.insertTextNode(leadingWhitespace);
2573         }
2574         // FIXME: We should generate a parse error if we skipped over any
2575         // non-whitespace characters.
2576         break;
2577     }
2578     }
2579 }
2580 
processEndOfFile(AtomicHTMLToken & token)2581 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2582 {
2583     ASSERT(token.type() == HTMLToken::EndOfFile);
2584     switch (insertionMode()) {
2585     case InitialMode:
2586         ASSERT(insertionMode() == InitialMode);
2587         defaultForInitial();
2588         // Fall through.
2589     case BeforeHTMLMode:
2590         ASSERT(insertionMode() == BeforeHTMLMode);
2591         defaultForBeforeHTML();
2592         // Fall through.
2593     case BeforeHeadMode:
2594         ASSERT(insertionMode() == BeforeHeadMode);
2595         defaultForBeforeHead();
2596         // Fall through.
2597     case InHeadMode:
2598         ASSERT(insertionMode() == InHeadMode);
2599         defaultForInHead();
2600         // Fall through.
2601     case AfterHeadMode:
2602         ASSERT(insertionMode() == AfterHeadMode);
2603         defaultForAfterHead();
2604         // Fall through
2605     case InBodyMode:
2606     case InCellMode:
2607     case InCaptionMode:
2608     case InRowMode:
2609         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2610         notImplemented(); // Emit parse error based on what elements are still open.
2611         break;
2612     case AfterBodyMode:
2613     case AfterAfterBodyMode:
2614         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2615         break;
2616     case InHeadNoscriptMode:
2617         ASSERT(insertionMode() == InHeadNoscriptMode);
2618         defaultForInHeadNoscript();
2619         processEndOfFile(token);
2620         return;
2621     case AfterFramesetMode:
2622     case AfterAfterFramesetMode:
2623         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2624         break;
2625     case InFramesetMode:
2626     case InTableMode:
2627     case InTableBodyMode:
2628     case InSelectInTableMode:
2629     case InSelectMode:
2630         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2631         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2632             parseError(token);
2633         break;
2634     case InColumnGroupMode:
2635         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2636             ASSERT(isParsingFragment());
2637             return; // FIXME: Should we break here instead of returning?
2638         }
2639         if (!processColgroupEndTagForInColumnGroup()) {
2640             ASSERT(isParsingFragment());
2641             return; // FIXME: Should we break here instead of returning?
2642         }
2643         prepareToReprocessToken();
2644         processEndOfFile(token);
2645         return;
2646     case InForeignContentMode:
2647         setInsertionMode(InBodyMode);
2648         processEndOfFile(token);
2649         return;
2650     case InTableTextMode:
2651         defaultForInTableText();
2652         processEndOfFile(token);
2653         return;
2654     case TextMode:
2655         parseError(token);
2656         if (m_tree.currentNode()->hasTagName(scriptTag))
2657             notImplemented(); // mark the script element as "already started".
2658         m_tree.openElements()->pop();
2659         setInsertionMode(m_originalInsertionMode);
2660         prepareToReprocessToken();
2661         processEndOfFile(token);
2662         return;
2663     }
2664     ASSERT(m_tree.currentNode());
2665     m_tree.openElements()->popAll();
2666 }
2667 
defaultForInitial()2668 void HTMLTreeBuilder::defaultForInitial()
2669 {
2670     notImplemented();
2671     if (!m_fragmentContext.fragment())
2672         m_document->setCompatibilityMode(Document::QuirksMode);
2673     // FIXME: parse error
2674     setInsertionMode(BeforeHTMLMode);
2675     prepareToReprocessToken();
2676 }
2677 
defaultForBeforeHTML()2678 void HTMLTreeBuilder::defaultForBeforeHTML()
2679 {
2680     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2681     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2682     setInsertionMode(BeforeHeadMode);
2683     prepareToReprocessToken();
2684 }
2685 
defaultForBeforeHead()2686 void HTMLTreeBuilder::defaultForBeforeHead()
2687 {
2688     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2689     processStartTag(startHead);
2690     prepareToReprocessToken();
2691 }
2692 
defaultForInHead()2693 void HTMLTreeBuilder::defaultForInHead()
2694 {
2695     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2696     processEndTag(endHead);
2697     prepareToReprocessToken();
2698 }
2699 
defaultForInHeadNoscript()2700 void HTMLTreeBuilder::defaultForInHeadNoscript()
2701 {
2702     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2703     processEndTag(endNoscript);
2704     prepareToReprocessToken();
2705 }
2706 
defaultForAfterHead()2707 void HTMLTreeBuilder::defaultForAfterHead()
2708 {
2709     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2710     processStartTag(startBody);
2711     m_framesetOk = true;
2712     prepareToReprocessToken();
2713 }
2714 
defaultForInTableText()2715 void HTMLTreeBuilder::defaultForInTableText()
2716 {
2717     String characters = String::adopt(m_pendingTableCharacters);
2718     if (!isAllWhitespace(characters)) {
2719         // FIXME: parse error
2720         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2721         m_tree.reconstructTheActiveFormattingElements();
2722         m_tree.insertTextNode(characters);
2723         m_framesetOk = false;
2724         setInsertionMode(m_originalInsertionMode);
2725         prepareToReprocessToken();
2726         return;
2727     }
2728     m_tree.insertTextNode(characters);
2729     setInsertionMode(m_originalInsertionMode);
2730     prepareToReprocessToken();
2731 }
2732 
processStartTagForInHead(AtomicHTMLToken & token)2733 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2734 {
2735     ASSERT(token.type() == HTMLToken::StartTag);
2736     if (token.name() == htmlTag) {
2737         m_tree.insertHTMLHtmlStartTagInBody(token);
2738         return true;
2739     }
2740     if (token.name() == baseTag
2741         || token.name() == basefontTag
2742         || token.name() == bgsoundTag
2743         || token.name() == commandTag
2744         || token.name() == linkTag
2745         || token.name() == metaTag) {
2746         m_tree.insertSelfClosingHTMLElement(token);
2747         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2748         return true;
2749     }
2750     if (token.name() == titleTag) {
2751         processGenericRCDATAStartTag(token);
2752         return true;
2753     }
2754     if (token.name() == noscriptTag) {
2755         if (scriptEnabled(m_document->frame())) {
2756             processGenericRawTextStartTag(token);
2757             return true;
2758         }
2759         m_tree.insertHTMLElement(token);
2760         setInsertionMode(InHeadNoscriptMode);
2761         return true;
2762     }
2763     if (token.name() == noframesTag || token.name() == styleTag) {
2764         processGenericRawTextStartTag(token);
2765         return true;
2766     }
2767     if (token.name() == scriptTag) {
2768         processScriptStartTag(token);
2769         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2770             processFakeEndTag(scriptTag);
2771         return true;
2772     }
2773     if (token.name() == headTag) {
2774         parseError(token);
2775         return true;
2776     }
2777     return false;
2778 }
2779 
processGenericRCDATAStartTag(AtomicHTMLToken & token)2780 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2781 {
2782     ASSERT(token.type() == HTMLToken::StartTag);
2783     m_tree.insertHTMLElement(token);
2784     m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2785     m_originalInsertionMode = m_insertionMode;
2786     setInsertionMode(TextMode);
2787 }
2788 
processGenericRawTextStartTag(AtomicHTMLToken & token)2789 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2790 {
2791     ASSERT(token.type() == HTMLToken::StartTag);
2792     m_tree.insertHTMLElement(token);
2793     m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2794     m_originalInsertionMode = m_insertionMode;
2795     setInsertionMode(TextMode);
2796 }
2797 
processScriptStartTag(AtomicHTMLToken & token)2798 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2799 {
2800     ASSERT(token.type() == HTMLToken::StartTag);
2801     m_tree.insertScriptElement(token);
2802     m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2803     m_originalInsertionMode = m_insertionMode;
2804 
2805     TextPosition0 position = m_parser->textPosition();
2806 
2807     ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2808 
2809     m_lastScriptElementStartPosition = position;
2810 
2811     setInsertionMode(TextMode);
2812 }
2813 
finished()2814 void HTMLTreeBuilder::finished()
2815 {
2816     if (isParsingFragment())
2817         return;
2818 
2819     ASSERT(m_document);
2820     // Warning, this may detach the parser. Do not do anything else after this.
2821     m_document->finishedParsing();
2822 }
2823 
parseError(AtomicHTMLToken &)2824 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2825 {
2826 }
2827 
scriptEnabled(Frame * frame)2828 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2829 {
2830     if (!frame)
2831         return false;
2832     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2833 }
2834 
pluginsEnabled(Frame * frame)2835 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2836 {
2837     if (!frame)
2838         return false;
2839     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2840 }
2841 
2842 }
2843