• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "core/html/parser/HTMLTreeBuilder.h"
29 
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/MathMLNames.h"
33 #include "core/SVGNames.h"
34 #include "core/XLinkNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/XMLNames.h"
37 #include "core/dom/DocumentFragment.h"
38 #include "core/dom/ElementTraversal.h"
39 #include "core/html/HTMLDocument.h"
40 #include "core/html/HTMLFormElement.h"
41 #include "core/html/parser/AtomicHTMLToken.h"
42 #include "core/html/parser/HTMLDocumentParser.h"
43 #include "core/html/parser/HTMLParserIdioms.h"
44 #include "core/html/parser/HTMLStackItem.h"
45 #include "core/html/parser/HTMLToken.h"
46 #include "core/html/parser/HTMLTokenizer.h"
47 #include "platform/NotImplemented.h"
48 #include "platform/text/PlatformLocale.h"
49 #include "wtf/MainThread.h"
50 #include "wtf/unicode/CharacterNames.h"
51 
52 namespace WebCore {
53 
54 using namespace HTMLNames;
55 
56 namespace {
57 
isHTMLSpaceOrReplacementCharacter(UChar character)58 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
59 {
60     return isHTMLSpace<UChar>(character) || character == replacementCharacter;
61 }
62 
63 }
64 
uninitializedPositionValue1()65 static TextPosition uninitializedPositionValue1()
66 {
67     return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
68 }
69 
isAllWhitespace(const String & string)70 static inline bool isAllWhitespace(const String& string)
71 {
72     return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
73 }
74 
isAllWhitespaceOrReplacementCharacters(const String & string)75 static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
76 {
77     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
78 }
79 
isNumberedHeaderTag(const AtomicString & tagName)80 static bool isNumberedHeaderTag(const AtomicString& tagName)
81 {
82     return tagName == h1Tag
83         || tagName == h2Tag
84         || tagName == h3Tag
85         || tagName == h4Tag
86         || tagName == h5Tag
87         || tagName == h6Tag;
88 }
89 
isCaptionColOrColgroupTag(const AtomicString & tagName)90 static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
91 {
92     return tagName == captionTag
93         || tagName == colTag
94         || tagName == colgroupTag;
95 }
96 
isTableCellContextTag(const AtomicString & tagName)97 static bool isTableCellContextTag(const AtomicString& tagName)
98 {
99     return tagName == thTag || tagName == tdTag;
100 }
101 
isTableBodyContextTag(const AtomicString & tagName)102 static bool isTableBodyContextTag(const AtomicString& tagName)
103 {
104     return tagName == tbodyTag
105         || tagName == tfootTag
106         || tagName == theadTag;
107 }
108 
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)109 static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
110 {
111     return tagName == bTag
112         || tagName == bigTag
113         || tagName == codeTag
114         || tagName == emTag
115         || tagName == fontTag
116         || tagName == iTag
117         || tagName == sTag
118         || tagName == smallTag
119         || tagName == strikeTag
120         || tagName == strongTag
121         || tagName == ttTag
122         || tagName == uTag;
123 }
124 
isNonAnchorFormattingTag(const AtomicString & tagName)125 static bool isNonAnchorFormattingTag(const AtomicString& tagName)
126 {
127     return tagName == nobrTag
128         || isNonAnchorNonNobrFormattingTag(tagName);
129 }
130 
131 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)132 static bool isFormattingTag(const AtomicString& tagName)
133 {
134     return tagName == aTag || isNonAnchorFormattingTag(tagName);
135 }
136 
closestFormAncestor(Element & element)137 static HTMLFormElement* closestFormAncestor(Element& element)
138 {
139     ASSERT(isMainThread());
140     return Traversal<HTMLFormElement>::firstAncestorOrSelf(element);
141 }
142 
143 class HTMLTreeBuilder::CharacterTokenBuffer {
144     WTF_MAKE_NONCOPYABLE(CharacterTokenBuffer);
145 public:
CharacterTokenBuffer(AtomicHTMLToken * token)146     explicit CharacterTokenBuffer(AtomicHTMLToken* token)
147         : m_characters(token->characters().impl())
148         , m_current(0)
149         , m_end(token->characters().length())
150     {
151         ASSERT(!isEmpty());
152     }
153 
CharacterTokenBuffer(const String & characters)154     explicit CharacterTokenBuffer(const String& characters)
155         : m_characters(characters.impl())
156         , m_current(0)
157         , m_end(characters.length())
158     {
159         ASSERT(!isEmpty());
160     }
161 
~CharacterTokenBuffer()162     ~CharacterTokenBuffer()
163     {
164         ASSERT(isEmpty());
165     }
166 
isEmpty() const167     bool isEmpty() const { return m_current == m_end; }
168 
skipAtMostOneLeadingNewline()169     void skipAtMostOneLeadingNewline()
170     {
171         ASSERT(!isEmpty());
172         if ((*m_characters)[m_current] == '\n')
173             ++m_current;
174     }
175 
skipLeadingWhitespace()176     void skipLeadingWhitespace()
177     {
178         skipLeading<isHTMLSpace<UChar> >();
179     }
180 
takeLeadingWhitespace()181     String takeLeadingWhitespace()
182     {
183         return takeLeading<isHTMLSpace<UChar> >();
184     }
185 
skipLeadingNonWhitespace()186     void skipLeadingNonWhitespace()
187     {
188         skipLeading<isNotHTMLSpace<UChar> >();
189     }
190 
takeRemaining()191     String takeRemaining()
192     {
193         ASSERT(!isEmpty());
194         unsigned start = m_current;
195         m_current = m_end;
196         // Notice that substring is smart enough to return *this when start == 0.
197         return String(m_characters->substring(start, m_end - start));
198     }
199 
giveRemainingTo(StringBuilder & recipient)200     void giveRemainingTo(StringBuilder& recipient)
201     {
202         if (m_characters->is8Bit())
203             recipient.append(m_characters->characters8() + m_current, m_end - m_current);
204         else
205             recipient.append(m_characters->characters16() + m_current, m_end - m_current);
206         m_current = m_end;
207     }
208 
takeRemainingWhitespace()209     String takeRemainingWhitespace()
210     {
211         ASSERT(!isEmpty());
212         const unsigned start = m_current;
213         m_current = m_end; // One way or another, we're taking everything!
214 
215         unsigned length = 0;
216         for (unsigned i = start; i < m_end; ++i) {
217             if (isHTMLSpace<UChar>((*m_characters)[i]))
218                 ++length;
219         }
220         // Returning the null string when there aren't any whitespace
221         // characters is slightly cleaner semantically because we don't want
222         // to insert a text node (as opposed to inserting an empty text node).
223         if (!length)
224             return String();
225         if (length == start - m_end) // It's all whitespace.
226             return String(m_characters->substring(start, start - m_end));
227 
228         StringBuilder result;
229         result.reserveCapacity(length);
230         for (unsigned i = start; i < m_end; ++i) {
231             UChar c = (*m_characters)[i];
232             if (isHTMLSpace<UChar>(c))
233                 result.append(c);
234         }
235 
236         return result.toString();
237     }
238 
239 private:
240     template<bool characterPredicate(UChar)>
skipLeading()241     void skipLeading()
242     {
243         ASSERT(!isEmpty());
244         while (characterPredicate((*m_characters)[m_current])) {
245             if (++m_current == m_end)
246                 return;
247         }
248     }
249 
250     template<bool characterPredicate(UChar)>
takeLeading()251     String takeLeading()
252     {
253         ASSERT(!isEmpty());
254         const unsigned start = m_current;
255         skipLeading<characterPredicate>();
256         if (start == m_current)
257             return String();
258         return String(m_characters->substring(start, m_current - start));
259     }
260 
261     RefPtr<StringImpl> m_characters;
262     unsigned m_current;
263     unsigned m_end;
264 };
265 
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,ParserContentPolicy parserContentPolicy,bool,const HTMLParserOptions & options)266 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool, const HTMLParserOptions& options)
267     : m_framesetOk(true)
268 #ifndef NDEBUG
269     , m_isAttached(true)
270 #endif
271     , m_tree(document, parserContentPolicy)
272     , m_insertionMode(InitialMode)
273     , m_originalInsertionMode(InitialMode)
274     , m_shouldSkipLeadingNewline(false)
275     , m_parser(parser)
276     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
277     , m_options(options)
278 {
279 }
280 
281 // FIXME: Member variables should be grouped into self-initializing structs to
282 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy,const HTMLParserOptions & options)283 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
284     : m_framesetOk(true)
285 #ifndef NDEBUG
286     , m_isAttached(true)
287 #endif
288     , m_fragmentContext(fragment, contextElement)
289     , m_tree(fragment, parserContentPolicy)
290     , m_insertionMode(InitialMode)
291     , m_originalInsertionMode(InitialMode)
292     , m_shouldSkipLeadingNewline(false)
293     , m_parser(parser)
294     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
295     , m_options(options)
296 {
297     ASSERT(isMainThread());
298     ASSERT(contextElement);
299 
300     // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
301     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
302     // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
303     // and instead use the DocumentFragment as a root node.
304     m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode));
305 
306     if (isHTMLTemplateElement(*contextElement))
307         m_templateInsertionModes.append(TemplateContentsMode);
308 
309     resetInsertionModeAppropriately();
310     m_tree.setForm(closestFormAncestor(*contextElement));
311 }
312 
~HTMLTreeBuilder()313 HTMLTreeBuilder::~HTMLTreeBuilder()
314 {
315 }
316 
trace(Visitor * visitor)317 void HTMLTreeBuilder::trace(Visitor* visitor)
318 {
319     visitor->trace(m_fragmentContext);
320     visitor->trace(m_tree);
321     visitor->trace(m_parser);
322     visitor->trace(m_scriptToProcess);
323 }
324 
detach()325 void HTMLTreeBuilder::detach()
326 {
327 #ifndef NDEBUG
328     // This call makes little sense in fragment mode, but for consistency
329     // DocumentParser expects detach() to always be called before it's destroyed.
330     m_isAttached = false;
331 #endif
332     // HTMLConstructionSite might be on the callstack when detach() is called
333     // otherwise we'd just call m_tree.clear() here instead.
334     m_tree.detach();
335 }
336 
FragmentParsingContext()337 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
338     : m_fragment(nullptr)
339 {
340 }
341 
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement)342 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement)
343     : m_fragment(fragment)
344 {
345     ASSERT(!fragment->hasChildren());
346     m_contextElementStackItem = HTMLStackItem::create(contextElement, HTMLStackItem::ItemForContextElement);
347 }
348 
~FragmentParsingContext()349 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
350 {
351 }
352 
trace(Visitor * visitor)353 void HTMLTreeBuilder::FragmentParsingContext::trace(Visitor* visitor)
354 {
355     visitor->trace(m_fragment);
356     visitor->trace(m_contextElementStackItem);
357 }
358 
takeScriptToProcess(TextPosition & scriptStartPosition)359 PassRefPtrWillBeRawPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
360 {
361     ASSERT(m_scriptToProcess);
362     ASSERT(!m_tree.hasPendingTasks());
363     // Unpause ourselves, callers may pause us again when processing the script.
364     // The HTML5 spec is written as though scripts are executed inside the tree
365     // builder.  We pause the parser to exit the tree builder, and then resume
366     // before running scripts.
367     scriptStartPosition = m_scriptToProcessStartPosition;
368     m_scriptToProcessStartPosition = uninitializedPositionValue1();
369     return m_scriptToProcess.release();
370 }
371 
constructTree(AtomicHTMLToken * token)372 void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
373 {
374     if (shouldProcessTokenInForeignContent(token))
375         processTokenInForeignContent(token);
376     else
377         processToken(token);
378 
379     if (m_parser->tokenizer()) {
380         bool inForeignContent = false;
381         if (!m_tree.isEmpty()) {
382             HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
383             inForeignContent = !adjustedCurrentNode->isInHTMLNamespace()
384                 && !HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)
385                 && !HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode);
386         }
387 
388         m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
389         m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
390     }
391 
392     m_tree.executeQueuedTasks();
393     // We might be detached now.
394 }
395 
processToken(AtomicHTMLToken * token)396 void HTMLTreeBuilder::processToken(AtomicHTMLToken* token)
397 {
398     if (token->type() == HTMLToken::Character) {
399         processCharacter(token);
400         return;
401     }
402 
403     // Any non-character token needs to cause us to flush any pending text immediately.
404     // NOTE: flush() can cause any queued tasks to execute, possibly re-entering the parser.
405     m_tree.flush();
406     m_shouldSkipLeadingNewline = false;
407 
408     switch (token->type()) {
409     case HTMLToken::Uninitialized:
410     case HTMLToken::Character:
411         ASSERT_NOT_REACHED();
412         break;
413     case HTMLToken::DOCTYPE:
414         processDoctypeToken(token);
415         break;
416     case HTMLToken::StartTag:
417         processStartTag(token);
418         break;
419     case HTMLToken::EndTag:
420         processEndTag(token);
421         break;
422     case HTMLToken::Comment:
423         processComment(token);
424         break;
425     case HTMLToken::EndOfFile:
426         processEndOfFile(token);
427         break;
428     }
429 }
430 
processDoctypeToken(AtomicHTMLToken * token)431 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token)
432 {
433     ASSERT(token->type() == HTMLToken::DOCTYPE);
434     if (m_insertionMode == InitialMode) {
435         m_tree.insertDoctype(token);
436         setInsertionMode(BeforeHTMLMode);
437         return;
438     }
439     if (m_insertionMode == InTableTextMode) {
440         defaultForInTableText();
441         processDoctypeToken(token);
442         return;
443     }
444     parseError(token);
445 }
446 
processFakeStartTag(const QualifiedName & tagName,const Vector<Attribute> & attributes)447 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
448 {
449     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
450     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
451     processStartTag(&fakeToken);
452 }
453 
processFakeEndTag(const AtomicString & tagName)454 void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
455 {
456     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
457     processEndTag(&fakeToken);
458 }
459 
processFakeEndTag(const QualifiedName & tagName)460 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
461 {
462     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
463     processFakeEndTag(tagName.localName());
464 }
465 
processFakePEndTagIfPInButtonScope()466 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
467 {
468     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
469         return;
470     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
471     processEndTag(&endP);
472 }
473 
474 namespace {
475 
isLi(const HTMLStackItem * item)476 bool isLi(const HTMLStackItem* item)
477 {
478     return item->hasTagName(liTag);
479 }
480 
isDdOrDt(const HTMLStackItem * item)481 bool isDdOrDt(const HTMLStackItem* item)
482 {
483     return item->hasTagName(ddTag)
484         || item->hasTagName(dtTag);
485 }
486 
487 }
488 
489 template <bool shouldClose(const HTMLStackItem*)>
processCloseWhenNestedTag(AtomicHTMLToken * token)490 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token)
491 {
492     m_framesetOk = false;
493     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
494     while (1) {
495         RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
496         if (shouldClose(item.get())) {
497             ASSERT(item->isElementNode());
498             processFakeEndTag(item->localName());
499             break;
500         }
501         if (item->isSpecialNode() && !item->hasTagName(addressTag) && !item->hasTagName(divTag) && !item->hasTagName(pTag))
502             break;
503         nodeRecord = nodeRecord->next();
504     }
505     processFakePEndTagIfPInButtonScope();
506     m_tree.insertHTMLElement(token);
507 }
508 
509 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
510 
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,const QualifiedName * const * names,size_t length)511 static void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, const QualifiedName* const* names, size_t length)
512 {
513     for (size_t i = 0; i < length; ++i) {
514         const QualifiedName& name = *names[i];
515         const AtomicString& localName = name.localName();
516         AtomicString loweredLocalName = localName.lower();
517         if (loweredLocalName != localName)
518             map->add(loweredLocalName, name);
519     }
520 }
521 
adjustSVGTagNameCase(AtomicHTMLToken * token)522 static void adjustSVGTagNameCase(AtomicHTMLToken* token)
523 {
524     static PrefixedNameToQualifiedNameMap* caseMap = 0;
525     if (!caseMap) {
526         caseMap = new PrefixedNameToQualifiedNameMap;
527         OwnPtr<const QualifiedName*[]> svgTags = SVGNames::getSVGTags();
528         mapLoweredLocalNameToName(caseMap, svgTags.get(), SVGNames::SVGTagsCount);
529     }
530 
531     const QualifiedName& casedName = caseMap->get(token->name());
532     if (casedName.localName().isNull())
533         return;
534     token->setName(casedName.localName());
535 }
536 
537 template<PassOwnPtr<const QualifiedName*[]> getAttrs(), unsigned length>
adjustAttributes(AtomicHTMLToken * token)538 static void adjustAttributes(AtomicHTMLToken* token)
539 {
540     static PrefixedNameToQualifiedNameMap* caseMap = 0;
541     if (!caseMap) {
542         caseMap = new PrefixedNameToQualifiedNameMap;
543         OwnPtr<const QualifiedName*[]> attrs = getAttrs();
544         mapLoweredLocalNameToName(caseMap, attrs.get(), length);
545     }
546 
547     for (unsigned i = 0; i < token->attributes().size(); ++i) {
548         Attribute& tokenAttribute = token->attributes().at(i);
549         const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
550         if (!casedName.localName().isNull())
551             tokenAttribute.parserSetName(casedName);
552     }
553 }
554 
adjustSVGAttributes(AtomicHTMLToken * token)555 static void adjustSVGAttributes(AtomicHTMLToken* token)
556 {
557     adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
558 }
559 
adjustMathMLAttributes(AtomicHTMLToken * token)560 static void adjustMathMLAttributes(AtomicHTMLToken* token)
561 {
562     adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
563 }
564 
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,const QualifiedName * const * names,size_t length)565 static void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, const QualifiedName* const* names, size_t length)
566 {
567     for (size_t i = 0; i < length; ++i) {
568         const QualifiedName* name = names[i];
569         const AtomicString& localName = name->localName();
570         AtomicString prefixColonLocalName = prefix + ':' + localName;
571         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
572         map->add(prefixColonLocalName, nameWithPrefix);
573     }
574 }
575 
adjustForeignAttributes(AtomicHTMLToken * token)576 static void adjustForeignAttributes(AtomicHTMLToken* token)
577 {
578     static PrefixedNameToQualifiedNameMap* map = 0;
579     if (!map) {
580         map = new PrefixedNameToQualifiedNameMap;
581 
582         OwnPtr<const QualifiedName*[]> attrs = XLinkNames::getXLinkAttrs();
583         addNamesWithPrefix(map, xlinkAtom, attrs.get(), XLinkNames::XLinkAttrsCount);
584 
585         OwnPtr<const QualifiedName*[]> xmlAttrs = XMLNames::getXMLAttrs();
586         addNamesWithPrefix(map, xmlAtom, xmlAttrs.get(), XMLNames::XMLAttrsCount);
587 
588         map->add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr);
589         map->add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI));
590     }
591 
592     for (unsigned i = 0; i < token->attributes().size(); ++i) {
593         Attribute& tokenAttribute = token->attributes().at(i);
594         const QualifiedName& name = map->get(tokenAttribute.localName());
595         if (!name.localName().isNull())
596             tokenAttribute.parserSetName(name);
597     }
598 }
599 
processStartTagForInBody(AtomicHTMLToken * token)600 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
601 {
602     ASSERT(token->type() == HTMLToken::StartTag);
603     if (token->name() == htmlTag) {
604         processHtmlStartTagForInBody(token);
605         return;
606     }
607     if (token->name() == baseTag
608         || token->name() == basefontTag
609         || token->name() == bgsoundTag
610         || token->name() == commandTag
611         || token->name() == linkTag
612         || token->name() == metaTag
613         || token->name() == noframesTag
614         || token->name() == scriptTag
615         || token->name() == styleTag
616         || token->name() == titleTag) {
617         bool didProcess = processStartTagForInHead(token);
618         ASSERT_UNUSED(didProcess, didProcess);
619         return;
620     }
621     if (token->name() == bodyTag) {
622         parseError(token);
623         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement() || m_tree.openElements()->hasTemplateInHTMLScope()) {
624             ASSERT(isParsingFragmentOrTemplateContents());
625             return;
626         }
627         m_framesetOk = false;
628         m_tree.insertHTMLBodyStartTagInBody(token);
629         return;
630     }
631     if (token->name() == framesetTag) {
632         parseError(token);
633         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
634             ASSERT(isParsingFragmentOrTemplateContents());
635             return;
636         }
637         if (!m_framesetOk)
638             return;
639         m_tree.openElements()->bodyElement()->remove(ASSERT_NO_EXCEPTION);
640         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
641         m_tree.openElements()->popHTMLBodyElement();
642         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
643         m_tree.insertHTMLElement(token);
644         setInsertionMode(InFramesetMode);
645         return;
646     }
647     if (token->name() == addressTag
648         || token->name() == articleTag
649         || token->name() == asideTag
650         || token->name() == blockquoteTag
651         || token->name() == centerTag
652         || token->name() == detailsTag
653         || token->name() == dirTag
654         || token->name() == divTag
655         || token->name() == dlTag
656         || token->name() == fieldsetTag
657         || token->name() == figcaptionTag
658         || token->name() == figureTag
659         || token->name() == footerTag
660         || token->name() == headerTag
661         || token->name() == hgroupTag
662         || token->name() == mainTag
663         || token->name() == menuTag
664         || token->name() == navTag
665         || token->name() == olTag
666         || token->name() == pTag
667         || token->name() == sectionTag
668         || token->name() == summaryTag
669         || token->name() == ulTag) {
670         processFakePEndTagIfPInButtonScope();
671         m_tree.insertHTMLElement(token);
672         return;
673     }
674     if (isNumberedHeaderTag(token->name())) {
675         processFakePEndTagIfPInButtonScope();
676         if (m_tree.currentStackItem()->isNumberedHeaderElement()) {
677             parseError(token);
678             m_tree.openElements()->pop();
679         }
680         m_tree.insertHTMLElement(token);
681         return;
682     }
683     if (token->name() == preTag || token->name() == listingTag) {
684         processFakePEndTagIfPInButtonScope();
685         m_tree.insertHTMLElement(token);
686         m_shouldSkipLeadingNewline = true;
687         m_framesetOk = false;
688         return;
689     }
690     if (token->name() == formTag) {
691         if (m_tree.form()) {
692             parseError(token);
693             return;
694         }
695         processFakePEndTagIfPInButtonScope();
696         m_tree.insertHTMLFormElement(token);
697         return;
698     }
699     if (token->name() == liTag) {
700         processCloseWhenNestedTag<isLi>(token);
701         return;
702     }
703     if (token->name() == ddTag || token->name() == dtTag) {
704         processCloseWhenNestedTag<isDdOrDt>(token);
705         return;
706     }
707     if (token->name() == plaintextTag) {
708         processFakePEndTagIfPInButtonScope();
709         m_tree.insertHTMLElement(token);
710         if (m_parser->tokenizer())
711             m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
712         return;
713     }
714     if (token->name() == buttonTag) {
715         if (m_tree.openElements()->inScope(buttonTag)) {
716             parseError(token);
717             processFakeEndTag(buttonTag);
718             processStartTag(token); // FIXME: Could we just fall through here?
719             return;
720         }
721         m_tree.reconstructTheActiveFormattingElements();
722         m_tree.insertHTMLElement(token);
723         m_framesetOk = false;
724         return;
725     }
726     if (token->name() == aTag) {
727         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
728         if (activeATag) {
729             parseError(token);
730             processFakeEndTag(aTag);
731             m_tree.activeFormattingElements()->remove(activeATag);
732             if (m_tree.openElements()->contains(activeATag))
733                 m_tree.openElements()->remove(activeATag);
734         }
735         m_tree.reconstructTheActiveFormattingElements();
736         m_tree.insertFormattingElement(token);
737         return;
738     }
739     if (isNonAnchorNonNobrFormattingTag(token->name())) {
740         m_tree.reconstructTheActiveFormattingElements();
741         m_tree.insertFormattingElement(token);
742         return;
743     }
744     if (token->name() == nobrTag) {
745         m_tree.reconstructTheActiveFormattingElements();
746         if (m_tree.openElements()->inScope(nobrTag)) {
747             parseError(token);
748             processFakeEndTag(nobrTag);
749             m_tree.reconstructTheActiveFormattingElements();
750         }
751         m_tree.insertFormattingElement(token);
752         return;
753     }
754     if (token->name() == appletTag
755         || token->name() == embedTag
756         || token->name() == objectTag) {
757         if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
758             return;
759     }
760     if (token->name() == appletTag
761         || token->name() == marqueeTag
762         || token->name() == objectTag) {
763         m_tree.reconstructTheActiveFormattingElements();
764         m_tree.insertHTMLElement(token);
765         m_tree.activeFormattingElements()->appendMarker();
766         m_framesetOk = false;
767         return;
768     }
769     if (token->name() == tableTag) {
770         if (!m_tree.inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
771             processFakeEndTag(pTag);
772         m_tree.insertHTMLElement(token);
773         m_framesetOk = false;
774         setInsertionMode(InTableMode);
775         return;
776     }
777     if (token->name() == imageTag) {
778         parseError(token);
779         // Apparently we're not supposed to ask.
780         token->setName(imgTag.localName());
781         // Note the fall through to the imgTag handling below!
782     }
783     if (token->name() == areaTag
784         || token->name() == brTag
785         || token->name() == embedTag
786         || token->name() == imgTag
787         || token->name() == keygenTag
788         || token->name() == wbrTag) {
789         m_tree.reconstructTheActiveFormattingElements();
790         m_tree.insertSelfClosingHTMLElement(token);
791         m_framesetOk = false;
792         return;
793     }
794     if (token->name() == inputTag) {
795         Attribute* typeAttribute = token->getAttributeItem(typeAttr);
796         m_tree.reconstructTheActiveFormattingElements();
797         m_tree.insertSelfClosingHTMLElement(token);
798         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
799             m_framesetOk = false;
800         return;
801     }
802     if (token->name() == paramTag
803         || token->name() == sourceTag
804         || token->name() == trackTag) {
805         m_tree.insertSelfClosingHTMLElement(token);
806         return;
807     }
808     if (token->name() == hrTag) {
809         processFakePEndTagIfPInButtonScope();
810         m_tree.insertSelfClosingHTMLElement(token);
811         m_framesetOk = false;
812         return;
813     }
814     if (token->name() == textareaTag) {
815         m_tree.insertHTMLElement(token);
816         m_shouldSkipLeadingNewline = true;
817         if (m_parser->tokenizer())
818             m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
819         m_originalInsertionMode = m_insertionMode;
820         m_framesetOk = false;
821         setInsertionMode(TextMode);
822         return;
823     }
824     if (token->name() == xmpTag) {
825         processFakePEndTagIfPInButtonScope();
826         m_tree.reconstructTheActiveFormattingElements();
827         m_framesetOk = false;
828         processGenericRawTextStartTag(token);
829         return;
830     }
831     if (token->name() == iframeTag) {
832         m_framesetOk = false;
833         processGenericRawTextStartTag(token);
834         return;
835     }
836     if (token->name() == noembedTag && m_options.pluginsEnabled) {
837         processGenericRawTextStartTag(token);
838         return;
839     }
840     if (token->name() == noscriptTag && m_options.scriptEnabled) {
841         processGenericRawTextStartTag(token);
842         return;
843     }
844     if (token->name() == selectTag) {
845         m_tree.reconstructTheActiveFormattingElements();
846         m_tree.insertHTMLElement(token);
847         m_framesetOk = false;
848         if (m_insertionMode == InTableMode
849              || m_insertionMode == InCaptionMode
850              || m_insertionMode == InColumnGroupMode
851              || m_insertionMode == InTableBodyMode
852              || m_insertionMode == InRowMode
853              || m_insertionMode == InCellMode)
854             setInsertionMode(InSelectInTableMode);
855         else
856             setInsertionMode(InSelectMode);
857         return;
858     }
859     if (token->name() == optgroupTag || token->name() == optionTag) {
860         if (m_tree.currentStackItem()->hasTagName(optionTag)) {
861             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
862             processEndTag(&endOption);
863         }
864         m_tree.reconstructTheActiveFormattingElements();
865         m_tree.insertHTMLElement(token);
866         return;
867     }
868     if (token->name() == rpTag || token->name() == rtTag) {
869         if (m_tree.openElements()->inScope(rubyTag.localName())) {
870             m_tree.generateImpliedEndTags();
871             if (!m_tree.currentStackItem()->hasTagName(rubyTag))
872                 parseError(token);
873         }
874         m_tree.insertHTMLElement(token);
875         return;
876     }
877     if (token->name() == MathMLNames::mathTag.localName()) {
878         m_tree.reconstructTheActiveFormattingElements();
879         adjustMathMLAttributes(token);
880         adjustForeignAttributes(token);
881         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
882         return;
883     }
884     if (token->name() == SVGNames::svgTag.localName()) {
885         m_tree.reconstructTheActiveFormattingElements();
886         adjustSVGAttributes(token);
887         adjustForeignAttributes(token);
888         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
889         return;
890     }
891     if (isCaptionColOrColgroupTag(token->name())
892         || token->name() == frameTag
893         || token->name() == headTag
894         || isTableBodyContextTag(token->name())
895         || isTableCellContextTag(token->name())
896         || token->name() == trTag) {
897         parseError(token);
898         return;
899     }
900     if (token->name() == templateTag) {
901         processTemplateStartTag(token);
902         return;
903     }
904     m_tree.reconstructTheActiveFormattingElements();
905     m_tree.insertHTMLElement(token);
906 }
907 
processTemplateStartTag(AtomicHTMLToken * token)908 void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken* token)
909 {
910     m_tree.activeFormattingElements()->appendMarker();
911     m_tree.insertHTMLElement(token);
912     m_templateInsertionModes.append(TemplateContentsMode);
913     setInsertionMode(TemplateContentsMode);
914 }
915 
processTemplateEndTag(AtomicHTMLToken * token)916 bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token)
917 {
918     ASSERT(token->name() == templateTag.localName());
919     if (!m_tree.openElements()->hasTemplateInHTMLScope()) {
920         ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && isHTMLTemplateElement(m_fragmentContext.contextElement())));
921         parseError(token);
922         return false;
923     }
924     m_tree.generateImpliedEndTags();
925     if (!m_tree.currentStackItem()->hasTagName(templateTag))
926         parseError(token);
927     m_tree.openElements()->popUntilPopped(templateTag);
928     m_tree.activeFormattingElements()->clearToLastMarker();
929     m_templateInsertionModes.removeLast();
930     resetInsertionModeAppropriately();
931     return true;
932 }
933 
processEndOfFileForInTemplateContents(AtomicHTMLToken * token)934 bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* token)
935 {
936     AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag.localName());
937     if (!processTemplateEndTag(&endTemplate))
938         return false;
939 
940     processEndOfFile(token);
941     return true;
942 }
943 
processColgroupEndTagForInColumnGroup()944 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
945 {
946     if (m_tree.currentIsRootNode() || isHTMLTemplateElement(*m_tree.currentNode())) {
947         ASSERT(isParsingFragmentOrTemplateContents());
948         // FIXME: parse error
949         return false;
950     }
951     m_tree.openElements()->pop();
952     setInsertionMode(InTableMode);
953     return true;
954 }
955 
956 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
adjustedCurrentStackItem() const957 HTMLStackItem* HTMLTreeBuilder::adjustedCurrentStackItem() const
958 {
959     ASSERT(!m_tree.isEmpty());
960     if (isParsingFragment() && m_tree.openElements()->hasOnlyOneElement())
961         return m_fragmentContext.contextElementStackItem();
962 
963     return m_tree.currentStackItem();
964 }
965 
966 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()967 void HTMLTreeBuilder::closeTheCell()
968 {
969     ASSERT(insertionMode() == InCellMode);
970     if (m_tree.openElements()->inTableScope(tdTag)) {
971         ASSERT(!m_tree.openElements()->inTableScope(thTag));
972         processFakeEndTag(tdTag);
973         return;
974     }
975     ASSERT(m_tree.openElements()->inTableScope(thTag));
976     processFakeEndTag(thTag);
977     ASSERT(insertionMode() == InRowMode);
978 }
979 
processStartTagForInTable(AtomicHTMLToken * token)980 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token)
981 {
982     ASSERT(token->type() == HTMLToken::StartTag);
983     if (token->name() == captionTag) {
984         m_tree.openElements()->popUntilTableScopeMarker();
985         m_tree.activeFormattingElements()->appendMarker();
986         m_tree.insertHTMLElement(token);
987         setInsertionMode(InCaptionMode);
988         return;
989     }
990     if (token->name() == colgroupTag) {
991         m_tree.openElements()->popUntilTableScopeMarker();
992         m_tree.insertHTMLElement(token);
993         setInsertionMode(InColumnGroupMode);
994         return;
995     }
996     if (token->name() == colTag) {
997         processFakeStartTag(colgroupTag);
998         ASSERT(InColumnGroupMode);
999         processStartTag(token);
1000         return;
1001     }
1002     if (isTableBodyContextTag(token->name())) {
1003         m_tree.openElements()->popUntilTableScopeMarker();
1004         m_tree.insertHTMLElement(token);
1005         setInsertionMode(InTableBodyMode);
1006         return;
1007     }
1008     if (isTableCellContextTag(token->name())
1009         || token->name() == trTag) {
1010         processFakeStartTag(tbodyTag);
1011         ASSERT(insertionMode() == InTableBodyMode);
1012         processStartTag(token);
1013         return;
1014     }
1015     if (token->name() == tableTag) {
1016         parseError(token);
1017         if (!processTableEndTagForInTable()) {
1018             ASSERT(isParsingFragmentOrTemplateContents());
1019             return;
1020         }
1021         processStartTag(token);
1022         return;
1023     }
1024     if (token->name() == styleTag || token->name() == scriptTag) {
1025         processStartTagForInHead(token);
1026         return;
1027     }
1028     if (token->name() == inputTag) {
1029         Attribute* typeAttribute = token->getAttributeItem(typeAttr);
1030         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1031             parseError(token);
1032             m_tree.insertSelfClosingHTMLElement(token);
1033             return;
1034         }
1035         // Fall through to "anything else" case.
1036     }
1037     if (token->name() == formTag) {
1038         parseError(token);
1039         if (m_tree.form())
1040             return;
1041         m_tree.insertHTMLFormElement(token, true);
1042         m_tree.openElements()->pop();
1043         return;
1044     }
1045     if (token->name() == templateTag) {
1046         processTemplateStartTag(token);
1047         return;
1048     }
1049     parseError(token);
1050     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1051     processStartTagForInBody(token);
1052 }
1053 
processStartTag(AtomicHTMLToken * token)1054 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token)
1055 {
1056     ASSERT(token->type() == HTMLToken::StartTag);
1057     switch (insertionMode()) {
1058     case InitialMode:
1059         ASSERT(insertionMode() == InitialMode);
1060         defaultForInitial();
1061         // Fall through.
1062     case BeforeHTMLMode:
1063         ASSERT(insertionMode() == BeforeHTMLMode);
1064         if (token->name() == htmlTag) {
1065             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1066             setInsertionMode(BeforeHeadMode);
1067             return;
1068         }
1069         defaultForBeforeHTML();
1070         // Fall through.
1071     case BeforeHeadMode:
1072         ASSERT(insertionMode() == BeforeHeadMode);
1073         if (token->name() == htmlTag) {
1074             processHtmlStartTagForInBody(token);
1075             return;
1076         }
1077         if (token->name() == headTag) {
1078             m_tree.insertHTMLHeadElement(token);
1079             setInsertionMode(InHeadMode);
1080             return;
1081         }
1082         defaultForBeforeHead();
1083         // Fall through.
1084     case InHeadMode:
1085         ASSERT(insertionMode() == InHeadMode);
1086         if (processStartTagForInHead(token))
1087             return;
1088         defaultForInHead();
1089         // Fall through.
1090     case AfterHeadMode:
1091         ASSERT(insertionMode() == AfterHeadMode);
1092         if (token->name() == htmlTag) {
1093             processHtmlStartTagForInBody(token);
1094             return;
1095         }
1096         if (token->name() == bodyTag) {
1097             m_framesetOk = false;
1098             m_tree.insertHTMLBodyElement(token);
1099             setInsertionMode(InBodyMode);
1100             return;
1101         }
1102         if (token->name() == framesetTag) {
1103             m_tree.insertHTMLElement(token);
1104             setInsertionMode(InFramesetMode);
1105             return;
1106         }
1107         if (token->name() == baseTag
1108             || token->name() == basefontTag
1109             || token->name() == bgsoundTag
1110             || token->name() == linkTag
1111             || token->name() == metaTag
1112             || token->name() == noframesTag
1113             || token->name() == scriptTag
1114             || token->name() == styleTag
1115             || token->name() == templateTag
1116             || token->name() == titleTag) {
1117             parseError(token);
1118             ASSERT(m_tree.head());
1119             m_tree.openElements()->pushHTMLHeadElement(m_tree.headStackItem());
1120             processStartTagForInHead(token);
1121             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1122             return;
1123         }
1124         if (token->name() == headTag) {
1125             parseError(token);
1126             return;
1127         }
1128         defaultForAfterHead();
1129         // Fall through
1130     case InBodyMode:
1131         ASSERT(insertionMode() == InBodyMode);
1132         processStartTagForInBody(token);
1133         break;
1134     case InTableMode:
1135         ASSERT(insertionMode() == InTableMode);
1136         processStartTagForInTable(token);
1137         break;
1138     case InCaptionMode:
1139         ASSERT(insertionMode() == InCaptionMode);
1140         if (isCaptionColOrColgroupTag(token->name())
1141             || isTableBodyContextTag(token->name())
1142             || isTableCellContextTag(token->name())
1143             || token->name() == trTag) {
1144             parseError(token);
1145             if (!processCaptionEndTagForInCaption()) {
1146                 ASSERT(isParsingFragment());
1147                 return;
1148             }
1149             processStartTag(token);
1150             return;
1151         }
1152         processStartTagForInBody(token);
1153         break;
1154     case InColumnGroupMode:
1155         ASSERT(insertionMode() == InColumnGroupMode);
1156         if (token->name() == htmlTag) {
1157             processHtmlStartTagForInBody(token);
1158             return;
1159         }
1160         if (token->name() == colTag) {
1161             m_tree.insertSelfClosingHTMLElement(token);
1162             return;
1163         }
1164         if (token->name() == templateTag) {
1165             processTemplateStartTag(token);
1166             return;
1167         }
1168         if (!processColgroupEndTagForInColumnGroup()) {
1169             ASSERT(isParsingFragmentOrTemplateContents());
1170             return;
1171         }
1172         processStartTag(token);
1173         break;
1174     case InTableBodyMode:
1175         ASSERT(insertionMode() == InTableBodyMode);
1176         if (token->name() == trTag) {
1177             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1178             m_tree.insertHTMLElement(token);
1179             setInsertionMode(InRowMode);
1180             return;
1181         }
1182         if (isTableCellContextTag(token->name())) {
1183             parseError(token);
1184             processFakeStartTag(trTag);
1185             ASSERT(insertionMode() == InRowMode);
1186             processStartTag(token);
1187             return;
1188         }
1189         if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) {
1190             // FIXME: This is slow.
1191             if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1192                 ASSERT(isParsingFragmentOrTemplateContents());
1193                 parseError(token);
1194                 return;
1195             }
1196             m_tree.openElements()->popUntilTableBodyScopeMarker();
1197             ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1198             processFakeEndTag(m_tree.currentStackItem()->localName());
1199             processStartTag(token);
1200             return;
1201         }
1202         processStartTagForInTable(token);
1203         break;
1204     case InRowMode:
1205         ASSERT(insertionMode() == InRowMode);
1206         if (isTableCellContextTag(token->name())) {
1207             m_tree.openElements()->popUntilTableRowScopeMarker();
1208             m_tree.insertHTMLElement(token);
1209             setInsertionMode(InCellMode);
1210             m_tree.activeFormattingElements()->appendMarker();
1211             return;
1212         }
1213         if (token->name() == trTag
1214             || isCaptionColOrColgroupTag(token->name())
1215             || isTableBodyContextTag(token->name())) {
1216             if (!processTrEndTagForInRow()) {
1217                 ASSERT(isParsingFragmentOrTemplateContents());
1218                 return;
1219             }
1220             ASSERT(insertionMode() == InTableBodyMode);
1221             processStartTag(token);
1222             return;
1223         }
1224         processStartTagForInTable(token);
1225         break;
1226     case InCellMode:
1227         ASSERT(insertionMode() == InCellMode);
1228         if (isCaptionColOrColgroupTag(token->name())
1229             || isTableCellContextTag(token->name())
1230             || token->name() == trTag
1231             || isTableBodyContextTag(token->name())) {
1232             // FIXME: This could be more efficient.
1233             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1234                 ASSERT(isParsingFragment());
1235                 parseError(token);
1236                 return;
1237             }
1238             closeTheCell();
1239             processStartTag(token);
1240             return;
1241         }
1242         processStartTagForInBody(token);
1243         break;
1244     case AfterBodyMode:
1245     case AfterAfterBodyMode:
1246         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1247         if (token->name() == htmlTag) {
1248             processHtmlStartTagForInBody(token);
1249             return;
1250         }
1251         setInsertionMode(InBodyMode);
1252         processStartTag(token);
1253         break;
1254     case InHeadNoscriptMode:
1255         ASSERT(insertionMode() == InHeadNoscriptMode);
1256         if (token->name() == htmlTag) {
1257             processHtmlStartTagForInBody(token);
1258             return;
1259         }
1260         if (token->name() == basefontTag
1261             || token->name() == bgsoundTag
1262             || token->name() == linkTag
1263             || token->name() == metaTag
1264             || token->name() == noframesTag
1265             || token->name() == styleTag) {
1266             bool didProcess = processStartTagForInHead(token);
1267             ASSERT_UNUSED(didProcess, didProcess);
1268             return;
1269         }
1270         if (token->name() == htmlTag || token->name() == noscriptTag) {
1271             parseError(token);
1272             return;
1273         }
1274         defaultForInHeadNoscript();
1275         processToken(token);
1276         break;
1277     case InFramesetMode:
1278         ASSERT(insertionMode() == InFramesetMode);
1279         if (token->name() == htmlTag) {
1280             processHtmlStartTagForInBody(token);
1281             return;
1282         }
1283         if (token->name() == framesetTag) {
1284             m_tree.insertHTMLElement(token);
1285             return;
1286         }
1287         if (token->name() == frameTag) {
1288             m_tree.insertSelfClosingHTMLElement(token);
1289             return;
1290         }
1291         if (token->name() == noframesTag) {
1292             processStartTagForInHead(token);
1293             return;
1294         }
1295         if (token->name() == templateTag) {
1296             processTemplateStartTag(token);
1297             return;
1298         }
1299         parseError(token);
1300         break;
1301     case AfterFramesetMode:
1302     case AfterAfterFramesetMode:
1303         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1304         if (token->name() == htmlTag) {
1305             processHtmlStartTagForInBody(token);
1306             return;
1307         }
1308         if (token->name() == noframesTag) {
1309             processStartTagForInHead(token);
1310             return;
1311         }
1312         parseError(token);
1313         break;
1314     case InSelectInTableMode:
1315         ASSERT(insertionMode() == InSelectInTableMode);
1316         if (token->name() == captionTag
1317             || token->name() == tableTag
1318             || isTableBodyContextTag(token->name())
1319             || token->name() == trTag
1320             || isTableCellContextTag(token->name())) {
1321             parseError(token);
1322             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1323             processEndTag(&endSelect);
1324             processStartTag(token);
1325             return;
1326         }
1327         // Fall through
1328     case InSelectMode:
1329         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1330         if (token->name() == htmlTag) {
1331             processHtmlStartTagForInBody(token);
1332             return;
1333         }
1334         if (token->name() == optionTag) {
1335             if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1336                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1337                 processEndTag(&endOption);
1338             }
1339             m_tree.insertHTMLElement(token);
1340             return;
1341         }
1342         if (token->name() == optgroupTag) {
1343             if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1344                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1345                 processEndTag(&endOption);
1346             }
1347             if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
1348                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1349                 processEndTag(&endOptgroup);
1350             }
1351             m_tree.insertHTMLElement(token);
1352             return;
1353         }
1354         if (token->name() == selectTag) {
1355             parseError(token);
1356             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1357             processEndTag(&endSelect);
1358             return;
1359         }
1360         if (token->name() == inputTag
1361             || token->name() == keygenTag
1362             || token->name() == textareaTag) {
1363             parseError(token);
1364             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1365                 ASSERT(isParsingFragment());
1366                 return;
1367             }
1368             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1369             processEndTag(&endSelect);
1370             processStartTag(token);
1371             return;
1372         }
1373         if (token->name() == scriptTag) {
1374             bool didProcess = processStartTagForInHead(token);
1375             ASSERT_UNUSED(didProcess, didProcess);
1376             return;
1377         }
1378         if (token->name() == templateTag) {
1379             processTemplateStartTag(token);
1380             return;
1381         }
1382         break;
1383     case InTableTextMode:
1384         defaultForInTableText();
1385         processStartTag(token);
1386         break;
1387     case TextMode:
1388         ASSERT_NOT_REACHED();
1389         break;
1390     case TemplateContentsMode:
1391         if (token->name() == templateTag) {
1392             processTemplateStartTag(token);
1393             return;
1394         }
1395 
1396         if (token->name() == linkTag
1397             || token->name() == scriptTag
1398             || token->name() == styleTag
1399             || token->name() == metaTag) {
1400             processStartTagForInHead(token);
1401             return;
1402         }
1403 
1404         InsertionMode insertionMode = TemplateContentsMode;
1405         if (token->name() == frameTag)
1406             insertionMode = InFramesetMode;
1407         else if (token->name() == colTag)
1408             insertionMode = InColumnGroupMode;
1409         else if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name()))
1410             insertionMode = InTableMode;
1411         else if (token->name() == trTag)
1412             insertionMode = InTableBodyMode;
1413         else if (isTableCellContextTag(token->name()))
1414             insertionMode = InRowMode;
1415         else
1416             insertionMode = InBodyMode;
1417 
1418         ASSERT(insertionMode != TemplateContentsMode);
1419         ASSERT(m_templateInsertionModes.last() == TemplateContentsMode);
1420         m_templateInsertionModes.last() = insertionMode;
1421         setInsertionMode(insertionMode);
1422 
1423         processStartTag(token);
1424         break;
1425     }
1426 }
1427 
processHtmlStartTagForInBody(AtomicHTMLToken * token)1428 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token)
1429 {
1430     parseError(token);
1431     if (m_tree.openElements()->hasTemplateInHTMLScope()) {
1432         ASSERT(isParsingTemplateContents());
1433         return;
1434     }
1435     m_tree.insertHTMLHtmlStartTagInBody(token);
1436 }
1437 
processBodyEndTagForInBody(AtomicHTMLToken * token)1438 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token)
1439 {
1440     ASSERT(token->type() == HTMLToken::EndTag);
1441     ASSERT(token->name() == bodyTag);
1442     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1443         parseError(token);
1444         return false;
1445     }
1446     notImplemented(); // Emit a more specific parse error based on stack contents.
1447     setInsertionMode(AfterBodyMode);
1448     return true;
1449 }
1450 
processAnyOtherEndTagForInBody(AtomicHTMLToken * token)1451 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token)
1452 {
1453     ASSERT(token->type() == HTMLToken::EndTag);
1454     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1455     while (1) {
1456         RefPtrWillBeRawPtr<HTMLStackItem> item = record->stackItem();
1457         if (item->matchesHTMLTag(token->name())) {
1458             m_tree.generateImpliedEndTagsWithExclusion(token->name());
1459             if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1460                 parseError(token);
1461             m_tree.openElements()->popUntilPopped(item->element());
1462             return;
1463         }
1464         if (item->isSpecialNode()) {
1465             parseError(token);
1466             return;
1467         }
1468         record = record->next();
1469     }
1470 }
1471 
1472 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken * token)1473 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token)
1474 {
1475     // The adoption agency algorithm is N^2. We limit the number of iterations
1476     // to stop from hanging the whole browser. This limit is specified in the
1477     // adoption agency algorithm:
1478     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1479     static const int outerIterationLimit = 8;
1480     static const int innerIterationLimit = 3;
1481 
1482     // 1, 2, 3 and 16 are covered by the for() loop.
1483     for (int i = 0; i < outerIterationLimit; ++i) {
1484         // 4.
1485         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name());
1486         // 4.a
1487         if (!formattingElement)
1488             return processAnyOtherEndTagForInBody(token);
1489         // 4.c
1490         if ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement)) {
1491             parseError(token);
1492             notImplemented(); // Check the stack of open elements for a more specific parse error.
1493             return;
1494         }
1495         // 4.b
1496         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1497         if (!formattingElementRecord) {
1498             parseError(token);
1499             m_tree.activeFormattingElements()->remove(formattingElement);
1500             return;
1501         }
1502         // 4.d
1503         if (formattingElement != m_tree.currentElement())
1504             parseError(token);
1505         // 5.
1506         HTMLElementStack::ElementRecord* furthestBlock = m_tree.openElements()->furthestBlockForFormattingElement(formattingElement);
1507         // 6.
1508         if (!furthestBlock) {
1509             m_tree.openElements()->popUntilPopped(formattingElement);
1510             m_tree.activeFormattingElements()->remove(formattingElement);
1511             return;
1512         }
1513         // 7.
1514         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1515         RefPtrWillBeRawPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1516         // 8.
1517         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1518         // 9.
1519         HTMLElementStack::ElementRecord* node = furthestBlock;
1520         HTMLElementStack::ElementRecord* nextNode = node->next();
1521         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1522         // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1523         for (int i = 0; i < innerIterationLimit; ++i) {
1524             // 9.4
1525             node = nextNode;
1526             ASSERT(node);
1527             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1528             // 9.5
1529             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1530                 m_tree.openElements()->remove(node->element());
1531                 node = 0;
1532                 continue;
1533             }
1534             // 9.6
1535             if (node == formattingElementRecord)
1536                 break;
1537             // 9.7
1538             RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get());
1539 
1540             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1541             nodeEntry->replaceElement(newItem);
1542             node->replaceElement(newItem.release());
1543 
1544             // 9.8
1545             if (lastNode == furthestBlock)
1546                 bookmark.moveToAfter(nodeEntry);
1547             // 9.9
1548             m_tree.reparent(node, lastNode);
1549             // 9.10
1550             lastNode = node;
1551         }
1552         // 10.
1553         m_tree.insertAlreadyParsedChild(commonAncestor.get(), lastNode);
1554         // 11.
1555         RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get());
1556         // 12.
1557         m_tree.takeAllChildren(newItem.get(), furthestBlock);
1558         // 13.
1559         m_tree.reparent(furthestBlock, newItem.get());
1560         // 14.
1561         m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark);
1562         // 15.
1563         m_tree.openElements()->remove(formattingElement);
1564         m_tree.openElements()->insertAbove(newItem, furthestBlock);
1565     }
1566 }
1567 
resetInsertionModeAppropriately()1568 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1569 {
1570     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1571     bool last = false;
1572     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1573     while (1) {
1574         RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
1575         if (item->node() == m_tree.openElements()->rootNode()) {
1576             last = true;
1577             if (isParsingFragment())
1578                 item = m_fragmentContext.contextElementStackItem();
1579         }
1580         if (item->hasTagName(templateTag))
1581             return setInsertionMode(m_templateInsertionModes.last());
1582         if (item->hasTagName(selectTag)) {
1583             if (!last) {
1584                 while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) {
1585                     nodeRecord = nodeRecord->next();
1586                     item = nodeRecord->stackItem();
1587                     if (item->hasTagName(tableTag))
1588                         return setInsertionMode(InSelectInTableMode);
1589                 }
1590             }
1591             return setInsertionMode(InSelectMode);
1592         }
1593         if (item->hasTagName(tdTag) || item->hasTagName(thTag))
1594             return setInsertionMode(InCellMode);
1595         if (item->hasTagName(trTag))
1596             return setInsertionMode(InRowMode);
1597         if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag))
1598             return setInsertionMode(InTableBodyMode);
1599         if (item->hasTagName(captionTag))
1600             return setInsertionMode(InCaptionMode);
1601         if (item->hasTagName(colgroupTag)) {
1602             return setInsertionMode(InColumnGroupMode);
1603         }
1604         if (item->hasTagName(tableTag))
1605             return setInsertionMode(InTableMode);
1606         if (item->hasTagName(headTag)) {
1607             if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node())
1608                 return setInsertionMode(InHeadMode);
1609             return setInsertionMode(InBodyMode);
1610         }
1611         if (item->hasTagName(bodyTag))
1612             return setInsertionMode(InBodyMode);
1613         if (item->hasTagName(framesetTag)) {
1614             return setInsertionMode(InFramesetMode);
1615         }
1616         if (item->hasTagName(htmlTag)) {
1617             if (m_tree.headStackItem())
1618                 return setInsertionMode(AfterHeadMode);
1619 
1620             ASSERT(isParsingFragment());
1621             return setInsertionMode(BeforeHeadMode);
1622         }
1623         if (last) {
1624             ASSERT(isParsingFragment());
1625             return setInsertionMode(InBodyMode);
1626         }
1627         nodeRecord = nodeRecord->next();
1628     }
1629 }
1630 
processEndTagForInTableBody(AtomicHTMLToken * token)1631 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token)
1632 {
1633     ASSERT(token->type() == HTMLToken::EndTag);
1634     if (isTableBodyContextTag(token->name())) {
1635         if (!m_tree.openElements()->inTableScope(token->name())) {
1636             parseError(token);
1637             return;
1638         }
1639         m_tree.openElements()->popUntilTableBodyScopeMarker();
1640         m_tree.openElements()->pop();
1641         setInsertionMode(InTableMode);
1642         return;
1643     }
1644     if (token->name() == tableTag) {
1645         // FIXME: This is slow.
1646         if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1647             ASSERT(isParsingFragmentOrTemplateContents());
1648             parseError(token);
1649             return;
1650         }
1651         m_tree.openElements()->popUntilTableBodyScopeMarker();
1652         ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1653         processFakeEndTag(m_tree.currentStackItem()->localName());
1654         processEndTag(token);
1655         return;
1656     }
1657     if (token->name() == bodyTag
1658         || isCaptionColOrColgroupTag(token->name())
1659         || token->name() == htmlTag
1660         || isTableCellContextTag(token->name())
1661         || token->name() == trTag) {
1662         parseError(token);
1663         return;
1664     }
1665     processEndTagForInTable(token);
1666 }
1667 
processEndTagForInRow(AtomicHTMLToken * token)1668 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token)
1669 {
1670     ASSERT(token->type() == HTMLToken::EndTag);
1671     if (token->name() == trTag) {
1672         processTrEndTagForInRow();
1673         return;
1674     }
1675     if (token->name() == tableTag) {
1676         if (!processTrEndTagForInRow()) {
1677             ASSERT(isParsingFragmentOrTemplateContents());
1678             return;
1679         }
1680         ASSERT(insertionMode() == InTableBodyMode);
1681         processEndTag(token);
1682         return;
1683     }
1684     if (isTableBodyContextTag(token->name())) {
1685         if (!m_tree.openElements()->inTableScope(token->name())) {
1686             parseError(token);
1687             return;
1688         }
1689         processFakeEndTag(trTag);
1690         ASSERT(insertionMode() == InTableBodyMode);
1691         processEndTag(token);
1692         return;
1693     }
1694     if (token->name() == bodyTag
1695         || isCaptionColOrColgroupTag(token->name())
1696         || token->name() == htmlTag
1697         || isTableCellContextTag(token->name())) {
1698         parseError(token);
1699         return;
1700     }
1701     processEndTagForInTable(token);
1702 }
1703 
processEndTagForInCell(AtomicHTMLToken * token)1704 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token)
1705 {
1706     ASSERT(token->type() == HTMLToken::EndTag);
1707     if (isTableCellContextTag(token->name())) {
1708         if (!m_tree.openElements()->inTableScope(token->name())) {
1709             parseError(token);
1710             return;
1711         }
1712         m_tree.generateImpliedEndTags();
1713         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1714             parseError(token);
1715         m_tree.openElements()->popUntilPopped(token->name());
1716         m_tree.activeFormattingElements()->clearToLastMarker();
1717         setInsertionMode(InRowMode);
1718         return;
1719     }
1720     if (token->name() == bodyTag
1721         || isCaptionColOrColgroupTag(token->name())
1722         || token->name() == htmlTag) {
1723         parseError(token);
1724         return;
1725     }
1726     if (token->name() == tableTag
1727         || token->name() == trTag
1728         || isTableBodyContextTag(token->name())) {
1729         if (!m_tree.openElements()->inTableScope(token->name())) {
1730             ASSERT(isTableBodyContextTag(token->name()) || m_tree.openElements()->inTableScope(templateTag) || isParsingFragment());
1731             parseError(token);
1732             return;
1733         }
1734         closeTheCell();
1735         processEndTag(token);
1736         return;
1737     }
1738     processEndTagForInBody(token);
1739 }
1740 
processEndTagForInBody(AtomicHTMLToken * token)1741 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token)
1742 {
1743     ASSERT(token->type() == HTMLToken::EndTag);
1744     if (token->name() == bodyTag) {
1745         processBodyEndTagForInBody(token);
1746         return;
1747     }
1748     if (token->name() == htmlTag) {
1749         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1750         if (processBodyEndTagForInBody(&endBody))
1751             processEndTag(token);
1752         return;
1753     }
1754     if (token->name() == addressTag
1755         || token->name() == articleTag
1756         || token->name() == asideTag
1757         || token->name() == blockquoteTag
1758         || token->name() == buttonTag
1759         || token->name() == centerTag
1760         || token->name() == detailsTag
1761         || token->name() == dirTag
1762         || token->name() == divTag
1763         || token->name() == dlTag
1764         || token->name() == fieldsetTag
1765         || token->name() == figcaptionTag
1766         || token->name() == figureTag
1767         || token->name() == footerTag
1768         || token->name() == headerTag
1769         || token->name() == hgroupTag
1770         || token->name() == listingTag
1771         || token->name() == mainTag
1772         || token->name() == menuTag
1773         || token->name() == navTag
1774         || token->name() == olTag
1775         || token->name() == preTag
1776         || token->name() == sectionTag
1777         || token->name() == summaryTag
1778         || token->name() == ulTag) {
1779         if (!m_tree.openElements()->inScope(token->name())) {
1780             parseError(token);
1781             return;
1782         }
1783         m_tree.generateImpliedEndTags();
1784         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1785             parseError(token);
1786         m_tree.openElements()->popUntilPopped(token->name());
1787         return;
1788     }
1789     if (token->name() == formTag) {
1790         RefPtrWillBeRawPtr<Element> node = m_tree.takeForm();
1791         if (!node || !m_tree.openElements()->inScope(node.get())) {
1792             parseError(token);
1793             return;
1794         }
1795         m_tree.generateImpliedEndTags();
1796         if (m_tree.currentElement() != node.get())
1797             parseError(token);
1798         m_tree.openElements()->remove(node.get());
1799     }
1800     if (token->name() == pTag) {
1801         if (!m_tree.openElements()->inButtonScope(token->name())) {
1802             parseError(token);
1803             processFakeStartTag(pTag);
1804             ASSERT(m_tree.openElements()->inScope(token->name()));
1805             processEndTag(token);
1806             return;
1807         }
1808         m_tree.generateImpliedEndTagsWithExclusion(token->name());
1809         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1810             parseError(token);
1811         m_tree.openElements()->popUntilPopped(token->name());
1812         return;
1813     }
1814     if (token->name() == liTag) {
1815         if (!m_tree.openElements()->inListItemScope(token->name())) {
1816             parseError(token);
1817             return;
1818         }
1819         m_tree.generateImpliedEndTagsWithExclusion(token->name());
1820         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1821             parseError(token);
1822         m_tree.openElements()->popUntilPopped(token->name());
1823         return;
1824     }
1825     if (token->name() == ddTag
1826         || token->name() == dtTag) {
1827         if (!m_tree.openElements()->inScope(token->name())) {
1828             parseError(token);
1829             return;
1830         }
1831         m_tree.generateImpliedEndTagsWithExclusion(token->name());
1832         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1833             parseError(token);
1834         m_tree.openElements()->popUntilPopped(token->name());
1835         return;
1836     }
1837     if (isNumberedHeaderTag(token->name())) {
1838         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1839             parseError(token);
1840             return;
1841         }
1842         m_tree.generateImpliedEndTags();
1843         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1844             parseError(token);
1845         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1846         return;
1847     }
1848     if (isFormattingTag(token->name())) {
1849         callTheAdoptionAgency(token);
1850         return;
1851     }
1852     if (token->name() == appletTag
1853         || token->name() == marqueeTag
1854         || token->name() == objectTag) {
1855         if (!m_tree.openElements()->inScope(token->name())) {
1856             parseError(token);
1857             return;
1858         }
1859         m_tree.generateImpliedEndTags();
1860         if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1861             parseError(token);
1862         m_tree.openElements()->popUntilPopped(token->name());
1863         m_tree.activeFormattingElements()->clearToLastMarker();
1864         return;
1865     }
1866     if (token->name() == brTag) {
1867         parseError(token);
1868         processFakeStartTag(brTag);
1869         return;
1870     }
1871     if (token->name() == templateTag) {
1872         processTemplateEndTag(token);
1873         return;
1874     }
1875     processAnyOtherEndTagForInBody(token);
1876 }
1877 
processCaptionEndTagForInCaption()1878 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1879 {
1880     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1881         ASSERT(isParsingFragment());
1882         // FIXME: parse error
1883         return false;
1884     }
1885     m_tree.generateImpliedEndTags();
1886     // FIXME: parse error if (!m_tree.currentStackItem()->hasTagName(captionTag))
1887     m_tree.openElements()->popUntilPopped(captionTag.localName());
1888     m_tree.activeFormattingElements()->clearToLastMarker();
1889     setInsertionMode(InTableMode);
1890     return true;
1891 }
1892 
processTrEndTagForInRow()1893 bool HTMLTreeBuilder::processTrEndTagForInRow()
1894 {
1895     if (!m_tree.openElements()->inTableScope(trTag)) {
1896         ASSERT(isParsingFragmentOrTemplateContents());
1897         // FIXME: parse error
1898         return false;
1899     }
1900     m_tree.openElements()->popUntilTableRowScopeMarker();
1901     ASSERT(m_tree.currentStackItem()->hasTagName(trTag));
1902     m_tree.openElements()->pop();
1903     setInsertionMode(InTableBodyMode);
1904     return true;
1905 }
1906 
processTableEndTagForInTable()1907 bool HTMLTreeBuilder::processTableEndTagForInTable()
1908 {
1909     if (!m_tree.openElements()->inTableScope(tableTag)) {
1910         ASSERT(isParsingFragmentOrTemplateContents());
1911         // FIXME: parse error.
1912         return false;
1913     }
1914     m_tree.openElements()->popUntilPopped(tableTag.localName());
1915     resetInsertionModeAppropriately();
1916     return true;
1917 }
1918 
processEndTagForInTable(AtomicHTMLToken * token)1919 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token)
1920 {
1921     ASSERT(token->type() == HTMLToken::EndTag);
1922     if (token->name() == tableTag) {
1923         processTableEndTagForInTable();
1924         return;
1925     }
1926     if (token->name() == bodyTag
1927         || isCaptionColOrColgroupTag(token->name())
1928         || token->name() == htmlTag
1929         || isTableBodyContextTag(token->name())
1930         || isTableCellContextTag(token->name())
1931         || token->name() == trTag) {
1932         parseError(token);
1933         return;
1934     }
1935     parseError(token);
1936     // Is this redirection necessary here?
1937     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1938     processEndTagForInBody(token);
1939 }
1940 
processEndTag(AtomicHTMLToken * token)1941 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
1942 {
1943     ASSERT(token->type() == HTMLToken::EndTag);
1944     switch (insertionMode()) {
1945     case InitialMode:
1946         ASSERT(insertionMode() == InitialMode);
1947         defaultForInitial();
1948         // Fall through.
1949     case BeforeHTMLMode:
1950         ASSERT(insertionMode() == BeforeHTMLMode);
1951         if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1952             parseError(token);
1953             return;
1954         }
1955         defaultForBeforeHTML();
1956         // Fall through.
1957     case BeforeHeadMode:
1958         ASSERT(insertionMode() == BeforeHeadMode);
1959         if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1960             parseError(token);
1961             return;
1962         }
1963         defaultForBeforeHead();
1964         // Fall through.
1965     case InHeadMode:
1966         ASSERT(insertionMode() == InHeadMode);
1967         // FIXME: This case should be broken out into processEndTagForInHead,
1968         // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1969         // but because the logic falls through to AfterHeadMode, that gets a little messy.
1970         if (token->name() == templateTag) {
1971             processTemplateEndTag(token);
1972             return;
1973         }
1974         if (token->name() == headTag) {
1975             m_tree.openElements()->popHTMLHeadElement();
1976             setInsertionMode(AfterHeadMode);
1977             return;
1978         }
1979         if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1980             parseError(token);
1981             return;
1982         }
1983         defaultForInHead();
1984         // Fall through.
1985     case AfterHeadMode:
1986         ASSERT(insertionMode() == AfterHeadMode);
1987         if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1988             parseError(token);
1989             return;
1990         }
1991         defaultForAfterHead();
1992         // Fall through
1993     case InBodyMode:
1994         ASSERT(insertionMode() == InBodyMode);
1995         processEndTagForInBody(token);
1996         break;
1997     case InTableMode:
1998         ASSERT(insertionMode() == InTableMode);
1999         processEndTagForInTable(token);
2000         break;
2001     case InCaptionMode:
2002         ASSERT(insertionMode() == InCaptionMode);
2003         if (token->name() == captionTag) {
2004             processCaptionEndTagForInCaption();
2005             return;
2006         }
2007         if (token->name() == tableTag) {
2008             parseError(token);
2009             if (!processCaptionEndTagForInCaption()) {
2010                 ASSERT(isParsingFragment());
2011                 return;
2012             }
2013             processEndTag(token);
2014             return;
2015         }
2016         if (token->name() == bodyTag
2017             || token->name() == colTag
2018             || token->name() == colgroupTag
2019             || token->name() == htmlTag
2020             || isTableBodyContextTag(token->name())
2021             || isTableCellContextTag(token->name())
2022             || token->name() == trTag) {
2023             parseError(token);
2024             return;
2025         }
2026         processEndTagForInBody(token);
2027         break;
2028     case InColumnGroupMode:
2029         ASSERT(insertionMode() == InColumnGroupMode);
2030         if (token->name() == colgroupTag) {
2031             processColgroupEndTagForInColumnGroup();
2032             return;
2033         }
2034         if (token->name() == colTag) {
2035             parseError(token);
2036             return;
2037         }
2038         if (token->name() == templateTag) {
2039             processTemplateEndTag(token);
2040             return;
2041         }
2042         if (!processColgroupEndTagForInColumnGroup()) {
2043             ASSERT(isParsingFragmentOrTemplateContents());
2044             return;
2045         }
2046         processEndTag(token);
2047         break;
2048     case InRowMode:
2049         ASSERT(insertionMode() == InRowMode);
2050         processEndTagForInRow(token);
2051         break;
2052     case InCellMode:
2053         ASSERT(insertionMode() == InCellMode);
2054         processEndTagForInCell(token);
2055         break;
2056     case InTableBodyMode:
2057         ASSERT(insertionMode() == InTableBodyMode);
2058         processEndTagForInTableBody(token);
2059         break;
2060     case AfterBodyMode:
2061         ASSERT(insertionMode() == AfterBodyMode);
2062         if (token->name() == htmlTag) {
2063             if (isParsingFragment()) {
2064                 parseError(token);
2065                 return;
2066             }
2067             setInsertionMode(AfterAfterBodyMode);
2068             return;
2069         }
2070         // Fall through.
2071     case AfterAfterBodyMode:
2072         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2073         parseError(token);
2074         setInsertionMode(InBodyMode);
2075         processEndTag(token);
2076         break;
2077     case InHeadNoscriptMode:
2078         ASSERT(insertionMode() == InHeadNoscriptMode);
2079         if (token->name() == noscriptTag) {
2080             ASSERT(m_tree.currentStackItem()->hasTagName(noscriptTag));
2081             m_tree.openElements()->pop();
2082             ASSERT(m_tree.currentStackItem()->hasTagName(headTag));
2083             setInsertionMode(InHeadMode);
2084             return;
2085         }
2086         if (token->name() != brTag) {
2087             parseError(token);
2088             return;
2089         }
2090         defaultForInHeadNoscript();
2091         processToken(token);
2092         break;
2093     case TextMode:
2094         if (token->name() == scriptTag) {
2095             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2096             ASSERT(m_tree.currentStackItem()->hasTagName(scriptTag));
2097             if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2098                 m_scriptToProcess = m_tree.currentElement();
2099             m_tree.openElements()->pop();
2100             setInsertionMode(m_originalInsertionMode);
2101 
2102             if (m_parser->tokenizer()) {
2103                 // We must set the tokenizer's state to
2104                 // DataState explicitly if the tokenizer didn't have a chance to.
2105                 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_options.useThreading);
2106                 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2107             }
2108             return;
2109         }
2110         m_tree.openElements()->pop();
2111         setInsertionMode(m_originalInsertionMode);
2112         break;
2113     case InFramesetMode:
2114         ASSERT(insertionMode() == InFramesetMode);
2115         if (token->name() == framesetTag) {
2116             bool ignoreFramesetForFragmentParsing  = m_tree.currentIsRootNode();
2117             ignoreFramesetForFragmentParsing = ignoreFramesetForFragmentParsing || m_tree.openElements()->hasTemplateInHTMLScope();
2118             if (ignoreFramesetForFragmentParsing) {
2119                 ASSERT(isParsingFragmentOrTemplateContents());
2120                 parseError(token);
2121                 return;
2122             }
2123             m_tree.openElements()->pop();
2124             if (!isParsingFragment() && !m_tree.currentStackItem()->hasTagName(framesetTag))
2125                 setInsertionMode(AfterFramesetMode);
2126             return;
2127         }
2128         if (token->name() == templateTag) {
2129             processTemplateEndTag(token);
2130             return;
2131         }
2132         break;
2133     case AfterFramesetMode:
2134         ASSERT(insertionMode() == AfterFramesetMode);
2135         if (token->name() == htmlTag) {
2136             setInsertionMode(AfterAfterFramesetMode);
2137             return;
2138         }
2139         // Fall through.
2140     case AfterAfterFramesetMode:
2141         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2142         parseError(token);
2143         break;
2144     case InSelectInTableMode:
2145         ASSERT(insertionMode() == InSelectInTableMode);
2146         if (token->name() == captionTag
2147             || token->name() == tableTag
2148             || isTableBodyContextTag(token->name())
2149             || token->name() == trTag
2150             || isTableCellContextTag(token->name())) {
2151             parseError(token);
2152             if (m_tree.openElements()->inTableScope(token->name())) {
2153                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2154                 processEndTag(&endSelect);
2155                 processEndTag(token);
2156             }
2157             return;
2158         }
2159         // Fall through.
2160     case InSelectMode:
2161         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2162         if (token->name() == optgroupTag) {
2163             if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2164                 processFakeEndTag(optionTag);
2165             if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
2166                 m_tree.openElements()->pop();
2167                 return;
2168             }
2169             parseError(token);
2170             return;
2171         }
2172         if (token->name() == optionTag) {
2173             if (m_tree.currentStackItem()->hasTagName(optionTag)) {
2174                 m_tree.openElements()->pop();
2175                 return;
2176             }
2177             parseError(token);
2178             return;
2179         }
2180         if (token->name() == selectTag) {
2181             if (!m_tree.openElements()->inSelectScope(token->name())) {
2182                 ASSERT(isParsingFragment());
2183                 parseError(token);
2184                 return;
2185             }
2186             m_tree.openElements()->popUntilPopped(selectTag.localName());
2187             resetInsertionModeAppropriately();
2188             return;
2189         }
2190         if (token->name() == templateTag) {
2191             processTemplateEndTag(token);
2192             return;
2193         }
2194         break;
2195     case InTableTextMode:
2196         defaultForInTableText();
2197         processEndTag(token);
2198         break;
2199     case TemplateContentsMode:
2200         if (token->name() == templateTag) {
2201             processTemplateEndTag(token);
2202             return;
2203         }
2204         break;
2205     }
2206 }
2207 
processComment(AtomicHTMLToken * token)2208 void HTMLTreeBuilder::processComment(AtomicHTMLToken* token)
2209 {
2210     ASSERT(token->type() == HTMLToken::Comment);
2211     if (m_insertionMode == InitialMode
2212         || m_insertionMode == BeforeHTMLMode
2213         || m_insertionMode == AfterAfterBodyMode
2214         || m_insertionMode == AfterAfterFramesetMode) {
2215         m_tree.insertCommentOnDocument(token);
2216         return;
2217     }
2218     if (m_insertionMode == AfterBodyMode) {
2219         m_tree.insertCommentOnHTMLHtmlElement(token);
2220         return;
2221     }
2222     if (m_insertionMode == InTableTextMode) {
2223         defaultForInTableText();
2224         processComment(token);
2225         return;
2226     }
2227     m_tree.insertComment(token);
2228 }
2229 
processCharacter(AtomicHTMLToken * token)2230 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token)
2231 {
2232     ASSERT(token->type() == HTMLToken::Character);
2233     CharacterTokenBuffer buffer(token);
2234     processCharacterBuffer(buffer);
2235 }
2236 
processCharacterBuffer(CharacterTokenBuffer & buffer)2237 void HTMLTreeBuilder::processCharacterBuffer(CharacterTokenBuffer& buffer)
2238 {
2239 ReprocessBuffer:
2240     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2241     // Note that this logic is different than the generic \r\n collapsing
2242     // handled in the input stream preprocessor. This logic is here as an
2243     // "authoring convenience" so folks can write:
2244     //
2245     // <pre>
2246     // lorem ipsum
2247     // lorem ipsum
2248     // </pre>
2249     //
2250     // without getting an extra newline at the start of their <pre> element.
2251     if (m_shouldSkipLeadingNewline) {
2252         m_shouldSkipLeadingNewline = false;
2253         buffer.skipAtMostOneLeadingNewline();
2254         if (buffer.isEmpty())
2255             return;
2256     }
2257 
2258     switch (insertionMode()) {
2259     case InitialMode: {
2260         ASSERT(insertionMode() == InitialMode);
2261         buffer.skipLeadingWhitespace();
2262         if (buffer.isEmpty())
2263             return;
2264         defaultForInitial();
2265         // Fall through.
2266     }
2267     case BeforeHTMLMode: {
2268         ASSERT(insertionMode() == BeforeHTMLMode);
2269         buffer.skipLeadingWhitespace();
2270         if (buffer.isEmpty())
2271             return;
2272         defaultForBeforeHTML();
2273         // Fall through.
2274     }
2275     case BeforeHeadMode: {
2276         ASSERT(insertionMode() == BeforeHeadMode);
2277         buffer.skipLeadingWhitespace();
2278         if (buffer.isEmpty())
2279             return;
2280         defaultForBeforeHead();
2281         // Fall through.
2282     }
2283     case InHeadMode: {
2284         ASSERT(insertionMode() == InHeadMode);
2285         String leadingWhitespace = buffer.takeLeadingWhitespace();
2286         if (!leadingWhitespace.isEmpty())
2287             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2288         if (buffer.isEmpty())
2289             return;
2290         defaultForInHead();
2291         // Fall through.
2292     }
2293     case AfterHeadMode: {
2294         ASSERT(insertionMode() == AfterHeadMode);
2295         String leadingWhitespace = buffer.takeLeadingWhitespace();
2296         if (!leadingWhitespace.isEmpty())
2297             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2298         if (buffer.isEmpty())
2299             return;
2300         defaultForAfterHead();
2301         // Fall through.
2302     }
2303     case InBodyMode:
2304     case InCaptionMode:
2305     case TemplateContentsMode:
2306     case InCellMode: {
2307         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode || insertionMode() == TemplateContentsMode);
2308         processCharacterBufferForInBody(buffer);
2309         break;
2310     }
2311     case InTableMode:
2312     case InTableBodyMode:
2313     case InRowMode: {
2314         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2315         ASSERT(m_pendingTableCharacters.isEmpty());
2316         if (m_tree.currentStackItem()->isElementNode()
2317             && (m_tree.currentStackItem()->hasTagName(tableTag)
2318                 || m_tree.currentStackItem()->hasTagName(tbodyTag)
2319                 || m_tree.currentStackItem()->hasTagName(tfootTag)
2320                 || m_tree.currentStackItem()->hasTagName(theadTag)
2321                 || m_tree.currentStackItem()->hasTagName(trTag))) {
2322             m_originalInsertionMode = m_insertionMode;
2323             setInsertionMode(InTableTextMode);
2324             // Note that we fall through to the InTableTextMode case below.
2325         } else {
2326             HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2327             processCharacterBufferForInBody(buffer);
2328             break;
2329         }
2330         // Fall through.
2331     }
2332     case InTableTextMode: {
2333         buffer.giveRemainingTo(m_pendingTableCharacters);
2334         break;
2335     }
2336     case InColumnGroupMode: {
2337         ASSERT(insertionMode() == InColumnGroupMode);
2338         String leadingWhitespace = buffer.takeLeadingWhitespace();
2339         if (!leadingWhitespace.isEmpty())
2340             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2341         if (buffer.isEmpty())
2342             return;
2343         if (!processColgroupEndTagForInColumnGroup()) {
2344             ASSERT(isParsingFragmentOrTemplateContents());
2345             // The spec tells us to drop these characters on the floor.
2346             buffer.skipLeadingNonWhitespace();
2347             if (buffer.isEmpty())
2348                 return;
2349         }
2350         goto ReprocessBuffer;
2351     }
2352     case AfterBodyMode:
2353     case AfterAfterBodyMode: {
2354         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2355         // FIXME: parse error
2356         setInsertionMode(InBodyMode);
2357         goto ReprocessBuffer;
2358     }
2359     case TextMode: {
2360         ASSERT(insertionMode() == TextMode);
2361         m_tree.insertTextNode(buffer.takeRemaining());
2362         break;
2363     }
2364     case InHeadNoscriptMode: {
2365         ASSERT(insertionMode() == InHeadNoscriptMode);
2366         String leadingWhitespace = buffer.takeLeadingWhitespace();
2367         if (!leadingWhitespace.isEmpty())
2368             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2369         if (buffer.isEmpty())
2370             return;
2371         defaultForInHeadNoscript();
2372         goto ReprocessBuffer;
2373     }
2374     case InFramesetMode:
2375     case AfterFramesetMode: {
2376         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2377         String leadingWhitespace = buffer.takeRemainingWhitespace();
2378         if (!leadingWhitespace.isEmpty())
2379             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2380         // FIXME: We should generate a parse error if we skipped over any
2381         // non-whitespace characters.
2382         break;
2383     }
2384     case InSelectInTableMode:
2385     case InSelectMode: {
2386         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2387         m_tree.insertTextNode(buffer.takeRemaining());
2388         break;
2389     }
2390     case AfterAfterFramesetMode: {
2391         String leadingWhitespace = buffer.takeRemainingWhitespace();
2392         if (!leadingWhitespace.isEmpty()) {
2393             m_tree.reconstructTheActiveFormattingElements();
2394             m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2395         }
2396         // FIXME: We should generate a parse error if we skipped over any
2397         // non-whitespace characters.
2398         break;
2399     }
2400     }
2401 }
2402 
processCharacterBufferForInBody(CharacterTokenBuffer & buffer)2403 void HTMLTreeBuilder::processCharacterBufferForInBody(CharacterTokenBuffer& buffer)
2404 {
2405     m_tree.reconstructTheActiveFormattingElements();
2406     const String& characters = buffer.takeRemaining();
2407     m_tree.insertTextNode(characters);
2408     if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2409         m_framesetOk = false;
2410 }
2411 
processEndOfFile(AtomicHTMLToken * token)2412 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token)
2413 {
2414     ASSERT(token->type() == HTMLToken::EndOfFile);
2415     switch (insertionMode()) {
2416     case InitialMode:
2417         ASSERT(insertionMode() == InitialMode);
2418         defaultForInitial();
2419         // Fall through.
2420     case BeforeHTMLMode:
2421         ASSERT(insertionMode() == BeforeHTMLMode);
2422         defaultForBeforeHTML();
2423         // Fall through.
2424     case BeforeHeadMode:
2425         ASSERT(insertionMode() == BeforeHeadMode);
2426         defaultForBeforeHead();
2427         // Fall through.
2428     case InHeadMode:
2429         ASSERT(insertionMode() == InHeadMode);
2430         defaultForInHead();
2431         // Fall through.
2432     case AfterHeadMode:
2433         ASSERT(insertionMode() == AfterHeadMode);
2434         defaultForAfterHead();
2435         // Fall through
2436     case InBodyMode:
2437     case InCellMode:
2438     case InCaptionMode:
2439     case InRowMode:
2440         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode || insertionMode() == TemplateContentsMode);
2441         notImplemented(); // Emit parse error based on what elements are still open.
2442         if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2443             return;
2444         break;
2445     case AfterBodyMode:
2446     case AfterAfterBodyMode:
2447         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2448         break;
2449     case InHeadNoscriptMode:
2450         ASSERT(insertionMode() == InHeadNoscriptMode);
2451         defaultForInHeadNoscript();
2452         processEndOfFile(token);
2453         return;
2454     case AfterFramesetMode:
2455     case AfterAfterFramesetMode:
2456         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2457         break;
2458     case InColumnGroupMode:
2459         if (m_tree.currentIsRootNode()) {
2460             ASSERT(isParsingFragment());
2461             return; // FIXME: Should we break here instead of returning?
2462         }
2463         ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || isHTMLTemplateElement(m_tree.currentNode()));
2464         processColgroupEndTagForInColumnGroup();
2465         // Fall through
2466     case InFramesetMode:
2467     case InTableMode:
2468     case InTableBodyMode:
2469     case InSelectInTableMode:
2470     case InSelectMode:
2471         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode || insertionMode() == InColumnGroupMode);
2472         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2473             parseError(token);
2474         if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2475             return;
2476         break;
2477     case InTableTextMode:
2478         defaultForInTableText();
2479         processEndOfFile(token);
2480         return;
2481     case TextMode:
2482         parseError(token);
2483         if (m_tree.currentStackItem()->hasTagName(scriptTag))
2484             notImplemented(); // mark the script element as "already started".
2485         m_tree.openElements()->pop();
2486         ASSERT(m_originalInsertionMode != TextMode);
2487         setInsertionMode(m_originalInsertionMode);
2488         processEndOfFile(token);
2489         return;
2490     case TemplateContentsMode:
2491         if (processEndOfFileForInTemplateContents(token))
2492             return;
2493         break;
2494     }
2495     m_tree.processEndOfFile();
2496 }
2497 
defaultForInitial()2498 void HTMLTreeBuilder::defaultForInitial()
2499 {
2500     notImplemented();
2501     m_tree.setDefaultCompatibilityMode();
2502     // FIXME: parse error
2503     setInsertionMode(BeforeHTMLMode);
2504 }
2505 
defaultForBeforeHTML()2506 void HTMLTreeBuilder::defaultForBeforeHTML()
2507 {
2508     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2509     m_tree.insertHTMLHtmlStartTagBeforeHTML(&startHTML);
2510     setInsertionMode(BeforeHeadMode);
2511 }
2512 
defaultForBeforeHead()2513 void HTMLTreeBuilder::defaultForBeforeHead()
2514 {
2515     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2516     processStartTag(&startHead);
2517 }
2518 
defaultForInHead()2519 void HTMLTreeBuilder::defaultForInHead()
2520 {
2521     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2522     processEndTag(&endHead);
2523 }
2524 
defaultForInHeadNoscript()2525 void HTMLTreeBuilder::defaultForInHeadNoscript()
2526 {
2527     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2528     processEndTag(&endNoscript);
2529 }
2530 
defaultForAfterHead()2531 void HTMLTreeBuilder::defaultForAfterHead()
2532 {
2533     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2534     processStartTag(&startBody);
2535     m_framesetOk = true;
2536 }
2537 
defaultForInTableText()2538 void HTMLTreeBuilder::defaultForInTableText()
2539 {
2540     String characters = m_pendingTableCharacters.toString();
2541     m_pendingTableCharacters.clear();
2542     if (!isAllWhitespace(characters)) {
2543         // FIXME: parse error
2544         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2545         m_tree.reconstructTheActiveFormattingElements();
2546         m_tree.insertTextNode(characters, NotAllWhitespace);
2547         m_framesetOk = false;
2548         setInsertionMode(m_originalInsertionMode);
2549         return;
2550     }
2551     m_tree.insertTextNode(characters);
2552     setInsertionMode(m_originalInsertionMode);
2553 }
2554 
processStartTagForInHead(AtomicHTMLToken * token)2555 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token)
2556 {
2557     ASSERT(token->type() == HTMLToken::StartTag);
2558     if (token->name() == htmlTag) {
2559         processHtmlStartTagForInBody(token);
2560         return true;
2561     }
2562     if (token->name() == baseTag
2563         || token->name() == basefontTag
2564         || token->name() == bgsoundTag
2565         || token->name() == commandTag
2566         || token->name() == linkTag
2567         || token->name() == metaTag) {
2568         m_tree.insertSelfClosingHTMLElement(token);
2569         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2570         return true;
2571     }
2572     if (token->name() == titleTag) {
2573         processGenericRCDATAStartTag(token);
2574         return true;
2575     }
2576     if (token->name() == noscriptTag) {
2577         if (m_options.scriptEnabled) {
2578             processGenericRawTextStartTag(token);
2579             return true;
2580         }
2581         m_tree.insertHTMLElement(token);
2582         setInsertionMode(InHeadNoscriptMode);
2583         return true;
2584     }
2585     if (token->name() == noframesTag || token->name() == styleTag) {
2586         processGenericRawTextStartTag(token);
2587         return true;
2588     }
2589     if (token->name() == scriptTag) {
2590         processScriptStartTag(token);
2591         return true;
2592     }
2593     if (token->name() == templateTag) {
2594         processTemplateStartTag(token);
2595         return true;
2596     }
2597     if (token->name() == headTag) {
2598         parseError(token);
2599         return true;
2600     }
2601     return false;
2602 }
2603 
processGenericRCDATAStartTag(AtomicHTMLToken * token)2604 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
2605 {
2606     ASSERT(token->type() == HTMLToken::StartTag);
2607     m_tree.insertHTMLElement(token);
2608     if (m_parser->tokenizer())
2609         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2610     m_originalInsertionMode = m_insertionMode;
2611     setInsertionMode(TextMode);
2612 }
2613 
processGenericRawTextStartTag(AtomicHTMLToken * token)2614 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
2615 {
2616     ASSERT(token->type() == HTMLToken::StartTag);
2617     m_tree.insertHTMLElement(token);
2618     if (m_parser->tokenizer())
2619         m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2620     m_originalInsertionMode = m_insertionMode;
2621     setInsertionMode(TextMode);
2622 }
2623 
processScriptStartTag(AtomicHTMLToken * token)2624 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
2625 {
2626     ASSERT(token->type() == HTMLToken::StartTag);
2627     m_tree.insertScriptElement(token);
2628     if (m_parser->tokenizer())
2629         m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2630     m_originalInsertionMode = m_insertionMode;
2631 
2632     TextPosition position = m_parser->textPosition();
2633 
2634     m_scriptToProcessStartPosition = position;
2635 
2636     setInsertionMode(TextMode);
2637 }
2638 
2639 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
shouldProcessTokenInForeignContent(AtomicHTMLToken * token)2640 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token)
2641 {
2642     if (m_tree.isEmpty())
2643         return false;
2644     HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2645 
2646     if (adjustedCurrentNode->isInHTMLNamespace())
2647         return false;
2648     if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2649         if (token->type() == HTMLToken::StartTag
2650             && token->name() != MathMLNames::mglyphTag
2651             && token->name() != MathMLNames::malignmarkTag)
2652             return false;
2653         if (token->type() == HTMLToken::Character)
2654             return false;
2655     }
2656     if (adjustedCurrentNode->hasTagName(MathMLNames::annotation_xmlTag)
2657         && token->type() == HTMLToken::StartTag
2658         && token->name() == SVGNames::svgTag)
2659         return false;
2660     if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2661         if (token->type() == HTMLToken::StartTag)
2662             return false;
2663         if (token->type() == HTMLToken::Character)
2664             return false;
2665     }
2666     if (token->type() == HTMLToken::EndOfFile)
2667         return false;
2668     return true;
2669 }
2670 
processTokenInForeignContent(AtomicHTMLToken * token)2671 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token)
2672 {
2673     if (token->type() == HTMLToken::Character) {
2674         const String& characters = token->characters();
2675         m_tree.insertTextNode(characters);
2676         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2677             m_framesetOk = false;
2678         return;
2679     }
2680 
2681     m_tree.flush();
2682     HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2683 
2684     switch (token->type()) {
2685     case HTMLToken::Uninitialized:
2686         ASSERT_NOT_REACHED();
2687         break;
2688     case HTMLToken::DOCTYPE:
2689         parseError(token);
2690         break;
2691     case HTMLToken::StartTag: {
2692         if (token->name() == bTag
2693             || token->name() == bigTag
2694             || token->name() == blockquoteTag
2695             || token->name() == bodyTag
2696             || token->name() == brTag
2697             || token->name() == centerTag
2698             || token->name() == codeTag
2699             || token->name() == ddTag
2700             || token->name() == divTag
2701             || token->name() == dlTag
2702             || token->name() == dtTag
2703             || token->name() == emTag
2704             || token->name() == embedTag
2705             || isNumberedHeaderTag(token->name())
2706             || token->name() == headTag
2707             || token->name() == hrTag
2708             || token->name() == iTag
2709             || token->name() == imgTag
2710             || token->name() == liTag
2711             || token->name() == listingTag
2712             || token->name() == menuTag
2713             || token->name() == metaTag
2714             || token->name() == nobrTag
2715             || token->name() == olTag
2716             || token->name() == pTag
2717             || token->name() == preTag
2718             || token->name() == rubyTag
2719             || token->name() == sTag
2720             || token->name() == smallTag
2721             || token->name() == spanTag
2722             || token->name() == strongTag
2723             || token->name() == strikeTag
2724             || token->name() == subTag
2725             || token->name() == supTag
2726             || token->name() == tableTag
2727             || token->name() == ttTag
2728             || token->name() == uTag
2729             || token->name() == ulTag
2730             || token->name() == varTag
2731             || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) {
2732             parseError(token);
2733             m_tree.openElements()->popUntilForeignContentScopeMarker();
2734             processStartTag(token);
2735             return;
2736         }
2737         const AtomicString& currentNamespace = adjustedCurrentNode->namespaceURI();
2738         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2739             adjustMathMLAttributes(token);
2740         if (currentNamespace == SVGNames::svgNamespaceURI) {
2741             adjustSVGTagNameCase(token);
2742             adjustSVGAttributes(token);
2743         }
2744         adjustForeignAttributes(token);
2745         m_tree.insertForeignElement(token, currentNamespace);
2746         break;
2747     }
2748     case HTMLToken::EndTag: {
2749         if (adjustedCurrentNode->namespaceURI() == SVGNames::svgNamespaceURI)
2750             adjustSVGTagNameCase(token);
2751 
2752         if (token->name() == SVGNames::scriptTag && m_tree.currentStackItem()->hasTagName(SVGNames::scriptTag)) {
2753             if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2754                 m_scriptToProcess = m_tree.currentElement();
2755             m_tree.openElements()->pop();
2756             return;
2757         }
2758         if (!m_tree.currentStackItem()->isInHTMLNamespace()) {
2759             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2760             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2761             if (!nodeRecord->stackItem()->hasLocalName(token->name()))
2762                 parseError(token);
2763             while (1) {
2764                 if (nodeRecord->stackItem()->hasLocalName(token->name())) {
2765                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2766                     return;
2767                 }
2768                 nodeRecord = nodeRecord->next();
2769 
2770                 if (nodeRecord->stackItem()->isInHTMLNamespace())
2771                     break;
2772             }
2773         }
2774         // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2775         processEndTag(token);
2776         break;
2777     }
2778     case HTMLToken::Comment:
2779         m_tree.insertComment(token);
2780         break;
2781     case HTMLToken::Character:
2782     case HTMLToken::EndOfFile:
2783         ASSERT_NOT_REACHED();
2784         break;
2785     }
2786 }
2787 
finished()2788 void HTMLTreeBuilder::finished()
2789 {
2790     if (isParsingFragment())
2791         return;
2792 
2793     ASSERT(m_templateInsertionModes.isEmpty());
2794     ASSERT(m_isAttached);
2795     // Warning, this may detach the parser. Do not do anything else after this.
2796     m_tree.finishedParsing();
2797 }
2798 
parseError(AtomicHTMLToken *)2799 void HTMLTreeBuilder::parseError(AtomicHTMLToken*)
2800 {
2801 }
2802 
2803 } // namespace WebCore
2804