1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "ScriptController.h"
50 #include "Text.h"
51 #include "XLinkNames.h"
52 #include "XMLNSNames.h"
53 #include "XMLNames.h"
54 #include <wtf/unicode/CharacterNames.h>
55
56 namespace WebCore {
57
58 using namespace HTMLNames;
59
60 static const int uninitializedLineNumberValue = -1;
61
uninitializedPositionValue1()62 static TextPosition1 uninitializedPositionValue1()
63 {
64 return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
65 }
66
67 namespace {
68
isHTMLSpaceOrReplacementCharacter(UChar character)69 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
70 {
71 return isHTMLSpace(character) || character == replacementCharacter;
72 }
73
isAllWhitespace(const String & string)74 inline bool isAllWhitespace(const String& string)
75 {
76 return string.isAllSpecialCharacters<isHTMLSpace>();
77 }
78
isAllWhitespaceOrReplacementCharacters(const String & string)79 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
80 {
81 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
82 }
83
isNumberedHeaderTag(const AtomicString & tagName)84 bool isNumberedHeaderTag(const AtomicString& tagName)
85 {
86 return tagName == h1Tag
87 || tagName == h2Tag
88 || tagName == h3Tag
89 || tagName == h4Tag
90 || tagName == h5Tag
91 || tagName == h6Tag;
92 }
93
isCaptionColOrColgroupTag(const AtomicString & tagName)94 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
95 {
96 return tagName == captionTag
97 || tagName == colTag
98 || tagName == colgroupTag;
99 }
100
isTableCellContextTag(const AtomicString & tagName)101 bool isTableCellContextTag(const AtomicString& tagName)
102 {
103 return tagName == thTag || tagName == tdTag;
104 }
105
isTableBodyContextTag(const AtomicString & tagName)106 bool isTableBodyContextTag(const AtomicString& tagName)
107 {
108 return tagName == tbodyTag
109 || tagName == tfootTag
110 || tagName == theadTag;
111 }
112
113 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
isSpecialNode(Node * node)114 bool isSpecialNode(Node* node)
115 {
116 if (node->hasTagName(MathMLNames::miTag)
117 || node->hasTagName(MathMLNames::moTag)
118 || node->hasTagName(MathMLNames::mnTag)
119 || node->hasTagName(MathMLNames::msTag)
120 || node->hasTagName(MathMLNames::mtextTag)
121 || node->hasTagName(MathMLNames::annotation_xmlTag)
122 || node->hasTagName(SVGNames::foreignObjectTag)
123 || node->hasTagName(SVGNames::descTag)
124 || node->hasTagName(SVGNames::titleTag))
125 return true;
126 if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
127 return true;
128 if (!isInHTMLNamespace(node))
129 return false;
130 const AtomicString& tagName = node->localName();
131 return tagName == addressTag
132 || tagName == appletTag
133 || tagName == areaTag
134 || tagName == articleTag
135 || tagName == asideTag
136 || tagName == baseTag
137 || tagName == basefontTag
138 || tagName == bgsoundTag
139 || tagName == blockquoteTag
140 || tagName == bodyTag
141 || tagName == brTag
142 || tagName == buttonTag
143 || tagName == captionTag
144 || tagName == centerTag
145 || tagName == colTag
146 || tagName == colgroupTag
147 || tagName == commandTag
148 || tagName == ddTag
149 || tagName == detailsTag
150 || tagName == dirTag
151 || tagName == divTag
152 || tagName == dlTag
153 || tagName == dtTag
154 || tagName == embedTag
155 || tagName == fieldsetTag
156 || tagName == figcaptionTag
157 || tagName == figureTag
158 || tagName == footerTag
159 || tagName == formTag
160 || tagName == frameTag
161 || tagName == framesetTag
162 || isNumberedHeaderTag(tagName)
163 || tagName == headTag
164 || tagName == headerTag
165 || tagName == hgroupTag
166 || tagName == hrTag
167 || tagName == htmlTag
168 || tagName == iframeTag
169 || tagName == imgTag
170 || tagName == inputTag
171 || tagName == isindexTag
172 || tagName == liTag
173 || tagName == linkTag
174 || tagName == listingTag
175 || tagName == marqueeTag
176 || tagName == menuTag
177 || tagName == metaTag
178 || tagName == navTag
179 || tagName == noembedTag
180 || tagName == noframesTag
181 || tagName == noscriptTag
182 || tagName == objectTag
183 || tagName == olTag
184 || tagName == pTag
185 || tagName == paramTag
186 || tagName == plaintextTag
187 || tagName == preTag
188 || tagName == scriptTag
189 || tagName == sectionTag
190 || tagName == selectTag
191 || tagName == styleTag
192 || tagName == summaryTag
193 || tagName == tableTag
194 || isTableBodyContextTag(tagName)
195 || tagName == tdTag
196 || tagName == textareaTag
197 || tagName == thTag
198 || tagName == titleTag
199 || tagName == trTag
200 || tagName == ulTag
201 || tagName == wbrTag
202 || tagName == xmpTag;
203 }
204
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)205 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
206 {
207 return tagName == bTag
208 || tagName == bigTag
209 || tagName == codeTag
210 || tagName == emTag
211 || tagName == fontTag
212 || tagName == iTag
213 || tagName == sTag
214 || tagName == smallTag
215 || tagName == strikeTag
216 || tagName == strongTag
217 || tagName == ttTag
218 || tagName == uTag;
219 }
220
isNonAnchorFormattingTag(const AtomicString & tagName)221 bool isNonAnchorFormattingTag(const AtomicString& tagName)
222 {
223 return tagName == nobrTag
224 || isNonAnchorNonNobrFormattingTag(tagName);
225 }
226
227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)228 bool isFormattingTag(const AtomicString& tagName)
229 {
230 return tagName == aTag || isNonAnchorFormattingTag(tagName);
231 }
232
closestFormAncestor(Element * element)233 HTMLFormElement* closestFormAncestor(Element* element)
234 {
235 while (element) {
236 if (element->hasTagName(formTag))
237 return static_cast<HTMLFormElement*>(element);
238 ContainerNode* parent = element->parentNode();
239 if (!parent || !parent->isElementNode())
240 return 0;
241 element = static_cast<Element*>(parent);
242 }
243 return 0;
244 }
245
246 } // namespace
247
248 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
249 WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
250 public:
ExternalCharacterTokenBuffer(AtomicHTMLToken & token)251 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
252 : m_current(token.characters().data())
253 , m_end(m_current + token.characters().size())
254 {
255 ASSERT(!isEmpty());
256 }
257
ExternalCharacterTokenBuffer(const String & string)258 explicit ExternalCharacterTokenBuffer(const String& string)
259 : m_current(string.characters())
260 , m_end(m_current + string.length())
261 {
262 ASSERT(!isEmpty());
263 }
264
~ExternalCharacterTokenBuffer()265 ~ExternalCharacterTokenBuffer()
266 {
267 ASSERT(isEmpty());
268 }
269
isEmpty() const270 bool isEmpty() const { return m_current == m_end; }
271
skipLeadingWhitespace()272 void skipLeadingWhitespace()
273 {
274 skipLeading<isHTMLSpace>();
275 }
276
takeLeadingWhitespace()277 String takeLeadingWhitespace()
278 {
279 return takeLeading<isHTMLSpace>();
280 }
281
takeLeadingNonWhitespace()282 String takeLeadingNonWhitespace()
283 {
284 return takeLeading<isNotHTMLSpace>();
285 }
286
takeRemaining()287 String takeRemaining()
288 {
289 ASSERT(!isEmpty());
290 const UChar* start = m_current;
291 m_current = m_end;
292 return String(start, m_current - start);
293 }
294
giveRemainingTo(Vector<UChar> & recipient)295 void giveRemainingTo(Vector<UChar>& recipient)
296 {
297 recipient.append(m_current, m_end - m_current);
298 m_current = m_end;
299 }
300
takeRemainingWhitespace()301 String takeRemainingWhitespace()
302 {
303 ASSERT(!isEmpty());
304 Vector<UChar> whitespace;
305 do {
306 UChar cc = *m_current++;
307 if (isHTMLSpace(cc))
308 whitespace.append(cc);
309 } while (m_current < m_end);
310 // Returning the null string when there aren't any whitespace
311 // characters is slightly cleaner semantically because we don't want
312 // to insert a text node (as opposed to inserting an empty text node).
313 if (whitespace.isEmpty())
314 return String();
315 return String::adopt(whitespace);
316 }
317
318 private:
319 template<bool characterPredicate(UChar)>
skipLeading()320 void skipLeading()
321 {
322 ASSERT(!isEmpty());
323 while (characterPredicate(*m_current)) {
324 if (++m_current == m_end)
325 return;
326 }
327 }
328
329 template<bool characterPredicate(UChar)>
takeLeading()330 String takeLeading()
331 {
332 ASSERT(!isEmpty());
333 const UChar* start = m_current;
334 skipLeading<characterPredicate>();
335 if (start == m_current)
336 return String();
337 return String(start, m_current - start);
338 }
339
340 const UChar* m_current;
341 const UChar* m_end;
342 };
343
344
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,bool reportErrors,bool usePreHTML5ParserQuirks)345 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
346 : m_framesetOk(true)
347 , m_document(document)
348 , m_tree(document)
349 , m_reportErrors(reportErrors)
350 , m_isPaused(false)
351 , m_insertionMode(InitialMode)
352 , m_originalInsertionMode(InitialMode)
353 , m_parser(parser)
354 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
355 , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
356 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
357 , m_hasPendingForeignInsertionModeSteps(false)
358 {
359 }
360
361 // FIXME: Member variables should be grouped into self-initializing structs to
362 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission,bool usePreHTML5ParserQuirks)363 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
364 : m_framesetOk(true)
365 , m_fragmentContext(fragment, contextElement, scriptingPermission)
366 , m_document(fragment->document())
367 , m_tree(fragment, scriptingPermission)
368 , m_reportErrors(false) // FIXME: Why not report errors in fragments?
369 , m_isPaused(false)
370 , m_insertionMode(InitialMode)
371 , m_originalInsertionMode(InitialMode)
372 , m_parser(parser)
373 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
374 , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
375 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
376 , m_hasPendingForeignInsertionModeSteps(false)
377 {
378 if (contextElement) {
379 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
380 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
381 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
382 // and instead use the DocumentFragment as a root node.
383 m_tree.openElements()->pushRootNode(fragment);
384 resetInsertionModeAppropriately();
385 m_tree.setForm(closestFormAncestor(contextElement));
386 }
387 }
388
~HTMLTreeBuilder()389 HTMLTreeBuilder::~HTMLTreeBuilder()
390 {
391 }
392
detach()393 void HTMLTreeBuilder::detach()
394 {
395 // This call makes little sense in fragment mode, but for consistency
396 // DocumentParser expects detach() to always be called before it's destroyed.
397 m_document = 0;
398 // HTMLConstructionSite might be on the callstack when detach() is called
399 // otherwise we'd just call m_tree.clear() here instead.
400 m_tree.detach();
401 }
402
FragmentParsingContext()403 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
404 : m_fragment(0)
405 , m_contextElement(0)
406 , m_scriptingPermission(FragmentScriptingAllowed)
407 {
408 }
409
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)410 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
411 : m_fragment(fragment)
412 , m_contextElement(contextElement)
413 , m_scriptingPermission(scriptingPermission)
414 {
415 ASSERT(!fragment->hasChildNodes());
416 }
417
~FragmentParsingContext()418 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
419 {
420 }
421
takeScriptToProcess(TextPosition1 & scriptStartPosition)422 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
423 {
424 // Unpause ourselves, callers may pause us again when processing the script.
425 // The HTML5 spec is written as though scripts are executed inside the tree
426 // builder. We pause the parser to exit the tree builder, and then resume
427 // before running scripts.
428 m_isPaused = false;
429 scriptStartPosition = m_scriptToProcessStartPosition;
430 m_scriptToProcessStartPosition = uninitializedPositionValue1();
431 return m_scriptToProcess.release();
432 }
433
constructTreeFromToken(HTMLToken & rawToken)434 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
435 {
436 AtomicHTMLToken token(rawToken);
437
438 // We clear the rawToken in case constructTreeFromAtomicToken
439 // synchronously re-enters the parser. We don't clear the token immedately
440 // for Character tokens because the AtomicHTMLToken avoids copying the
441 // characters by keeping a pointer to the underlying buffer in the
442 // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
443 // the parser.
444 //
445 // FIXME: Top clearing the rawToken once we start running the parser off
446 // the main thread or once we stop allowing synchronous JavaScript
447 // execution from parseMappedAttribute.
448 if (rawToken.type() != HTMLToken::Character)
449 rawToken.clear();
450
451 constructTreeFromAtomicToken(token);
452
453 if (!rawToken.isUninitialized()) {
454 ASSERT(rawToken.type() == HTMLToken::Character);
455 rawToken.clear();
456 }
457 }
458
constructTreeFromAtomicToken(AtomicHTMLToken & token)459 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
460 {
461 processToken(token);
462
463 // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
464 // the U+0000 characters into replacement characters has compatibility
465 // problems.
466 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
467 m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
468 }
469
processToken(AtomicHTMLToken & token)470 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
471 {
472 switch (token.type()) {
473 case HTMLToken::Uninitialized:
474 ASSERT_NOT_REACHED();
475 break;
476 case HTMLToken::DOCTYPE:
477 processDoctypeToken(token);
478 break;
479 case HTMLToken::StartTag:
480 processStartTag(token);
481 break;
482 case HTMLToken::EndTag:
483 processEndTag(token);
484 break;
485 case HTMLToken::Comment:
486 processComment(token);
487 return;
488 case HTMLToken::Character:
489 processCharacter(token);
490 break;
491 case HTMLToken::EndOfFile:
492 processEndOfFile(token);
493 break;
494 }
495 }
496
processDoctypeToken(AtomicHTMLToken & token)497 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
498 {
499 ASSERT(token.type() == HTMLToken::DOCTYPE);
500 if (m_insertionMode == InitialMode) {
501 m_tree.insertDoctype(token);
502 setInsertionMode(BeforeHTMLMode);
503 return;
504 }
505 if (m_insertionMode == InTableTextMode) {
506 defaultForInTableText();
507 processDoctypeToken(token);
508 return;
509 }
510 parseError(token);
511 }
512
processFakeStartTag(const QualifiedName & tagName,PassRefPtr<NamedNodeMap> attributes)513 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
514 {
515 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
516 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
517 processStartTag(fakeToken);
518 }
519
processFakeEndTag(const QualifiedName & tagName)520 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
521 {
522 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
523 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
524 processEndTag(fakeToken);
525 }
526
processFakeCharacters(const String & characters)527 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
528 {
529 ASSERT(!characters.isEmpty());
530 ExternalCharacterTokenBuffer buffer(characters);
531 processCharacterBuffer(buffer);
532 }
533
processFakePEndTagIfPInButtonScope()534 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
535 {
536 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
537 return;
538 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
539 processEndTag(endP);
540 }
541
attributesForIsindexInput(AtomicHTMLToken & token)542 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
543 {
544 RefPtr<NamedNodeMap> attributes = token.takeAtributes();
545 if (!attributes)
546 attributes = NamedNodeMap::create();
547 else {
548 attributes->removeAttribute(nameAttr);
549 attributes->removeAttribute(actionAttr);
550 attributes->removeAttribute(promptAttr);
551 }
552
553 RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
554 attributes->insertAttribute(mappedAttribute.release(), false);
555 return attributes.release();
556 }
557
processIsindexStartTagForInBody(AtomicHTMLToken & token)558 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
559 {
560 ASSERT(token.type() == HTMLToken::StartTag);
561 ASSERT(token.name() == isindexTag);
562 parseError(token);
563 if (m_tree.form())
564 return;
565 notImplemented(); // Acknowledge self-closing flag
566 processFakeStartTag(formTag);
567 RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
568 if (actionAttribute) {
569 ASSERT(m_tree.currentElement()->hasTagName(formTag));
570 m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
571 }
572 processFakeStartTag(hrTag);
573 processFakeStartTag(labelTag);
574 RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
575 if (promptAttribute)
576 processFakeCharacters(promptAttribute->value());
577 else
578 processFakeCharacters(searchableIndexIntroduction());
579 processFakeStartTag(inputTag, attributesForIsindexInput(token));
580 notImplemented(); // This second set of characters may be needed by non-english locales.
581 processFakeEndTag(labelTag);
582 processFakeStartTag(hrTag);
583 processFakeEndTag(formTag);
584 }
585
586 namespace {
587
isLi(const ContainerNode * element)588 bool isLi(const ContainerNode* element)
589 {
590 return element->hasTagName(liTag);
591 }
592
isDdOrDt(const ContainerNode * element)593 bool isDdOrDt(const ContainerNode* element)
594 {
595 return element->hasTagName(ddTag)
596 || element->hasTagName(dtTag);
597 }
598
599 }
600
601 template <bool shouldClose(const ContainerNode*)>
processCloseWhenNestedTag(AtomicHTMLToken & token)602 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
603 {
604 m_framesetOk = false;
605 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
606 while (1) {
607 ContainerNode* node = nodeRecord->node();
608 if (shouldClose(node)) {
609 ASSERT(node->isElementNode());
610 processFakeEndTag(toElement(node)->tagQName());
611 break;
612 }
613 if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
614 break;
615 nodeRecord = nodeRecord->next();
616 }
617 processFakePEndTagIfPInButtonScope();
618 m_tree.insertHTMLElement(token);
619 }
620
621 namespace {
622
623 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
624
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,QualifiedName ** names,size_t length)625 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
626 {
627 for (size_t i = 0; i < length; ++i) {
628 const QualifiedName& name = *names[i];
629 const AtomicString& localName = name.localName();
630 AtomicString loweredLocalName = localName.lower();
631 if (loweredLocalName != localName)
632 map->add(loweredLocalName, name);
633 }
634 }
635
adjustSVGTagNameCase(AtomicHTMLToken & token)636 void adjustSVGTagNameCase(AtomicHTMLToken& token)
637 {
638 static PrefixedNameToQualifiedNameMap* caseMap = 0;
639 if (!caseMap) {
640 caseMap = new PrefixedNameToQualifiedNameMap;
641 size_t length = 0;
642 QualifiedName** svgTags = SVGNames::getSVGTags(&length);
643 mapLoweredLocalNameToName(caseMap, svgTags, length);
644 }
645
646 const QualifiedName& casedName = caseMap->get(token.name());
647 if (casedName.localName().isNull())
648 return;
649 token.setName(casedName.localName());
650 }
651
652 template<QualifiedName** getAttrs(size_t* length)>
adjustAttributes(AtomicHTMLToken & token)653 void adjustAttributes(AtomicHTMLToken& token)
654 {
655 static PrefixedNameToQualifiedNameMap* caseMap = 0;
656 if (!caseMap) {
657 caseMap = new PrefixedNameToQualifiedNameMap;
658 size_t length = 0;
659 QualifiedName** attrs = getAttrs(&length);
660 mapLoweredLocalNameToName(caseMap, attrs, length);
661 }
662
663 NamedNodeMap* attributes = token.attributes();
664 if (!attributes)
665 return;
666
667 for (unsigned x = 0; x < attributes->length(); ++x) {
668 Attribute* attribute = attributes->attributeItem(x);
669 const QualifiedName& casedName = caseMap->get(attribute->localName());
670 if (!casedName.localName().isNull())
671 attribute->parserSetName(casedName);
672 }
673 }
674
adjustSVGAttributes(AtomicHTMLToken & token)675 void adjustSVGAttributes(AtomicHTMLToken& token)
676 {
677 adjustAttributes<SVGNames::getSVGAttrs>(token);
678 }
679
adjustMathMLAttributes(AtomicHTMLToken & token)680 void adjustMathMLAttributes(AtomicHTMLToken& token)
681 {
682 adjustAttributes<MathMLNames::getMathMLAttrs>(token);
683 }
684
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,QualifiedName ** names,size_t length)685 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
686 {
687 for (size_t i = 0; i < length; ++i) {
688 QualifiedName* name = names[i];
689 const AtomicString& localName = name->localName();
690 AtomicString prefixColonLocalName(prefix + ":" + localName);
691 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
692 map->add(prefixColonLocalName, nameWithPrefix);
693 }
694 }
695
adjustForeignAttributes(AtomicHTMLToken & token)696 void adjustForeignAttributes(AtomicHTMLToken& token)
697 {
698 static PrefixedNameToQualifiedNameMap* map = 0;
699 if (!map) {
700 map = new PrefixedNameToQualifiedNameMap;
701 size_t length = 0;
702 QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
703 addNamesWithPrefix(map, "xlink", attrs, length);
704
705 attrs = XMLNames::getXMLAttrs(&length);
706 addNamesWithPrefix(map, "xml", attrs, length);
707
708 map->add("xmlns", XMLNSNames::xmlnsAttr);
709 map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
710 }
711
712 NamedNodeMap* attributes = token.attributes();
713 if (!attributes)
714 return;
715
716 for (unsigned x = 0; x < attributes->length(); ++x) {
717 Attribute* attribute = attributes->attributeItem(x);
718 const QualifiedName& name = map->get(attribute->localName());
719 if (!name.localName().isNull())
720 attribute->parserSetName(name);
721 }
722 }
723
724 }
725
processStartTagForInBody(AtomicHTMLToken & token)726 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
727 {
728 ASSERT(token.type() == HTMLToken::StartTag);
729 if (token.name() == htmlTag) {
730 m_tree.insertHTMLHtmlStartTagInBody(token);
731 return;
732 }
733 if (token.name() == baseTag
734 || token.name() == basefontTag
735 || token.name() == bgsoundTag
736 || token.name() == commandTag
737 || token.name() == linkTag
738 || token.name() == metaTag
739 || token.name() == noframesTag
740 || token.name() == scriptTag
741 || token.name() == styleTag
742 || token.name() == titleTag) {
743 bool didProcess = processStartTagForInHead(token);
744 ASSERT_UNUSED(didProcess, didProcess);
745 return;
746 }
747 if (token.name() == bodyTag) {
748 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
749 ASSERT(isParsingFragment());
750 return;
751 }
752 m_tree.insertHTMLBodyStartTagInBody(token);
753 return;
754 }
755 if (token.name() == framesetTag) {
756 parseError(token);
757 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
758 ASSERT(isParsingFragment());
759 return;
760 }
761 if (!m_framesetOk)
762 return;
763 ExceptionCode ec = 0;
764 m_tree.openElements()->bodyElement()->remove(ec);
765 ASSERT(!ec);
766 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
767 m_tree.openElements()->popHTMLBodyElement();
768 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
769 m_tree.insertHTMLElement(token);
770 setInsertionMode(InFramesetMode);
771 return;
772 }
773 if (token.name() == addressTag
774 || token.name() == articleTag
775 || token.name() == asideTag
776 || token.name() == blockquoteTag
777 || token.name() == centerTag
778 || token.name() == detailsTag
779 || token.name() == dirTag
780 || token.name() == divTag
781 || token.name() == dlTag
782 || token.name() == fieldsetTag
783 || token.name() == figcaptionTag
784 || token.name() == figureTag
785 || token.name() == footerTag
786 || token.name() == headerTag
787 || token.name() == hgroupTag
788 || token.name() == menuTag
789 || token.name() == navTag
790 || token.name() == olTag
791 || token.name() == pTag
792 || token.name() == sectionTag
793 || token.name() == summaryTag
794 || token.name() == ulTag) {
795 processFakePEndTagIfPInButtonScope();
796 m_tree.insertHTMLElement(token);
797 return;
798 }
799 if (isNumberedHeaderTag(token.name())) {
800 processFakePEndTagIfPInButtonScope();
801 if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
802 parseError(token);
803 m_tree.openElements()->pop();
804 }
805 m_tree.insertHTMLElement(token);
806 return;
807 }
808 if (token.name() == preTag || token.name() == listingTag) {
809 processFakePEndTagIfPInButtonScope();
810 m_tree.insertHTMLElement(token);
811 m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
812 m_framesetOk = false;
813 return;
814 }
815 if (token.name() == formTag) {
816 if (m_tree.form()) {
817 parseError(token);
818 return;
819 }
820 processFakePEndTagIfPInButtonScope();
821 m_tree.insertHTMLFormElement(token);
822 return;
823 }
824 if (token.name() == liTag) {
825 processCloseWhenNestedTag<isLi>(token);
826 return;
827 }
828 if (token.name() == ddTag || token.name() == dtTag) {
829 processCloseWhenNestedTag<isDdOrDt>(token);
830 return;
831 }
832 if (token.name() == plaintextTag) {
833 processFakePEndTagIfPInButtonScope();
834 m_tree.insertHTMLElement(token);
835 m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
836 return;
837 }
838 if (token.name() == buttonTag) {
839 if (m_tree.openElements()->inScope(buttonTag)) {
840 parseError(token);
841 processFakeEndTag(buttonTag);
842 reprocessStartTag(token); // FIXME: Could we just fall through here?
843 return;
844 }
845 m_tree.reconstructTheActiveFormattingElements();
846 m_tree.insertHTMLElement(token);
847 m_framesetOk = false;
848 return;
849 }
850 if (token.name() == aTag) {
851 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
852 if (activeATag) {
853 parseError(token);
854 processFakeEndTag(aTag);
855 m_tree.activeFormattingElements()->remove(activeATag);
856 if (m_tree.openElements()->contains(activeATag))
857 m_tree.openElements()->remove(activeATag);
858 }
859 m_tree.reconstructTheActiveFormattingElements();
860 m_tree.insertFormattingElement(token);
861 return;
862 }
863 if (isNonAnchorNonNobrFormattingTag(token.name())) {
864 m_tree.reconstructTheActiveFormattingElements();
865 m_tree.insertFormattingElement(token);
866 return;
867 }
868 if (token.name() == nobrTag) {
869 m_tree.reconstructTheActiveFormattingElements();
870 if (m_tree.openElements()->inScope(nobrTag)) {
871 parseError(token);
872 processFakeEndTag(nobrTag);
873 m_tree.reconstructTheActiveFormattingElements();
874 }
875 m_tree.insertFormattingElement(token);
876 return;
877 }
878 if (token.name() == appletTag
879 || token.name() == marqueeTag
880 || token.name() == objectTag) {
881 m_tree.reconstructTheActiveFormattingElements();
882 m_tree.insertHTMLElement(token);
883 m_tree.activeFormattingElements()->appendMarker();
884 m_framesetOk = false;
885 return;
886 }
887 if (token.name() == tableTag) {
888 if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
889 processFakeEndTag(pTag);
890 m_tree.insertHTMLElement(token);
891 m_framesetOk = false;
892 setInsertionMode(InTableMode);
893 return;
894 }
895 if (token.name() == imageTag) {
896 parseError(token);
897 // Apparently we're not supposed to ask.
898 token.setName(imgTag.localName());
899 prepareToReprocessToken();
900 // Note the fall through to the imgTag handling below!
901 }
902 if (token.name() == areaTag
903 || token.name() == brTag
904 || token.name() == embedTag
905 || token.name() == imgTag
906 || token.name() == keygenTag
907 || token.name() == wbrTag) {
908 m_tree.reconstructTheActiveFormattingElements();
909 m_tree.insertSelfClosingHTMLElement(token);
910 m_framesetOk = false;
911 return;
912 }
913 if (token.name() == inputTag) {
914 RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
915 m_tree.reconstructTheActiveFormattingElements();
916 m_tree.insertSelfClosingHTMLElement(token);
917 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
918 m_framesetOk = false;
919 return;
920 }
921 if (token.name() == paramTag
922 || token.name() == sourceTag
923 || token.name() == trackTag) {
924 m_tree.insertSelfClosingHTMLElement(token);
925 return;
926 }
927 if (token.name() == hrTag) {
928 processFakePEndTagIfPInButtonScope();
929 m_tree.insertSelfClosingHTMLElement(token);
930 m_framesetOk = false;
931 return;
932 }
933 if (token.name() == isindexTag) {
934 processIsindexStartTagForInBody(token);
935 return;
936 }
937 if (token.name() == textareaTag) {
938 m_tree.insertHTMLElement(token);
939 m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
940 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
941 m_originalInsertionMode = m_insertionMode;
942 m_framesetOk = false;
943 setInsertionMode(TextMode);
944 return;
945 }
946 if (token.name() == xmpTag) {
947 processFakePEndTagIfPInButtonScope();
948 m_tree.reconstructTheActiveFormattingElements();
949 m_framesetOk = false;
950 processGenericRawTextStartTag(token);
951 return;
952 }
953 if (token.name() == iframeTag) {
954 m_framesetOk = false;
955 processGenericRawTextStartTag(token);
956 return;
957 }
958 if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
959 processGenericRawTextStartTag(token);
960 return;
961 }
962 if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
963 processGenericRawTextStartTag(token);
964 return;
965 }
966 if (token.name() == selectTag) {
967 m_tree.reconstructTheActiveFormattingElements();
968 m_tree.insertHTMLElement(token);
969 m_framesetOk = false;
970 if (m_insertionMode == InTableMode
971 || m_insertionMode == InCaptionMode
972 || m_insertionMode == InColumnGroupMode
973 || m_insertionMode == InTableBodyMode
974 || m_insertionMode == InRowMode
975 || m_insertionMode == InCellMode)
976 setInsertionMode(InSelectInTableMode);
977 else
978 setInsertionMode(InSelectMode);
979 return;
980 }
981 if (token.name() == optgroupTag || token.name() == optionTag) {
982 if (m_tree.openElements()->inScope(optionTag.localName())) {
983 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
984 processEndTag(endOption);
985 }
986 m_tree.reconstructTheActiveFormattingElements();
987 m_tree.insertHTMLElement(token);
988 return;
989 }
990 if (token.name() == rpTag || token.name() == rtTag) {
991 if (m_tree.openElements()->inScope(rubyTag.localName())) {
992 m_tree.generateImpliedEndTags();
993 if (!m_tree.currentNode()->hasTagName(rubyTag)) {
994 parseError(token);
995 m_tree.openElements()->popUntil(rubyTag.localName());
996 }
997 }
998 m_tree.insertHTMLElement(token);
999 return;
1000 }
1001 if (token.name() == MathMLNames::mathTag.localName()) {
1002 m_tree.reconstructTheActiveFormattingElements();
1003 adjustMathMLAttributes(token);
1004 adjustForeignAttributes(token);
1005 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1006 if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1007 setInsertionMode(InForeignContentMode);
1008 return;
1009 }
1010 if (token.name() == SVGNames::svgTag.localName()) {
1011 m_tree.reconstructTheActiveFormattingElements();
1012 adjustSVGAttributes(token);
1013 adjustForeignAttributes(token);
1014 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1015 if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1016 setInsertionMode(InForeignContentMode);
1017 return;
1018 }
1019 if (isCaptionColOrColgroupTag(token.name())
1020 || token.name() == frameTag
1021 || token.name() == headTag
1022 || isTableBodyContextTag(token.name())
1023 || isTableCellContextTag(token.name())
1024 || token.name() == trTag) {
1025 parseError(token);
1026 return;
1027 }
1028 m_tree.reconstructTheActiveFormattingElements();
1029 m_tree.insertHTMLElement(token);
1030 }
1031
processColgroupEndTagForInColumnGroup()1032 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1033 {
1034 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1035 ASSERT(isParsingFragment());
1036 // FIXME: parse error
1037 return false;
1038 }
1039 m_tree.openElements()->pop();
1040 setInsertionMode(InTableMode);
1041 return true;
1042 }
1043
1044 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()1045 void HTMLTreeBuilder::closeTheCell()
1046 {
1047 ASSERT(insertionMode() == InCellMode);
1048 if (m_tree.openElements()->inTableScope(tdTag)) {
1049 ASSERT(!m_tree.openElements()->inTableScope(thTag));
1050 processFakeEndTag(tdTag);
1051 return;
1052 }
1053 ASSERT(m_tree.openElements()->inTableScope(thTag));
1054 processFakeEndTag(thTag);
1055 ASSERT(insertionMode() == InRowMode);
1056 }
1057
processStartTagForInTable(AtomicHTMLToken & token)1058 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1059 {
1060 ASSERT(token.type() == HTMLToken::StartTag);
1061 if (token.name() == captionTag) {
1062 m_tree.openElements()->popUntilTableScopeMarker();
1063 m_tree.activeFormattingElements()->appendMarker();
1064 m_tree.insertHTMLElement(token);
1065 setInsertionMode(InCaptionMode);
1066 return;
1067 }
1068 if (token.name() == colgroupTag) {
1069 m_tree.openElements()->popUntilTableScopeMarker();
1070 m_tree.insertHTMLElement(token);
1071 setInsertionMode(InColumnGroupMode);
1072 return;
1073 }
1074 if (token.name() == colTag) {
1075 processFakeStartTag(colgroupTag);
1076 ASSERT(InColumnGroupMode);
1077 reprocessStartTag(token);
1078 return;
1079 }
1080 if (isTableBodyContextTag(token.name())) {
1081 m_tree.openElements()->popUntilTableScopeMarker();
1082 m_tree.insertHTMLElement(token);
1083 setInsertionMode(InTableBodyMode);
1084 return;
1085 }
1086 if (isTableCellContextTag(token.name())
1087 || token.name() == trTag) {
1088 processFakeStartTag(tbodyTag);
1089 ASSERT(insertionMode() == InTableBodyMode);
1090 reprocessStartTag(token);
1091 return;
1092 }
1093 if (token.name() == tableTag) {
1094 parseError(token);
1095 if (!processTableEndTagForInTable()) {
1096 ASSERT(isParsingFragment());
1097 return;
1098 }
1099 reprocessStartTag(token);
1100 return;
1101 }
1102 if (token.name() == styleTag || token.name() == scriptTag) {
1103 processStartTagForInHead(token);
1104 return;
1105 }
1106 if (token.name() == inputTag) {
1107 Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1108 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1109 parseError(token);
1110 m_tree.insertSelfClosingHTMLElement(token);
1111 return;
1112 }
1113 // Fall through to "anything else" case.
1114 }
1115 if (token.name() == formTag) {
1116 parseError(token);
1117 if (m_tree.form())
1118 return;
1119 m_tree.insertHTMLFormElement(token, true);
1120 m_tree.openElements()->pop();
1121 return;
1122 }
1123 parseError(token);
1124 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1125 processStartTagForInBody(token);
1126 }
1127
1128 namespace {
1129
shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken & token,ContainerNode * currentElement)1130 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
1131 {
1132 ASSERT(token.type() == HTMLToken::StartTag);
1133 if (currentElement->hasTagName(MathMLNames::miTag)
1134 || currentElement->hasTagName(MathMLNames::moTag)
1135 || currentElement->hasTagName(MathMLNames::mnTag)
1136 || currentElement->hasTagName(MathMLNames::msTag)
1137 || currentElement->hasTagName(MathMLNames::mtextTag)) {
1138 return token.name() != MathMLNames::mglyphTag
1139 && token.name() != MathMLNames::malignmarkTag;
1140 }
1141 if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1142 return token.name() == SVGNames::svgTag;
1143 if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1144 || currentElement->hasTagName(SVGNames::descTag)
1145 || currentElement->hasTagName(SVGNames::titleTag))
1146 return true;
1147 return isInHTMLNamespace(currentElement);
1148 }
1149
1150 }
1151
processStartTag(AtomicHTMLToken & token)1152 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1153 {
1154 ASSERT(token.type() == HTMLToken::StartTag);
1155 switch (insertionMode()) {
1156 case InitialMode:
1157 ASSERT(insertionMode() == InitialMode);
1158 defaultForInitial();
1159 // Fall through.
1160 case BeforeHTMLMode:
1161 ASSERT(insertionMode() == BeforeHTMLMode);
1162 if (token.name() == htmlTag) {
1163 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1164 setInsertionMode(BeforeHeadMode);
1165 return;
1166 }
1167 defaultForBeforeHTML();
1168 // Fall through.
1169 case BeforeHeadMode:
1170 ASSERT(insertionMode() == BeforeHeadMode);
1171 if (token.name() == htmlTag) {
1172 m_tree.insertHTMLHtmlStartTagInBody(token);
1173 return;
1174 }
1175 if (token.name() == headTag) {
1176 m_tree.insertHTMLHeadElement(token);
1177 setInsertionMode(InHeadMode);
1178 return;
1179 }
1180 defaultForBeforeHead();
1181 // Fall through.
1182 case InHeadMode:
1183 ASSERT(insertionMode() == InHeadMode);
1184 if (processStartTagForInHead(token))
1185 return;
1186 defaultForInHead();
1187 // Fall through.
1188 case AfterHeadMode:
1189 ASSERT(insertionMode() == AfterHeadMode);
1190 if (token.name() == htmlTag) {
1191 m_tree.insertHTMLHtmlStartTagInBody(token);
1192 return;
1193 }
1194 if (token.name() == bodyTag) {
1195 m_framesetOk = false;
1196 m_tree.insertHTMLBodyElement(token);
1197 setInsertionMode(InBodyMode);
1198 return;
1199 }
1200 if (token.name() == framesetTag) {
1201 m_tree.insertHTMLElement(token);
1202 setInsertionMode(InFramesetMode);
1203 return;
1204 }
1205 if (token.name() == baseTag
1206 || token.name() == basefontTag
1207 || token.name() == bgsoundTag
1208 || token.name() == linkTag
1209 || token.name() == metaTag
1210 || token.name() == noframesTag
1211 || token.name() == scriptTag
1212 || token.name() == styleTag
1213 || token.name() == titleTag) {
1214 parseError(token);
1215 ASSERT(m_tree.head());
1216 m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1217 processStartTagForInHead(token);
1218 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1219 return;
1220 }
1221 if (token.name() == headTag) {
1222 parseError(token);
1223 return;
1224 }
1225 defaultForAfterHead();
1226 // Fall through
1227 case InBodyMode:
1228 ASSERT(insertionMode() == InBodyMode);
1229 processStartTagForInBody(token);
1230 break;
1231 case InTableMode:
1232 ASSERT(insertionMode() == InTableMode);
1233 processStartTagForInTable(token);
1234 break;
1235 case InCaptionMode:
1236 ASSERT(insertionMode() == InCaptionMode);
1237 if (isCaptionColOrColgroupTag(token.name())
1238 || isTableBodyContextTag(token.name())
1239 || isTableCellContextTag(token.name())
1240 || token.name() == trTag) {
1241 parseError(token);
1242 if (!processCaptionEndTagForInCaption()) {
1243 ASSERT(isParsingFragment());
1244 return;
1245 }
1246 reprocessStartTag(token);
1247 return;
1248 }
1249 processStartTagForInBody(token);
1250 break;
1251 case InColumnGroupMode:
1252 ASSERT(insertionMode() == InColumnGroupMode);
1253 if (token.name() == htmlTag) {
1254 m_tree.insertHTMLHtmlStartTagInBody(token);
1255 return;
1256 }
1257 if (token.name() == colTag) {
1258 m_tree.insertSelfClosingHTMLElement(token);
1259 return;
1260 }
1261 if (!processColgroupEndTagForInColumnGroup()) {
1262 ASSERT(isParsingFragment());
1263 return;
1264 }
1265 reprocessStartTag(token);
1266 break;
1267 case InTableBodyMode:
1268 ASSERT(insertionMode() == InTableBodyMode);
1269 if (token.name() == trTag) {
1270 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1271 m_tree.insertHTMLElement(token);
1272 setInsertionMode(InRowMode);
1273 return;
1274 }
1275 if (isTableCellContextTag(token.name())) {
1276 parseError(token);
1277 processFakeStartTag(trTag);
1278 ASSERT(insertionMode() == InRowMode);
1279 reprocessStartTag(token);
1280 return;
1281 }
1282 if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1283 // FIXME: This is slow.
1284 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1285 ASSERT(isParsingFragment());
1286 parseError(token);
1287 return;
1288 }
1289 m_tree.openElements()->popUntilTableBodyScopeMarker();
1290 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1291 processFakeEndTag(m_tree.currentElement()->tagQName());
1292 reprocessStartTag(token);
1293 return;
1294 }
1295 processStartTagForInTable(token);
1296 break;
1297 case InRowMode:
1298 ASSERT(insertionMode() == InRowMode);
1299 if (isTableCellContextTag(token.name())) {
1300 m_tree.openElements()->popUntilTableRowScopeMarker();
1301 m_tree.insertHTMLElement(token);
1302 setInsertionMode(InCellMode);
1303 m_tree.activeFormattingElements()->appendMarker();
1304 return;
1305 }
1306 if (token.name() == trTag
1307 || isCaptionColOrColgroupTag(token.name())
1308 || isTableBodyContextTag(token.name())) {
1309 if (!processTrEndTagForInRow()) {
1310 ASSERT(isParsingFragment());
1311 return;
1312 }
1313 ASSERT(insertionMode() == InTableBodyMode);
1314 reprocessStartTag(token);
1315 return;
1316 }
1317 processStartTagForInTable(token);
1318 break;
1319 case InCellMode:
1320 ASSERT(insertionMode() == InCellMode);
1321 if (isCaptionColOrColgroupTag(token.name())
1322 || isTableCellContextTag(token.name())
1323 || token.name() == trTag
1324 || isTableBodyContextTag(token.name())) {
1325 // FIXME: This could be more efficient.
1326 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1327 ASSERT(isParsingFragment());
1328 parseError(token);
1329 return;
1330 }
1331 closeTheCell();
1332 reprocessStartTag(token);
1333 return;
1334 }
1335 processStartTagForInBody(token);
1336 break;
1337 case AfterBodyMode:
1338 case AfterAfterBodyMode:
1339 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1340 if (token.name() == htmlTag) {
1341 m_tree.insertHTMLHtmlStartTagInBody(token);
1342 return;
1343 }
1344 setInsertionMode(InBodyMode);
1345 reprocessStartTag(token);
1346 break;
1347 case InHeadNoscriptMode:
1348 ASSERT(insertionMode() == InHeadNoscriptMode);
1349 if (token.name() == htmlTag) {
1350 m_tree.insertHTMLHtmlStartTagInBody(token);
1351 return;
1352 }
1353 if (token.name() == basefontTag
1354 || token.name() == bgsoundTag
1355 || token.name() == linkTag
1356 || token.name() == metaTag
1357 || token.name() == noframesTag
1358 || token.name() == styleTag) {
1359 bool didProcess = processStartTagForInHead(token);
1360 ASSERT_UNUSED(didProcess, didProcess);
1361 return;
1362 }
1363 if (token.name() == htmlTag || token.name() == noscriptTag) {
1364 parseError(token);
1365 return;
1366 }
1367 defaultForInHeadNoscript();
1368 processToken(token);
1369 break;
1370 case InFramesetMode:
1371 ASSERT(insertionMode() == InFramesetMode);
1372 if (token.name() == htmlTag) {
1373 m_tree.insertHTMLHtmlStartTagInBody(token);
1374 return;
1375 }
1376 if (token.name() == framesetTag) {
1377 m_tree.insertHTMLElement(token);
1378 return;
1379 }
1380 if (token.name() == frameTag) {
1381 m_tree.insertSelfClosingHTMLElement(token);
1382 return;
1383 }
1384 if (token.name() == noframesTag) {
1385 processStartTagForInHead(token);
1386 return;
1387 }
1388 parseError(token);
1389 break;
1390 case AfterFramesetMode:
1391 case AfterAfterFramesetMode:
1392 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1393 if (token.name() == htmlTag) {
1394 m_tree.insertHTMLHtmlStartTagInBody(token);
1395 return;
1396 }
1397 if (token.name() == noframesTag) {
1398 processStartTagForInHead(token);
1399 return;
1400 }
1401 parseError(token);
1402 break;
1403 case InSelectInTableMode:
1404 ASSERT(insertionMode() == InSelectInTableMode);
1405 if (token.name() == captionTag
1406 || token.name() == tableTag
1407 || isTableBodyContextTag(token.name())
1408 || token.name() == trTag
1409 || isTableCellContextTag(token.name())) {
1410 parseError(token);
1411 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1412 processEndTag(endSelect);
1413 reprocessStartTag(token);
1414 return;
1415 }
1416 // Fall through
1417 case InSelectMode:
1418 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1419 if (token.name() == htmlTag) {
1420 m_tree.insertHTMLHtmlStartTagInBody(token);
1421 return;
1422 }
1423 if (token.name() == optionTag) {
1424 if (m_tree.currentNode()->hasTagName(optionTag)) {
1425 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1426 processEndTag(endOption);
1427 }
1428 m_tree.insertHTMLElement(token);
1429 return;
1430 }
1431 if (token.name() == optgroupTag) {
1432 if (m_tree.currentNode()->hasTagName(optionTag)) {
1433 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1434 processEndTag(endOption);
1435 }
1436 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1437 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1438 processEndTag(endOptgroup);
1439 }
1440 m_tree.insertHTMLElement(token);
1441 return;
1442 }
1443 if (token.name() == selectTag) {
1444 parseError(token);
1445 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1446 processEndTag(endSelect);
1447 return;
1448 }
1449 if (token.name() == inputTag
1450 || token.name() == keygenTag
1451 || token.name() == textareaTag) {
1452 parseError(token);
1453 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1454 ASSERT(isParsingFragment());
1455 return;
1456 }
1457 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1458 processEndTag(endSelect);
1459 reprocessStartTag(token);
1460 return;
1461 }
1462 if (token.name() == scriptTag) {
1463 bool didProcess = processStartTagForInHead(token);
1464 ASSERT_UNUSED(didProcess, didProcess);
1465 return;
1466 }
1467 break;
1468 case InTableTextMode:
1469 defaultForInTableText();
1470 processStartTag(token);
1471 break;
1472 case InForeignContentMode: {
1473 if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
1474 processForeignContentUsingInBodyModeAndResetMode(token);
1475 return;
1476 }
1477 if (token.name() == bTag
1478 || token.name() == bigTag
1479 || token.name() == blockquoteTag
1480 || token.name() == bodyTag
1481 || token.name() == brTag
1482 || token.name() == centerTag
1483 || token.name() == codeTag
1484 || token.name() == ddTag
1485 || token.name() == divTag
1486 || token.name() == dlTag
1487 || token.name() == dtTag
1488 || token.name() == emTag
1489 || token.name() == embedTag
1490 || isNumberedHeaderTag(token.name())
1491 || token.name() == headTag
1492 || token.name() == hrTag
1493 || token.name() == iTag
1494 || token.name() == imgTag
1495 || token.name() == liTag
1496 || token.name() == listingTag
1497 || token.name() == menuTag
1498 || token.name() == metaTag
1499 || token.name() == nobrTag
1500 || token.name() == olTag
1501 || token.name() == pTag
1502 || token.name() == preTag
1503 || token.name() == rubyTag
1504 || token.name() == sTag
1505 || token.name() == smallTag
1506 || token.name() == spanTag
1507 || token.name() == strongTag
1508 || token.name() == strikeTag
1509 || token.name() == subTag
1510 || token.name() == supTag
1511 || token.name() == tableTag
1512 || token.name() == ttTag
1513 || token.name() == uTag
1514 || token.name() == ulTag
1515 || token.name() == varTag
1516 || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1517 parseError(token);
1518 m_tree.openElements()->popUntilForeignContentScopeMarker();
1519 resetInsertionModeAppropriately();
1520 reprocessStartTag(token);
1521 return;
1522 }
1523 const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1524 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1525 adjustMathMLAttributes(token);
1526 if (currentNamespace == SVGNames::svgNamespaceURI) {
1527 adjustSVGTagNameCase(token);
1528 adjustSVGAttributes(token);
1529 }
1530 adjustForeignAttributes(token);
1531 m_tree.insertForeignElement(token, currentNamespace);
1532 break;
1533 }
1534 case TextMode:
1535 ASSERT_NOT_REACHED();
1536 break;
1537 }
1538 }
1539
processBodyEndTagForInBody(AtomicHTMLToken & token)1540 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1541 {
1542 ASSERT(token.type() == HTMLToken::EndTag);
1543 ASSERT(token.name() == bodyTag);
1544 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1545 parseError(token);
1546 return false;
1547 }
1548 notImplemented(); // Emit a more specific parse error based on stack contents.
1549 setInsertionMode(AfterBodyMode);
1550 return true;
1551 }
1552
processAnyOtherEndTagForInBody(AtomicHTMLToken & token)1553 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1554 {
1555 ASSERT(token.type() == HTMLToken::EndTag);
1556 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1557 while (1) {
1558 ContainerNode* node = record->node();
1559 if (node->hasLocalName(token.name())) {
1560 m_tree.generateImpliedEndTags();
1561 // FIXME: The ElementRecord pointed to by record might be deleted by
1562 // the preceding call. Perhaps we should hold a RefPtr so that it
1563 // stays alive for the duration of record's scope.
1564 record = 0;
1565 if (!m_tree.currentNode()->hasLocalName(token.name())) {
1566 parseError(token);
1567 // FIXME: This is either a bug in the spec, or a bug in our
1568 // implementation. Filed a bug with HTML5:
1569 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1570 // We might have already popped the node for the token in
1571 // generateImpliedEndTags, just abort.
1572 if (!m_tree.openElements()->contains(toElement(node)))
1573 return;
1574 }
1575 m_tree.openElements()->popUntilPopped(toElement(node));
1576 return;
1577 }
1578 if (isSpecialNode(node)) {
1579 parseError(token);
1580 return;
1581 }
1582 record = record->next();
1583 }
1584 }
1585
1586 // FIXME: This probably belongs on HTMLElementStack.
furthestBlockForFormattingElement(Element * formattingElement)1587 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1588 {
1589 HTMLElementStack::ElementRecord* furthestBlock = 0;
1590 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1591 for (; record; record = record->next()) {
1592 if (record->element() == formattingElement)
1593 return furthestBlock;
1594 if (isSpecialNode(record->element()))
1595 furthestBlock = record;
1596 }
1597 ASSERT_NOT_REACHED();
1598 return 0;
1599 }
1600
1601 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken & token)1602 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1603 {
1604 // The adoption agency algorithm is N^2. We limit the number of iterations
1605 // to stop from hanging the whole browser. This limit is copied from the
1606 // legacy tree builder and might need to be tweaked in the future.
1607 static const int adoptionAgencyIterationLimit = 10;
1608
1609 for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1610 // 1.
1611 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1612 if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1613 parseError(token);
1614 notImplemented(); // Check the stack of open elements for a more specific parse error.
1615 return;
1616 }
1617 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1618 if (!formattingElementRecord) {
1619 parseError(token);
1620 m_tree.activeFormattingElements()->remove(formattingElement);
1621 return;
1622 }
1623 if (formattingElement != m_tree.currentElement())
1624 parseError(token);
1625 // 2.
1626 HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1627 // 3.
1628 if (!furthestBlock) {
1629 m_tree.openElements()->popUntilPopped(formattingElement);
1630 m_tree.activeFormattingElements()->remove(formattingElement);
1631 return;
1632 }
1633 // 4.
1634 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1635 ContainerNode* commonAncestor = formattingElementRecord->next()->node();
1636 // 5.
1637 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1638 // 6.
1639 HTMLElementStack::ElementRecord* node = furthestBlock;
1640 HTMLElementStack::ElementRecord* nextNode = node->next();
1641 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1642 for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1643 // 6.1
1644 node = nextNode;
1645 ASSERT(node);
1646 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1647 // 6.2
1648 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1649 m_tree.openElements()->remove(node->element());
1650 node = 0;
1651 continue;
1652 }
1653 // 6.3
1654 if (node == formattingElementRecord)
1655 break;
1656 // 6.5
1657 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1658 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1659 nodeEntry->replaceElement(newElement.get());
1660 node->replaceElement(newElement.release());
1661 // 6.4 -- Intentionally out of order to handle the case where node
1662 // was replaced in 6.5.
1663 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1664 if (lastNode == furthestBlock)
1665 bookmark.moveToAfter(nodeEntry);
1666 // 6.6
1667 if (Element* parent = lastNode->element()->parentElement())
1668 parent->parserRemoveChild(lastNode->element());
1669 node->element()->parserAddChild(lastNode->element());
1670 if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1671 lastNode->element()->lazyAttach();
1672 // 6.7
1673 lastNode = node;
1674 }
1675 // 7
1676 const AtomicString& commonAncestorTag = commonAncestor->localName();
1677 if (Element* parent = lastNode->element()->parentElement())
1678 parent->parserRemoveChild(lastNode->element());
1679 // FIXME: If this moves to HTMLConstructionSite, this check should use
1680 // causesFosterParenting(tagName) instead.
1681 if (commonAncestorTag == tableTag
1682 || commonAncestorTag == trTag
1683 || isTableBodyContextTag(commonAncestorTag))
1684 m_tree.fosterParent(lastNode->element());
1685 else {
1686 commonAncestor->parserAddChild(lastNode->element());
1687 ASSERT(lastNode->node()->isElementNode());
1688 ASSERT(lastNode->element()->parentNode());
1689 if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1690 lastNode->element()->lazyAttach();
1691 }
1692 // 8
1693 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1694 // 9
1695 newElement->takeAllChildrenFrom(furthestBlock->element());
1696 // 10
1697 Element* furthestBlockElement = furthestBlock->element();
1698 // FIXME: All this creation / parserAddChild / attach business should
1699 // be in HTMLConstructionSite. My guess is that steps 8--12
1700 // should all be in some HTMLConstructionSite function.
1701 furthestBlockElement->parserAddChild(newElement);
1702 if (furthestBlockElement->attached() && !newElement->attached()) {
1703 // Notice that newElement might already be attached if, for example, one of the reparented
1704 // children is a style element, which attaches itself automatically.
1705 newElement->attach();
1706 }
1707 // 11
1708 m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1709 // 12
1710 m_tree.openElements()->remove(formattingElement);
1711 m_tree.openElements()->insertAbove(newElement, furthestBlock);
1712 }
1713 }
1714
resetInsertionModeAppropriately()1715 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1716 {
1717 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1718 bool last = false;
1719 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1720 while (1) {
1721 ContainerNode* node = nodeRecord->node();
1722 if (node == m_tree.openElements()->rootNode()) {
1723 ASSERT(isParsingFragment());
1724 last = true;
1725 node = m_fragmentContext.contextElement();
1726 }
1727 if (node->hasTagName(selectTag)) {
1728 ASSERT(isParsingFragment());
1729 return setInsertionMode(InSelectMode);
1730 }
1731 if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1732 return setInsertionMode(InCellMode);
1733 if (node->hasTagName(trTag))
1734 return setInsertionMode(InRowMode);
1735 if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1736 return setInsertionMode(InTableBodyMode);
1737 if (node->hasTagName(captionTag))
1738 return setInsertionMode(InCaptionMode);
1739 if (node->hasTagName(colgroupTag)) {
1740 ASSERT(isParsingFragment());
1741 return setInsertionMode(InColumnGroupMode);
1742 }
1743 if (node->hasTagName(tableTag))
1744 return setInsertionMode(InTableMode);
1745 if (node->hasTagName(headTag)) {
1746 ASSERT(isParsingFragment());
1747 return setInsertionMode(InBodyMode);
1748 }
1749 if (node->hasTagName(bodyTag))
1750 return setInsertionMode(InBodyMode);
1751 if (node->hasTagName(framesetTag)) {
1752 ASSERT(isParsingFragment());
1753 return setInsertionMode(InFramesetMode);
1754 }
1755 if (node->hasTagName(htmlTag)) {
1756 ASSERT(isParsingFragment());
1757 return setInsertionMode(BeforeHeadMode);
1758 }
1759 if (node->namespaceURI() == SVGNames::svgNamespaceURI
1760 || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1761 return setInsertionMode(InForeignContentMode);
1762 if (last) {
1763 ASSERT(isParsingFragment());
1764 return setInsertionMode(InBodyMode);
1765 }
1766 nodeRecord = nodeRecord->next();
1767 }
1768 }
1769
processEndTagForInTableBody(AtomicHTMLToken & token)1770 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1771 {
1772 ASSERT(token.type() == HTMLToken::EndTag);
1773 if (isTableBodyContextTag(token.name())) {
1774 if (!m_tree.openElements()->inTableScope(token.name())) {
1775 parseError(token);
1776 return;
1777 }
1778 m_tree.openElements()->popUntilTableBodyScopeMarker();
1779 m_tree.openElements()->pop();
1780 setInsertionMode(InTableMode);
1781 return;
1782 }
1783 if (token.name() == tableTag) {
1784 // FIXME: This is slow.
1785 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1786 ASSERT(isParsingFragment());
1787 parseError(token);
1788 return;
1789 }
1790 m_tree.openElements()->popUntilTableBodyScopeMarker();
1791 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1792 processFakeEndTag(m_tree.currentElement()->tagQName());
1793 reprocessEndTag(token);
1794 return;
1795 }
1796 if (token.name() == bodyTag
1797 || isCaptionColOrColgroupTag(token.name())
1798 || token.name() == htmlTag
1799 || isTableCellContextTag(token.name())
1800 || token.name() == trTag) {
1801 parseError(token);
1802 return;
1803 }
1804 processEndTagForInTable(token);
1805 }
1806
processEndTagForInRow(AtomicHTMLToken & token)1807 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1808 {
1809 ASSERT(token.type() == HTMLToken::EndTag);
1810 if (token.name() == trTag) {
1811 processTrEndTagForInRow();
1812 return;
1813 }
1814 if (token.name() == tableTag) {
1815 if (!processTrEndTagForInRow()) {
1816 ASSERT(isParsingFragment());
1817 return;
1818 }
1819 ASSERT(insertionMode() == InTableBodyMode);
1820 reprocessEndTag(token);
1821 return;
1822 }
1823 if (isTableBodyContextTag(token.name())) {
1824 if (!m_tree.openElements()->inTableScope(token.name())) {
1825 parseError(token);
1826 return;
1827 }
1828 processFakeEndTag(trTag);
1829 ASSERT(insertionMode() == InTableBodyMode);
1830 reprocessEndTag(token);
1831 return;
1832 }
1833 if (token.name() == bodyTag
1834 || isCaptionColOrColgroupTag(token.name())
1835 || token.name() == htmlTag
1836 || isTableCellContextTag(token.name())) {
1837 parseError(token);
1838 return;
1839 }
1840 processEndTagForInTable(token);
1841 }
1842
processEndTagForInCell(AtomicHTMLToken & token)1843 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1844 {
1845 ASSERT(token.type() == HTMLToken::EndTag);
1846 if (isTableCellContextTag(token.name())) {
1847 if (!m_tree.openElements()->inTableScope(token.name())) {
1848 parseError(token);
1849 return;
1850 }
1851 m_tree.generateImpliedEndTags();
1852 if (!m_tree.currentNode()->hasLocalName(token.name()))
1853 parseError(token);
1854 m_tree.openElements()->popUntilPopped(token.name());
1855 m_tree.activeFormattingElements()->clearToLastMarker();
1856 setInsertionMode(InRowMode);
1857 return;
1858 }
1859 if (token.name() == bodyTag
1860 || isCaptionColOrColgroupTag(token.name())
1861 || token.name() == htmlTag) {
1862 parseError(token);
1863 return;
1864 }
1865 if (token.name() == tableTag
1866 || token.name() == trTag
1867 || isTableBodyContextTag(token.name())) {
1868 if (!m_tree.openElements()->inTableScope(token.name())) {
1869 ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1870 parseError(token);
1871 return;
1872 }
1873 closeTheCell();
1874 reprocessEndTag(token);
1875 return;
1876 }
1877 processEndTagForInBody(token);
1878 }
1879
processEndTagForInBody(AtomicHTMLToken & token)1880 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1881 {
1882 ASSERT(token.type() == HTMLToken::EndTag);
1883 if (token.name() == bodyTag) {
1884 processBodyEndTagForInBody(token);
1885 return;
1886 }
1887 if (token.name() == htmlTag) {
1888 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1889 if (processBodyEndTagForInBody(endBody))
1890 reprocessEndTag(token);
1891 return;
1892 }
1893 if (token.name() == addressTag
1894 || token.name() == articleTag
1895 || token.name() == asideTag
1896 || token.name() == blockquoteTag
1897 || token.name() == buttonTag
1898 || token.name() == centerTag
1899 || token.name() == detailsTag
1900 || token.name() == dirTag
1901 || token.name() == divTag
1902 || token.name() == dlTag
1903 || token.name() == fieldsetTag
1904 || token.name() == figcaptionTag
1905 || token.name() == figureTag
1906 || token.name() == footerTag
1907 || token.name() == headerTag
1908 || token.name() == hgroupTag
1909 || token.name() == listingTag
1910 || token.name() == menuTag
1911 || token.name() == navTag
1912 || token.name() == olTag
1913 || token.name() == preTag
1914 || token.name() == sectionTag
1915 || token.name() == summaryTag
1916 || token.name() == ulTag) {
1917 if (!m_tree.openElements()->inScope(token.name())) {
1918 parseError(token);
1919 return;
1920 }
1921 m_tree.generateImpliedEndTags();
1922 if (!m_tree.currentNode()->hasLocalName(token.name()))
1923 parseError(token);
1924 m_tree.openElements()->popUntilPopped(token.name());
1925 return;
1926 }
1927 if (token.name() == formTag) {
1928 RefPtr<Element> node = m_tree.takeForm();
1929 if (!node || !m_tree.openElements()->inScope(node.get())) {
1930 parseError(token);
1931 return;
1932 }
1933 m_tree.generateImpliedEndTags();
1934 if (m_tree.currentElement() != node.get())
1935 parseError(token);
1936 m_tree.openElements()->remove(node.get());
1937 }
1938 if (token.name() == pTag) {
1939 if (!m_tree.openElements()->inButtonScope(token.name())) {
1940 parseError(token);
1941 processFakeStartTag(pTag);
1942 ASSERT(m_tree.openElements()->inScope(token.name()));
1943 reprocessEndTag(token);
1944 return;
1945 }
1946 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1947 if (!m_tree.currentNode()->hasLocalName(token.name()))
1948 parseError(token);
1949 m_tree.openElements()->popUntilPopped(token.name());
1950 return;
1951 }
1952 if (token.name() == liTag) {
1953 if (!m_tree.openElements()->inListItemScope(token.name())) {
1954 parseError(token);
1955 return;
1956 }
1957 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1958 if (!m_tree.currentNode()->hasLocalName(token.name()))
1959 parseError(token);
1960 m_tree.openElements()->popUntilPopped(token.name());
1961 return;
1962 }
1963 if (token.name() == ddTag
1964 || token.name() == dtTag) {
1965 if (!m_tree.openElements()->inScope(token.name())) {
1966 parseError(token);
1967 return;
1968 }
1969 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1970 if (!m_tree.currentNode()->hasLocalName(token.name()))
1971 parseError(token);
1972 m_tree.openElements()->popUntilPopped(token.name());
1973 return;
1974 }
1975 if (isNumberedHeaderTag(token.name())) {
1976 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1977 parseError(token);
1978 return;
1979 }
1980 m_tree.generateImpliedEndTags();
1981 if (!m_tree.currentNode()->hasLocalName(token.name()))
1982 parseError(token);
1983 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1984 return;
1985 }
1986 if (isFormattingTag(token.name())) {
1987 callTheAdoptionAgency(token);
1988 return;
1989 }
1990 if (token.name() == appletTag
1991 || token.name() == marqueeTag
1992 || token.name() == objectTag) {
1993 if (!m_tree.openElements()->inScope(token.name())) {
1994 parseError(token);
1995 return;
1996 }
1997 m_tree.generateImpliedEndTags();
1998 if (!m_tree.currentNode()->hasLocalName(token.name()))
1999 parseError(token);
2000 m_tree.openElements()->popUntilPopped(token.name());
2001 m_tree.activeFormattingElements()->clearToLastMarker();
2002 return;
2003 }
2004 if (token.name() == brTag) {
2005 parseError(token);
2006 processFakeStartTag(brTag);
2007 return;
2008 }
2009 processAnyOtherEndTagForInBody(token);
2010 }
2011
processCaptionEndTagForInCaption()2012 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2013 {
2014 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2015 ASSERT(isParsingFragment());
2016 // FIXME: parse error
2017 return false;
2018 }
2019 m_tree.generateImpliedEndTags();
2020 // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2021 m_tree.openElements()->popUntilPopped(captionTag.localName());
2022 m_tree.activeFormattingElements()->clearToLastMarker();
2023 setInsertionMode(InTableMode);
2024 return true;
2025 }
2026
processTrEndTagForInRow()2027 bool HTMLTreeBuilder::processTrEndTagForInRow()
2028 {
2029 if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2030 ASSERT(isParsingFragment());
2031 // FIXME: parse error
2032 return false;
2033 }
2034 m_tree.openElements()->popUntilTableRowScopeMarker();
2035 ASSERT(m_tree.currentElement()->hasTagName(trTag));
2036 m_tree.openElements()->pop();
2037 setInsertionMode(InTableBodyMode);
2038 return true;
2039 }
2040
processTableEndTagForInTable()2041 bool HTMLTreeBuilder::processTableEndTagForInTable()
2042 {
2043 if (!m_tree.openElements()->inTableScope(tableTag)) {
2044 ASSERT(isParsingFragment());
2045 // FIXME: parse error.
2046 return false;
2047 }
2048 m_tree.openElements()->popUntilPopped(tableTag.localName());
2049 resetInsertionModeAppropriately();
2050 return true;
2051 }
2052
processEndTagForInTable(AtomicHTMLToken & token)2053 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2054 {
2055 ASSERT(token.type() == HTMLToken::EndTag);
2056 if (token.name() == tableTag) {
2057 processTableEndTagForInTable();
2058 return;
2059 }
2060 if (token.name() == bodyTag
2061 || isCaptionColOrColgroupTag(token.name())
2062 || token.name() == htmlTag
2063 || isTableBodyContextTag(token.name())
2064 || isTableCellContextTag(token.name())
2065 || token.name() == trTag) {
2066 parseError(token);
2067 return;
2068 }
2069 // Is this redirection necessary here?
2070 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2071 processEndTagForInBody(token);
2072 }
2073
processEndTag(AtomicHTMLToken & token)2074 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2075 {
2076 ASSERT(token.type() == HTMLToken::EndTag);
2077 switch (insertionMode()) {
2078 case InitialMode:
2079 ASSERT(insertionMode() == InitialMode);
2080 defaultForInitial();
2081 // Fall through.
2082 case BeforeHTMLMode:
2083 ASSERT(insertionMode() == BeforeHTMLMode);
2084 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2085 parseError(token);
2086 return;
2087 }
2088 defaultForBeforeHTML();
2089 // Fall through.
2090 case BeforeHeadMode:
2091 ASSERT(insertionMode() == BeforeHeadMode);
2092 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2093 parseError(token);
2094 return;
2095 }
2096 defaultForBeforeHead();
2097 // Fall through.
2098 case InHeadMode:
2099 ASSERT(insertionMode() == InHeadMode);
2100 if (token.name() == headTag) {
2101 m_tree.openElements()->popHTMLHeadElement();
2102 setInsertionMode(AfterHeadMode);
2103 return;
2104 }
2105 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2106 parseError(token);
2107 return;
2108 }
2109 defaultForInHead();
2110 // Fall through.
2111 case AfterHeadMode:
2112 ASSERT(insertionMode() == AfterHeadMode);
2113 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2114 parseError(token);
2115 return;
2116 }
2117 defaultForAfterHead();
2118 // Fall through
2119 case InBodyMode:
2120 ASSERT(insertionMode() == InBodyMode);
2121 processEndTagForInBody(token);
2122 break;
2123 case InTableMode:
2124 ASSERT(insertionMode() == InTableMode);
2125 processEndTagForInTable(token);
2126 break;
2127 case InCaptionMode:
2128 ASSERT(insertionMode() == InCaptionMode);
2129 if (token.name() == captionTag) {
2130 processCaptionEndTagForInCaption();
2131 return;
2132 }
2133 if (token.name() == tableTag) {
2134 parseError(token);
2135 if (!processCaptionEndTagForInCaption()) {
2136 ASSERT(isParsingFragment());
2137 return;
2138 }
2139 reprocessEndTag(token);
2140 return;
2141 }
2142 if (token.name() == bodyTag
2143 || token.name() == colTag
2144 || token.name() == colgroupTag
2145 || token.name() == htmlTag
2146 || isTableBodyContextTag(token.name())
2147 || isTableCellContextTag(token.name())
2148 || token.name() == trTag) {
2149 parseError(token);
2150 return;
2151 }
2152 processEndTagForInBody(token);
2153 break;
2154 case InColumnGroupMode:
2155 ASSERT(insertionMode() == InColumnGroupMode);
2156 if (token.name() == colgroupTag) {
2157 processColgroupEndTagForInColumnGroup();
2158 return;
2159 }
2160 if (token.name() == colTag) {
2161 parseError(token);
2162 return;
2163 }
2164 if (!processColgroupEndTagForInColumnGroup()) {
2165 ASSERT(isParsingFragment());
2166 return;
2167 }
2168 reprocessEndTag(token);
2169 break;
2170 case InRowMode:
2171 ASSERT(insertionMode() == InRowMode);
2172 processEndTagForInRow(token);
2173 break;
2174 case InCellMode:
2175 ASSERT(insertionMode() == InCellMode);
2176 processEndTagForInCell(token);
2177 break;
2178 case InTableBodyMode:
2179 ASSERT(insertionMode() == InTableBodyMode);
2180 processEndTagForInTableBody(token);
2181 break;
2182 case AfterBodyMode:
2183 ASSERT(insertionMode() == AfterBodyMode);
2184 if (token.name() == htmlTag) {
2185 if (isParsingFragment()) {
2186 parseError(token);
2187 return;
2188 }
2189 setInsertionMode(AfterAfterBodyMode);
2190 return;
2191 }
2192 prepareToReprocessToken();
2193 // Fall through.
2194 case AfterAfterBodyMode:
2195 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2196 parseError(token);
2197 setInsertionMode(InBodyMode);
2198 reprocessEndTag(token);
2199 break;
2200 case InHeadNoscriptMode:
2201 ASSERT(insertionMode() == InHeadNoscriptMode);
2202 if (token.name() == noscriptTag) {
2203 ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2204 m_tree.openElements()->pop();
2205 ASSERT(m_tree.currentElement()->hasTagName(headTag));
2206 setInsertionMode(InHeadMode);
2207 return;
2208 }
2209 if (token.name() != brTag) {
2210 parseError(token);
2211 return;
2212 }
2213 defaultForInHeadNoscript();
2214 processToken(token);
2215 break;
2216 case TextMode:
2217 if (token.name() == scriptTag) {
2218 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2219 m_isPaused = true;
2220 ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2221 m_scriptToProcess = m_tree.currentElement();
2222 m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2223 m_tree.openElements()->pop();
2224 if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2225 m_scriptToProcess->removeAllChildren();
2226 setInsertionMode(m_originalInsertionMode);
2227
2228 // This token will not have been created by the tokenizer if a
2229 // self-closing script tag was encountered and pre-HTML5 parser
2230 // quirks are enabled. We must set the tokenizer's state to
2231 // DataState explicitly if the tokenizer didn't have a chance to.
2232 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2233 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2234 return;
2235 }
2236 m_tree.openElements()->pop();
2237 setInsertionMode(m_originalInsertionMode);
2238 break;
2239 case InFramesetMode:
2240 ASSERT(insertionMode() == InFramesetMode);
2241 if (token.name() == framesetTag) {
2242 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2243 parseError(token);
2244 return;
2245 }
2246 m_tree.openElements()->pop();
2247 if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2248 setInsertionMode(AfterFramesetMode);
2249 return;
2250 }
2251 break;
2252 case AfterFramesetMode:
2253 ASSERT(insertionMode() == AfterFramesetMode);
2254 if (token.name() == htmlTag) {
2255 setInsertionMode(AfterAfterFramesetMode);
2256 return;
2257 }
2258 // Fall through.
2259 case AfterAfterFramesetMode:
2260 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2261 parseError(token);
2262 break;
2263 case InSelectInTableMode:
2264 ASSERT(insertionMode() == InSelectInTableMode);
2265 if (token.name() == captionTag
2266 || token.name() == tableTag
2267 || isTableBodyContextTag(token.name())
2268 || token.name() == trTag
2269 || isTableCellContextTag(token.name())) {
2270 parseError(token);
2271 if (m_tree.openElements()->inTableScope(token.name())) {
2272 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2273 processEndTag(endSelect);
2274 reprocessEndTag(token);
2275 }
2276 return;
2277 }
2278 // Fall through.
2279 case InSelectMode:
2280 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2281 if (token.name() == optgroupTag) {
2282 if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2283 processFakeEndTag(optionTag);
2284 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2285 m_tree.openElements()->pop();
2286 return;
2287 }
2288 parseError(token);
2289 return;
2290 }
2291 if (token.name() == optionTag) {
2292 if (m_tree.currentNode()->hasTagName(optionTag)) {
2293 m_tree.openElements()->pop();
2294 return;
2295 }
2296 parseError(token);
2297 return;
2298 }
2299 if (token.name() == selectTag) {
2300 if (!m_tree.openElements()->inSelectScope(token.name())) {
2301 ASSERT(isParsingFragment());
2302 parseError(token);
2303 return;
2304 }
2305 m_tree.openElements()->popUntilPopped(selectTag.localName());
2306 resetInsertionModeAppropriately();
2307 return;
2308 }
2309 break;
2310 case InTableTextMode:
2311 defaultForInTableText();
2312 processEndTag(token);
2313 break;
2314 case InForeignContentMode:
2315 if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2316 notImplemented();
2317 return;
2318 }
2319 if (!isInHTMLNamespace(m_tree.currentNode())) {
2320 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2321 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2322 if (!nodeRecord->node()->hasLocalName(token.name()))
2323 parseError(token);
2324 while (1) {
2325 if (nodeRecord->node()->hasLocalName(token.name())) {
2326 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2327 resetForeignInsertionMode();
2328 return;
2329 }
2330 nodeRecord = nodeRecord->next();
2331
2332 if (isInHTMLNamespace(nodeRecord->node()))
2333 break;
2334 }
2335 }
2336 // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2337 processForeignContentUsingInBodyModeAndResetMode(token);
2338 break;
2339 }
2340 }
2341
prepareToReprocessToken()2342 void HTMLTreeBuilder::prepareToReprocessToken()
2343 {
2344 if (m_hasPendingForeignInsertionModeSteps) {
2345 resetForeignInsertionMode();
2346 m_hasPendingForeignInsertionModeSteps = false;
2347 }
2348 }
2349
reprocessStartTag(AtomicHTMLToken & token)2350 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2351 {
2352 prepareToReprocessToken();
2353 processStartTag(token);
2354 }
2355
reprocessEndTag(AtomicHTMLToken & token)2356 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2357 {
2358 prepareToReprocessToken();
2359 processEndTag(token);
2360 }
2361
2362 class HTMLTreeBuilder::FakeInsertionMode {
2363 WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2364 public:
FakeInsertionMode(HTMLTreeBuilder * treeBuilder,InsertionMode mode)2365 FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2366 : m_treeBuilder(treeBuilder)
2367 , m_originalMode(treeBuilder->insertionMode())
2368 {
2369 m_treeBuilder->setFakeInsertionMode(mode);
2370 }
2371
~FakeInsertionMode()2372 ~FakeInsertionMode()
2373 {
2374 if (m_treeBuilder->isFakeInsertionMode())
2375 m_treeBuilder->setInsertionMode(m_originalMode);
2376 }
2377
2378 private:
2379 HTMLTreeBuilder* m_treeBuilder;
2380 InsertionMode m_originalMode;
2381 };
2382
processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken & token)2383 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2384 {
2385 m_hasPendingForeignInsertionModeSteps = true;
2386 {
2387 FakeInsertionMode fakeMode(this, InBodyMode);
2388 processToken(token);
2389 }
2390 if (m_hasPendingForeignInsertionModeSteps)
2391 resetForeignInsertionMode();
2392 }
2393
resetForeignInsertionMode()2394 void HTMLTreeBuilder::resetForeignInsertionMode()
2395 {
2396 if (insertionMode() == InForeignContentMode)
2397 resetInsertionModeAppropriately();
2398 }
2399
processComment(AtomicHTMLToken & token)2400 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2401 {
2402 ASSERT(token.type() == HTMLToken::Comment);
2403 if (m_insertionMode == InitialMode
2404 || m_insertionMode == BeforeHTMLMode
2405 || m_insertionMode == AfterAfterBodyMode
2406 || m_insertionMode == AfterAfterFramesetMode) {
2407 m_tree.insertCommentOnDocument(token);
2408 return;
2409 }
2410 if (m_insertionMode == AfterBodyMode) {
2411 m_tree.insertCommentOnHTMLHtmlElement(token);
2412 return;
2413 }
2414 if (m_insertionMode == InTableTextMode) {
2415 defaultForInTableText();
2416 processComment(token);
2417 return;
2418 }
2419 m_tree.insertComment(token);
2420 }
2421
processCharacter(AtomicHTMLToken & token)2422 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2423 {
2424 ASSERT(token.type() == HTMLToken::Character);
2425 ExternalCharacterTokenBuffer buffer(token);
2426 processCharacterBuffer(buffer);
2427 }
2428
processCharacterBuffer(ExternalCharacterTokenBuffer & buffer)2429 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2430 {
2431 ReprocessBuffer:
2432 switch (insertionMode()) {
2433 case InitialMode: {
2434 ASSERT(insertionMode() == InitialMode);
2435 buffer.skipLeadingWhitespace();
2436 if (buffer.isEmpty())
2437 return;
2438 defaultForInitial();
2439 // Fall through.
2440 }
2441 case BeforeHTMLMode: {
2442 ASSERT(insertionMode() == BeforeHTMLMode);
2443 buffer.skipLeadingWhitespace();
2444 if (buffer.isEmpty())
2445 return;
2446 defaultForBeforeHTML();
2447 // Fall through.
2448 }
2449 case BeforeHeadMode: {
2450 ASSERT(insertionMode() == BeforeHeadMode);
2451 buffer.skipLeadingWhitespace();
2452 if (buffer.isEmpty())
2453 return;
2454 defaultForBeforeHead();
2455 // Fall through.
2456 }
2457 case InHeadMode: {
2458 ASSERT(insertionMode() == InHeadMode);
2459 String leadingWhitespace = buffer.takeLeadingWhitespace();
2460 if (!leadingWhitespace.isEmpty())
2461 m_tree.insertTextNode(leadingWhitespace);
2462 if (buffer.isEmpty())
2463 return;
2464 defaultForInHead();
2465 // Fall through.
2466 }
2467 case AfterHeadMode: {
2468 ASSERT(insertionMode() == AfterHeadMode);
2469 String leadingWhitespace = buffer.takeLeadingWhitespace();
2470 if (!leadingWhitespace.isEmpty())
2471 m_tree.insertTextNode(leadingWhitespace);
2472 if (buffer.isEmpty())
2473 return;
2474 defaultForAfterHead();
2475 // Fall through.
2476 }
2477 case InBodyMode:
2478 case InCaptionMode:
2479 case InCellMode: {
2480 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2481 m_tree.reconstructTheActiveFormattingElements();
2482 String characters = buffer.takeRemaining();
2483 m_tree.insertTextNode(characters);
2484 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2485 m_framesetOk = false;
2486 break;
2487 }
2488 case InTableMode:
2489 case InTableBodyMode:
2490 case InRowMode: {
2491 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2492 ASSERT(m_pendingTableCharacters.isEmpty());
2493 m_originalInsertionMode = m_insertionMode;
2494 setInsertionMode(InTableTextMode);
2495 prepareToReprocessToken();
2496 // Fall through.
2497 }
2498 case InTableTextMode: {
2499 buffer.giveRemainingTo(m_pendingTableCharacters);
2500 break;
2501 }
2502 case InColumnGroupMode: {
2503 ASSERT(insertionMode() == InColumnGroupMode);
2504 String leadingWhitespace = buffer.takeLeadingWhitespace();
2505 if (!leadingWhitespace.isEmpty())
2506 m_tree.insertTextNode(leadingWhitespace);
2507 if (buffer.isEmpty())
2508 return;
2509 if (!processColgroupEndTagForInColumnGroup()) {
2510 ASSERT(isParsingFragment());
2511 // The spec tells us to drop these characters on the floor.
2512 buffer.takeLeadingNonWhitespace();
2513 if (buffer.isEmpty())
2514 return;
2515 }
2516 prepareToReprocessToken();
2517 goto ReprocessBuffer;
2518 }
2519 case AfterBodyMode:
2520 case AfterAfterBodyMode: {
2521 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2522 // FIXME: parse error
2523 setInsertionMode(InBodyMode);
2524 prepareToReprocessToken();
2525 goto ReprocessBuffer;
2526 break;
2527 }
2528 case TextMode: {
2529 ASSERT(insertionMode() == TextMode);
2530 m_tree.insertTextNode(buffer.takeRemaining());
2531 break;
2532 }
2533 case InHeadNoscriptMode: {
2534 ASSERT(insertionMode() == InHeadNoscriptMode);
2535 String leadingWhitespace = buffer.takeLeadingWhitespace();
2536 if (!leadingWhitespace.isEmpty())
2537 m_tree.insertTextNode(leadingWhitespace);
2538 if (buffer.isEmpty())
2539 return;
2540 defaultForInHeadNoscript();
2541 goto ReprocessBuffer;
2542 break;
2543 }
2544 case InFramesetMode:
2545 case AfterFramesetMode: {
2546 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2547 String leadingWhitespace = buffer.takeRemainingWhitespace();
2548 if (!leadingWhitespace.isEmpty())
2549 m_tree.insertTextNode(leadingWhitespace);
2550 // FIXME: We should generate a parse error if we skipped over any
2551 // non-whitespace characters.
2552 break;
2553 }
2554 case InSelectInTableMode:
2555 case InSelectMode: {
2556 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2557 m_tree.insertTextNode(buffer.takeRemaining());
2558 break;
2559 }
2560 case InForeignContentMode: {
2561 ASSERT(insertionMode() == InForeignContentMode);
2562 String characters = buffer.takeRemaining();
2563 m_tree.insertTextNode(characters);
2564 if (m_framesetOk && !isAllWhitespace(characters))
2565 m_framesetOk = false;
2566 break;
2567 }
2568 case AfterAfterFramesetMode: {
2569 String leadingWhitespace = buffer.takeRemainingWhitespace();
2570 if (!leadingWhitespace.isEmpty()) {
2571 m_tree.reconstructTheActiveFormattingElements();
2572 m_tree.insertTextNode(leadingWhitespace);
2573 }
2574 // FIXME: We should generate a parse error if we skipped over any
2575 // non-whitespace characters.
2576 break;
2577 }
2578 }
2579 }
2580
processEndOfFile(AtomicHTMLToken & token)2581 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2582 {
2583 ASSERT(token.type() == HTMLToken::EndOfFile);
2584 switch (insertionMode()) {
2585 case InitialMode:
2586 ASSERT(insertionMode() == InitialMode);
2587 defaultForInitial();
2588 // Fall through.
2589 case BeforeHTMLMode:
2590 ASSERT(insertionMode() == BeforeHTMLMode);
2591 defaultForBeforeHTML();
2592 // Fall through.
2593 case BeforeHeadMode:
2594 ASSERT(insertionMode() == BeforeHeadMode);
2595 defaultForBeforeHead();
2596 // Fall through.
2597 case InHeadMode:
2598 ASSERT(insertionMode() == InHeadMode);
2599 defaultForInHead();
2600 // Fall through.
2601 case AfterHeadMode:
2602 ASSERT(insertionMode() == AfterHeadMode);
2603 defaultForAfterHead();
2604 // Fall through
2605 case InBodyMode:
2606 case InCellMode:
2607 case InCaptionMode:
2608 case InRowMode:
2609 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2610 notImplemented(); // Emit parse error based on what elements are still open.
2611 break;
2612 case AfterBodyMode:
2613 case AfterAfterBodyMode:
2614 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2615 break;
2616 case InHeadNoscriptMode:
2617 ASSERT(insertionMode() == InHeadNoscriptMode);
2618 defaultForInHeadNoscript();
2619 processEndOfFile(token);
2620 return;
2621 case AfterFramesetMode:
2622 case AfterAfterFramesetMode:
2623 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2624 break;
2625 case InFramesetMode:
2626 case InTableMode:
2627 case InTableBodyMode:
2628 case InSelectInTableMode:
2629 case InSelectMode:
2630 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2631 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2632 parseError(token);
2633 break;
2634 case InColumnGroupMode:
2635 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2636 ASSERT(isParsingFragment());
2637 return; // FIXME: Should we break here instead of returning?
2638 }
2639 if (!processColgroupEndTagForInColumnGroup()) {
2640 ASSERT(isParsingFragment());
2641 return; // FIXME: Should we break here instead of returning?
2642 }
2643 prepareToReprocessToken();
2644 processEndOfFile(token);
2645 return;
2646 case InForeignContentMode:
2647 setInsertionMode(InBodyMode);
2648 processEndOfFile(token);
2649 return;
2650 case InTableTextMode:
2651 defaultForInTableText();
2652 processEndOfFile(token);
2653 return;
2654 case TextMode:
2655 parseError(token);
2656 if (m_tree.currentNode()->hasTagName(scriptTag))
2657 notImplemented(); // mark the script element as "already started".
2658 m_tree.openElements()->pop();
2659 setInsertionMode(m_originalInsertionMode);
2660 prepareToReprocessToken();
2661 processEndOfFile(token);
2662 return;
2663 }
2664 ASSERT(m_tree.currentNode());
2665 m_tree.openElements()->popAll();
2666 }
2667
defaultForInitial()2668 void HTMLTreeBuilder::defaultForInitial()
2669 {
2670 notImplemented();
2671 if (!m_fragmentContext.fragment())
2672 m_document->setCompatibilityMode(Document::QuirksMode);
2673 // FIXME: parse error
2674 setInsertionMode(BeforeHTMLMode);
2675 prepareToReprocessToken();
2676 }
2677
defaultForBeforeHTML()2678 void HTMLTreeBuilder::defaultForBeforeHTML()
2679 {
2680 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2681 m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2682 setInsertionMode(BeforeHeadMode);
2683 prepareToReprocessToken();
2684 }
2685
defaultForBeforeHead()2686 void HTMLTreeBuilder::defaultForBeforeHead()
2687 {
2688 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2689 processStartTag(startHead);
2690 prepareToReprocessToken();
2691 }
2692
defaultForInHead()2693 void HTMLTreeBuilder::defaultForInHead()
2694 {
2695 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2696 processEndTag(endHead);
2697 prepareToReprocessToken();
2698 }
2699
defaultForInHeadNoscript()2700 void HTMLTreeBuilder::defaultForInHeadNoscript()
2701 {
2702 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2703 processEndTag(endNoscript);
2704 prepareToReprocessToken();
2705 }
2706
defaultForAfterHead()2707 void HTMLTreeBuilder::defaultForAfterHead()
2708 {
2709 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2710 processStartTag(startBody);
2711 m_framesetOk = true;
2712 prepareToReprocessToken();
2713 }
2714
defaultForInTableText()2715 void HTMLTreeBuilder::defaultForInTableText()
2716 {
2717 String characters = String::adopt(m_pendingTableCharacters);
2718 if (!isAllWhitespace(characters)) {
2719 // FIXME: parse error
2720 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2721 m_tree.reconstructTheActiveFormattingElements();
2722 m_tree.insertTextNode(characters);
2723 m_framesetOk = false;
2724 setInsertionMode(m_originalInsertionMode);
2725 prepareToReprocessToken();
2726 return;
2727 }
2728 m_tree.insertTextNode(characters);
2729 setInsertionMode(m_originalInsertionMode);
2730 prepareToReprocessToken();
2731 }
2732
processStartTagForInHead(AtomicHTMLToken & token)2733 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2734 {
2735 ASSERT(token.type() == HTMLToken::StartTag);
2736 if (token.name() == htmlTag) {
2737 m_tree.insertHTMLHtmlStartTagInBody(token);
2738 return true;
2739 }
2740 if (token.name() == baseTag
2741 || token.name() == basefontTag
2742 || token.name() == bgsoundTag
2743 || token.name() == commandTag
2744 || token.name() == linkTag
2745 || token.name() == metaTag) {
2746 m_tree.insertSelfClosingHTMLElement(token);
2747 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2748 return true;
2749 }
2750 if (token.name() == titleTag) {
2751 processGenericRCDATAStartTag(token);
2752 return true;
2753 }
2754 if (token.name() == noscriptTag) {
2755 if (scriptEnabled(m_document->frame())) {
2756 processGenericRawTextStartTag(token);
2757 return true;
2758 }
2759 m_tree.insertHTMLElement(token);
2760 setInsertionMode(InHeadNoscriptMode);
2761 return true;
2762 }
2763 if (token.name() == noframesTag || token.name() == styleTag) {
2764 processGenericRawTextStartTag(token);
2765 return true;
2766 }
2767 if (token.name() == scriptTag) {
2768 processScriptStartTag(token);
2769 if (m_usePreHTML5ParserQuirks && token.selfClosing())
2770 processFakeEndTag(scriptTag);
2771 return true;
2772 }
2773 if (token.name() == headTag) {
2774 parseError(token);
2775 return true;
2776 }
2777 return false;
2778 }
2779
processGenericRCDATAStartTag(AtomicHTMLToken & token)2780 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2781 {
2782 ASSERT(token.type() == HTMLToken::StartTag);
2783 m_tree.insertHTMLElement(token);
2784 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2785 m_originalInsertionMode = m_insertionMode;
2786 setInsertionMode(TextMode);
2787 }
2788
processGenericRawTextStartTag(AtomicHTMLToken & token)2789 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2790 {
2791 ASSERT(token.type() == HTMLToken::StartTag);
2792 m_tree.insertHTMLElement(token);
2793 m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2794 m_originalInsertionMode = m_insertionMode;
2795 setInsertionMode(TextMode);
2796 }
2797
processScriptStartTag(AtomicHTMLToken & token)2798 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2799 {
2800 ASSERT(token.type() == HTMLToken::StartTag);
2801 m_tree.insertScriptElement(token);
2802 m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2803 m_originalInsertionMode = m_insertionMode;
2804
2805 TextPosition0 position = m_parser->textPosition();
2806
2807 ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2808
2809 m_lastScriptElementStartPosition = position;
2810
2811 setInsertionMode(TextMode);
2812 }
2813
finished()2814 void HTMLTreeBuilder::finished()
2815 {
2816 if (isParsingFragment())
2817 return;
2818
2819 ASSERT(m_document);
2820 // Warning, this may detach the parser. Do not do anything else after this.
2821 m_document->finishedParsing();
2822 }
2823
parseError(AtomicHTMLToken &)2824 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2825 {
2826 }
2827
scriptEnabled(Frame * frame)2828 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2829 {
2830 if (!frame)
2831 return false;
2832 return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2833 }
2834
pluginsEnabled(Frame * frame)2835 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2836 {
2837 if (!frame)
2838 return false;
2839 return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2840 }
2841
2842 }
2843