1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011, 2014 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "core/html/parser/HTMLTreeBuilder.h"
29
30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/MathMLNames.h"
33 #include "core/SVGNames.h"
34 #include "core/XLinkNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/XMLNames.h"
37 #include "core/dom/DocumentFragment.h"
38 #include "core/dom/ElementTraversal.h"
39 #include "core/html/HTMLDocument.h"
40 #include "core/html/HTMLFormElement.h"
41 #include "core/html/parser/AtomicHTMLToken.h"
42 #include "core/html/parser/HTMLDocumentParser.h"
43 #include "core/html/parser/HTMLParserIdioms.h"
44 #include "core/html/parser/HTMLStackItem.h"
45 #include "core/html/parser/HTMLToken.h"
46 #include "core/html/parser/HTMLTokenizer.h"
47 #include "platform/NotImplemented.h"
48 #include "platform/text/PlatformLocale.h"
49 #include "wtf/MainThread.h"
50 #include "wtf/unicode/CharacterNames.h"
51
52 namespace blink {
53
54 using namespace HTMLNames;
55
56 namespace {
57
isHTMLSpaceOrReplacementCharacter(UChar character)58 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
59 {
60 return isHTMLSpace<UChar>(character) || character == replacementCharacter;
61 }
62
63 }
64
uninitializedPositionValue1()65 static TextPosition uninitializedPositionValue1()
66 {
67 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
68 }
69
isAllWhitespace(const String & string)70 static inline bool isAllWhitespace(const String& string)
71 {
72 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
73 }
74
isAllWhitespaceOrReplacementCharacters(const String & string)75 static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
76 {
77 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
78 }
79
isNumberedHeaderTag(const AtomicString & tagName)80 static bool isNumberedHeaderTag(const AtomicString& tagName)
81 {
82 return tagName == h1Tag
83 || tagName == h2Tag
84 || tagName == h3Tag
85 || tagName == h4Tag
86 || tagName == h5Tag
87 || tagName == h6Tag;
88 }
89
isCaptionColOrColgroupTag(const AtomicString & tagName)90 static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
91 {
92 return tagName == captionTag
93 || tagName == colTag
94 || tagName == colgroupTag;
95 }
96
isTableCellContextTag(const AtomicString & tagName)97 static bool isTableCellContextTag(const AtomicString& tagName)
98 {
99 return tagName == thTag || tagName == tdTag;
100 }
101
isTableBodyContextTag(const AtomicString & tagName)102 static bool isTableBodyContextTag(const AtomicString& tagName)
103 {
104 return tagName == tbodyTag
105 || tagName == tfootTag
106 || tagName == theadTag;
107 }
108
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)109 static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
110 {
111 return tagName == bTag
112 || tagName == bigTag
113 || tagName == codeTag
114 || tagName == emTag
115 || tagName == fontTag
116 || tagName == iTag
117 || tagName == sTag
118 || tagName == smallTag
119 || tagName == strikeTag
120 || tagName == strongTag
121 || tagName == ttTag
122 || tagName == uTag;
123 }
124
isNonAnchorFormattingTag(const AtomicString & tagName)125 static bool isNonAnchorFormattingTag(const AtomicString& tagName)
126 {
127 return tagName == nobrTag
128 || isNonAnchorNonNobrFormattingTag(tagName);
129 }
130
131 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)132 static bool isFormattingTag(const AtomicString& tagName)
133 {
134 return tagName == aTag || isNonAnchorFormattingTag(tagName);
135 }
136
closestFormAncestor(Element & element)137 static HTMLFormElement* closestFormAncestor(Element& element)
138 {
139 ASSERT(isMainThread());
140 return Traversal<HTMLFormElement>::firstAncestorOrSelf(element);
141 }
142
143 class HTMLTreeBuilder::CharacterTokenBuffer {
144 WTF_MAKE_NONCOPYABLE(CharacterTokenBuffer);
145 public:
CharacterTokenBuffer(AtomicHTMLToken * token)146 explicit CharacterTokenBuffer(AtomicHTMLToken* token)
147 : m_characters(token->characters().impl())
148 , m_current(0)
149 , m_end(token->characters().length())
150 {
151 ASSERT(!isEmpty());
152 }
153
CharacterTokenBuffer(const String & characters)154 explicit CharacterTokenBuffer(const String& characters)
155 : m_characters(characters.impl())
156 , m_current(0)
157 , m_end(characters.length())
158 {
159 ASSERT(!isEmpty());
160 }
161
~CharacterTokenBuffer()162 ~CharacterTokenBuffer()
163 {
164 ASSERT(isEmpty());
165 }
166
isEmpty() const167 bool isEmpty() const { return m_current == m_end; }
168
skipAtMostOneLeadingNewline()169 void skipAtMostOneLeadingNewline()
170 {
171 ASSERT(!isEmpty());
172 if ((*m_characters)[m_current] == '\n')
173 ++m_current;
174 }
175
skipLeadingWhitespace()176 void skipLeadingWhitespace()
177 {
178 skipLeading<isHTMLSpace<UChar> >();
179 }
180
takeLeadingWhitespace()181 String takeLeadingWhitespace()
182 {
183 return takeLeading<isHTMLSpace<UChar> >();
184 }
185
skipLeadingNonWhitespace()186 void skipLeadingNonWhitespace()
187 {
188 skipLeading<isNotHTMLSpace<UChar> >();
189 }
190
takeRemaining()191 String takeRemaining()
192 {
193 ASSERT(!isEmpty());
194 unsigned start = m_current;
195 m_current = m_end;
196 // Notice that substring is smart enough to return *this when start == 0.
197 return String(m_characters->substring(start, m_end - start));
198 }
199
giveRemainingTo(StringBuilder & recipient)200 void giveRemainingTo(StringBuilder& recipient)
201 {
202 if (m_characters->is8Bit())
203 recipient.append(m_characters->characters8() + m_current, m_end - m_current);
204 else
205 recipient.append(m_characters->characters16() + m_current, m_end - m_current);
206 m_current = m_end;
207 }
208
takeRemainingWhitespace()209 String takeRemainingWhitespace()
210 {
211 ASSERT(!isEmpty());
212 const unsigned start = m_current;
213 m_current = m_end; // One way or another, we're taking everything!
214
215 unsigned length = 0;
216 for (unsigned i = start; i < m_end; ++i) {
217 if (isHTMLSpace<UChar>((*m_characters)[i]))
218 ++length;
219 }
220 // Returning the null string when there aren't any whitespace
221 // characters is slightly cleaner semantically because we don't want
222 // to insert a text node (as opposed to inserting an empty text node).
223 if (!length)
224 return String();
225 if (length == start - m_end) // It's all whitespace.
226 return String(m_characters->substring(start, start - m_end));
227
228 StringBuilder result;
229 result.reserveCapacity(length);
230 for (unsigned i = start; i < m_end; ++i) {
231 UChar c = (*m_characters)[i];
232 if (isHTMLSpace<UChar>(c))
233 result.append(c);
234 }
235
236 return result.toString();
237 }
238
239 private:
240 template<bool characterPredicate(UChar)>
skipLeading()241 void skipLeading()
242 {
243 ASSERT(!isEmpty());
244 while (characterPredicate((*m_characters)[m_current])) {
245 if (++m_current == m_end)
246 return;
247 }
248 }
249
250 template<bool characterPredicate(UChar)>
takeLeading()251 String takeLeading()
252 {
253 ASSERT(!isEmpty());
254 const unsigned start = m_current;
255 skipLeading<characterPredicate>();
256 if (start == m_current)
257 return String();
258 return String(m_characters->substring(start, m_current - start));
259 }
260
261 RefPtr<StringImpl> m_characters;
262 unsigned m_current;
263 unsigned m_end;
264 };
265
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,ParserContentPolicy parserContentPolicy,bool,const HTMLParserOptions & options)266 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool, const HTMLParserOptions& options)
267 : m_framesetOk(true)
268 #if ENABLE(ASSERT)
269 , m_isAttached(true)
270 #endif
271 , m_tree(document, parserContentPolicy)
272 , m_insertionMode(InitialMode)
273 , m_originalInsertionMode(InitialMode)
274 , m_shouldSkipLeadingNewline(false)
275 , m_parser(parser)
276 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
277 , m_options(options)
278 {
279 }
280
281 // FIXME: Member variables should be grouped into self-initializing structs to
282 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy,const HTMLParserOptions & options)283 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
284 : m_framesetOk(true)
285 #if ENABLE(ASSERT)
286 , m_isAttached(true)
287 #endif
288 , m_fragmentContext(fragment, contextElement)
289 , m_tree(fragment, parserContentPolicy)
290 , m_insertionMode(InitialMode)
291 , m_originalInsertionMode(InitialMode)
292 , m_shouldSkipLeadingNewline(false)
293 , m_parser(parser)
294 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
295 , m_options(options)
296 {
297 ASSERT(isMainThread());
298 ASSERT(contextElement);
299
300 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
301 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
302 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
303 // and instead use the DocumentFragment as a root node.
304 m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode));
305
306 if (isHTMLTemplateElement(*contextElement))
307 m_templateInsertionModes.append(TemplateContentsMode);
308
309 resetInsertionModeAppropriately();
310 m_tree.setForm(closestFormAncestor(*contextElement));
311 }
312
~HTMLTreeBuilder()313 HTMLTreeBuilder::~HTMLTreeBuilder()
314 {
315 }
316
trace(Visitor * visitor)317 void HTMLTreeBuilder::trace(Visitor* visitor)
318 {
319 visitor->trace(m_fragmentContext);
320 visitor->trace(m_tree);
321 visitor->trace(m_parser);
322 visitor->trace(m_scriptToProcess);
323 }
324
detach()325 void HTMLTreeBuilder::detach()
326 {
327 #if ENABLE(ASSERT)
328 // This call makes little sense in fragment mode, but for consistency
329 // DocumentParser expects detach() to always be called before it's destroyed.
330 m_isAttached = false;
331 #endif
332 // HTMLConstructionSite might be on the callstack when detach() is called
333 // otherwise we'd just call m_tree.clear() here instead.
334 m_tree.detach();
335 }
336
FragmentParsingContext()337 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
338 : m_fragment(nullptr)
339 {
340 }
341
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement)342 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement)
343 : m_fragment(fragment)
344 {
345 ASSERT(!fragment->hasChildren());
346 m_contextElementStackItem = HTMLStackItem::create(contextElement, HTMLStackItem::ItemForContextElement);
347 }
348
~FragmentParsingContext()349 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
350 {
351 }
352
trace(Visitor * visitor)353 void HTMLTreeBuilder::FragmentParsingContext::trace(Visitor* visitor)
354 {
355 visitor->trace(m_fragment);
356 visitor->trace(m_contextElementStackItem);
357 }
358
takeScriptToProcess(TextPosition & scriptStartPosition)359 PassRefPtrWillBeRawPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
360 {
361 ASSERT(m_scriptToProcess);
362 ASSERT(!m_tree.hasPendingTasks());
363 // Unpause ourselves, callers may pause us again when processing the script.
364 // The HTML5 spec is written as though scripts are executed inside the tree
365 // builder. We pause the parser to exit the tree builder, and then resume
366 // before running scripts.
367 scriptStartPosition = m_scriptToProcessStartPosition;
368 m_scriptToProcessStartPosition = uninitializedPositionValue1();
369 return m_scriptToProcess.release();
370 }
371
constructTree(AtomicHTMLToken * token)372 void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
373 {
374 if (shouldProcessTokenInForeignContent(token))
375 processTokenInForeignContent(token);
376 else
377 processToken(token);
378
379 if (m_parser->tokenizer()) {
380 bool inForeignContent = false;
381 if (!m_tree.isEmpty()) {
382 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
383 inForeignContent = !adjustedCurrentNode->isInHTMLNamespace()
384 && !HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)
385 && !HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode);
386 }
387
388 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
389 m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
390 }
391
392 m_tree.executeQueuedTasks();
393 // We might be detached now.
394 }
395
processToken(AtomicHTMLToken * token)396 void HTMLTreeBuilder::processToken(AtomicHTMLToken* token)
397 {
398 if (token->type() == HTMLToken::Character) {
399 processCharacter(token);
400 return;
401 }
402
403 // Any non-character token needs to cause us to flush any pending text immediately.
404 // NOTE: flush() can cause any queued tasks to execute, possibly re-entering the parser.
405 m_tree.flush(FlushAlways);
406 m_shouldSkipLeadingNewline = false;
407
408 switch (token->type()) {
409 case HTMLToken::Uninitialized:
410 case HTMLToken::Character:
411 ASSERT_NOT_REACHED();
412 break;
413 case HTMLToken::DOCTYPE:
414 processDoctypeToken(token);
415 break;
416 case HTMLToken::StartTag:
417 processStartTag(token);
418 break;
419 case HTMLToken::EndTag:
420 processEndTag(token);
421 break;
422 case HTMLToken::Comment:
423 processComment(token);
424 break;
425 case HTMLToken::EndOfFile:
426 processEndOfFile(token);
427 break;
428 }
429 }
430
processDoctypeToken(AtomicHTMLToken * token)431 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token)
432 {
433 ASSERT(token->type() == HTMLToken::DOCTYPE);
434 if (m_insertionMode == InitialMode) {
435 m_tree.insertDoctype(token);
436 setInsertionMode(BeforeHTMLMode);
437 return;
438 }
439 if (m_insertionMode == InTableTextMode) {
440 defaultForInTableText();
441 processDoctypeToken(token);
442 return;
443 }
444 parseError(token);
445 }
446
processFakeStartTag(const QualifiedName & tagName,const Vector<Attribute> & attributes)447 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
448 {
449 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
450 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
451 processStartTag(&fakeToken);
452 }
453
processFakeEndTag(const AtomicString & tagName)454 void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
455 {
456 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
457 processEndTag(&fakeToken);
458 }
459
processFakeEndTag(const QualifiedName & tagName)460 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
461 {
462 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
463 processFakeEndTag(tagName.localName());
464 }
465
processFakePEndTagIfPInButtonScope()466 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
467 {
468 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
469 return;
470 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
471 processEndTag(&endP);
472 }
473
474 namespace {
475
isLi(const HTMLStackItem * item)476 bool isLi(const HTMLStackItem* item)
477 {
478 return item->hasTagName(liTag);
479 }
480
isDdOrDt(const HTMLStackItem * item)481 bool isDdOrDt(const HTMLStackItem* item)
482 {
483 return item->hasTagName(ddTag)
484 || item->hasTagName(dtTag);
485 }
486
487 }
488
489 template <bool shouldClose(const HTMLStackItem*)>
processCloseWhenNestedTag(AtomicHTMLToken * token)490 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token)
491 {
492 m_framesetOk = false;
493 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
494 while (1) {
495 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
496 if (shouldClose(item.get())) {
497 ASSERT(item->isElementNode());
498 processFakeEndTag(item->localName());
499 break;
500 }
501 if (item->isSpecialNode() && !item->hasTagName(addressTag) && !item->hasTagName(divTag) && !item->hasTagName(pTag))
502 break;
503 nodeRecord = nodeRecord->next();
504 }
505 processFakePEndTagIfPInButtonScope();
506 m_tree.insertHTMLElement(token);
507 }
508
509 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
510
511 template <typename TableQualifiedName>
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,const TableQualifiedName * const * names,size_t length)512 static void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, const TableQualifiedName* const* names, size_t length)
513 {
514 for (size_t i = 0; i < length; ++i) {
515 const QualifiedName& name = *names[i];
516 const AtomicString& localName = name.localName();
517 AtomicString loweredLocalName = localName.lower();
518 if (loweredLocalName != localName)
519 map->add(loweredLocalName, name);
520 }
521 }
522
adjustSVGTagNameCase(AtomicHTMLToken * token)523 static void adjustSVGTagNameCase(AtomicHTMLToken* token)
524 {
525 static PrefixedNameToQualifiedNameMap* caseMap = 0;
526 if (!caseMap) {
527 caseMap = new PrefixedNameToQualifiedNameMap;
528 OwnPtr<const SVGQualifiedName*[]> svgTags = SVGNames::getSVGTags();
529 mapLoweredLocalNameToName(caseMap, svgTags.get(), SVGNames::SVGTagsCount);
530 }
531
532 const QualifiedName& casedName = caseMap->get(token->name());
533 if (casedName.localName().isNull())
534 return;
535 token->setName(casedName.localName());
536 }
537
538 template<PassOwnPtr<const QualifiedName*[]> getAttrs(), unsigned length>
adjustAttributes(AtomicHTMLToken * token)539 static void adjustAttributes(AtomicHTMLToken* token)
540 {
541 static PrefixedNameToQualifiedNameMap* caseMap = 0;
542 if (!caseMap) {
543 caseMap = new PrefixedNameToQualifiedNameMap;
544 OwnPtr<const QualifiedName*[]> attrs = getAttrs();
545 mapLoweredLocalNameToName(caseMap, attrs.get(), length);
546 }
547
548 for (unsigned i = 0; i < token->attributes().size(); ++i) {
549 Attribute& tokenAttribute = token->attributes().at(i);
550 const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
551 if (!casedName.localName().isNull())
552 tokenAttribute.parserSetName(casedName);
553 }
554 }
555
adjustSVGAttributes(AtomicHTMLToken * token)556 static void adjustSVGAttributes(AtomicHTMLToken* token)
557 {
558 adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
559 }
560
adjustMathMLAttributes(AtomicHTMLToken * token)561 static void adjustMathMLAttributes(AtomicHTMLToken* token)
562 {
563 adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
564 }
565
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,const QualifiedName * const * names,size_t length)566 static void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, const QualifiedName* const* names, size_t length)
567 {
568 for (size_t i = 0; i < length; ++i) {
569 const QualifiedName* name = names[i];
570 const AtomicString& localName = name->localName();
571 AtomicString prefixColonLocalName = prefix + ':' + localName;
572 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
573 map->add(prefixColonLocalName, nameWithPrefix);
574 }
575 }
576
adjustForeignAttributes(AtomicHTMLToken * token)577 static void adjustForeignAttributes(AtomicHTMLToken* token)
578 {
579 static PrefixedNameToQualifiedNameMap* map = 0;
580 if (!map) {
581 map = new PrefixedNameToQualifiedNameMap;
582
583 OwnPtr<const QualifiedName*[]> attrs = XLinkNames::getXLinkAttrs();
584 addNamesWithPrefix(map, xlinkAtom, attrs.get(), XLinkNames::XLinkAttrsCount);
585
586 OwnPtr<const QualifiedName*[]> xmlAttrs = XMLNames::getXMLAttrs();
587 addNamesWithPrefix(map, xmlAtom, xmlAttrs.get(), XMLNames::XMLAttrsCount);
588
589 map->add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr);
590 map->add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI));
591 }
592
593 for (unsigned i = 0; i < token->attributes().size(); ++i) {
594 Attribute& tokenAttribute = token->attributes().at(i);
595 const QualifiedName& name = map->get(tokenAttribute.localName());
596 if (!name.localName().isNull())
597 tokenAttribute.parserSetName(name);
598 }
599 }
600
processStartTagForInBody(AtomicHTMLToken * token)601 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
602 {
603 ASSERT(token->type() == HTMLToken::StartTag);
604 if (token->name() == htmlTag) {
605 processHtmlStartTagForInBody(token);
606 return;
607 }
608 if (token->name() == baseTag
609 || token->name() == basefontTag
610 || token->name() == bgsoundTag
611 || token->name() == commandTag
612 || token->name() == linkTag
613 || token->name() == metaTag
614 || token->name() == noframesTag
615 || token->name() == scriptTag
616 || token->name() == styleTag
617 || token->name() == titleTag) {
618 bool didProcess = processStartTagForInHead(token);
619 ASSERT_UNUSED(didProcess, didProcess);
620 return;
621 }
622 if (token->name() == bodyTag) {
623 parseError(token);
624 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement() || m_tree.openElements()->hasTemplateInHTMLScope()) {
625 ASSERT(isParsingFragmentOrTemplateContents());
626 return;
627 }
628 m_framesetOk = false;
629 m_tree.insertHTMLBodyStartTagInBody(token);
630 return;
631 }
632 if (token->name() == framesetTag) {
633 parseError(token);
634 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
635 ASSERT(isParsingFragmentOrTemplateContents());
636 return;
637 }
638 if (!m_framesetOk)
639 return;
640 m_tree.openElements()->bodyElement()->remove(ASSERT_NO_EXCEPTION);
641 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
642 m_tree.openElements()->popHTMLBodyElement();
643 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
644 m_tree.insertHTMLElement(token);
645 setInsertionMode(InFramesetMode);
646 return;
647 }
648 if (token->name() == addressTag
649 || token->name() == articleTag
650 || token->name() == asideTag
651 || token->name() == blockquoteTag
652 || token->name() == centerTag
653 || token->name() == detailsTag
654 || token->name() == dirTag
655 || token->name() == divTag
656 || token->name() == dlTag
657 || token->name() == fieldsetTag
658 || token->name() == figcaptionTag
659 || token->name() == figureTag
660 || token->name() == footerTag
661 || token->name() == headerTag
662 || token->name() == hgroupTag
663 || token->name() == mainTag
664 || token->name() == menuTag
665 || token->name() == navTag
666 || token->name() == olTag
667 || token->name() == pTag
668 || token->name() == sectionTag
669 || token->name() == summaryTag
670 || token->name() == ulTag) {
671 processFakePEndTagIfPInButtonScope();
672 m_tree.insertHTMLElement(token);
673 return;
674 }
675 if (isNumberedHeaderTag(token->name())) {
676 processFakePEndTagIfPInButtonScope();
677 if (m_tree.currentStackItem()->isNumberedHeaderElement()) {
678 parseError(token);
679 m_tree.openElements()->pop();
680 }
681 m_tree.insertHTMLElement(token);
682 return;
683 }
684 if (token->name() == preTag || token->name() == listingTag) {
685 processFakePEndTagIfPInButtonScope();
686 m_tree.insertHTMLElement(token);
687 m_shouldSkipLeadingNewline = true;
688 m_framesetOk = false;
689 return;
690 }
691 if (token->name() == formTag) {
692 if (m_tree.form()) {
693 parseError(token);
694 return;
695 }
696 processFakePEndTagIfPInButtonScope();
697 m_tree.insertHTMLFormElement(token);
698 return;
699 }
700 if (token->name() == liTag) {
701 processCloseWhenNestedTag<isLi>(token);
702 return;
703 }
704 if (token->name() == ddTag || token->name() == dtTag) {
705 processCloseWhenNestedTag<isDdOrDt>(token);
706 return;
707 }
708 if (token->name() == plaintextTag) {
709 processFakePEndTagIfPInButtonScope();
710 m_tree.insertHTMLElement(token);
711 if (m_parser->tokenizer())
712 m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
713 return;
714 }
715 if (token->name() == buttonTag) {
716 if (m_tree.openElements()->inScope(buttonTag)) {
717 parseError(token);
718 processFakeEndTag(buttonTag);
719 processStartTag(token); // FIXME: Could we just fall through here?
720 return;
721 }
722 m_tree.reconstructTheActiveFormattingElements();
723 m_tree.insertHTMLElement(token);
724 m_framesetOk = false;
725 return;
726 }
727 if (token->name() == aTag) {
728 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
729 if (activeATag) {
730 parseError(token);
731 processFakeEndTag(aTag);
732 m_tree.activeFormattingElements()->remove(activeATag);
733 if (m_tree.openElements()->contains(activeATag))
734 m_tree.openElements()->remove(activeATag);
735 }
736 m_tree.reconstructTheActiveFormattingElements();
737 m_tree.insertFormattingElement(token);
738 return;
739 }
740 if (isNonAnchorNonNobrFormattingTag(token->name())) {
741 m_tree.reconstructTheActiveFormattingElements();
742 m_tree.insertFormattingElement(token);
743 return;
744 }
745 if (token->name() == nobrTag) {
746 m_tree.reconstructTheActiveFormattingElements();
747 if (m_tree.openElements()->inScope(nobrTag)) {
748 parseError(token);
749 processFakeEndTag(nobrTag);
750 m_tree.reconstructTheActiveFormattingElements();
751 }
752 m_tree.insertFormattingElement(token);
753 return;
754 }
755 if (token->name() == appletTag
756 || token->name() == embedTag
757 || token->name() == objectTag) {
758 if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
759 return;
760 }
761 if (token->name() == appletTag
762 || token->name() == marqueeTag
763 || token->name() == objectTag) {
764 m_tree.reconstructTheActiveFormattingElements();
765 m_tree.insertHTMLElement(token);
766 m_tree.activeFormattingElements()->appendMarker();
767 m_framesetOk = false;
768 return;
769 }
770 if (token->name() == tableTag) {
771 if (!m_tree.inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
772 processFakeEndTag(pTag);
773 m_tree.insertHTMLElement(token);
774 m_framesetOk = false;
775 setInsertionMode(InTableMode);
776 return;
777 }
778 if (token->name() == imageTag) {
779 parseError(token);
780 // Apparently we're not supposed to ask.
781 token->setName(imgTag.localName());
782 // Note the fall through to the imgTag handling below!
783 }
784 if (token->name() == areaTag
785 || token->name() == brTag
786 || token->name() == embedTag
787 || token->name() == imgTag
788 || token->name() == keygenTag
789 || token->name() == wbrTag) {
790 m_tree.reconstructTheActiveFormattingElements();
791 m_tree.insertSelfClosingHTMLElement(token);
792 m_framesetOk = false;
793 return;
794 }
795 if (token->name() == inputTag) {
796 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
797 m_tree.reconstructTheActiveFormattingElements();
798 m_tree.insertSelfClosingHTMLElement(token);
799 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
800 m_framesetOk = false;
801 return;
802 }
803 if ((RuntimeEnabledFeatures::contextMenuEnabled() && token->name() == menuitemTag)
804 || token->name() == paramTag
805 || token->name() == sourceTag
806 || token->name() == trackTag) {
807 m_tree.insertSelfClosingHTMLElement(token);
808 return;
809 }
810 if (token->name() == hrTag) {
811 processFakePEndTagIfPInButtonScope();
812 m_tree.insertSelfClosingHTMLElement(token);
813 m_framesetOk = false;
814 return;
815 }
816 if (token->name() == textareaTag) {
817 m_tree.insertHTMLElement(token);
818 m_shouldSkipLeadingNewline = true;
819 if (m_parser->tokenizer())
820 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
821 m_originalInsertionMode = m_insertionMode;
822 m_framesetOk = false;
823 setInsertionMode(TextMode);
824 return;
825 }
826 if (token->name() == xmpTag) {
827 processFakePEndTagIfPInButtonScope();
828 m_tree.reconstructTheActiveFormattingElements();
829 m_framesetOk = false;
830 processGenericRawTextStartTag(token);
831 return;
832 }
833 if (token->name() == iframeTag) {
834 m_framesetOk = false;
835 processGenericRawTextStartTag(token);
836 return;
837 }
838 if (token->name() == noembedTag && m_options.pluginsEnabled) {
839 processGenericRawTextStartTag(token);
840 return;
841 }
842 if (token->name() == noscriptTag && m_options.scriptEnabled) {
843 processGenericRawTextStartTag(token);
844 return;
845 }
846 if (token->name() == selectTag) {
847 m_tree.reconstructTheActiveFormattingElements();
848 m_tree.insertHTMLElement(token);
849 m_framesetOk = false;
850 if (m_insertionMode == InTableMode
851 || m_insertionMode == InCaptionMode
852 || m_insertionMode == InColumnGroupMode
853 || m_insertionMode == InTableBodyMode
854 || m_insertionMode == InRowMode
855 || m_insertionMode == InCellMode)
856 setInsertionMode(InSelectInTableMode);
857 else
858 setInsertionMode(InSelectMode);
859 return;
860 }
861 if (token->name() == optgroupTag || token->name() == optionTag) {
862 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
863 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
864 processEndTag(&endOption);
865 }
866 m_tree.reconstructTheActiveFormattingElements();
867 m_tree.insertHTMLElement(token);
868 return;
869 }
870 if (token->name() == rbTag || token->name() == rtcTag) {
871 if (m_tree.openElements()->inScope(rubyTag.localName())) {
872 m_tree.generateImpliedEndTags();
873 if (!m_tree.currentStackItem()->hasTagName(rubyTag))
874 parseError(token);
875 }
876 m_tree.insertHTMLElement(token);
877 return;
878 }
879 if (token->name() == rtTag || token->name() == rpTag) {
880 if (m_tree.openElements()->inScope(rubyTag.localName())) {
881 m_tree.generateImpliedEndTagsWithExclusion(rtcTag.localName());
882 if (!m_tree.currentStackItem()->hasTagName(rubyTag) && !m_tree.currentStackItem()->hasTagName(rtcTag))
883 parseError(token);
884 }
885 m_tree.insertHTMLElement(token);
886 return;
887 }
888 if (token->name() == MathMLNames::mathTag.localName()) {
889 m_tree.reconstructTheActiveFormattingElements();
890 adjustMathMLAttributes(token);
891 adjustForeignAttributes(token);
892 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
893 return;
894 }
895 if (token->name() == SVGNames::svgTag.localName()) {
896 m_tree.reconstructTheActiveFormattingElements();
897 adjustSVGAttributes(token);
898 adjustForeignAttributes(token);
899 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
900 return;
901 }
902 if (isCaptionColOrColgroupTag(token->name())
903 || token->name() == frameTag
904 || token->name() == headTag
905 || isTableBodyContextTag(token->name())
906 || isTableCellContextTag(token->name())
907 || token->name() == trTag) {
908 parseError(token);
909 return;
910 }
911 if (token->name() == templateTag) {
912 processTemplateStartTag(token);
913 return;
914 }
915 m_tree.reconstructTheActiveFormattingElements();
916 m_tree.insertHTMLElement(token);
917 }
918
processTemplateStartTag(AtomicHTMLToken * token)919 void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken* token)
920 {
921 m_tree.activeFormattingElements()->appendMarker();
922 m_tree.insertHTMLElement(token);
923 m_templateInsertionModes.append(TemplateContentsMode);
924 setInsertionMode(TemplateContentsMode);
925 }
926
processTemplateEndTag(AtomicHTMLToken * token)927 bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token)
928 {
929 ASSERT(token->name() == templateTag.localName());
930 if (!m_tree.openElements()->hasTemplateInHTMLScope()) {
931 ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && isHTMLTemplateElement(m_fragmentContext.contextElement())));
932 parseError(token);
933 return false;
934 }
935 m_tree.generateImpliedEndTags();
936 if (!m_tree.currentStackItem()->hasTagName(templateTag))
937 parseError(token);
938 m_tree.openElements()->popUntilPopped(templateTag);
939 m_tree.activeFormattingElements()->clearToLastMarker();
940 m_templateInsertionModes.removeLast();
941 resetInsertionModeAppropriately();
942 return true;
943 }
944
processEndOfFileForInTemplateContents(AtomicHTMLToken * token)945 bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* token)
946 {
947 AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag.localName());
948 if (!processTemplateEndTag(&endTemplate))
949 return false;
950
951 processEndOfFile(token);
952 return true;
953 }
954
processColgroupEndTagForInColumnGroup()955 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
956 {
957 if (m_tree.currentIsRootNode() || isHTMLTemplateElement(*m_tree.currentNode())) {
958 ASSERT(isParsingFragmentOrTemplateContents());
959 // FIXME: parse error
960 return false;
961 }
962 m_tree.openElements()->pop();
963 setInsertionMode(InTableMode);
964 return true;
965 }
966
967 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
adjustedCurrentStackItem() const968 HTMLStackItem* HTMLTreeBuilder::adjustedCurrentStackItem() const
969 {
970 ASSERT(!m_tree.isEmpty());
971 if (isParsingFragment() && m_tree.openElements()->hasOnlyOneElement())
972 return m_fragmentContext.contextElementStackItem();
973
974 return m_tree.currentStackItem();
975 }
976
977 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()978 void HTMLTreeBuilder::closeTheCell()
979 {
980 ASSERT(insertionMode() == InCellMode);
981 if (m_tree.openElements()->inTableScope(tdTag)) {
982 ASSERT(!m_tree.openElements()->inTableScope(thTag));
983 processFakeEndTag(tdTag);
984 return;
985 }
986 ASSERT(m_tree.openElements()->inTableScope(thTag));
987 processFakeEndTag(thTag);
988 ASSERT(insertionMode() == InRowMode);
989 }
990
processStartTagForInTable(AtomicHTMLToken * token)991 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token)
992 {
993 ASSERT(token->type() == HTMLToken::StartTag);
994 if (token->name() == captionTag) {
995 m_tree.openElements()->popUntilTableScopeMarker();
996 m_tree.activeFormattingElements()->appendMarker();
997 m_tree.insertHTMLElement(token);
998 setInsertionMode(InCaptionMode);
999 return;
1000 }
1001 if (token->name() == colgroupTag) {
1002 m_tree.openElements()->popUntilTableScopeMarker();
1003 m_tree.insertHTMLElement(token);
1004 setInsertionMode(InColumnGroupMode);
1005 return;
1006 }
1007 if (token->name() == colTag) {
1008 processFakeStartTag(colgroupTag);
1009 ASSERT(InColumnGroupMode);
1010 processStartTag(token);
1011 return;
1012 }
1013 if (isTableBodyContextTag(token->name())) {
1014 m_tree.openElements()->popUntilTableScopeMarker();
1015 m_tree.insertHTMLElement(token);
1016 setInsertionMode(InTableBodyMode);
1017 return;
1018 }
1019 if (isTableCellContextTag(token->name())
1020 || token->name() == trTag) {
1021 processFakeStartTag(tbodyTag);
1022 ASSERT(insertionMode() == InTableBodyMode);
1023 processStartTag(token);
1024 return;
1025 }
1026 if (token->name() == tableTag) {
1027 parseError(token);
1028 if (!processTableEndTagForInTable()) {
1029 ASSERT(isParsingFragmentOrTemplateContents());
1030 return;
1031 }
1032 processStartTag(token);
1033 return;
1034 }
1035 if (token->name() == styleTag || token->name() == scriptTag) {
1036 processStartTagForInHead(token);
1037 return;
1038 }
1039 if (token->name() == inputTag) {
1040 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
1041 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1042 parseError(token);
1043 m_tree.insertSelfClosingHTMLElement(token);
1044 return;
1045 }
1046 // Fall through to "anything else" case.
1047 }
1048 if (token->name() == formTag) {
1049 parseError(token);
1050 if (m_tree.form())
1051 return;
1052 m_tree.insertHTMLFormElement(token, true);
1053 m_tree.openElements()->pop();
1054 return;
1055 }
1056 if (token->name() == templateTag) {
1057 processTemplateStartTag(token);
1058 return;
1059 }
1060 parseError(token);
1061 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1062 processStartTagForInBody(token);
1063 }
1064
processStartTag(AtomicHTMLToken * token)1065 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token)
1066 {
1067 ASSERT(token->type() == HTMLToken::StartTag);
1068 switch (insertionMode()) {
1069 case InitialMode:
1070 ASSERT(insertionMode() == InitialMode);
1071 defaultForInitial();
1072 // Fall through.
1073 case BeforeHTMLMode:
1074 ASSERT(insertionMode() == BeforeHTMLMode);
1075 if (token->name() == htmlTag) {
1076 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1077 setInsertionMode(BeforeHeadMode);
1078 return;
1079 }
1080 defaultForBeforeHTML();
1081 // Fall through.
1082 case BeforeHeadMode:
1083 ASSERT(insertionMode() == BeforeHeadMode);
1084 if (token->name() == htmlTag) {
1085 processHtmlStartTagForInBody(token);
1086 return;
1087 }
1088 if (token->name() == headTag) {
1089 m_tree.insertHTMLHeadElement(token);
1090 setInsertionMode(InHeadMode);
1091 return;
1092 }
1093 defaultForBeforeHead();
1094 // Fall through.
1095 case InHeadMode:
1096 ASSERT(insertionMode() == InHeadMode);
1097 if (processStartTagForInHead(token))
1098 return;
1099 defaultForInHead();
1100 // Fall through.
1101 case AfterHeadMode:
1102 ASSERT(insertionMode() == AfterHeadMode);
1103 if (token->name() == htmlTag) {
1104 processHtmlStartTagForInBody(token);
1105 return;
1106 }
1107 if (token->name() == bodyTag) {
1108 m_framesetOk = false;
1109 m_tree.insertHTMLBodyElement(token);
1110 setInsertionMode(InBodyMode);
1111 return;
1112 }
1113 if (token->name() == framesetTag) {
1114 m_tree.insertHTMLElement(token);
1115 setInsertionMode(InFramesetMode);
1116 return;
1117 }
1118 if (token->name() == baseTag
1119 || token->name() == basefontTag
1120 || token->name() == bgsoundTag
1121 || token->name() == linkTag
1122 || token->name() == metaTag
1123 || token->name() == noframesTag
1124 || token->name() == scriptTag
1125 || token->name() == styleTag
1126 || token->name() == templateTag
1127 || token->name() == titleTag) {
1128 parseError(token);
1129 ASSERT(m_tree.head());
1130 m_tree.openElements()->pushHTMLHeadElement(m_tree.headStackItem());
1131 processStartTagForInHead(token);
1132 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1133 return;
1134 }
1135 if (token->name() == headTag) {
1136 parseError(token);
1137 return;
1138 }
1139 defaultForAfterHead();
1140 // Fall through
1141 case InBodyMode:
1142 ASSERT(insertionMode() == InBodyMode);
1143 processStartTagForInBody(token);
1144 break;
1145 case InTableMode:
1146 ASSERT(insertionMode() == InTableMode);
1147 processStartTagForInTable(token);
1148 break;
1149 case InCaptionMode:
1150 ASSERT(insertionMode() == InCaptionMode);
1151 if (isCaptionColOrColgroupTag(token->name())
1152 || isTableBodyContextTag(token->name())
1153 || isTableCellContextTag(token->name())
1154 || token->name() == trTag) {
1155 parseError(token);
1156 if (!processCaptionEndTagForInCaption()) {
1157 ASSERT(isParsingFragment());
1158 return;
1159 }
1160 processStartTag(token);
1161 return;
1162 }
1163 processStartTagForInBody(token);
1164 break;
1165 case InColumnGroupMode:
1166 ASSERT(insertionMode() == InColumnGroupMode);
1167 if (token->name() == htmlTag) {
1168 processHtmlStartTagForInBody(token);
1169 return;
1170 }
1171 if (token->name() == colTag) {
1172 m_tree.insertSelfClosingHTMLElement(token);
1173 return;
1174 }
1175 if (token->name() == templateTag) {
1176 processTemplateStartTag(token);
1177 return;
1178 }
1179 if (!processColgroupEndTagForInColumnGroup()) {
1180 ASSERT(isParsingFragmentOrTemplateContents());
1181 return;
1182 }
1183 processStartTag(token);
1184 break;
1185 case InTableBodyMode:
1186 ASSERT(insertionMode() == InTableBodyMode);
1187 if (token->name() == trTag) {
1188 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1189 m_tree.insertHTMLElement(token);
1190 setInsertionMode(InRowMode);
1191 return;
1192 }
1193 if (isTableCellContextTag(token->name())) {
1194 parseError(token);
1195 processFakeStartTag(trTag);
1196 ASSERT(insertionMode() == InRowMode);
1197 processStartTag(token);
1198 return;
1199 }
1200 if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) {
1201 // FIXME: This is slow.
1202 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1203 ASSERT(isParsingFragmentOrTemplateContents());
1204 parseError(token);
1205 return;
1206 }
1207 m_tree.openElements()->popUntilTableBodyScopeMarker();
1208 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1209 processFakeEndTag(m_tree.currentStackItem()->localName());
1210 processStartTag(token);
1211 return;
1212 }
1213 processStartTagForInTable(token);
1214 break;
1215 case InRowMode:
1216 ASSERT(insertionMode() == InRowMode);
1217 if (isTableCellContextTag(token->name())) {
1218 m_tree.openElements()->popUntilTableRowScopeMarker();
1219 m_tree.insertHTMLElement(token);
1220 setInsertionMode(InCellMode);
1221 m_tree.activeFormattingElements()->appendMarker();
1222 return;
1223 }
1224 if (token->name() == trTag
1225 || isCaptionColOrColgroupTag(token->name())
1226 || isTableBodyContextTag(token->name())) {
1227 if (!processTrEndTagForInRow()) {
1228 ASSERT(isParsingFragmentOrTemplateContents());
1229 return;
1230 }
1231 ASSERT(insertionMode() == InTableBodyMode);
1232 processStartTag(token);
1233 return;
1234 }
1235 processStartTagForInTable(token);
1236 break;
1237 case InCellMode:
1238 ASSERT(insertionMode() == InCellMode);
1239 if (isCaptionColOrColgroupTag(token->name())
1240 || isTableCellContextTag(token->name())
1241 || token->name() == trTag
1242 || isTableBodyContextTag(token->name())) {
1243 // FIXME: This could be more efficient.
1244 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1245 ASSERT(isParsingFragment());
1246 parseError(token);
1247 return;
1248 }
1249 closeTheCell();
1250 processStartTag(token);
1251 return;
1252 }
1253 processStartTagForInBody(token);
1254 break;
1255 case AfterBodyMode:
1256 case AfterAfterBodyMode:
1257 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1258 if (token->name() == htmlTag) {
1259 processHtmlStartTagForInBody(token);
1260 return;
1261 }
1262 setInsertionMode(InBodyMode);
1263 processStartTag(token);
1264 break;
1265 case InHeadNoscriptMode:
1266 ASSERT(insertionMode() == InHeadNoscriptMode);
1267 if (token->name() == htmlTag) {
1268 processHtmlStartTagForInBody(token);
1269 return;
1270 }
1271 if (token->name() == basefontTag
1272 || token->name() == bgsoundTag
1273 || token->name() == linkTag
1274 || token->name() == metaTag
1275 || token->name() == noframesTag
1276 || token->name() == styleTag) {
1277 bool didProcess = processStartTagForInHead(token);
1278 ASSERT_UNUSED(didProcess, didProcess);
1279 return;
1280 }
1281 if (token->name() == htmlTag || token->name() == noscriptTag) {
1282 parseError(token);
1283 return;
1284 }
1285 defaultForInHeadNoscript();
1286 processToken(token);
1287 break;
1288 case InFramesetMode:
1289 ASSERT(insertionMode() == InFramesetMode);
1290 if (token->name() == htmlTag) {
1291 processHtmlStartTagForInBody(token);
1292 return;
1293 }
1294 if (token->name() == framesetTag) {
1295 m_tree.insertHTMLElement(token);
1296 return;
1297 }
1298 if (token->name() == frameTag) {
1299 m_tree.insertSelfClosingHTMLElement(token);
1300 return;
1301 }
1302 if (token->name() == noframesTag) {
1303 processStartTagForInHead(token);
1304 return;
1305 }
1306 if (token->name() == templateTag) {
1307 processTemplateStartTag(token);
1308 return;
1309 }
1310 parseError(token);
1311 break;
1312 case AfterFramesetMode:
1313 case AfterAfterFramesetMode:
1314 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1315 if (token->name() == htmlTag) {
1316 processHtmlStartTagForInBody(token);
1317 return;
1318 }
1319 if (token->name() == noframesTag) {
1320 processStartTagForInHead(token);
1321 return;
1322 }
1323 parseError(token);
1324 break;
1325 case InSelectInTableMode:
1326 ASSERT(insertionMode() == InSelectInTableMode);
1327 if (token->name() == captionTag
1328 || token->name() == tableTag
1329 || isTableBodyContextTag(token->name())
1330 || token->name() == trTag
1331 || isTableCellContextTag(token->name())) {
1332 parseError(token);
1333 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1334 processEndTag(&endSelect);
1335 processStartTag(token);
1336 return;
1337 }
1338 // Fall through
1339 case InSelectMode:
1340 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1341 if (token->name() == htmlTag) {
1342 processHtmlStartTagForInBody(token);
1343 return;
1344 }
1345 if (token->name() == optionTag) {
1346 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1347 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1348 processEndTag(&endOption);
1349 }
1350 m_tree.insertHTMLElement(token);
1351 return;
1352 }
1353 if (token->name() == optgroupTag) {
1354 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1355 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1356 processEndTag(&endOption);
1357 }
1358 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
1359 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1360 processEndTag(&endOptgroup);
1361 }
1362 m_tree.insertHTMLElement(token);
1363 return;
1364 }
1365 if (token->name() == selectTag) {
1366 parseError(token);
1367 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1368 processEndTag(&endSelect);
1369 return;
1370 }
1371 if (token->name() == inputTag
1372 || token->name() == keygenTag
1373 || token->name() == textareaTag) {
1374 parseError(token);
1375 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1376 ASSERT(isParsingFragment());
1377 return;
1378 }
1379 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1380 processEndTag(&endSelect);
1381 processStartTag(token);
1382 return;
1383 }
1384 if (token->name() == scriptTag) {
1385 bool didProcess = processStartTagForInHead(token);
1386 ASSERT_UNUSED(didProcess, didProcess);
1387 return;
1388 }
1389 if (token->name() == templateTag) {
1390 processTemplateStartTag(token);
1391 return;
1392 }
1393 break;
1394 case InTableTextMode:
1395 defaultForInTableText();
1396 processStartTag(token);
1397 break;
1398 case TextMode:
1399 ASSERT_NOT_REACHED();
1400 break;
1401 case TemplateContentsMode:
1402 if (token->name() == templateTag) {
1403 processTemplateStartTag(token);
1404 return;
1405 }
1406
1407 if (token->name() == linkTag
1408 || token->name() == scriptTag
1409 || token->name() == styleTag
1410 || token->name() == metaTag) {
1411 processStartTagForInHead(token);
1412 return;
1413 }
1414
1415 InsertionMode insertionMode = TemplateContentsMode;
1416 if (token->name() == frameTag)
1417 insertionMode = InFramesetMode;
1418 else if (token->name() == colTag)
1419 insertionMode = InColumnGroupMode;
1420 else if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name()))
1421 insertionMode = InTableMode;
1422 else if (token->name() == trTag)
1423 insertionMode = InTableBodyMode;
1424 else if (isTableCellContextTag(token->name()))
1425 insertionMode = InRowMode;
1426 else
1427 insertionMode = InBodyMode;
1428
1429 ASSERT(insertionMode != TemplateContentsMode);
1430 ASSERT(m_templateInsertionModes.last() == TemplateContentsMode);
1431 m_templateInsertionModes.last() = insertionMode;
1432 setInsertionMode(insertionMode);
1433
1434 processStartTag(token);
1435 break;
1436 }
1437 }
1438
processHtmlStartTagForInBody(AtomicHTMLToken * token)1439 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token)
1440 {
1441 parseError(token);
1442 if (m_tree.openElements()->hasTemplateInHTMLScope()) {
1443 ASSERT(isParsingTemplateContents());
1444 return;
1445 }
1446 m_tree.insertHTMLHtmlStartTagInBody(token);
1447 }
1448
processBodyEndTagForInBody(AtomicHTMLToken * token)1449 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token)
1450 {
1451 ASSERT(token->type() == HTMLToken::EndTag);
1452 ASSERT(token->name() == bodyTag);
1453 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1454 parseError(token);
1455 return false;
1456 }
1457 notImplemented(); // Emit a more specific parse error based on stack contents.
1458 setInsertionMode(AfterBodyMode);
1459 return true;
1460 }
1461
processAnyOtherEndTagForInBody(AtomicHTMLToken * token)1462 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token)
1463 {
1464 ASSERT(token->type() == HTMLToken::EndTag);
1465 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1466 while (1) {
1467 RefPtrWillBeRawPtr<HTMLStackItem> item = record->stackItem();
1468 if (item->matchesHTMLTag(token->name())) {
1469 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1470 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1471 parseError(token);
1472 m_tree.openElements()->popUntilPopped(item->element());
1473 return;
1474 }
1475 if (item->isSpecialNode()) {
1476 parseError(token);
1477 return;
1478 }
1479 record = record->next();
1480 }
1481 }
1482
1483 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken * token)1484 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token)
1485 {
1486 // The adoption agency algorithm is N^2. We limit the number of iterations
1487 // to stop from hanging the whole browser. This limit is specified in the
1488 // adoption agency algorithm:
1489 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1490 static const int outerIterationLimit = 8;
1491 static const int innerIterationLimit = 3;
1492
1493 // 1, 2, 3 and 16 are covered by the for() loop.
1494 for (int i = 0; i < outerIterationLimit; ++i) {
1495 // 4.
1496 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name());
1497 // 4.a
1498 if (!formattingElement)
1499 return processAnyOtherEndTagForInBody(token);
1500 // 4.c
1501 if ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement)) {
1502 parseError(token);
1503 notImplemented(); // Check the stack of open elements for a more specific parse error.
1504 return;
1505 }
1506 // 4.b
1507 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1508 if (!formattingElementRecord) {
1509 parseError(token);
1510 m_tree.activeFormattingElements()->remove(formattingElement);
1511 return;
1512 }
1513 // 4.d
1514 if (formattingElement != m_tree.currentElement())
1515 parseError(token);
1516 // 5.
1517 HTMLElementStack::ElementRecord* furthestBlock = m_tree.openElements()->furthestBlockForFormattingElement(formattingElement);
1518 // 6.
1519 if (!furthestBlock) {
1520 m_tree.openElements()->popUntilPopped(formattingElement);
1521 m_tree.activeFormattingElements()->remove(formattingElement);
1522 return;
1523 }
1524 // 7.
1525 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1526 RefPtrWillBeRawPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1527 // 8.
1528 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1529 // 9.
1530 HTMLElementStack::ElementRecord* node = furthestBlock;
1531 HTMLElementStack::ElementRecord* nextNode = node->next();
1532 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1533 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1534 for (int i = 0; i < innerIterationLimit; ++i) {
1535 // 9.4
1536 node = nextNode;
1537 ASSERT(node);
1538 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1539 // 9.5
1540 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1541 m_tree.openElements()->remove(node->element());
1542 node = 0;
1543 continue;
1544 }
1545 // 9.6
1546 if (node == formattingElementRecord)
1547 break;
1548 // 9.7
1549 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get());
1550
1551 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1552 nodeEntry->replaceElement(newItem);
1553 node->replaceElement(newItem.release());
1554
1555 // 9.8
1556 if (lastNode == furthestBlock)
1557 bookmark.moveToAfter(nodeEntry);
1558 // 9.9
1559 m_tree.reparent(node, lastNode);
1560 // 9.10
1561 lastNode = node;
1562 }
1563 // 10.
1564 m_tree.insertAlreadyParsedChild(commonAncestor.get(), lastNode);
1565 // 11.
1566 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get());
1567 // 12.
1568 m_tree.takeAllChildren(newItem.get(), furthestBlock);
1569 // 13.
1570 m_tree.reparent(furthestBlock, newItem.get());
1571 // 14.
1572 m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark);
1573 // 15.
1574 m_tree.openElements()->remove(formattingElement);
1575 m_tree.openElements()->insertAbove(newItem, furthestBlock);
1576 }
1577 }
1578
resetInsertionModeAppropriately()1579 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1580 {
1581 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1582 bool last = false;
1583 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1584 while (1) {
1585 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
1586 if (item->node() == m_tree.openElements()->rootNode()) {
1587 last = true;
1588 if (isParsingFragment())
1589 item = m_fragmentContext.contextElementStackItem();
1590 }
1591 if (item->hasTagName(templateTag))
1592 return setInsertionMode(m_templateInsertionModes.last());
1593 if (item->hasTagName(selectTag)) {
1594 if (!last) {
1595 while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) {
1596 nodeRecord = nodeRecord->next();
1597 item = nodeRecord->stackItem();
1598 if (item->hasTagName(tableTag))
1599 return setInsertionMode(InSelectInTableMode);
1600 }
1601 }
1602 return setInsertionMode(InSelectMode);
1603 }
1604 if (item->hasTagName(tdTag) || item->hasTagName(thTag))
1605 return setInsertionMode(InCellMode);
1606 if (item->hasTagName(trTag))
1607 return setInsertionMode(InRowMode);
1608 if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag))
1609 return setInsertionMode(InTableBodyMode);
1610 if (item->hasTagName(captionTag))
1611 return setInsertionMode(InCaptionMode);
1612 if (item->hasTagName(colgroupTag)) {
1613 return setInsertionMode(InColumnGroupMode);
1614 }
1615 if (item->hasTagName(tableTag))
1616 return setInsertionMode(InTableMode);
1617 if (item->hasTagName(headTag)) {
1618 if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node())
1619 return setInsertionMode(InHeadMode);
1620 return setInsertionMode(InBodyMode);
1621 }
1622 if (item->hasTagName(bodyTag))
1623 return setInsertionMode(InBodyMode);
1624 if (item->hasTagName(framesetTag)) {
1625 return setInsertionMode(InFramesetMode);
1626 }
1627 if (item->hasTagName(htmlTag)) {
1628 if (m_tree.headStackItem())
1629 return setInsertionMode(AfterHeadMode);
1630
1631 ASSERT(isParsingFragment());
1632 return setInsertionMode(BeforeHeadMode);
1633 }
1634 if (last) {
1635 ASSERT(isParsingFragment());
1636 return setInsertionMode(InBodyMode);
1637 }
1638 nodeRecord = nodeRecord->next();
1639 }
1640 }
1641
processEndTagForInTableBody(AtomicHTMLToken * token)1642 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token)
1643 {
1644 ASSERT(token->type() == HTMLToken::EndTag);
1645 if (isTableBodyContextTag(token->name())) {
1646 if (!m_tree.openElements()->inTableScope(token->name())) {
1647 parseError(token);
1648 return;
1649 }
1650 m_tree.openElements()->popUntilTableBodyScopeMarker();
1651 m_tree.openElements()->pop();
1652 setInsertionMode(InTableMode);
1653 return;
1654 }
1655 if (token->name() == tableTag) {
1656 // FIXME: This is slow.
1657 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1658 ASSERT(isParsingFragmentOrTemplateContents());
1659 parseError(token);
1660 return;
1661 }
1662 m_tree.openElements()->popUntilTableBodyScopeMarker();
1663 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1664 processFakeEndTag(m_tree.currentStackItem()->localName());
1665 processEndTag(token);
1666 return;
1667 }
1668 if (token->name() == bodyTag
1669 || isCaptionColOrColgroupTag(token->name())
1670 || token->name() == htmlTag
1671 || isTableCellContextTag(token->name())
1672 || token->name() == trTag) {
1673 parseError(token);
1674 return;
1675 }
1676 processEndTagForInTable(token);
1677 }
1678
processEndTagForInRow(AtomicHTMLToken * token)1679 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token)
1680 {
1681 ASSERT(token->type() == HTMLToken::EndTag);
1682 if (token->name() == trTag) {
1683 processTrEndTagForInRow();
1684 return;
1685 }
1686 if (token->name() == tableTag) {
1687 if (!processTrEndTagForInRow()) {
1688 ASSERT(isParsingFragmentOrTemplateContents());
1689 return;
1690 }
1691 ASSERT(insertionMode() == InTableBodyMode);
1692 processEndTag(token);
1693 return;
1694 }
1695 if (isTableBodyContextTag(token->name())) {
1696 if (!m_tree.openElements()->inTableScope(token->name())) {
1697 parseError(token);
1698 return;
1699 }
1700 processFakeEndTag(trTag);
1701 ASSERT(insertionMode() == InTableBodyMode);
1702 processEndTag(token);
1703 return;
1704 }
1705 if (token->name() == bodyTag
1706 || isCaptionColOrColgroupTag(token->name())
1707 || token->name() == htmlTag
1708 || isTableCellContextTag(token->name())) {
1709 parseError(token);
1710 return;
1711 }
1712 processEndTagForInTable(token);
1713 }
1714
processEndTagForInCell(AtomicHTMLToken * token)1715 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token)
1716 {
1717 ASSERT(token->type() == HTMLToken::EndTag);
1718 if (isTableCellContextTag(token->name())) {
1719 if (!m_tree.openElements()->inTableScope(token->name())) {
1720 parseError(token);
1721 return;
1722 }
1723 m_tree.generateImpliedEndTags();
1724 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1725 parseError(token);
1726 m_tree.openElements()->popUntilPopped(token->name());
1727 m_tree.activeFormattingElements()->clearToLastMarker();
1728 setInsertionMode(InRowMode);
1729 return;
1730 }
1731 if (token->name() == bodyTag
1732 || isCaptionColOrColgroupTag(token->name())
1733 || token->name() == htmlTag) {
1734 parseError(token);
1735 return;
1736 }
1737 if (token->name() == tableTag
1738 || token->name() == trTag
1739 || isTableBodyContextTag(token->name())) {
1740 if (!m_tree.openElements()->inTableScope(token->name())) {
1741 ASSERT(isTableBodyContextTag(token->name()) || m_tree.openElements()->inTableScope(templateTag) || isParsingFragment());
1742 parseError(token);
1743 return;
1744 }
1745 closeTheCell();
1746 processEndTag(token);
1747 return;
1748 }
1749 processEndTagForInBody(token);
1750 }
1751
processEndTagForInBody(AtomicHTMLToken * token)1752 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token)
1753 {
1754 ASSERT(token->type() == HTMLToken::EndTag);
1755 if (token->name() == bodyTag) {
1756 processBodyEndTagForInBody(token);
1757 return;
1758 }
1759 if (token->name() == htmlTag) {
1760 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1761 if (processBodyEndTagForInBody(&endBody))
1762 processEndTag(token);
1763 return;
1764 }
1765 if (token->name() == addressTag
1766 || token->name() == articleTag
1767 || token->name() == asideTag
1768 || token->name() == blockquoteTag
1769 || token->name() == buttonTag
1770 || token->name() == centerTag
1771 || token->name() == detailsTag
1772 || token->name() == dirTag
1773 || token->name() == divTag
1774 || token->name() == dlTag
1775 || token->name() == fieldsetTag
1776 || token->name() == figcaptionTag
1777 || token->name() == figureTag
1778 || token->name() == footerTag
1779 || token->name() == headerTag
1780 || token->name() == hgroupTag
1781 || token->name() == listingTag
1782 || token->name() == mainTag
1783 || token->name() == menuTag
1784 || token->name() == navTag
1785 || token->name() == olTag
1786 || token->name() == preTag
1787 || token->name() == sectionTag
1788 || token->name() == summaryTag
1789 || token->name() == ulTag) {
1790 if (!m_tree.openElements()->inScope(token->name())) {
1791 parseError(token);
1792 return;
1793 }
1794 m_tree.generateImpliedEndTags();
1795 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1796 parseError(token);
1797 m_tree.openElements()->popUntilPopped(token->name());
1798 return;
1799 }
1800 if (token->name() == formTag) {
1801 RefPtrWillBeRawPtr<Element> node = m_tree.takeForm();
1802 if (!node || !m_tree.openElements()->inScope(node.get())) {
1803 parseError(token);
1804 return;
1805 }
1806 m_tree.generateImpliedEndTags();
1807 if (m_tree.currentElement() != node.get())
1808 parseError(token);
1809 m_tree.openElements()->remove(node.get());
1810 }
1811 if (token->name() == pTag) {
1812 if (!m_tree.openElements()->inButtonScope(token->name())) {
1813 parseError(token);
1814 processFakeStartTag(pTag);
1815 ASSERT(m_tree.openElements()->inScope(token->name()));
1816 processEndTag(token);
1817 return;
1818 }
1819 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1820 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1821 parseError(token);
1822 m_tree.openElements()->popUntilPopped(token->name());
1823 return;
1824 }
1825 if (token->name() == liTag) {
1826 if (!m_tree.openElements()->inListItemScope(token->name())) {
1827 parseError(token);
1828 return;
1829 }
1830 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1831 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1832 parseError(token);
1833 m_tree.openElements()->popUntilPopped(token->name());
1834 return;
1835 }
1836 if (token->name() == ddTag
1837 || token->name() == dtTag) {
1838 if (!m_tree.openElements()->inScope(token->name())) {
1839 parseError(token);
1840 return;
1841 }
1842 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1843 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1844 parseError(token);
1845 m_tree.openElements()->popUntilPopped(token->name());
1846 return;
1847 }
1848 if (isNumberedHeaderTag(token->name())) {
1849 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1850 parseError(token);
1851 return;
1852 }
1853 m_tree.generateImpliedEndTags();
1854 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1855 parseError(token);
1856 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1857 return;
1858 }
1859 if (isFormattingTag(token->name())) {
1860 callTheAdoptionAgency(token);
1861 return;
1862 }
1863 if (token->name() == appletTag
1864 || token->name() == marqueeTag
1865 || token->name() == objectTag) {
1866 if (!m_tree.openElements()->inScope(token->name())) {
1867 parseError(token);
1868 return;
1869 }
1870 m_tree.generateImpliedEndTags();
1871 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1872 parseError(token);
1873 m_tree.openElements()->popUntilPopped(token->name());
1874 m_tree.activeFormattingElements()->clearToLastMarker();
1875 return;
1876 }
1877 if (token->name() == brTag) {
1878 parseError(token);
1879 processFakeStartTag(brTag);
1880 return;
1881 }
1882 if (token->name() == templateTag) {
1883 processTemplateEndTag(token);
1884 return;
1885 }
1886 processAnyOtherEndTagForInBody(token);
1887 }
1888
processCaptionEndTagForInCaption()1889 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1890 {
1891 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1892 ASSERT(isParsingFragment());
1893 // FIXME: parse error
1894 return false;
1895 }
1896 m_tree.generateImpliedEndTags();
1897 // FIXME: parse error if (!m_tree.currentStackItem()->hasTagName(captionTag))
1898 m_tree.openElements()->popUntilPopped(captionTag.localName());
1899 m_tree.activeFormattingElements()->clearToLastMarker();
1900 setInsertionMode(InTableMode);
1901 return true;
1902 }
1903
processTrEndTagForInRow()1904 bool HTMLTreeBuilder::processTrEndTagForInRow()
1905 {
1906 if (!m_tree.openElements()->inTableScope(trTag)) {
1907 ASSERT(isParsingFragmentOrTemplateContents());
1908 // FIXME: parse error
1909 return false;
1910 }
1911 m_tree.openElements()->popUntilTableRowScopeMarker();
1912 ASSERT(m_tree.currentStackItem()->hasTagName(trTag));
1913 m_tree.openElements()->pop();
1914 setInsertionMode(InTableBodyMode);
1915 return true;
1916 }
1917
processTableEndTagForInTable()1918 bool HTMLTreeBuilder::processTableEndTagForInTable()
1919 {
1920 if (!m_tree.openElements()->inTableScope(tableTag)) {
1921 ASSERT(isParsingFragmentOrTemplateContents());
1922 // FIXME: parse error.
1923 return false;
1924 }
1925 m_tree.openElements()->popUntilPopped(tableTag.localName());
1926 resetInsertionModeAppropriately();
1927 return true;
1928 }
1929
processEndTagForInTable(AtomicHTMLToken * token)1930 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token)
1931 {
1932 ASSERT(token->type() == HTMLToken::EndTag);
1933 if (token->name() == tableTag) {
1934 processTableEndTagForInTable();
1935 return;
1936 }
1937 if (token->name() == bodyTag
1938 || isCaptionColOrColgroupTag(token->name())
1939 || token->name() == htmlTag
1940 || isTableBodyContextTag(token->name())
1941 || isTableCellContextTag(token->name())
1942 || token->name() == trTag) {
1943 parseError(token);
1944 return;
1945 }
1946 parseError(token);
1947 // Is this redirection necessary here?
1948 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1949 processEndTagForInBody(token);
1950 }
1951
processEndTag(AtomicHTMLToken * token)1952 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
1953 {
1954 ASSERT(token->type() == HTMLToken::EndTag);
1955 switch (insertionMode()) {
1956 case InitialMode:
1957 ASSERT(insertionMode() == InitialMode);
1958 defaultForInitial();
1959 // Fall through.
1960 case BeforeHTMLMode:
1961 ASSERT(insertionMode() == BeforeHTMLMode);
1962 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1963 parseError(token);
1964 return;
1965 }
1966 defaultForBeforeHTML();
1967 // Fall through.
1968 case BeforeHeadMode:
1969 ASSERT(insertionMode() == BeforeHeadMode);
1970 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1971 parseError(token);
1972 return;
1973 }
1974 defaultForBeforeHead();
1975 // Fall through.
1976 case InHeadMode:
1977 ASSERT(insertionMode() == InHeadMode);
1978 // FIXME: This case should be broken out into processEndTagForInHead,
1979 // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1980 // but because the logic falls through to AfterHeadMode, that gets a little messy.
1981 if (token->name() == templateTag) {
1982 processTemplateEndTag(token);
1983 return;
1984 }
1985 if (token->name() == headTag) {
1986 m_tree.openElements()->popHTMLHeadElement();
1987 setInsertionMode(AfterHeadMode);
1988 return;
1989 }
1990 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1991 parseError(token);
1992 return;
1993 }
1994 defaultForInHead();
1995 // Fall through.
1996 case AfterHeadMode:
1997 ASSERT(insertionMode() == AfterHeadMode);
1998 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1999 parseError(token);
2000 return;
2001 }
2002 defaultForAfterHead();
2003 // Fall through
2004 case InBodyMode:
2005 ASSERT(insertionMode() == InBodyMode);
2006 processEndTagForInBody(token);
2007 break;
2008 case InTableMode:
2009 ASSERT(insertionMode() == InTableMode);
2010 processEndTagForInTable(token);
2011 break;
2012 case InCaptionMode:
2013 ASSERT(insertionMode() == InCaptionMode);
2014 if (token->name() == captionTag) {
2015 processCaptionEndTagForInCaption();
2016 return;
2017 }
2018 if (token->name() == tableTag) {
2019 parseError(token);
2020 if (!processCaptionEndTagForInCaption()) {
2021 ASSERT(isParsingFragment());
2022 return;
2023 }
2024 processEndTag(token);
2025 return;
2026 }
2027 if (token->name() == bodyTag
2028 || token->name() == colTag
2029 || token->name() == colgroupTag
2030 || token->name() == htmlTag
2031 || isTableBodyContextTag(token->name())
2032 || isTableCellContextTag(token->name())
2033 || token->name() == trTag) {
2034 parseError(token);
2035 return;
2036 }
2037 processEndTagForInBody(token);
2038 break;
2039 case InColumnGroupMode:
2040 ASSERT(insertionMode() == InColumnGroupMode);
2041 if (token->name() == colgroupTag) {
2042 processColgroupEndTagForInColumnGroup();
2043 return;
2044 }
2045 if (token->name() == colTag) {
2046 parseError(token);
2047 return;
2048 }
2049 if (token->name() == templateTag) {
2050 processTemplateEndTag(token);
2051 return;
2052 }
2053 if (!processColgroupEndTagForInColumnGroup()) {
2054 ASSERT(isParsingFragmentOrTemplateContents());
2055 return;
2056 }
2057 processEndTag(token);
2058 break;
2059 case InRowMode:
2060 ASSERT(insertionMode() == InRowMode);
2061 processEndTagForInRow(token);
2062 break;
2063 case InCellMode:
2064 ASSERT(insertionMode() == InCellMode);
2065 processEndTagForInCell(token);
2066 break;
2067 case InTableBodyMode:
2068 ASSERT(insertionMode() == InTableBodyMode);
2069 processEndTagForInTableBody(token);
2070 break;
2071 case AfterBodyMode:
2072 ASSERT(insertionMode() == AfterBodyMode);
2073 if (token->name() == htmlTag) {
2074 if (isParsingFragment()) {
2075 parseError(token);
2076 return;
2077 }
2078 setInsertionMode(AfterAfterBodyMode);
2079 return;
2080 }
2081 // Fall through.
2082 case AfterAfterBodyMode:
2083 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2084 parseError(token);
2085 setInsertionMode(InBodyMode);
2086 processEndTag(token);
2087 break;
2088 case InHeadNoscriptMode:
2089 ASSERT(insertionMode() == InHeadNoscriptMode);
2090 if (token->name() == noscriptTag) {
2091 ASSERT(m_tree.currentStackItem()->hasTagName(noscriptTag));
2092 m_tree.openElements()->pop();
2093 ASSERT(m_tree.currentStackItem()->hasTagName(headTag));
2094 setInsertionMode(InHeadMode);
2095 return;
2096 }
2097 if (token->name() != brTag) {
2098 parseError(token);
2099 return;
2100 }
2101 defaultForInHeadNoscript();
2102 processToken(token);
2103 break;
2104 case TextMode:
2105 if (token->name() == scriptTag) {
2106 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2107 ASSERT(m_tree.currentStackItem()->hasTagName(scriptTag));
2108 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2109 m_scriptToProcess = m_tree.currentElement();
2110 m_tree.openElements()->pop();
2111 setInsertionMode(m_originalInsertionMode);
2112
2113 if (m_parser->tokenizer()) {
2114 // We must set the tokenizer's state to
2115 // DataState explicitly if the tokenizer didn't have a chance to.
2116 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_options.useThreading);
2117 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2118 }
2119 return;
2120 }
2121 m_tree.openElements()->pop();
2122 setInsertionMode(m_originalInsertionMode);
2123 break;
2124 case InFramesetMode:
2125 ASSERT(insertionMode() == InFramesetMode);
2126 if (token->name() == framesetTag) {
2127 bool ignoreFramesetForFragmentParsing = m_tree.currentIsRootNode();
2128 ignoreFramesetForFragmentParsing = ignoreFramesetForFragmentParsing || m_tree.openElements()->hasTemplateInHTMLScope();
2129 if (ignoreFramesetForFragmentParsing) {
2130 ASSERT(isParsingFragmentOrTemplateContents());
2131 parseError(token);
2132 return;
2133 }
2134 m_tree.openElements()->pop();
2135 if (!isParsingFragment() && !m_tree.currentStackItem()->hasTagName(framesetTag))
2136 setInsertionMode(AfterFramesetMode);
2137 return;
2138 }
2139 if (token->name() == templateTag) {
2140 processTemplateEndTag(token);
2141 return;
2142 }
2143 break;
2144 case AfterFramesetMode:
2145 ASSERT(insertionMode() == AfterFramesetMode);
2146 if (token->name() == htmlTag) {
2147 setInsertionMode(AfterAfterFramesetMode);
2148 return;
2149 }
2150 // Fall through.
2151 case AfterAfterFramesetMode:
2152 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2153 parseError(token);
2154 break;
2155 case InSelectInTableMode:
2156 ASSERT(insertionMode() == InSelectInTableMode);
2157 if (token->name() == captionTag
2158 || token->name() == tableTag
2159 || isTableBodyContextTag(token->name())
2160 || token->name() == trTag
2161 || isTableCellContextTag(token->name())) {
2162 parseError(token);
2163 if (m_tree.openElements()->inTableScope(token->name())) {
2164 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2165 processEndTag(&endSelect);
2166 processEndTag(token);
2167 }
2168 return;
2169 }
2170 // Fall through.
2171 case InSelectMode:
2172 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2173 if (token->name() == optgroupTag) {
2174 if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2175 processFakeEndTag(optionTag);
2176 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
2177 m_tree.openElements()->pop();
2178 return;
2179 }
2180 parseError(token);
2181 return;
2182 }
2183 if (token->name() == optionTag) {
2184 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
2185 m_tree.openElements()->pop();
2186 return;
2187 }
2188 parseError(token);
2189 return;
2190 }
2191 if (token->name() == selectTag) {
2192 if (!m_tree.openElements()->inSelectScope(token->name())) {
2193 ASSERT(isParsingFragment());
2194 parseError(token);
2195 return;
2196 }
2197 m_tree.openElements()->popUntilPopped(selectTag.localName());
2198 resetInsertionModeAppropriately();
2199 return;
2200 }
2201 if (token->name() == templateTag) {
2202 processTemplateEndTag(token);
2203 return;
2204 }
2205 break;
2206 case InTableTextMode:
2207 defaultForInTableText();
2208 processEndTag(token);
2209 break;
2210 case TemplateContentsMode:
2211 if (token->name() == templateTag) {
2212 processTemplateEndTag(token);
2213 return;
2214 }
2215 break;
2216 }
2217 }
2218
processComment(AtomicHTMLToken * token)2219 void HTMLTreeBuilder::processComment(AtomicHTMLToken* token)
2220 {
2221 ASSERT(token->type() == HTMLToken::Comment);
2222 if (m_insertionMode == InitialMode
2223 || m_insertionMode == BeforeHTMLMode
2224 || m_insertionMode == AfterAfterBodyMode
2225 || m_insertionMode == AfterAfterFramesetMode) {
2226 m_tree.insertCommentOnDocument(token);
2227 return;
2228 }
2229 if (m_insertionMode == AfterBodyMode) {
2230 m_tree.insertCommentOnHTMLHtmlElement(token);
2231 return;
2232 }
2233 if (m_insertionMode == InTableTextMode) {
2234 defaultForInTableText();
2235 processComment(token);
2236 return;
2237 }
2238 m_tree.insertComment(token);
2239 }
2240
processCharacter(AtomicHTMLToken * token)2241 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token)
2242 {
2243 ASSERT(token->type() == HTMLToken::Character);
2244 CharacterTokenBuffer buffer(token);
2245 processCharacterBuffer(buffer);
2246 }
2247
processCharacterBuffer(CharacterTokenBuffer & buffer)2248 void HTMLTreeBuilder::processCharacterBuffer(CharacterTokenBuffer& buffer)
2249 {
2250 ReprocessBuffer:
2251 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2252 // Note that this logic is different than the generic \r\n collapsing
2253 // handled in the input stream preprocessor. This logic is here as an
2254 // "authoring convenience" so folks can write:
2255 //
2256 // <pre>
2257 // lorem ipsum
2258 // lorem ipsum
2259 // </pre>
2260 //
2261 // without getting an extra newline at the start of their <pre> element.
2262 if (m_shouldSkipLeadingNewline) {
2263 m_shouldSkipLeadingNewline = false;
2264 buffer.skipAtMostOneLeadingNewline();
2265 if (buffer.isEmpty())
2266 return;
2267 }
2268
2269 switch (insertionMode()) {
2270 case InitialMode: {
2271 ASSERT(insertionMode() == InitialMode);
2272 buffer.skipLeadingWhitespace();
2273 if (buffer.isEmpty())
2274 return;
2275 defaultForInitial();
2276 // Fall through.
2277 }
2278 case BeforeHTMLMode: {
2279 ASSERT(insertionMode() == BeforeHTMLMode);
2280 buffer.skipLeadingWhitespace();
2281 if (buffer.isEmpty())
2282 return;
2283 defaultForBeforeHTML();
2284 // Fall through.
2285 }
2286 case BeforeHeadMode: {
2287 ASSERT(insertionMode() == BeforeHeadMode);
2288 buffer.skipLeadingWhitespace();
2289 if (buffer.isEmpty())
2290 return;
2291 defaultForBeforeHead();
2292 // Fall through.
2293 }
2294 case InHeadMode: {
2295 ASSERT(insertionMode() == InHeadMode);
2296 String leadingWhitespace = buffer.takeLeadingWhitespace();
2297 if (!leadingWhitespace.isEmpty())
2298 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2299 if (buffer.isEmpty())
2300 return;
2301 defaultForInHead();
2302 // Fall through.
2303 }
2304 case AfterHeadMode: {
2305 ASSERT(insertionMode() == AfterHeadMode);
2306 String leadingWhitespace = buffer.takeLeadingWhitespace();
2307 if (!leadingWhitespace.isEmpty())
2308 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2309 if (buffer.isEmpty())
2310 return;
2311 defaultForAfterHead();
2312 // Fall through.
2313 }
2314 case InBodyMode:
2315 case InCaptionMode:
2316 case TemplateContentsMode:
2317 case InCellMode: {
2318 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode || insertionMode() == TemplateContentsMode);
2319 processCharacterBufferForInBody(buffer);
2320 break;
2321 }
2322 case InTableMode:
2323 case InTableBodyMode:
2324 case InRowMode: {
2325 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2326 ASSERT(m_pendingTableCharacters.isEmpty());
2327 if (m_tree.currentStackItem()->isElementNode()
2328 && (m_tree.currentStackItem()->hasTagName(tableTag)
2329 || m_tree.currentStackItem()->hasTagName(tbodyTag)
2330 || m_tree.currentStackItem()->hasTagName(tfootTag)
2331 || m_tree.currentStackItem()->hasTagName(theadTag)
2332 || m_tree.currentStackItem()->hasTagName(trTag))) {
2333 m_originalInsertionMode = m_insertionMode;
2334 setInsertionMode(InTableTextMode);
2335 // Note that we fall through to the InTableTextMode case below.
2336 } else {
2337 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2338 processCharacterBufferForInBody(buffer);
2339 break;
2340 }
2341 // Fall through.
2342 }
2343 case InTableTextMode: {
2344 buffer.giveRemainingTo(m_pendingTableCharacters);
2345 break;
2346 }
2347 case InColumnGroupMode: {
2348 ASSERT(insertionMode() == InColumnGroupMode);
2349 String leadingWhitespace = buffer.takeLeadingWhitespace();
2350 if (!leadingWhitespace.isEmpty())
2351 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2352 if (buffer.isEmpty())
2353 return;
2354 if (!processColgroupEndTagForInColumnGroup()) {
2355 ASSERT(isParsingFragmentOrTemplateContents());
2356 // The spec tells us to drop these characters on the floor.
2357 buffer.skipLeadingNonWhitespace();
2358 if (buffer.isEmpty())
2359 return;
2360 }
2361 goto ReprocessBuffer;
2362 }
2363 case AfterBodyMode:
2364 case AfterAfterBodyMode: {
2365 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2366 // FIXME: parse error
2367 setInsertionMode(InBodyMode);
2368 goto ReprocessBuffer;
2369 }
2370 case TextMode: {
2371 ASSERT(insertionMode() == TextMode);
2372 m_tree.insertTextNode(buffer.takeRemaining());
2373 break;
2374 }
2375 case InHeadNoscriptMode: {
2376 ASSERT(insertionMode() == InHeadNoscriptMode);
2377 String leadingWhitespace = buffer.takeLeadingWhitespace();
2378 if (!leadingWhitespace.isEmpty())
2379 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2380 if (buffer.isEmpty())
2381 return;
2382 defaultForInHeadNoscript();
2383 goto ReprocessBuffer;
2384 }
2385 case InFramesetMode:
2386 case AfterFramesetMode: {
2387 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2388 String leadingWhitespace = buffer.takeRemainingWhitespace();
2389 if (!leadingWhitespace.isEmpty())
2390 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2391 // FIXME: We should generate a parse error if we skipped over any
2392 // non-whitespace characters.
2393 break;
2394 }
2395 case InSelectInTableMode:
2396 case InSelectMode: {
2397 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2398 m_tree.insertTextNode(buffer.takeRemaining());
2399 break;
2400 }
2401 case AfterAfterFramesetMode: {
2402 String leadingWhitespace = buffer.takeRemainingWhitespace();
2403 if (!leadingWhitespace.isEmpty()) {
2404 m_tree.reconstructTheActiveFormattingElements();
2405 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2406 }
2407 // FIXME: We should generate a parse error if we skipped over any
2408 // non-whitespace characters.
2409 break;
2410 }
2411 }
2412 }
2413
processCharacterBufferForInBody(CharacterTokenBuffer & buffer)2414 void HTMLTreeBuilder::processCharacterBufferForInBody(CharacterTokenBuffer& buffer)
2415 {
2416 m_tree.reconstructTheActiveFormattingElements();
2417 const String& characters = buffer.takeRemaining();
2418 m_tree.insertTextNode(characters);
2419 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2420 m_framesetOk = false;
2421 }
2422
processEndOfFile(AtomicHTMLToken * token)2423 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token)
2424 {
2425 ASSERT(token->type() == HTMLToken::EndOfFile);
2426 switch (insertionMode()) {
2427 case InitialMode:
2428 ASSERT(insertionMode() == InitialMode);
2429 defaultForInitial();
2430 // Fall through.
2431 case BeforeHTMLMode:
2432 ASSERT(insertionMode() == BeforeHTMLMode);
2433 defaultForBeforeHTML();
2434 // Fall through.
2435 case BeforeHeadMode:
2436 ASSERT(insertionMode() == BeforeHeadMode);
2437 defaultForBeforeHead();
2438 // Fall through.
2439 case InHeadMode:
2440 ASSERT(insertionMode() == InHeadMode);
2441 defaultForInHead();
2442 // Fall through.
2443 case AfterHeadMode:
2444 ASSERT(insertionMode() == AfterHeadMode);
2445 defaultForAfterHead();
2446 // Fall through
2447 case InBodyMode:
2448 case InCellMode:
2449 case InCaptionMode:
2450 case InRowMode:
2451 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode || insertionMode() == TemplateContentsMode);
2452 notImplemented(); // Emit parse error based on what elements are still open.
2453 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2454 return;
2455 break;
2456 case AfterBodyMode:
2457 case AfterAfterBodyMode:
2458 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2459 break;
2460 case InHeadNoscriptMode:
2461 ASSERT(insertionMode() == InHeadNoscriptMode);
2462 defaultForInHeadNoscript();
2463 processEndOfFile(token);
2464 return;
2465 case AfterFramesetMode:
2466 case AfterAfterFramesetMode:
2467 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2468 break;
2469 case InColumnGroupMode:
2470 if (m_tree.currentIsRootNode()) {
2471 ASSERT(isParsingFragment());
2472 return; // FIXME: Should we break here instead of returning?
2473 }
2474 ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || isHTMLTemplateElement(m_tree.currentNode()));
2475 processColgroupEndTagForInColumnGroup();
2476 // Fall through
2477 case InFramesetMode:
2478 case InTableMode:
2479 case InTableBodyMode:
2480 case InSelectInTableMode:
2481 case InSelectMode:
2482 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode || insertionMode() == InColumnGroupMode);
2483 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2484 parseError(token);
2485 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2486 return;
2487 break;
2488 case InTableTextMode:
2489 defaultForInTableText();
2490 processEndOfFile(token);
2491 return;
2492 case TextMode:
2493 parseError(token);
2494 if (m_tree.currentStackItem()->hasTagName(scriptTag))
2495 notImplemented(); // mark the script element as "already started".
2496 m_tree.openElements()->pop();
2497 ASSERT(m_originalInsertionMode != TextMode);
2498 setInsertionMode(m_originalInsertionMode);
2499 processEndOfFile(token);
2500 return;
2501 case TemplateContentsMode:
2502 if (processEndOfFileForInTemplateContents(token))
2503 return;
2504 break;
2505 }
2506 m_tree.processEndOfFile();
2507 }
2508
defaultForInitial()2509 void HTMLTreeBuilder::defaultForInitial()
2510 {
2511 notImplemented();
2512 m_tree.setDefaultCompatibilityMode();
2513 // FIXME: parse error
2514 setInsertionMode(BeforeHTMLMode);
2515 }
2516
defaultForBeforeHTML()2517 void HTMLTreeBuilder::defaultForBeforeHTML()
2518 {
2519 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2520 m_tree.insertHTMLHtmlStartTagBeforeHTML(&startHTML);
2521 setInsertionMode(BeforeHeadMode);
2522 }
2523
defaultForBeforeHead()2524 void HTMLTreeBuilder::defaultForBeforeHead()
2525 {
2526 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2527 processStartTag(&startHead);
2528 }
2529
defaultForInHead()2530 void HTMLTreeBuilder::defaultForInHead()
2531 {
2532 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2533 processEndTag(&endHead);
2534 }
2535
defaultForInHeadNoscript()2536 void HTMLTreeBuilder::defaultForInHeadNoscript()
2537 {
2538 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2539 processEndTag(&endNoscript);
2540 }
2541
defaultForAfterHead()2542 void HTMLTreeBuilder::defaultForAfterHead()
2543 {
2544 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2545 processStartTag(&startBody);
2546 m_framesetOk = true;
2547 }
2548
defaultForInTableText()2549 void HTMLTreeBuilder::defaultForInTableText()
2550 {
2551 String characters = m_pendingTableCharacters.toString();
2552 m_pendingTableCharacters.clear();
2553 if (!isAllWhitespace(characters)) {
2554 // FIXME: parse error
2555 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2556 m_tree.reconstructTheActiveFormattingElements();
2557 m_tree.insertTextNode(characters, NotAllWhitespace);
2558 m_framesetOk = false;
2559 setInsertionMode(m_originalInsertionMode);
2560 return;
2561 }
2562 m_tree.insertTextNode(characters);
2563 setInsertionMode(m_originalInsertionMode);
2564 }
2565
processStartTagForInHead(AtomicHTMLToken * token)2566 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token)
2567 {
2568 ASSERT(token->type() == HTMLToken::StartTag);
2569 if (token->name() == htmlTag) {
2570 processHtmlStartTagForInBody(token);
2571 return true;
2572 }
2573 if (token->name() == baseTag
2574 || token->name() == basefontTag
2575 || token->name() == bgsoundTag
2576 || token->name() == commandTag
2577 || token->name() == linkTag
2578 || token->name() == metaTag) {
2579 m_tree.insertSelfClosingHTMLElement(token);
2580 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2581 return true;
2582 }
2583 if (token->name() == titleTag) {
2584 processGenericRCDATAStartTag(token);
2585 return true;
2586 }
2587 if (token->name() == noscriptTag) {
2588 if (m_options.scriptEnabled) {
2589 processGenericRawTextStartTag(token);
2590 return true;
2591 }
2592 m_tree.insertHTMLElement(token);
2593 setInsertionMode(InHeadNoscriptMode);
2594 return true;
2595 }
2596 if (token->name() == noframesTag || token->name() == styleTag) {
2597 processGenericRawTextStartTag(token);
2598 return true;
2599 }
2600 if (token->name() == scriptTag) {
2601 processScriptStartTag(token);
2602 return true;
2603 }
2604 if (token->name() == templateTag) {
2605 processTemplateStartTag(token);
2606 return true;
2607 }
2608 if (token->name() == headTag) {
2609 parseError(token);
2610 return true;
2611 }
2612 return false;
2613 }
2614
processGenericRCDATAStartTag(AtomicHTMLToken * token)2615 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
2616 {
2617 ASSERT(token->type() == HTMLToken::StartTag);
2618 m_tree.insertHTMLElement(token);
2619 if (m_parser->tokenizer())
2620 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2621 m_originalInsertionMode = m_insertionMode;
2622 setInsertionMode(TextMode);
2623 }
2624
processGenericRawTextStartTag(AtomicHTMLToken * token)2625 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
2626 {
2627 ASSERT(token->type() == HTMLToken::StartTag);
2628 m_tree.insertHTMLElement(token);
2629 if (m_parser->tokenizer())
2630 m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2631 m_originalInsertionMode = m_insertionMode;
2632 setInsertionMode(TextMode);
2633 }
2634
processScriptStartTag(AtomicHTMLToken * token)2635 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
2636 {
2637 ASSERT(token->type() == HTMLToken::StartTag);
2638 m_tree.insertScriptElement(token);
2639 if (m_parser->tokenizer())
2640 m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2641 m_originalInsertionMode = m_insertionMode;
2642
2643 TextPosition position = m_parser->textPosition();
2644
2645 m_scriptToProcessStartPosition = position;
2646
2647 setInsertionMode(TextMode);
2648 }
2649
2650 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
shouldProcessTokenInForeignContent(AtomicHTMLToken * token)2651 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token)
2652 {
2653 if (m_tree.isEmpty())
2654 return false;
2655 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2656
2657 if (adjustedCurrentNode->isInHTMLNamespace())
2658 return false;
2659 if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2660 if (token->type() == HTMLToken::StartTag
2661 && token->name() != MathMLNames::mglyphTag
2662 && token->name() != MathMLNames::malignmarkTag)
2663 return false;
2664 if (token->type() == HTMLToken::Character)
2665 return false;
2666 }
2667 if (adjustedCurrentNode->hasTagName(MathMLNames::annotation_xmlTag)
2668 && token->type() == HTMLToken::StartTag
2669 && token->name() == SVGNames::svgTag)
2670 return false;
2671 if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2672 if (token->type() == HTMLToken::StartTag)
2673 return false;
2674 if (token->type() == HTMLToken::Character)
2675 return false;
2676 }
2677 if (token->type() == HTMLToken::EndOfFile)
2678 return false;
2679 return true;
2680 }
2681
processTokenInForeignContent(AtomicHTMLToken * token)2682 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token)
2683 {
2684 if (token->type() == HTMLToken::Character) {
2685 const String& characters = token->characters();
2686 m_tree.insertTextNode(characters);
2687 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2688 m_framesetOk = false;
2689 return;
2690 }
2691
2692 m_tree.flush(FlushAlways);
2693 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2694
2695 switch (token->type()) {
2696 case HTMLToken::Uninitialized:
2697 ASSERT_NOT_REACHED();
2698 break;
2699 case HTMLToken::DOCTYPE:
2700 parseError(token);
2701 break;
2702 case HTMLToken::StartTag: {
2703 if (token->name() == bTag
2704 || token->name() == bigTag
2705 || token->name() == blockquoteTag
2706 || token->name() == bodyTag
2707 || token->name() == brTag
2708 || token->name() == centerTag
2709 || token->name() == codeTag
2710 || token->name() == ddTag
2711 || token->name() == divTag
2712 || token->name() == dlTag
2713 || token->name() == dtTag
2714 || token->name() == emTag
2715 || token->name() == embedTag
2716 || isNumberedHeaderTag(token->name())
2717 || token->name() == headTag
2718 || token->name() == hrTag
2719 || token->name() == iTag
2720 || token->name() == imgTag
2721 || token->name() == liTag
2722 || token->name() == listingTag
2723 || token->name() == menuTag
2724 || token->name() == metaTag
2725 || token->name() == nobrTag
2726 || token->name() == olTag
2727 || token->name() == pTag
2728 || token->name() == preTag
2729 || token->name() == rubyTag
2730 || token->name() == sTag
2731 || token->name() == smallTag
2732 || token->name() == spanTag
2733 || token->name() == strongTag
2734 || token->name() == strikeTag
2735 || token->name() == subTag
2736 || token->name() == supTag
2737 || token->name() == tableTag
2738 || token->name() == ttTag
2739 || token->name() == uTag
2740 || token->name() == ulTag
2741 || token->name() == varTag
2742 || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) {
2743 parseError(token);
2744 m_tree.openElements()->popUntilForeignContentScopeMarker();
2745 processStartTag(token);
2746 return;
2747 }
2748 const AtomicString& currentNamespace = adjustedCurrentNode->namespaceURI();
2749 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2750 adjustMathMLAttributes(token);
2751 if (currentNamespace == SVGNames::svgNamespaceURI) {
2752 adjustSVGTagNameCase(token);
2753 adjustSVGAttributes(token);
2754 }
2755 adjustForeignAttributes(token);
2756 m_tree.insertForeignElement(token, currentNamespace);
2757 break;
2758 }
2759 case HTMLToken::EndTag: {
2760 if (adjustedCurrentNode->namespaceURI() == SVGNames::svgNamespaceURI)
2761 adjustSVGTagNameCase(token);
2762
2763 if (token->name() == SVGNames::scriptTag && m_tree.currentStackItem()->hasTagName(SVGNames::scriptTag)) {
2764 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2765 m_scriptToProcess = m_tree.currentElement();
2766 m_tree.openElements()->pop();
2767 return;
2768 }
2769 if (!m_tree.currentStackItem()->isInHTMLNamespace()) {
2770 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2771 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2772 if (!nodeRecord->stackItem()->hasLocalName(token->name()))
2773 parseError(token);
2774 while (1) {
2775 if (nodeRecord->stackItem()->hasLocalName(token->name())) {
2776 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2777 return;
2778 }
2779 nodeRecord = nodeRecord->next();
2780
2781 if (nodeRecord->stackItem()->isInHTMLNamespace())
2782 break;
2783 }
2784 }
2785 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2786 processEndTag(token);
2787 break;
2788 }
2789 case HTMLToken::Comment:
2790 m_tree.insertComment(token);
2791 break;
2792 case HTMLToken::Character:
2793 case HTMLToken::EndOfFile:
2794 ASSERT_NOT_REACHED();
2795 break;
2796 }
2797 }
2798
finished()2799 void HTMLTreeBuilder::finished()
2800 {
2801 if (isParsingFragment())
2802 return;
2803
2804 ASSERT(m_templateInsertionModes.isEmpty());
2805 ASSERT(m_isAttached);
2806 // Warning, this may detach the parser. Do not do anything else after this.
2807 m_tree.finishedParsing();
2808 }
2809
parseError(AtomicHTMLToken *)2810 void HTMLTreeBuilder::parseError(AtomicHTMLToken*)
2811 {
2812 }
2813
2814 } // namespace blink
2815