1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "core/html/parser/HTMLTreeBuilder.h"
29
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/MathMLNames.h"
33 #include "core/SVGNames.h"
34 #include "core/XLinkNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/XMLNames.h"
37 #include "core/dom/DocumentFragment.h"
38 #include "core/dom/ElementTraversal.h"
39 #include "core/html/HTMLDocument.h"
40 #include "core/html/HTMLFormElement.h"
41 #include "core/html/parser/AtomicHTMLToken.h"
42 #include "core/html/parser/HTMLDocumentParser.h"
43 #include "core/html/parser/HTMLParserIdioms.h"
44 #include "core/html/parser/HTMLStackItem.h"
45 #include "core/html/parser/HTMLToken.h"
46 #include "core/html/parser/HTMLTokenizer.h"
47 #include "platform/NotImplemented.h"
48 #include "platform/text/PlatformLocale.h"
49 #include "wtf/MainThread.h"
50 #include "wtf/unicode/CharacterNames.h"
51
52 namespace WebCore {
53
54 using namespace HTMLNames;
55
56 namespace {
57
isHTMLSpaceOrReplacementCharacter(UChar character)58 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
59 {
60 return isHTMLSpace<UChar>(character) || character == replacementCharacter;
61 }
62
63 }
64
uninitializedPositionValue1()65 static TextPosition uninitializedPositionValue1()
66 {
67 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
68 }
69
isAllWhitespace(const String & string)70 static inline bool isAllWhitespace(const String& string)
71 {
72 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
73 }
74
isAllWhitespaceOrReplacementCharacters(const String & string)75 static inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
76 {
77 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
78 }
79
isNumberedHeaderTag(const AtomicString & tagName)80 static bool isNumberedHeaderTag(const AtomicString& tagName)
81 {
82 return tagName == h1Tag
83 || tagName == h2Tag
84 || tagName == h3Tag
85 || tagName == h4Tag
86 || tagName == h5Tag
87 || tagName == h6Tag;
88 }
89
isCaptionColOrColgroupTag(const AtomicString & tagName)90 static bool isCaptionColOrColgroupTag(const AtomicString& tagName)
91 {
92 return tagName == captionTag
93 || tagName == colTag
94 || tagName == colgroupTag;
95 }
96
isTableCellContextTag(const AtomicString & tagName)97 static bool isTableCellContextTag(const AtomicString& tagName)
98 {
99 return tagName == thTag || tagName == tdTag;
100 }
101
isTableBodyContextTag(const AtomicString & tagName)102 static bool isTableBodyContextTag(const AtomicString& tagName)
103 {
104 return tagName == tbodyTag
105 || tagName == tfootTag
106 || tagName == theadTag;
107 }
108
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)109 static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
110 {
111 return tagName == bTag
112 || tagName == bigTag
113 || tagName == codeTag
114 || tagName == emTag
115 || tagName == fontTag
116 || tagName == iTag
117 || tagName == sTag
118 || tagName == smallTag
119 || tagName == strikeTag
120 || tagName == strongTag
121 || tagName == ttTag
122 || tagName == uTag;
123 }
124
isNonAnchorFormattingTag(const AtomicString & tagName)125 static bool isNonAnchorFormattingTag(const AtomicString& tagName)
126 {
127 return tagName == nobrTag
128 || isNonAnchorNonNobrFormattingTag(tagName);
129 }
130
131 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)132 static bool isFormattingTag(const AtomicString& tagName)
133 {
134 return tagName == aTag || isNonAnchorFormattingTag(tagName);
135 }
136
closestFormAncestor(Element & element)137 static HTMLFormElement* closestFormAncestor(Element& element)
138 {
139 ASSERT(isMainThread());
140 return Traversal<HTMLFormElement>::firstAncestorOrSelf(element);
141 }
142
143 class HTMLTreeBuilder::CharacterTokenBuffer {
144 WTF_MAKE_NONCOPYABLE(CharacterTokenBuffer);
145 public:
CharacterTokenBuffer(AtomicHTMLToken * token)146 explicit CharacterTokenBuffer(AtomicHTMLToken* token)
147 : m_characters(token->characters().impl())
148 , m_current(0)
149 , m_end(token->characters().length())
150 {
151 ASSERT(!isEmpty());
152 }
153
CharacterTokenBuffer(const String & characters)154 explicit CharacterTokenBuffer(const String& characters)
155 : m_characters(characters.impl())
156 , m_current(0)
157 , m_end(characters.length())
158 {
159 ASSERT(!isEmpty());
160 }
161
~CharacterTokenBuffer()162 ~CharacterTokenBuffer()
163 {
164 ASSERT(isEmpty());
165 }
166
isEmpty() const167 bool isEmpty() const { return m_current == m_end; }
168
skipAtMostOneLeadingNewline()169 void skipAtMostOneLeadingNewline()
170 {
171 ASSERT(!isEmpty());
172 if ((*m_characters)[m_current] == '\n')
173 ++m_current;
174 }
175
skipLeadingWhitespace()176 void skipLeadingWhitespace()
177 {
178 skipLeading<isHTMLSpace<UChar> >();
179 }
180
takeLeadingWhitespace()181 String takeLeadingWhitespace()
182 {
183 return takeLeading<isHTMLSpace<UChar> >();
184 }
185
skipLeadingNonWhitespace()186 void skipLeadingNonWhitespace()
187 {
188 skipLeading<isNotHTMLSpace<UChar> >();
189 }
190
takeRemaining()191 String takeRemaining()
192 {
193 ASSERT(!isEmpty());
194 unsigned start = m_current;
195 m_current = m_end;
196 // Notice that substring is smart enough to return *this when start == 0.
197 return String(m_characters->substring(start, m_end - start));
198 }
199
giveRemainingTo(StringBuilder & recipient)200 void giveRemainingTo(StringBuilder& recipient)
201 {
202 if (m_characters->is8Bit())
203 recipient.append(m_characters->characters8() + m_current, m_end - m_current);
204 else
205 recipient.append(m_characters->characters16() + m_current, m_end - m_current);
206 m_current = m_end;
207 }
208
takeRemainingWhitespace()209 String takeRemainingWhitespace()
210 {
211 ASSERT(!isEmpty());
212 const unsigned start = m_current;
213 m_current = m_end; // One way or another, we're taking everything!
214
215 unsigned length = 0;
216 for (unsigned i = start; i < m_end; ++i) {
217 if (isHTMLSpace<UChar>((*m_characters)[i]))
218 ++length;
219 }
220 // Returning the null string when there aren't any whitespace
221 // characters is slightly cleaner semantically because we don't want
222 // to insert a text node (as opposed to inserting an empty text node).
223 if (!length)
224 return String();
225 if (length == start - m_end) // It's all whitespace.
226 return String(m_characters->substring(start, start - m_end));
227
228 StringBuilder result;
229 result.reserveCapacity(length);
230 for (unsigned i = start; i < m_end; ++i) {
231 UChar c = (*m_characters)[i];
232 if (isHTMLSpace<UChar>(c))
233 result.append(c);
234 }
235
236 return result.toString();
237 }
238
239 private:
240 template<bool characterPredicate(UChar)>
skipLeading()241 void skipLeading()
242 {
243 ASSERT(!isEmpty());
244 while (characterPredicate((*m_characters)[m_current])) {
245 if (++m_current == m_end)
246 return;
247 }
248 }
249
250 template<bool characterPredicate(UChar)>
takeLeading()251 String takeLeading()
252 {
253 ASSERT(!isEmpty());
254 const unsigned start = m_current;
255 skipLeading<characterPredicate>();
256 if (start == m_current)
257 return String();
258 return String(m_characters->substring(start, m_current - start));
259 }
260
261 RefPtr<StringImpl> m_characters;
262 unsigned m_current;
263 unsigned m_end;
264 };
265
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,ParserContentPolicy parserContentPolicy,bool,const HTMLParserOptions & options)266 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool, const HTMLParserOptions& options)
267 : m_framesetOk(true)
268 #ifndef NDEBUG
269 , m_isAttached(true)
270 #endif
271 , m_tree(document, parserContentPolicy)
272 , m_insertionMode(InitialMode)
273 , m_originalInsertionMode(InitialMode)
274 , m_shouldSkipLeadingNewline(false)
275 , m_parser(parser)
276 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
277 , m_options(options)
278 {
279 }
280
281 // FIXME: Member variables should be grouped into self-initializing structs to
282 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy,const HTMLParserOptions & options)283 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
284 : m_framesetOk(true)
285 #ifndef NDEBUG
286 , m_isAttached(true)
287 #endif
288 , m_fragmentContext(fragment, contextElement)
289 , m_tree(fragment, parserContentPolicy)
290 , m_insertionMode(InitialMode)
291 , m_originalInsertionMode(InitialMode)
292 , m_shouldSkipLeadingNewline(false)
293 , m_parser(parser)
294 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
295 , m_options(options)
296 {
297 ASSERT(isMainThread());
298 ASSERT(contextElement);
299
300 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
301 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
302 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
303 // and instead use the DocumentFragment as a root node.
304 m_tree.openElements()->pushRootNode(HTMLStackItem::create(fragment, HTMLStackItem::ItemForDocumentFragmentNode));
305
306 if (isHTMLTemplateElement(*contextElement))
307 m_templateInsertionModes.append(TemplateContentsMode);
308
309 resetInsertionModeAppropriately();
310 m_tree.setForm(closestFormAncestor(*contextElement));
311 }
312
~HTMLTreeBuilder()313 HTMLTreeBuilder::~HTMLTreeBuilder()
314 {
315 }
316
trace(Visitor * visitor)317 void HTMLTreeBuilder::trace(Visitor* visitor)
318 {
319 visitor->trace(m_fragmentContext);
320 visitor->trace(m_tree);
321 visitor->trace(m_parser);
322 visitor->trace(m_scriptToProcess);
323 }
324
detach()325 void HTMLTreeBuilder::detach()
326 {
327 #ifndef NDEBUG
328 // This call makes little sense in fragment mode, but for consistency
329 // DocumentParser expects detach() to always be called before it's destroyed.
330 m_isAttached = false;
331 #endif
332 // HTMLConstructionSite might be on the callstack when detach() is called
333 // otherwise we'd just call m_tree.clear() here instead.
334 m_tree.detach();
335 }
336
FragmentParsingContext()337 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
338 : m_fragment(nullptr)
339 {
340 }
341
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement)342 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement)
343 : m_fragment(fragment)
344 {
345 ASSERT(!fragment->hasChildren());
346 m_contextElementStackItem = HTMLStackItem::create(contextElement, HTMLStackItem::ItemForContextElement);
347 }
348
~FragmentParsingContext()349 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
350 {
351 }
352
trace(Visitor * visitor)353 void HTMLTreeBuilder::FragmentParsingContext::trace(Visitor* visitor)
354 {
355 visitor->trace(m_fragment);
356 visitor->trace(m_contextElementStackItem);
357 }
358
takeScriptToProcess(TextPosition & scriptStartPosition)359 PassRefPtrWillBeRawPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
360 {
361 ASSERT(m_scriptToProcess);
362 ASSERT(!m_tree.hasPendingTasks());
363 // Unpause ourselves, callers may pause us again when processing the script.
364 // The HTML5 spec is written as though scripts are executed inside the tree
365 // builder. We pause the parser to exit the tree builder, and then resume
366 // before running scripts.
367 scriptStartPosition = m_scriptToProcessStartPosition;
368 m_scriptToProcessStartPosition = uninitializedPositionValue1();
369 return m_scriptToProcess.release();
370 }
371
constructTree(AtomicHTMLToken * token)372 void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token)
373 {
374 if (shouldProcessTokenInForeignContent(token))
375 processTokenInForeignContent(token);
376 else
377 processToken(token);
378
379 if (m_parser->tokenizer()) {
380 bool inForeignContent = false;
381 if (!m_tree.isEmpty()) {
382 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
383 inForeignContent = !adjustedCurrentNode->isInHTMLNamespace()
384 && !HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)
385 && !HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode);
386 }
387
388 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
389 m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
390 }
391
392 m_tree.executeQueuedTasks();
393 // We might be detached now.
394 }
395
processToken(AtomicHTMLToken * token)396 void HTMLTreeBuilder::processToken(AtomicHTMLToken* token)
397 {
398 if (token->type() == HTMLToken::Character) {
399 processCharacter(token);
400 return;
401 }
402
403 // Any non-character token needs to cause us to flush any pending text immediately.
404 // NOTE: flush() can cause any queued tasks to execute, possibly re-entering the parser.
405 m_tree.flush();
406 m_shouldSkipLeadingNewline = false;
407
408 switch (token->type()) {
409 case HTMLToken::Uninitialized:
410 case HTMLToken::Character:
411 ASSERT_NOT_REACHED();
412 break;
413 case HTMLToken::DOCTYPE:
414 processDoctypeToken(token);
415 break;
416 case HTMLToken::StartTag:
417 processStartTag(token);
418 break;
419 case HTMLToken::EndTag:
420 processEndTag(token);
421 break;
422 case HTMLToken::Comment:
423 processComment(token);
424 break;
425 case HTMLToken::EndOfFile:
426 processEndOfFile(token);
427 break;
428 }
429 }
430
processDoctypeToken(AtomicHTMLToken * token)431 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token)
432 {
433 ASSERT(token->type() == HTMLToken::DOCTYPE);
434 if (m_insertionMode == InitialMode) {
435 m_tree.insertDoctype(token);
436 setInsertionMode(BeforeHTMLMode);
437 return;
438 }
439 if (m_insertionMode == InTableTextMode) {
440 defaultForInTableText();
441 processDoctypeToken(token);
442 return;
443 }
444 parseError(token);
445 }
446
processFakeStartTag(const QualifiedName & tagName,const Vector<Attribute> & attributes)447 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes)
448 {
449 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
450 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
451 processStartTag(&fakeToken);
452 }
453
processFakeEndTag(const AtomicString & tagName)454 void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName)
455 {
456 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName);
457 processEndTag(&fakeToken);
458 }
459
processFakeEndTag(const QualifiedName & tagName)460 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
461 {
462 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
463 processFakeEndTag(tagName.localName());
464 }
465
processFakePEndTagIfPInButtonScope()466 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
467 {
468 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
469 return;
470 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
471 processEndTag(&endP);
472 }
473
474 namespace {
475
isLi(const HTMLStackItem * item)476 bool isLi(const HTMLStackItem* item)
477 {
478 return item->hasTagName(liTag);
479 }
480
isDdOrDt(const HTMLStackItem * item)481 bool isDdOrDt(const HTMLStackItem* item)
482 {
483 return item->hasTagName(ddTag)
484 || item->hasTagName(dtTag);
485 }
486
487 }
488
489 template <bool shouldClose(const HTMLStackItem*)>
processCloseWhenNestedTag(AtomicHTMLToken * token)490 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token)
491 {
492 m_framesetOk = false;
493 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
494 while (1) {
495 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
496 if (shouldClose(item.get())) {
497 ASSERT(item->isElementNode());
498 processFakeEndTag(item->localName());
499 break;
500 }
501 if (item->isSpecialNode() && !item->hasTagName(addressTag) && !item->hasTagName(divTag) && !item->hasTagName(pTag))
502 break;
503 nodeRecord = nodeRecord->next();
504 }
505 processFakePEndTagIfPInButtonScope();
506 m_tree.insertHTMLElement(token);
507 }
508
509 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
510
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,const QualifiedName * const * names,size_t length)511 static void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, const QualifiedName* const* names, size_t length)
512 {
513 for (size_t i = 0; i < length; ++i) {
514 const QualifiedName& name = *names[i];
515 const AtomicString& localName = name.localName();
516 AtomicString loweredLocalName = localName.lower();
517 if (loweredLocalName != localName)
518 map->add(loweredLocalName, name);
519 }
520 }
521
adjustSVGTagNameCase(AtomicHTMLToken * token)522 static void adjustSVGTagNameCase(AtomicHTMLToken* token)
523 {
524 static PrefixedNameToQualifiedNameMap* caseMap = 0;
525 if (!caseMap) {
526 caseMap = new PrefixedNameToQualifiedNameMap;
527 OwnPtr<const QualifiedName*[]> svgTags = SVGNames::getSVGTags();
528 mapLoweredLocalNameToName(caseMap, svgTags.get(), SVGNames::SVGTagsCount);
529 }
530
531 const QualifiedName& casedName = caseMap->get(token->name());
532 if (casedName.localName().isNull())
533 return;
534 token->setName(casedName.localName());
535 }
536
537 template<PassOwnPtr<const QualifiedName*[]> getAttrs(), unsigned length>
adjustAttributes(AtomicHTMLToken * token)538 static void adjustAttributes(AtomicHTMLToken* token)
539 {
540 static PrefixedNameToQualifiedNameMap* caseMap = 0;
541 if (!caseMap) {
542 caseMap = new PrefixedNameToQualifiedNameMap;
543 OwnPtr<const QualifiedName*[]> attrs = getAttrs();
544 mapLoweredLocalNameToName(caseMap, attrs.get(), length);
545 }
546
547 for (unsigned i = 0; i < token->attributes().size(); ++i) {
548 Attribute& tokenAttribute = token->attributes().at(i);
549 const QualifiedName& casedName = caseMap->get(tokenAttribute.localName());
550 if (!casedName.localName().isNull())
551 tokenAttribute.parserSetName(casedName);
552 }
553 }
554
adjustSVGAttributes(AtomicHTMLToken * token)555 static void adjustSVGAttributes(AtomicHTMLToken* token)
556 {
557 adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token);
558 }
559
adjustMathMLAttributes(AtomicHTMLToken * token)560 static void adjustMathMLAttributes(AtomicHTMLToken* token)
561 {
562 adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token);
563 }
564
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,const QualifiedName * const * names,size_t length)565 static void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, const QualifiedName* const* names, size_t length)
566 {
567 for (size_t i = 0; i < length; ++i) {
568 const QualifiedName* name = names[i];
569 const AtomicString& localName = name->localName();
570 AtomicString prefixColonLocalName = prefix + ':' + localName;
571 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
572 map->add(prefixColonLocalName, nameWithPrefix);
573 }
574 }
575
adjustForeignAttributes(AtomicHTMLToken * token)576 static void adjustForeignAttributes(AtomicHTMLToken* token)
577 {
578 static PrefixedNameToQualifiedNameMap* map = 0;
579 if (!map) {
580 map = new PrefixedNameToQualifiedNameMap;
581
582 OwnPtr<const QualifiedName*[]> attrs = XLinkNames::getXLinkAttrs();
583 addNamesWithPrefix(map, xlinkAtom, attrs.get(), XLinkNames::XLinkAttrsCount);
584
585 OwnPtr<const QualifiedName*[]> xmlAttrs = XMLNames::getXMLAttrs();
586 addNamesWithPrefix(map, xmlAtom, xmlAttrs.get(), XMLNames::XMLAttrsCount);
587
588 map->add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr);
589 map->add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI));
590 }
591
592 for (unsigned i = 0; i < token->attributes().size(); ++i) {
593 Attribute& tokenAttribute = token->attributes().at(i);
594 const QualifiedName& name = map->get(tokenAttribute.localName());
595 if (!name.localName().isNull())
596 tokenAttribute.parserSetName(name);
597 }
598 }
599
processStartTagForInBody(AtomicHTMLToken * token)600 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token)
601 {
602 ASSERT(token->type() == HTMLToken::StartTag);
603 if (token->name() == htmlTag) {
604 processHtmlStartTagForInBody(token);
605 return;
606 }
607 if (token->name() == baseTag
608 || token->name() == basefontTag
609 || token->name() == bgsoundTag
610 || token->name() == commandTag
611 || token->name() == linkTag
612 || token->name() == metaTag
613 || token->name() == noframesTag
614 || token->name() == scriptTag
615 || token->name() == styleTag
616 || token->name() == titleTag) {
617 bool didProcess = processStartTagForInHead(token);
618 ASSERT_UNUSED(didProcess, didProcess);
619 return;
620 }
621 if (token->name() == bodyTag) {
622 parseError(token);
623 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement() || m_tree.openElements()->hasTemplateInHTMLScope()) {
624 ASSERT(isParsingFragmentOrTemplateContents());
625 return;
626 }
627 m_framesetOk = false;
628 m_tree.insertHTMLBodyStartTagInBody(token);
629 return;
630 }
631 if (token->name() == framesetTag) {
632 parseError(token);
633 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
634 ASSERT(isParsingFragmentOrTemplateContents());
635 return;
636 }
637 if (!m_framesetOk)
638 return;
639 m_tree.openElements()->bodyElement()->remove(ASSERT_NO_EXCEPTION);
640 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
641 m_tree.openElements()->popHTMLBodyElement();
642 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
643 m_tree.insertHTMLElement(token);
644 setInsertionMode(InFramesetMode);
645 return;
646 }
647 if (token->name() == addressTag
648 || token->name() == articleTag
649 || token->name() == asideTag
650 || token->name() == blockquoteTag
651 || token->name() == centerTag
652 || token->name() == detailsTag
653 || token->name() == dirTag
654 || token->name() == divTag
655 || token->name() == dlTag
656 || token->name() == fieldsetTag
657 || token->name() == figcaptionTag
658 || token->name() == figureTag
659 || token->name() == footerTag
660 || token->name() == headerTag
661 || token->name() == hgroupTag
662 || token->name() == mainTag
663 || token->name() == menuTag
664 || token->name() == navTag
665 || token->name() == olTag
666 || token->name() == pTag
667 || token->name() == sectionTag
668 || token->name() == summaryTag
669 || token->name() == ulTag) {
670 processFakePEndTagIfPInButtonScope();
671 m_tree.insertHTMLElement(token);
672 return;
673 }
674 if (isNumberedHeaderTag(token->name())) {
675 processFakePEndTagIfPInButtonScope();
676 if (m_tree.currentStackItem()->isNumberedHeaderElement()) {
677 parseError(token);
678 m_tree.openElements()->pop();
679 }
680 m_tree.insertHTMLElement(token);
681 return;
682 }
683 if (token->name() == preTag || token->name() == listingTag) {
684 processFakePEndTagIfPInButtonScope();
685 m_tree.insertHTMLElement(token);
686 m_shouldSkipLeadingNewline = true;
687 m_framesetOk = false;
688 return;
689 }
690 if (token->name() == formTag) {
691 if (m_tree.form()) {
692 parseError(token);
693 return;
694 }
695 processFakePEndTagIfPInButtonScope();
696 m_tree.insertHTMLFormElement(token);
697 return;
698 }
699 if (token->name() == liTag) {
700 processCloseWhenNestedTag<isLi>(token);
701 return;
702 }
703 if (token->name() == ddTag || token->name() == dtTag) {
704 processCloseWhenNestedTag<isDdOrDt>(token);
705 return;
706 }
707 if (token->name() == plaintextTag) {
708 processFakePEndTagIfPInButtonScope();
709 m_tree.insertHTMLElement(token);
710 if (m_parser->tokenizer())
711 m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
712 return;
713 }
714 if (token->name() == buttonTag) {
715 if (m_tree.openElements()->inScope(buttonTag)) {
716 parseError(token);
717 processFakeEndTag(buttonTag);
718 processStartTag(token); // FIXME: Could we just fall through here?
719 return;
720 }
721 m_tree.reconstructTheActiveFormattingElements();
722 m_tree.insertHTMLElement(token);
723 m_framesetOk = false;
724 return;
725 }
726 if (token->name() == aTag) {
727 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
728 if (activeATag) {
729 parseError(token);
730 processFakeEndTag(aTag);
731 m_tree.activeFormattingElements()->remove(activeATag);
732 if (m_tree.openElements()->contains(activeATag))
733 m_tree.openElements()->remove(activeATag);
734 }
735 m_tree.reconstructTheActiveFormattingElements();
736 m_tree.insertFormattingElement(token);
737 return;
738 }
739 if (isNonAnchorNonNobrFormattingTag(token->name())) {
740 m_tree.reconstructTheActiveFormattingElements();
741 m_tree.insertFormattingElement(token);
742 return;
743 }
744 if (token->name() == nobrTag) {
745 m_tree.reconstructTheActiveFormattingElements();
746 if (m_tree.openElements()->inScope(nobrTag)) {
747 parseError(token);
748 processFakeEndTag(nobrTag);
749 m_tree.reconstructTheActiveFormattingElements();
750 }
751 m_tree.insertFormattingElement(token);
752 return;
753 }
754 if (token->name() == appletTag
755 || token->name() == embedTag
756 || token->name() == objectTag) {
757 if (!pluginContentIsAllowed(m_tree.parserContentPolicy()))
758 return;
759 }
760 if (token->name() == appletTag
761 || token->name() == marqueeTag
762 || token->name() == objectTag) {
763 m_tree.reconstructTheActiveFormattingElements();
764 m_tree.insertHTMLElement(token);
765 m_tree.activeFormattingElements()->appendMarker();
766 m_framesetOk = false;
767 return;
768 }
769 if (token->name() == tableTag) {
770 if (!m_tree.inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
771 processFakeEndTag(pTag);
772 m_tree.insertHTMLElement(token);
773 m_framesetOk = false;
774 setInsertionMode(InTableMode);
775 return;
776 }
777 if (token->name() == imageTag) {
778 parseError(token);
779 // Apparently we're not supposed to ask.
780 token->setName(imgTag.localName());
781 // Note the fall through to the imgTag handling below!
782 }
783 if (token->name() == areaTag
784 || token->name() == brTag
785 || token->name() == embedTag
786 || token->name() == imgTag
787 || token->name() == keygenTag
788 || token->name() == wbrTag) {
789 m_tree.reconstructTheActiveFormattingElements();
790 m_tree.insertSelfClosingHTMLElement(token);
791 m_framesetOk = false;
792 return;
793 }
794 if (token->name() == inputTag) {
795 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
796 m_tree.reconstructTheActiveFormattingElements();
797 m_tree.insertSelfClosingHTMLElement(token);
798 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
799 m_framesetOk = false;
800 return;
801 }
802 if (token->name() == paramTag
803 || token->name() == sourceTag
804 || token->name() == trackTag) {
805 m_tree.insertSelfClosingHTMLElement(token);
806 return;
807 }
808 if (token->name() == hrTag) {
809 processFakePEndTagIfPInButtonScope();
810 m_tree.insertSelfClosingHTMLElement(token);
811 m_framesetOk = false;
812 return;
813 }
814 if (token->name() == textareaTag) {
815 m_tree.insertHTMLElement(token);
816 m_shouldSkipLeadingNewline = true;
817 if (m_parser->tokenizer())
818 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
819 m_originalInsertionMode = m_insertionMode;
820 m_framesetOk = false;
821 setInsertionMode(TextMode);
822 return;
823 }
824 if (token->name() == xmpTag) {
825 processFakePEndTagIfPInButtonScope();
826 m_tree.reconstructTheActiveFormattingElements();
827 m_framesetOk = false;
828 processGenericRawTextStartTag(token);
829 return;
830 }
831 if (token->name() == iframeTag) {
832 m_framesetOk = false;
833 processGenericRawTextStartTag(token);
834 return;
835 }
836 if (token->name() == noembedTag && m_options.pluginsEnabled) {
837 processGenericRawTextStartTag(token);
838 return;
839 }
840 if (token->name() == noscriptTag && m_options.scriptEnabled) {
841 processGenericRawTextStartTag(token);
842 return;
843 }
844 if (token->name() == selectTag) {
845 m_tree.reconstructTheActiveFormattingElements();
846 m_tree.insertHTMLElement(token);
847 m_framesetOk = false;
848 if (m_insertionMode == InTableMode
849 || m_insertionMode == InCaptionMode
850 || m_insertionMode == InColumnGroupMode
851 || m_insertionMode == InTableBodyMode
852 || m_insertionMode == InRowMode
853 || m_insertionMode == InCellMode)
854 setInsertionMode(InSelectInTableMode);
855 else
856 setInsertionMode(InSelectMode);
857 return;
858 }
859 if (token->name() == optgroupTag || token->name() == optionTag) {
860 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
861 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
862 processEndTag(&endOption);
863 }
864 m_tree.reconstructTheActiveFormattingElements();
865 m_tree.insertHTMLElement(token);
866 return;
867 }
868 if (token->name() == rpTag || token->name() == rtTag) {
869 if (m_tree.openElements()->inScope(rubyTag.localName())) {
870 m_tree.generateImpliedEndTags();
871 if (!m_tree.currentStackItem()->hasTagName(rubyTag))
872 parseError(token);
873 }
874 m_tree.insertHTMLElement(token);
875 return;
876 }
877 if (token->name() == MathMLNames::mathTag.localName()) {
878 m_tree.reconstructTheActiveFormattingElements();
879 adjustMathMLAttributes(token);
880 adjustForeignAttributes(token);
881 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
882 return;
883 }
884 if (token->name() == SVGNames::svgTag.localName()) {
885 m_tree.reconstructTheActiveFormattingElements();
886 adjustSVGAttributes(token);
887 adjustForeignAttributes(token);
888 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
889 return;
890 }
891 if (isCaptionColOrColgroupTag(token->name())
892 || token->name() == frameTag
893 || token->name() == headTag
894 || isTableBodyContextTag(token->name())
895 || isTableCellContextTag(token->name())
896 || token->name() == trTag) {
897 parseError(token);
898 return;
899 }
900 if (token->name() == templateTag) {
901 processTemplateStartTag(token);
902 return;
903 }
904 m_tree.reconstructTheActiveFormattingElements();
905 m_tree.insertHTMLElement(token);
906 }
907
processTemplateStartTag(AtomicHTMLToken * token)908 void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken* token)
909 {
910 m_tree.activeFormattingElements()->appendMarker();
911 m_tree.insertHTMLElement(token);
912 m_templateInsertionModes.append(TemplateContentsMode);
913 setInsertionMode(TemplateContentsMode);
914 }
915
processTemplateEndTag(AtomicHTMLToken * token)916 bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token)
917 {
918 ASSERT(token->name() == templateTag.localName());
919 if (!m_tree.openElements()->hasTemplateInHTMLScope()) {
920 ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && isHTMLTemplateElement(m_fragmentContext.contextElement())));
921 parseError(token);
922 return false;
923 }
924 m_tree.generateImpliedEndTags();
925 if (!m_tree.currentStackItem()->hasTagName(templateTag))
926 parseError(token);
927 m_tree.openElements()->popUntilPopped(templateTag);
928 m_tree.activeFormattingElements()->clearToLastMarker();
929 m_templateInsertionModes.removeLast();
930 resetInsertionModeAppropriately();
931 return true;
932 }
933
processEndOfFileForInTemplateContents(AtomicHTMLToken * token)934 bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* token)
935 {
936 AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag.localName());
937 if (!processTemplateEndTag(&endTemplate))
938 return false;
939
940 processEndOfFile(token);
941 return true;
942 }
943
processColgroupEndTagForInColumnGroup()944 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
945 {
946 if (m_tree.currentIsRootNode() || isHTMLTemplateElement(*m_tree.currentNode())) {
947 ASSERT(isParsingFragmentOrTemplateContents());
948 // FIXME: parse error
949 return false;
950 }
951 m_tree.openElements()->pop();
952 setInsertionMode(InTableMode);
953 return true;
954 }
955
956 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
adjustedCurrentStackItem() const957 HTMLStackItem* HTMLTreeBuilder::adjustedCurrentStackItem() const
958 {
959 ASSERT(!m_tree.isEmpty());
960 if (isParsingFragment() && m_tree.openElements()->hasOnlyOneElement())
961 return m_fragmentContext.contextElementStackItem();
962
963 return m_tree.currentStackItem();
964 }
965
966 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()967 void HTMLTreeBuilder::closeTheCell()
968 {
969 ASSERT(insertionMode() == InCellMode);
970 if (m_tree.openElements()->inTableScope(tdTag)) {
971 ASSERT(!m_tree.openElements()->inTableScope(thTag));
972 processFakeEndTag(tdTag);
973 return;
974 }
975 ASSERT(m_tree.openElements()->inTableScope(thTag));
976 processFakeEndTag(thTag);
977 ASSERT(insertionMode() == InRowMode);
978 }
979
processStartTagForInTable(AtomicHTMLToken * token)980 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token)
981 {
982 ASSERT(token->type() == HTMLToken::StartTag);
983 if (token->name() == captionTag) {
984 m_tree.openElements()->popUntilTableScopeMarker();
985 m_tree.activeFormattingElements()->appendMarker();
986 m_tree.insertHTMLElement(token);
987 setInsertionMode(InCaptionMode);
988 return;
989 }
990 if (token->name() == colgroupTag) {
991 m_tree.openElements()->popUntilTableScopeMarker();
992 m_tree.insertHTMLElement(token);
993 setInsertionMode(InColumnGroupMode);
994 return;
995 }
996 if (token->name() == colTag) {
997 processFakeStartTag(colgroupTag);
998 ASSERT(InColumnGroupMode);
999 processStartTag(token);
1000 return;
1001 }
1002 if (isTableBodyContextTag(token->name())) {
1003 m_tree.openElements()->popUntilTableScopeMarker();
1004 m_tree.insertHTMLElement(token);
1005 setInsertionMode(InTableBodyMode);
1006 return;
1007 }
1008 if (isTableCellContextTag(token->name())
1009 || token->name() == trTag) {
1010 processFakeStartTag(tbodyTag);
1011 ASSERT(insertionMode() == InTableBodyMode);
1012 processStartTag(token);
1013 return;
1014 }
1015 if (token->name() == tableTag) {
1016 parseError(token);
1017 if (!processTableEndTagForInTable()) {
1018 ASSERT(isParsingFragmentOrTemplateContents());
1019 return;
1020 }
1021 processStartTag(token);
1022 return;
1023 }
1024 if (token->name() == styleTag || token->name() == scriptTag) {
1025 processStartTagForInHead(token);
1026 return;
1027 }
1028 if (token->name() == inputTag) {
1029 Attribute* typeAttribute = token->getAttributeItem(typeAttr);
1030 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1031 parseError(token);
1032 m_tree.insertSelfClosingHTMLElement(token);
1033 return;
1034 }
1035 // Fall through to "anything else" case.
1036 }
1037 if (token->name() == formTag) {
1038 parseError(token);
1039 if (m_tree.form())
1040 return;
1041 m_tree.insertHTMLFormElement(token, true);
1042 m_tree.openElements()->pop();
1043 return;
1044 }
1045 if (token->name() == templateTag) {
1046 processTemplateStartTag(token);
1047 return;
1048 }
1049 parseError(token);
1050 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1051 processStartTagForInBody(token);
1052 }
1053
processStartTag(AtomicHTMLToken * token)1054 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token)
1055 {
1056 ASSERT(token->type() == HTMLToken::StartTag);
1057 switch (insertionMode()) {
1058 case InitialMode:
1059 ASSERT(insertionMode() == InitialMode);
1060 defaultForInitial();
1061 // Fall through.
1062 case BeforeHTMLMode:
1063 ASSERT(insertionMode() == BeforeHTMLMode);
1064 if (token->name() == htmlTag) {
1065 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1066 setInsertionMode(BeforeHeadMode);
1067 return;
1068 }
1069 defaultForBeforeHTML();
1070 // Fall through.
1071 case BeforeHeadMode:
1072 ASSERT(insertionMode() == BeforeHeadMode);
1073 if (token->name() == htmlTag) {
1074 processHtmlStartTagForInBody(token);
1075 return;
1076 }
1077 if (token->name() == headTag) {
1078 m_tree.insertHTMLHeadElement(token);
1079 setInsertionMode(InHeadMode);
1080 return;
1081 }
1082 defaultForBeforeHead();
1083 // Fall through.
1084 case InHeadMode:
1085 ASSERT(insertionMode() == InHeadMode);
1086 if (processStartTagForInHead(token))
1087 return;
1088 defaultForInHead();
1089 // Fall through.
1090 case AfterHeadMode:
1091 ASSERT(insertionMode() == AfterHeadMode);
1092 if (token->name() == htmlTag) {
1093 processHtmlStartTagForInBody(token);
1094 return;
1095 }
1096 if (token->name() == bodyTag) {
1097 m_framesetOk = false;
1098 m_tree.insertHTMLBodyElement(token);
1099 setInsertionMode(InBodyMode);
1100 return;
1101 }
1102 if (token->name() == framesetTag) {
1103 m_tree.insertHTMLElement(token);
1104 setInsertionMode(InFramesetMode);
1105 return;
1106 }
1107 if (token->name() == baseTag
1108 || token->name() == basefontTag
1109 || token->name() == bgsoundTag
1110 || token->name() == linkTag
1111 || token->name() == metaTag
1112 || token->name() == noframesTag
1113 || token->name() == scriptTag
1114 || token->name() == styleTag
1115 || token->name() == templateTag
1116 || token->name() == titleTag) {
1117 parseError(token);
1118 ASSERT(m_tree.head());
1119 m_tree.openElements()->pushHTMLHeadElement(m_tree.headStackItem());
1120 processStartTagForInHead(token);
1121 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1122 return;
1123 }
1124 if (token->name() == headTag) {
1125 parseError(token);
1126 return;
1127 }
1128 defaultForAfterHead();
1129 // Fall through
1130 case InBodyMode:
1131 ASSERT(insertionMode() == InBodyMode);
1132 processStartTagForInBody(token);
1133 break;
1134 case InTableMode:
1135 ASSERT(insertionMode() == InTableMode);
1136 processStartTagForInTable(token);
1137 break;
1138 case InCaptionMode:
1139 ASSERT(insertionMode() == InCaptionMode);
1140 if (isCaptionColOrColgroupTag(token->name())
1141 || isTableBodyContextTag(token->name())
1142 || isTableCellContextTag(token->name())
1143 || token->name() == trTag) {
1144 parseError(token);
1145 if (!processCaptionEndTagForInCaption()) {
1146 ASSERT(isParsingFragment());
1147 return;
1148 }
1149 processStartTag(token);
1150 return;
1151 }
1152 processStartTagForInBody(token);
1153 break;
1154 case InColumnGroupMode:
1155 ASSERT(insertionMode() == InColumnGroupMode);
1156 if (token->name() == htmlTag) {
1157 processHtmlStartTagForInBody(token);
1158 return;
1159 }
1160 if (token->name() == colTag) {
1161 m_tree.insertSelfClosingHTMLElement(token);
1162 return;
1163 }
1164 if (token->name() == templateTag) {
1165 processTemplateStartTag(token);
1166 return;
1167 }
1168 if (!processColgroupEndTagForInColumnGroup()) {
1169 ASSERT(isParsingFragmentOrTemplateContents());
1170 return;
1171 }
1172 processStartTag(token);
1173 break;
1174 case InTableBodyMode:
1175 ASSERT(insertionMode() == InTableBodyMode);
1176 if (token->name() == trTag) {
1177 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1178 m_tree.insertHTMLElement(token);
1179 setInsertionMode(InRowMode);
1180 return;
1181 }
1182 if (isTableCellContextTag(token->name())) {
1183 parseError(token);
1184 processFakeStartTag(trTag);
1185 ASSERT(insertionMode() == InRowMode);
1186 processStartTag(token);
1187 return;
1188 }
1189 if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) {
1190 // FIXME: This is slow.
1191 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1192 ASSERT(isParsingFragmentOrTemplateContents());
1193 parseError(token);
1194 return;
1195 }
1196 m_tree.openElements()->popUntilTableBodyScopeMarker();
1197 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1198 processFakeEndTag(m_tree.currentStackItem()->localName());
1199 processStartTag(token);
1200 return;
1201 }
1202 processStartTagForInTable(token);
1203 break;
1204 case InRowMode:
1205 ASSERT(insertionMode() == InRowMode);
1206 if (isTableCellContextTag(token->name())) {
1207 m_tree.openElements()->popUntilTableRowScopeMarker();
1208 m_tree.insertHTMLElement(token);
1209 setInsertionMode(InCellMode);
1210 m_tree.activeFormattingElements()->appendMarker();
1211 return;
1212 }
1213 if (token->name() == trTag
1214 || isCaptionColOrColgroupTag(token->name())
1215 || isTableBodyContextTag(token->name())) {
1216 if (!processTrEndTagForInRow()) {
1217 ASSERT(isParsingFragmentOrTemplateContents());
1218 return;
1219 }
1220 ASSERT(insertionMode() == InTableBodyMode);
1221 processStartTag(token);
1222 return;
1223 }
1224 processStartTagForInTable(token);
1225 break;
1226 case InCellMode:
1227 ASSERT(insertionMode() == InCellMode);
1228 if (isCaptionColOrColgroupTag(token->name())
1229 || isTableCellContextTag(token->name())
1230 || token->name() == trTag
1231 || isTableBodyContextTag(token->name())) {
1232 // FIXME: This could be more efficient.
1233 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1234 ASSERT(isParsingFragment());
1235 parseError(token);
1236 return;
1237 }
1238 closeTheCell();
1239 processStartTag(token);
1240 return;
1241 }
1242 processStartTagForInBody(token);
1243 break;
1244 case AfterBodyMode:
1245 case AfterAfterBodyMode:
1246 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1247 if (token->name() == htmlTag) {
1248 processHtmlStartTagForInBody(token);
1249 return;
1250 }
1251 setInsertionMode(InBodyMode);
1252 processStartTag(token);
1253 break;
1254 case InHeadNoscriptMode:
1255 ASSERT(insertionMode() == InHeadNoscriptMode);
1256 if (token->name() == htmlTag) {
1257 processHtmlStartTagForInBody(token);
1258 return;
1259 }
1260 if (token->name() == basefontTag
1261 || token->name() == bgsoundTag
1262 || token->name() == linkTag
1263 || token->name() == metaTag
1264 || token->name() == noframesTag
1265 || token->name() == styleTag) {
1266 bool didProcess = processStartTagForInHead(token);
1267 ASSERT_UNUSED(didProcess, didProcess);
1268 return;
1269 }
1270 if (token->name() == htmlTag || token->name() == noscriptTag) {
1271 parseError(token);
1272 return;
1273 }
1274 defaultForInHeadNoscript();
1275 processToken(token);
1276 break;
1277 case InFramesetMode:
1278 ASSERT(insertionMode() == InFramesetMode);
1279 if (token->name() == htmlTag) {
1280 processHtmlStartTagForInBody(token);
1281 return;
1282 }
1283 if (token->name() == framesetTag) {
1284 m_tree.insertHTMLElement(token);
1285 return;
1286 }
1287 if (token->name() == frameTag) {
1288 m_tree.insertSelfClosingHTMLElement(token);
1289 return;
1290 }
1291 if (token->name() == noframesTag) {
1292 processStartTagForInHead(token);
1293 return;
1294 }
1295 if (token->name() == templateTag) {
1296 processTemplateStartTag(token);
1297 return;
1298 }
1299 parseError(token);
1300 break;
1301 case AfterFramesetMode:
1302 case AfterAfterFramesetMode:
1303 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1304 if (token->name() == htmlTag) {
1305 processHtmlStartTagForInBody(token);
1306 return;
1307 }
1308 if (token->name() == noframesTag) {
1309 processStartTagForInHead(token);
1310 return;
1311 }
1312 parseError(token);
1313 break;
1314 case InSelectInTableMode:
1315 ASSERT(insertionMode() == InSelectInTableMode);
1316 if (token->name() == captionTag
1317 || token->name() == tableTag
1318 || isTableBodyContextTag(token->name())
1319 || token->name() == trTag
1320 || isTableCellContextTag(token->name())) {
1321 parseError(token);
1322 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1323 processEndTag(&endSelect);
1324 processStartTag(token);
1325 return;
1326 }
1327 // Fall through
1328 case InSelectMode:
1329 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1330 if (token->name() == htmlTag) {
1331 processHtmlStartTagForInBody(token);
1332 return;
1333 }
1334 if (token->name() == optionTag) {
1335 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1336 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1337 processEndTag(&endOption);
1338 }
1339 m_tree.insertHTMLElement(token);
1340 return;
1341 }
1342 if (token->name() == optgroupTag) {
1343 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
1344 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1345 processEndTag(&endOption);
1346 }
1347 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
1348 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1349 processEndTag(&endOptgroup);
1350 }
1351 m_tree.insertHTMLElement(token);
1352 return;
1353 }
1354 if (token->name() == selectTag) {
1355 parseError(token);
1356 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1357 processEndTag(&endSelect);
1358 return;
1359 }
1360 if (token->name() == inputTag
1361 || token->name() == keygenTag
1362 || token->name() == textareaTag) {
1363 parseError(token);
1364 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1365 ASSERT(isParsingFragment());
1366 return;
1367 }
1368 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1369 processEndTag(&endSelect);
1370 processStartTag(token);
1371 return;
1372 }
1373 if (token->name() == scriptTag) {
1374 bool didProcess = processStartTagForInHead(token);
1375 ASSERT_UNUSED(didProcess, didProcess);
1376 return;
1377 }
1378 if (token->name() == templateTag) {
1379 processTemplateStartTag(token);
1380 return;
1381 }
1382 break;
1383 case InTableTextMode:
1384 defaultForInTableText();
1385 processStartTag(token);
1386 break;
1387 case TextMode:
1388 ASSERT_NOT_REACHED();
1389 break;
1390 case TemplateContentsMode:
1391 if (token->name() == templateTag) {
1392 processTemplateStartTag(token);
1393 return;
1394 }
1395
1396 if (token->name() == linkTag
1397 || token->name() == scriptTag
1398 || token->name() == styleTag
1399 || token->name() == metaTag) {
1400 processStartTagForInHead(token);
1401 return;
1402 }
1403
1404 InsertionMode insertionMode = TemplateContentsMode;
1405 if (token->name() == frameTag)
1406 insertionMode = InFramesetMode;
1407 else if (token->name() == colTag)
1408 insertionMode = InColumnGroupMode;
1409 else if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name()))
1410 insertionMode = InTableMode;
1411 else if (token->name() == trTag)
1412 insertionMode = InTableBodyMode;
1413 else if (isTableCellContextTag(token->name()))
1414 insertionMode = InRowMode;
1415 else
1416 insertionMode = InBodyMode;
1417
1418 ASSERT(insertionMode != TemplateContentsMode);
1419 ASSERT(m_templateInsertionModes.last() == TemplateContentsMode);
1420 m_templateInsertionModes.last() = insertionMode;
1421 setInsertionMode(insertionMode);
1422
1423 processStartTag(token);
1424 break;
1425 }
1426 }
1427
processHtmlStartTagForInBody(AtomicHTMLToken * token)1428 void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token)
1429 {
1430 parseError(token);
1431 if (m_tree.openElements()->hasTemplateInHTMLScope()) {
1432 ASSERT(isParsingTemplateContents());
1433 return;
1434 }
1435 m_tree.insertHTMLHtmlStartTagInBody(token);
1436 }
1437
processBodyEndTagForInBody(AtomicHTMLToken * token)1438 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token)
1439 {
1440 ASSERT(token->type() == HTMLToken::EndTag);
1441 ASSERT(token->name() == bodyTag);
1442 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1443 parseError(token);
1444 return false;
1445 }
1446 notImplemented(); // Emit a more specific parse error based on stack contents.
1447 setInsertionMode(AfterBodyMode);
1448 return true;
1449 }
1450
processAnyOtherEndTagForInBody(AtomicHTMLToken * token)1451 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token)
1452 {
1453 ASSERT(token->type() == HTMLToken::EndTag);
1454 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1455 while (1) {
1456 RefPtrWillBeRawPtr<HTMLStackItem> item = record->stackItem();
1457 if (item->matchesHTMLTag(token->name())) {
1458 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1459 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1460 parseError(token);
1461 m_tree.openElements()->popUntilPopped(item->element());
1462 return;
1463 }
1464 if (item->isSpecialNode()) {
1465 parseError(token);
1466 return;
1467 }
1468 record = record->next();
1469 }
1470 }
1471
1472 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken * token)1473 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token)
1474 {
1475 // The adoption agency algorithm is N^2. We limit the number of iterations
1476 // to stop from hanging the whole browser. This limit is specified in the
1477 // adoption agency algorithm:
1478 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1479 static const int outerIterationLimit = 8;
1480 static const int innerIterationLimit = 3;
1481
1482 // 1, 2, 3 and 16 are covered by the for() loop.
1483 for (int i = 0; i < outerIterationLimit; ++i) {
1484 // 4.
1485 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name());
1486 // 4.a
1487 if (!formattingElement)
1488 return processAnyOtherEndTagForInBody(token);
1489 // 4.c
1490 if ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement)) {
1491 parseError(token);
1492 notImplemented(); // Check the stack of open elements for a more specific parse error.
1493 return;
1494 }
1495 // 4.b
1496 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1497 if (!formattingElementRecord) {
1498 parseError(token);
1499 m_tree.activeFormattingElements()->remove(formattingElement);
1500 return;
1501 }
1502 // 4.d
1503 if (formattingElement != m_tree.currentElement())
1504 parseError(token);
1505 // 5.
1506 HTMLElementStack::ElementRecord* furthestBlock = m_tree.openElements()->furthestBlockForFormattingElement(formattingElement);
1507 // 6.
1508 if (!furthestBlock) {
1509 m_tree.openElements()->popUntilPopped(formattingElement);
1510 m_tree.activeFormattingElements()->remove(formattingElement);
1511 return;
1512 }
1513 // 7.
1514 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1515 RefPtrWillBeRawPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem();
1516 // 8.
1517 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1518 // 9.
1519 HTMLElementStack::ElementRecord* node = furthestBlock;
1520 HTMLElementStack::ElementRecord* nextNode = node->next();
1521 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1522 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1523 for (int i = 0; i < innerIterationLimit; ++i) {
1524 // 9.4
1525 node = nextNode;
1526 ASSERT(node);
1527 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5.
1528 // 9.5
1529 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1530 m_tree.openElements()->remove(node->element());
1531 node = 0;
1532 continue;
1533 }
1534 // 9.6
1535 if (node == formattingElementRecord)
1536 break;
1537 // 9.7
1538 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get());
1539
1540 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1541 nodeEntry->replaceElement(newItem);
1542 node->replaceElement(newItem.release());
1543
1544 // 9.8
1545 if (lastNode == furthestBlock)
1546 bookmark.moveToAfter(nodeEntry);
1547 // 9.9
1548 m_tree.reparent(node, lastNode);
1549 // 9.10
1550 lastNode = node;
1551 }
1552 // 10.
1553 m_tree.insertAlreadyParsedChild(commonAncestor.get(), lastNode);
1554 // 11.
1555 RefPtrWillBeRawPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get());
1556 // 12.
1557 m_tree.takeAllChildren(newItem.get(), furthestBlock);
1558 // 13.
1559 m_tree.reparent(furthestBlock, newItem.get());
1560 // 14.
1561 m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark);
1562 // 15.
1563 m_tree.openElements()->remove(formattingElement);
1564 m_tree.openElements()->insertAbove(newItem, furthestBlock);
1565 }
1566 }
1567
resetInsertionModeAppropriately()1568 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1569 {
1570 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1571 bool last = false;
1572 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1573 while (1) {
1574 RefPtrWillBeRawPtr<HTMLStackItem> item = nodeRecord->stackItem();
1575 if (item->node() == m_tree.openElements()->rootNode()) {
1576 last = true;
1577 if (isParsingFragment())
1578 item = m_fragmentContext.contextElementStackItem();
1579 }
1580 if (item->hasTagName(templateTag))
1581 return setInsertionMode(m_templateInsertionModes.last());
1582 if (item->hasTagName(selectTag)) {
1583 if (!last) {
1584 while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) {
1585 nodeRecord = nodeRecord->next();
1586 item = nodeRecord->stackItem();
1587 if (item->hasTagName(tableTag))
1588 return setInsertionMode(InSelectInTableMode);
1589 }
1590 }
1591 return setInsertionMode(InSelectMode);
1592 }
1593 if (item->hasTagName(tdTag) || item->hasTagName(thTag))
1594 return setInsertionMode(InCellMode);
1595 if (item->hasTagName(trTag))
1596 return setInsertionMode(InRowMode);
1597 if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag))
1598 return setInsertionMode(InTableBodyMode);
1599 if (item->hasTagName(captionTag))
1600 return setInsertionMode(InCaptionMode);
1601 if (item->hasTagName(colgroupTag)) {
1602 return setInsertionMode(InColumnGroupMode);
1603 }
1604 if (item->hasTagName(tableTag))
1605 return setInsertionMode(InTableMode);
1606 if (item->hasTagName(headTag)) {
1607 if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node())
1608 return setInsertionMode(InHeadMode);
1609 return setInsertionMode(InBodyMode);
1610 }
1611 if (item->hasTagName(bodyTag))
1612 return setInsertionMode(InBodyMode);
1613 if (item->hasTagName(framesetTag)) {
1614 return setInsertionMode(InFramesetMode);
1615 }
1616 if (item->hasTagName(htmlTag)) {
1617 if (m_tree.headStackItem())
1618 return setInsertionMode(AfterHeadMode);
1619
1620 ASSERT(isParsingFragment());
1621 return setInsertionMode(BeforeHeadMode);
1622 }
1623 if (last) {
1624 ASSERT(isParsingFragment());
1625 return setInsertionMode(InBodyMode);
1626 }
1627 nodeRecord = nodeRecord->next();
1628 }
1629 }
1630
processEndTagForInTableBody(AtomicHTMLToken * token)1631 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token)
1632 {
1633 ASSERT(token->type() == HTMLToken::EndTag);
1634 if (isTableBodyContextTag(token->name())) {
1635 if (!m_tree.openElements()->inTableScope(token->name())) {
1636 parseError(token);
1637 return;
1638 }
1639 m_tree.openElements()->popUntilTableBodyScopeMarker();
1640 m_tree.openElements()->pop();
1641 setInsertionMode(InTableMode);
1642 return;
1643 }
1644 if (token->name() == tableTag) {
1645 // FIXME: This is slow.
1646 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) {
1647 ASSERT(isParsingFragmentOrTemplateContents());
1648 parseError(token);
1649 return;
1650 }
1651 m_tree.openElements()->popUntilTableBodyScopeMarker();
1652 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName()));
1653 processFakeEndTag(m_tree.currentStackItem()->localName());
1654 processEndTag(token);
1655 return;
1656 }
1657 if (token->name() == bodyTag
1658 || isCaptionColOrColgroupTag(token->name())
1659 || token->name() == htmlTag
1660 || isTableCellContextTag(token->name())
1661 || token->name() == trTag) {
1662 parseError(token);
1663 return;
1664 }
1665 processEndTagForInTable(token);
1666 }
1667
processEndTagForInRow(AtomicHTMLToken * token)1668 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token)
1669 {
1670 ASSERT(token->type() == HTMLToken::EndTag);
1671 if (token->name() == trTag) {
1672 processTrEndTagForInRow();
1673 return;
1674 }
1675 if (token->name() == tableTag) {
1676 if (!processTrEndTagForInRow()) {
1677 ASSERT(isParsingFragmentOrTemplateContents());
1678 return;
1679 }
1680 ASSERT(insertionMode() == InTableBodyMode);
1681 processEndTag(token);
1682 return;
1683 }
1684 if (isTableBodyContextTag(token->name())) {
1685 if (!m_tree.openElements()->inTableScope(token->name())) {
1686 parseError(token);
1687 return;
1688 }
1689 processFakeEndTag(trTag);
1690 ASSERT(insertionMode() == InTableBodyMode);
1691 processEndTag(token);
1692 return;
1693 }
1694 if (token->name() == bodyTag
1695 || isCaptionColOrColgroupTag(token->name())
1696 || token->name() == htmlTag
1697 || isTableCellContextTag(token->name())) {
1698 parseError(token);
1699 return;
1700 }
1701 processEndTagForInTable(token);
1702 }
1703
processEndTagForInCell(AtomicHTMLToken * token)1704 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token)
1705 {
1706 ASSERT(token->type() == HTMLToken::EndTag);
1707 if (isTableCellContextTag(token->name())) {
1708 if (!m_tree.openElements()->inTableScope(token->name())) {
1709 parseError(token);
1710 return;
1711 }
1712 m_tree.generateImpliedEndTags();
1713 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1714 parseError(token);
1715 m_tree.openElements()->popUntilPopped(token->name());
1716 m_tree.activeFormattingElements()->clearToLastMarker();
1717 setInsertionMode(InRowMode);
1718 return;
1719 }
1720 if (token->name() == bodyTag
1721 || isCaptionColOrColgroupTag(token->name())
1722 || token->name() == htmlTag) {
1723 parseError(token);
1724 return;
1725 }
1726 if (token->name() == tableTag
1727 || token->name() == trTag
1728 || isTableBodyContextTag(token->name())) {
1729 if (!m_tree.openElements()->inTableScope(token->name())) {
1730 ASSERT(isTableBodyContextTag(token->name()) || m_tree.openElements()->inTableScope(templateTag) || isParsingFragment());
1731 parseError(token);
1732 return;
1733 }
1734 closeTheCell();
1735 processEndTag(token);
1736 return;
1737 }
1738 processEndTagForInBody(token);
1739 }
1740
processEndTagForInBody(AtomicHTMLToken * token)1741 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token)
1742 {
1743 ASSERT(token->type() == HTMLToken::EndTag);
1744 if (token->name() == bodyTag) {
1745 processBodyEndTagForInBody(token);
1746 return;
1747 }
1748 if (token->name() == htmlTag) {
1749 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1750 if (processBodyEndTagForInBody(&endBody))
1751 processEndTag(token);
1752 return;
1753 }
1754 if (token->name() == addressTag
1755 || token->name() == articleTag
1756 || token->name() == asideTag
1757 || token->name() == blockquoteTag
1758 || token->name() == buttonTag
1759 || token->name() == centerTag
1760 || token->name() == detailsTag
1761 || token->name() == dirTag
1762 || token->name() == divTag
1763 || token->name() == dlTag
1764 || token->name() == fieldsetTag
1765 || token->name() == figcaptionTag
1766 || token->name() == figureTag
1767 || token->name() == footerTag
1768 || token->name() == headerTag
1769 || token->name() == hgroupTag
1770 || token->name() == listingTag
1771 || token->name() == mainTag
1772 || token->name() == menuTag
1773 || token->name() == navTag
1774 || token->name() == olTag
1775 || token->name() == preTag
1776 || token->name() == sectionTag
1777 || token->name() == summaryTag
1778 || token->name() == ulTag) {
1779 if (!m_tree.openElements()->inScope(token->name())) {
1780 parseError(token);
1781 return;
1782 }
1783 m_tree.generateImpliedEndTags();
1784 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1785 parseError(token);
1786 m_tree.openElements()->popUntilPopped(token->name());
1787 return;
1788 }
1789 if (token->name() == formTag) {
1790 RefPtrWillBeRawPtr<Element> node = m_tree.takeForm();
1791 if (!node || !m_tree.openElements()->inScope(node.get())) {
1792 parseError(token);
1793 return;
1794 }
1795 m_tree.generateImpliedEndTags();
1796 if (m_tree.currentElement() != node.get())
1797 parseError(token);
1798 m_tree.openElements()->remove(node.get());
1799 }
1800 if (token->name() == pTag) {
1801 if (!m_tree.openElements()->inButtonScope(token->name())) {
1802 parseError(token);
1803 processFakeStartTag(pTag);
1804 ASSERT(m_tree.openElements()->inScope(token->name()));
1805 processEndTag(token);
1806 return;
1807 }
1808 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1809 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1810 parseError(token);
1811 m_tree.openElements()->popUntilPopped(token->name());
1812 return;
1813 }
1814 if (token->name() == liTag) {
1815 if (!m_tree.openElements()->inListItemScope(token->name())) {
1816 parseError(token);
1817 return;
1818 }
1819 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1820 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1821 parseError(token);
1822 m_tree.openElements()->popUntilPopped(token->name());
1823 return;
1824 }
1825 if (token->name() == ddTag
1826 || token->name() == dtTag) {
1827 if (!m_tree.openElements()->inScope(token->name())) {
1828 parseError(token);
1829 return;
1830 }
1831 m_tree.generateImpliedEndTagsWithExclusion(token->name());
1832 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1833 parseError(token);
1834 m_tree.openElements()->popUntilPopped(token->name());
1835 return;
1836 }
1837 if (isNumberedHeaderTag(token->name())) {
1838 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1839 parseError(token);
1840 return;
1841 }
1842 m_tree.generateImpliedEndTags();
1843 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1844 parseError(token);
1845 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1846 return;
1847 }
1848 if (isFormattingTag(token->name())) {
1849 callTheAdoptionAgency(token);
1850 return;
1851 }
1852 if (token->name() == appletTag
1853 || token->name() == marqueeTag
1854 || token->name() == objectTag) {
1855 if (!m_tree.openElements()->inScope(token->name())) {
1856 parseError(token);
1857 return;
1858 }
1859 m_tree.generateImpliedEndTags();
1860 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name()))
1861 parseError(token);
1862 m_tree.openElements()->popUntilPopped(token->name());
1863 m_tree.activeFormattingElements()->clearToLastMarker();
1864 return;
1865 }
1866 if (token->name() == brTag) {
1867 parseError(token);
1868 processFakeStartTag(brTag);
1869 return;
1870 }
1871 if (token->name() == templateTag) {
1872 processTemplateEndTag(token);
1873 return;
1874 }
1875 processAnyOtherEndTagForInBody(token);
1876 }
1877
processCaptionEndTagForInCaption()1878 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
1879 {
1880 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
1881 ASSERT(isParsingFragment());
1882 // FIXME: parse error
1883 return false;
1884 }
1885 m_tree.generateImpliedEndTags();
1886 // FIXME: parse error if (!m_tree.currentStackItem()->hasTagName(captionTag))
1887 m_tree.openElements()->popUntilPopped(captionTag.localName());
1888 m_tree.activeFormattingElements()->clearToLastMarker();
1889 setInsertionMode(InTableMode);
1890 return true;
1891 }
1892
processTrEndTagForInRow()1893 bool HTMLTreeBuilder::processTrEndTagForInRow()
1894 {
1895 if (!m_tree.openElements()->inTableScope(trTag)) {
1896 ASSERT(isParsingFragmentOrTemplateContents());
1897 // FIXME: parse error
1898 return false;
1899 }
1900 m_tree.openElements()->popUntilTableRowScopeMarker();
1901 ASSERT(m_tree.currentStackItem()->hasTagName(trTag));
1902 m_tree.openElements()->pop();
1903 setInsertionMode(InTableBodyMode);
1904 return true;
1905 }
1906
processTableEndTagForInTable()1907 bool HTMLTreeBuilder::processTableEndTagForInTable()
1908 {
1909 if (!m_tree.openElements()->inTableScope(tableTag)) {
1910 ASSERT(isParsingFragmentOrTemplateContents());
1911 // FIXME: parse error.
1912 return false;
1913 }
1914 m_tree.openElements()->popUntilPopped(tableTag.localName());
1915 resetInsertionModeAppropriately();
1916 return true;
1917 }
1918
processEndTagForInTable(AtomicHTMLToken * token)1919 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token)
1920 {
1921 ASSERT(token->type() == HTMLToken::EndTag);
1922 if (token->name() == tableTag) {
1923 processTableEndTagForInTable();
1924 return;
1925 }
1926 if (token->name() == bodyTag
1927 || isCaptionColOrColgroupTag(token->name())
1928 || token->name() == htmlTag
1929 || isTableBodyContextTag(token->name())
1930 || isTableCellContextTag(token->name())
1931 || token->name() == trTag) {
1932 parseError(token);
1933 return;
1934 }
1935 parseError(token);
1936 // Is this redirection necessary here?
1937 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1938 processEndTagForInBody(token);
1939 }
1940
processEndTag(AtomicHTMLToken * token)1941 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token)
1942 {
1943 ASSERT(token->type() == HTMLToken::EndTag);
1944 switch (insertionMode()) {
1945 case InitialMode:
1946 ASSERT(insertionMode() == InitialMode);
1947 defaultForInitial();
1948 // Fall through.
1949 case BeforeHTMLMode:
1950 ASSERT(insertionMode() == BeforeHTMLMode);
1951 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1952 parseError(token);
1953 return;
1954 }
1955 defaultForBeforeHTML();
1956 // Fall through.
1957 case BeforeHeadMode:
1958 ASSERT(insertionMode() == BeforeHeadMode);
1959 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1960 parseError(token);
1961 return;
1962 }
1963 defaultForBeforeHead();
1964 // Fall through.
1965 case InHeadMode:
1966 ASSERT(insertionMode() == InHeadMode);
1967 // FIXME: This case should be broken out into processEndTagForInHead,
1968 // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode").
1969 // but because the logic falls through to AfterHeadMode, that gets a little messy.
1970 if (token->name() == templateTag) {
1971 processTemplateEndTag(token);
1972 return;
1973 }
1974 if (token->name() == headTag) {
1975 m_tree.openElements()->popHTMLHeadElement();
1976 setInsertionMode(AfterHeadMode);
1977 return;
1978 }
1979 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1980 parseError(token);
1981 return;
1982 }
1983 defaultForInHead();
1984 // Fall through.
1985 case AfterHeadMode:
1986 ASSERT(insertionMode() == AfterHeadMode);
1987 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) {
1988 parseError(token);
1989 return;
1990 }
1991 defaultForAfterHead();
1992 // Fall through
1993 case InBodyMode:
1994 ASSERT(insertionMode() == InBodyMode);
1995 processEndTagForInBody(token);
1996 break;
1997 case InTableMode:
1998 ASSERT(insertionMode() == InTableMode);
1999 processEndTagForInTable(token);
2000 break;
2001 case InCaptionMode:
2002 ASSERT(insertionMode() == InCaptionMode);
2003 if (token->name() == captionTag) {
2004 processCaptionEndTagForInCaption();
2005 return;
2006 }
2007 if (token->name() == tableTag) {
2008 parseError(token);
2009 if (!processCaptionEndTagForInCaption()) {
2010 ASSERT(isParsingFragment());
2011 return;
2012 }
2013 processEndTag(token);
2014 return;
2015 }
2016 if (token->name() == bodyTag
2017 || token->name() == colTag
2018 || token->name() == colgroupTag
2019 || token->name() == htmlTag
2020 || isTableBodyContextTag(token->name())
2021 || isTableCellContextTag(token->name())
2022 || token->name() == trTag) {
2023 parseError(token);
2024 return;
2025 }
2026 processEndTagForInBody(token);
2027 break;
2028 case InColumnGroupMode:
2029 ASSERT(insertionMode() == InColumnGroupMode);
2030 if (token->name() == colgroupTag) {
2031 processColgroupEndTagForInColumnGroup();
2032 return;
2033 }
2034 if (token->name() == colTag) {
2035 parseError(token);
2036 return;
2037 }
2038 if (token->name() == templateTag) {
2039 processTemplateEndTag(token);
2040 return;
2041 }
2042 if (!processColgroupEndTagForInColumnGroup()) {
2043 ASSERT(isParsingFragmentOrTemplateContents());
2044 return;
2045 }
2046 processEndTag(token);
2047 break;
2048 case InRowMode:
2049 ASSERT(insertionMode() == InRowMode);
2050 processEndTagForInRow(token);
2051 break;
2052 case InCellMode:
2053 ASSERT(insertionMode() == InCellMode);
2054 processEndTagForInCell(token);
2055 break;
2056 case InTableBodyMode:
2057 ASSERT(insertionMode() == InTableBodyMode);
2058 processEndTagForInTableBody(token);
2059 break;
2060 case AfterBodyMode:
2061 ASSERT(insertionMode() == AfterBodyMode);
2062 if (token->name() == htmlTag) {
2063 if (isParsingFragment()) {
2064 parseError(token);
2065 return;
2066 }
2067 setInsertionMode(AfterAfterBodyMode);
2068 return;
2069 }
2070 // Fall through.
2071 case AfterAfterBodyMode:
2072 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2073 parseError(token);
2074 setInsertionMode(InBodyMode);
2075 processEndTag(token);
2076 break;
2077 case InHeadNoscriptMode:
2078 ASSERT(insertionMode() == InHeadNoscriptMode);
2079 if (token->name() == noscriptTag) {
2080 ASSERT(m_tree.currentStackItem()->hasTagName(noscriptTag));
2081 m_tree.openElements()->pop();
2082 ASSERT(m_tree.currentStackItem()->hasTagName(headTag));
2083 setInsertionMode(InHeadMode);
2084 return;
2085 }
2086 if (token->name() != brTag) {
2087 parseError(token);
2088 return;
2089 }
2090 defaultForInHeadNoscript();
2091 processToken(token);
2092 break;
2093 case TextMode:
2094 if (token->name() == scriptTag) {
2095 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2096 ASSERT(m_tree.currentStackItem()->hasTagName(scriptTag));
2097 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2098 m_scriptToProcess = m_tree.currentElement();
2099 m_tree.openElements()->pop();
2100 setInsertionMode(m_originalInsertionMode);
2101
2102 if (m_parser->tokenizer()) {
2103 // We must set the tokenizer's state to
2104 // DataState explicitly if the tokenizer didn't have a chance to.
2105 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_options.useThreading);
2106 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2107 }
2108 return;
2109 }
2110 m_tree.openElements()->pop();
2111 setInsertionMode(m_originalInsertionMode);
2112 break;
2113 case InFramesetMode:
2114 ASSERT(insertionMode() == InFramesetMode);
2115 if (token->name() == framesetTag) {
2116 bool ignoreFramesetForFragmentParsing = m_tree.currentIsRootNode();
2117 ignoreFramesetForFragmentParsing = ignoreFramesetForFragmentParsing || m_tree.openElements()->hasTemplateInHTMLScope();
2118 if (ignoreFramesetForFragmentParsing) {
2119 ASSERT(isParsingFragmentOrTemplateContents());
2120 parseError(token);
2121 return;
2122 }
2123 m_tree.openElements()->pop();
2124 if (!isParsingFragment() && !m_tree.currentStackItem()->hasTagName(framesetTag))
2125 setInsertionMode(AfterFramesetMode);
2126 return;
2127 }
2128 if (token->name() == templateTag) {
2129 processTemplateEndTag(token);
2130 return;
2131 }
2132 break;
2133 case AfterFramesetMode:
2134 ASSERT(insertionMode() == AfterFramesetMode);
2135 if (token->name() == htmlTag) {
2136 setInsertionMode(AfterAfterFramesetMode);
2137 return;
2138 }
2139 // Fall through.
2140 case AfterAfterFramesetMode:
2141 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2142 parseError(token);
2143 break;
2144 case InSelectInTableMode:
2145 ASSERT(insertionMode() == InSelectInTableMode);
2146 if (token->name() == captionTag
2147 || token->name() == tableTag
2148 || isTableBodyContextTag(token->name())
2149 || token->name() == trTag
2150 || isTableCellContextTag(token->name())) {
2151 parseError(token);
2152 if (m_tree.openElements()->inTableScope(token->name())) {
2153 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2154 processEndTag(&endSelect);
2155 processEndTag(token);
2156 }
2157 return;
2158 }
2159 // Fall through.
2160 case InSelectMode:
2161 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2162 if (token->name() == optgroupTag) {
2163 if (m_tree.currentStackItem()->hasTagName(optionTag) && m_tree.oneBelowTop() && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2164 processFakeEndTag(optionTag);
2165 if (m_tree.currentStackItem()->hasTagName(optgroupTag)) {
2166 m_tree.openElements()->pop();
2167 return;
2168 }
2169 parseError(token);
2170 return;
2171 }
2172 if (token->name() == optionTag) {
2173 if (m_tree.currentStackItem()->hasTagName(optionTag)) {
2174 m_tree.openElements()->pop();
2175 return;
2176 }
2177 parseError(token);
2178 return;
2179 }
2180 if (token->name() == selectTag) {
2181 if (!m_tree.openElements()->inSelectScope(token->name())) {
2182 ASSERT(isParsingFragment());
2183 parseError(token);
2184 return;
2185 }
2186 m_tree.openElements()->popUntilPopped(selectTag.localName());
2187 resetInsertionModeAppropriately();
2188 return;
2189 }
2190 if (token->name() == templateTag) {
2191 processTemplateEndTag(token);
2192 return;
2193 }
2194 break;
2195 case InTableTextMode:
2196 defaultForInTableText();
2197 processEndTag(token);
2198 break;
2199 case TemplateContentsMode:
2200 if (token->name() == templateTag) {
2201 processTemplateEndTag(token);
2202 return;
2203 }
2204 break;
2205 }
2206 }
2207
processComment(AtomicHTMLToken * token)2208 void HTMLTreeBuilder::processComment(AtomicHTMLToken* token)
2209 {
2210 ASSERT(token->type() == HTMLToken::Comment);
2211 if (m_insertionMode == InitialMode
2212 || m_insertionMode == BeforeHTMLMode
2213 || m_insertionMode == AfterAfterBodyMode
2214 || m_insertionMode == AfterAfterFramesetMode) {
2215 m_tree.insertCommentOnDocument(token);
2216 return;
2217 }
2218 if (m_insertionMode == AfterBodyMode) {
2219 m_tree.insertCommentOnHTMLHtmlElement(token);
2220 return;
2221 }
2222 if (m_insertionMode == InTableTextMode) {
2223 defaultForInTableText();
2224 processComment(token);
2225 return;
2226 }
2227 m_tree.insertComment(token);
2228 }
2229
processCharacter(AtomicHTMLToken * token)2230 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token)
2231 {
2232 ASSERT(token->type() == HTMLToken::Character);
2233 CharacterTokenBuffer buffer(token);
2234 processCharacterBuffer(buffer);
2235 }
2236
processCharacterBuffer(CharacterTokenBuffer & buffer)2237 void HTMLTreeBuilder::processCharacterBuffer(CharacterTokenBuffer& buffer)
2238 {
2239 ReprocessBuffer:
2240 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2241 // Note that this logic is different than the generic \r\n collapsing
2242 // handled in the input stream preprocessor. This logic is here as an
2243 // "authoring convenience" so folks can write:
2244 //
2245 // <pre>
2246 // lorem ipsum
2247 // lorem ipsum
2248 // </pre>
2249 //
2250 // without getting an extra newline at the start of their <pre> element.
2251 if (m_shouldSkipLeadingNewline) {
2252 m_shouldSkipLeadingNewline = false;
2253 buffer.skipAtMostOneLeadingNewline();
2254 if (buffer.isEmpty())
2255 return;
2256 }
2257
2258 switch (insertionMode()) {
2259 case InitialMode: {
2260 ASSERT(insertionMode() == InitialMode);
2261 buffer.skipLeadingWhitespace();
2262 if (buffer.isEmpty())
2263 return;
2264 defaultForInitial();
2265 // Fall through.
2266 }
2267 case BeforeHTMLMode: {
2268 ASSERT(insertionMode() == BeforeHTMLMode);
2269 buffer.skipLeadingWhitespace();
2270 if (buffer.isEmpty())
2271 return;
2272 defaultForBeforeHTML();
2273 // Fall through.
2274 }
2275 case BeforeHeadMode: {
2276 ASSERT(insertionMode() == BeforeHeadMode);
2277 buffer.skipLeadingWhitespace();
2278 if (buffer.isEmpty())
2279 return;
2280 defaultForBeforeHead();
2281 // Fall through.
2282 }
2283 case InHeadMode: {
2284 ASSERT(insertionMode() == InHeadMode);
2285 String leadingWhitespace = buffer.takeLeadingWhitespace();
2286 if (!leadingWhitespace.isEmpty())
2287 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2288 if (buffer.isEmpty())
2289 return;
2290 defaultForInHead();
2291 // Fall through.
2292 }
2293 case AfterHeadMode: {
2294 ASSERT(insertionMode() == AfterHeadMode);
2295 String leadingWhitespace = buffer.takeLeadingWhitespace();
2296 if (!leadingWhitespace.isEmpty())
2297 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2298 if (buffer.isEmpty())
2299 return;
2300 defaultForAfterHead();
2301 // Fall through.
2302 }
2303 case InBodyMode:
2304 case InCaptionMode:
2305 case TemplateContentsMode:
2306 case InCellMode: {
2307 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode || insertionMode() == TemplateContentsMode);
2308 processCharacterBufferForInBody(buffer);
2309 break;
2310 }
2311 case InTableMode:
2312 case InTableBodyMode:
2313 case InRowMode: {
2314 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2315 ASSERT(m_pendingTableCharacters.isEmpty());
2316 if (m_tree.currentStackItem()->isElementNode()
2317 && (m_tree.currentStackItem()->hasTagName(tableTag)
2318 || m_tree.currentStackItem()->hasTagName(tbodyTag)
2319 || m_tree.currentStackItem()->hasTagName(tfootTag)
2320 || m_tree.currentStackItem()->hasTagName(theadTag)
2321 || m_tree.currentStackItem()->hasTagName(trTag))) {
2322 m_originalInsertionMode = m_insertionMode;
2323 setInsertionMode(InTableTextMode);
2324 // Note that we fall through to the InTableTextMode case below.
2325 } else {
2326 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2327 processCharacterBufferForInBody(buffer);
2328 break;
2329 }
2330 // Fall through.
2331 }
2332 case InTableTextMode: {
2333 buffer.giveRemainingTo(m_pendingTableCharacters);
2334 break;
2335 }
2336 case InColumnGroupMode: {
2337 ASSERT(insertionMode() == InColumnGroupMode);
2338 String leadingWhitespace = buffer.takeLeadingWhitespace();
2339 if (!leadingWhitespace.isEmpty())
2340 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2341 if (buffer.isEmpty())
2342 return;
2343 if (!processColgroupEndTagForInColumnGroup()) {
2344 ASSERT(isParsingFragmentOrTemplateContents());
2345 // The spec tells us to drop these characters on the floor.
2346 buffer.skipLeadingNonWhitespace();
2347 if (buffer.isEmpty())
2348 return;
2349 }
2350 goto ReprocessBuffer;
2351 }
2352 case AfterBodyMode:
2353 case AfterAfterBodyMode: {
2354 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2355 // FIXME: parse error
2356 setInsertionMode(InBodyMode);
2357 goto ReprocessBuffer;
2358 }
2359 case TextMode: {
2360 ASSERT(insertionMode() == TextMode);
2361 m_tree.insertTextNode(buffer.takeRemaining());
2362 break;
2363 }
2364 case InHeadNoscriptMode: {
2365 ASSERT(insertionMode() == InHeadNoscriptMode);
2366 String leadingWhitespace = buffer.takeLeadingWhitespace();
2367 if (!leadingWhitespace.isEmpty())
2368 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2369 if (buffer.isEmpty())
2370 return;
2371 defaultForInHeadNoscript();
2372 goto ReprocessBuffer;
2373 }
2374 case InFramesetMode:
2375 case AfterFramesetMode: {
2376 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2377 String leadingWhitespace = buffer.takeRemainingWhitespace();
2378 if (!leadingWhitespace.isEmpty())
2379 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2380 // FIXME: We should generate a parse error if we skipped over any
2381 // non-whitespace characters.
2382 break;
2383 }
2384 case InSelectInTableMode:
2385 case InSelectMode: {
2386 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2387 m_tree.insertTextNode(buffer.takeRemaining());
2388 break;
2389 }
2390 case AfterAfterFramesetMode: {
2391 String leadingWhitespace = buffer.takeRemainingWhitespace();
2392 if (!leadingWhitespace.isEmpty()) {
2393 m_tree.reconstructTheActiveFormattingElements();
2394 m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
2395 }
2396 // FIXME: We should generate a parse error if we skipped over any
2397 // non-whitespace characters.
2398 break;
2399 }
2400 }
2401 }
2402
processCharacterBufferForInBody(CharacterTokenBuffer & buffer)2403 void HTMLTreeBuilder::processCharacterBufferForInBody(CharacterTokenBuffer& buffer)
2404 {
2405 m_tree.reconstructTheActiveFormattingElements();
2406 const String& characters = buffer.takeRemaining();
2407 m_tree.insertTextNode(characters);
2408 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2409 m_framesetOk = false;
2410 }
2411
processEndOfFile(AtomicHTMLToken * token)2412 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token)
2413 {
2414 ASSERT(token->type() == HTMLToken::EndOfFile);
2415 switch (insertionMode()) {
2416 case InitialMode:
2417 ASSERT(insertionMode() == InitialMode);
2418 defaultForInitial();
2419 // Fall through.
2420 case BeforeHTMLMode:
2421 ASSERT(insertionMode() == BeforeHTMLMode);
2422 defaultForBeforeHTML();
2423 // Fall through.
2424 case BeforeHeadMode:
2425 ASSERT(insertionMode() == BeforeHeadMode);
2426 defaultForBeforeHead();
2427 // Fall through.
2428 case InHeadMode:
2429 ASSERT(insertionMode() == InHeadMode);
2430 defaultForInHead();
2431 // Fall through.
2432 case AfterHeadMode:
2433 ASSERT(insertionMode() == AfterHeadMode);
2434 defaultForAfterHead();
2435 // Fall through
2436 case InBodyMode:
2437 case InCellMode:
2438 case InCaptionMode:
2439 case InRowMode:
2440 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode || insertionMode() == TemplateContentsMode);
2441 notImplemented(); // Emit parse error based on what elements are still open.
2442 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2443 return;
2444 break;
2445 case AfterBodyMode:
2446 case AfterAfterBodyMode:
2447 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2448 break;
2449 case InHeadNoscriptMode:
2450 ASSERT(insertionMode() == InHeadNoscriptMode);
2451 defaultForInHeadNoscript();
2452 processEndOfFile(token);
2453 return;
2454 case AfterFramesetMode:
2455 case AfterAfterFramesetMode:
2456 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2457 break;
2458 case InColumnGroupMode:
2459 if (m_tree.currentIsRootNode()) {
2460 ASSERT(isParsingFragment());
2461 return; // FIXME: Should we break here instead of returning?
2462 }
2463 ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || isHTMLTemplateElement(m_tree.currentNode()));
2464 processColgroupEndTagForInColumnGroup();
2465 // Fall through
2466 case InFramesetMode:
2467 case InTableMode:
2468 case InTableBodyMode:
2469 case InSelectInTableMode:
2470 case InSelectMode:
2471 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode || insertionMode() == InColumnGroupMode);
2472 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2473 parseError(token);
2474 if (!m_templateInsertionModes.isEmpty() && processEndOfFileForInTemplateContents(token))
2475 return;
2476 break;
2477 case InTableTextMode:
2478 defaultForInTableText();
2479 processEndOfFile(token);
2480 return;
2481 case TextMode:
2482 parseError(token);
2483 if (m_tree.currentStackItem()->hasTagName(scriptTag))
2484 notImplemented(); // mark the script element as "already started".
2485 m_tree.openElements()->pop();
2486 ASSERT(m_originalInsertionMode != TextMode);
2487 setInsertionMode(m_originalInsertionMode);
2488 processEndOfFile(token);
2489 return;
2490 case TemplateContentsMode:
2491 if (processEndOfFileForInTemplateContents(token))
2492 return;
2493 break;
2494 }
2495 m_tree.processEndOfFile();
2496 }
2497
defaultForInitial()2498 void HTMLTreeBuilder::defaultForInitial()
2499 {
2500 notImplemented();
2501 m_tree.setDefaultCompatibilityMode();
2502 // FIXME: parse error
2503 setInsertionMode(BeforeHTMLMode);
2504 }
2505
defaultForBeforeHTML()2506 void HTMLTreeBuilder::defaultForBeforeHTML()
2507 {
2508 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2509 m_tree.insertHTMLHtmlStartTagBeforeHTML(&startHTML);
2510 setInsertionMode(BeforeHeadMode);
2511 }
2512
defaultForBeforeHead()2513 void HTMLTreeBuilder::defaultForBeforeHead()
2514 {
2515 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2516 processStartTag(&startHead);
2517 }
2518
defaultForInHead()2519 void HTMLTreeBuilder::defaultForInHead()
2520 {
2521 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2522 processEndTag(&endHead);
2523 }
2524
defaultForInHeadNoscript()2525 void HTMLTreeBuilder::defaultForInHeadNoscript()
2526 {
2527 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2528 processEndTag(&endNoscript);
2529 }
2530
defaultForAfterHead()2531 void HTMLTreeBuilder::defaultForAfterHead()
2532 {
2533 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2534 processStartTag(&startBody);
2535 m_framesetOk = true;
2536 }
2537
defaultForInTableText()2538 void HTMLTreeBuilder::defaultForInTableText()
2539 {
2540 String characters = m_pendingTableCharacters.toString();
2541 m_pendingTableCharacters.clear();
2542 if (!isAllWhitespace(characters)) {
2543 // FIXME: parse error
2544 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2545 m_tree.reconstructTheActiveFormattingElements();
2546 m_tree.insertTextNode(characters, NotAllWhitespace);
2547 m_framesetOk = false;
2548 setInsertionMode(m_originalInsertionMode);
2549 return;
2550 }
2551 m_tree.insertTextNode(characters);
2552 setInsertionMode(m_originalInsertionMode);
2553 }
2554
processStartTagForInHead(AtomicHTMLToken * token)2555 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token)
2556 {
2557 ASSERT(token->type() == HTMLToken::StartTag);
2558 if (token->name() == htmlTag) {
2559 processHtmlStartTagForInBody(token);
2560 return true;
2561 }
2562 if (token->name() == baseTag
2563 || token->name() == basefontTag
2564 || token->name() == bgsoundTag
2565 || token->name() == commandTag
2566 || token->name() == linkTag
2567 || token->name() == metaTag) {
2568 m_tree.insertSelfClosingHTMLElement(token);
2569 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2570 return true;
2571 }
2572 if (token->name() == titleTag) {
2573 processGenericRCDATAStartTag(token);
2574 return true;
2575 }
2576 if (token->name() == noscriptTag) {
2577 if (m_options.scriptEnabled) {
2578 processGenericRawTextStartTag(token);
2579 return true;
2580 }
2581 m_tree.insertHTMLElement(token);
2582 setInsertionMode(InHeadNoscriptMode);
2583 return true;
2584 }
2585 if (token->name() == noframesTag || token->name() == styleTag) {
2586 processGenericRawTextStartTag(token);
2587 return true;
2588 }
2589 if (token->name() == scriptTag) {
2590 processScriptStartTag(token);
2591 return true;
2592 }
2593 if (token->name() == templateTag) {
2594 processTemplateStartTag(token);
2595 return true;
2596 }
2597 if (token->name() == headTag) {
2598 parseError(token);
2599 return true;
2600 }
2601 return false;
2602 }
2603
processGenericRCDATAStartTag(AtomicHTMLToken * token)2604 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token)
2605 {
2606 ASSERT(token->type() == HTMLToken::StartTag);
2607 m_tree.insertHTMLElement(token);
2608 if (m_parser->tokenizer())
2609 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2610 m_originalInsertionMode = m_insertionMode;
2611 setInsertionMode(TextMode);
2612 }
2613
processGenericRawTextStartTag(AtomicHTMLToken * token)2614 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token)
2615 {
2616 ASSERT(token->type() == HTMLToken::StartTag);
2617 m_tree.insertHTMLElement(token);
2618 if (m_parser->tokenizer())
2619 m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2620 m_originalInsertionMode = m_insertionMode;
2621 setInsertionMode(TextMode);
2622 }
2623
processScriptStartTag(AtomicHTMLToken * token)2624 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token)
2625 {
2626 ASSERT(token->type() == HTMLToken::StartTag);
2627 m_tree.insertScriptElement(token);
2628 if (m_parser->tokenizer())
2629 m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2630 m_originalInsertionMode = m_insertionMode;
2631
2632 TextPosition position = m_parser->textPosition();
2633
2634 m_scriptToProcessStartPosition = position;
2635
2636 setInsertionMode(TextMode);
2637 }
2638
2639 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
shouldProcessTokenInForeignContent(AtomicHTMLToken * token)2640 bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token)
2641 {
2642 if (m_tree.isEmpty())
2643 return false;
2644 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2645
2646 if (adjustedCurrentNode->isInHTMLNamespace())
2647 return false;
2648 if (HTMLElementStack::isMathMLTextIntegrationPoint(adjustedCurrentNode)) {
2649 if (token->type() == HTMLToken::StartTag
2650 && token->name() != MathMLNames::mglyphTag
2651 && token->name() != MathMLNames::malignmarkTag)
2652 return false;
2653 if (token->type() == HTMLToken::Character)
2654 return false;
2655 }
2656 if (adjustedCurrentNode->hasTagName(MathMLNames::annotation_xmlTag)
2657 && token->type() == HTMLToken::StartTag
2658 && token->name() == SVGNames::svgTag)
2659 return false;
2660 if (HTMLElementStack::isHTMLIntegrationPoint(adjustedCurrentNode)) {
2661 if (token->type() == HTMLToken::StartTag)
2662 return false;
2663 if (token->type() == HTMLToken::Character)
2664 return false;
2665 }
2666 if (token->type() == HTMLToken::EndOfFile)
2667 return false;
2668 return true;
2669 }
2670
processTokenInForeignContent(AtomicHTMLToken * token)2671 void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token)
2672 {
2673 if (token->type() == HTMLToken::Character) {
2674 const String& characters = token->characters();
2675 m_tree.insertTextNode(characters);
2676 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2677 m_framesetOk = false;
2678 return;
2679 }
2680
2681 m_tree.flush();
2682 HTMLStackItem* adjustedCurrentNode = adjustedCurrentStackItem();
2683
2684 switch (token->type()) {
2685 case HTMLToken::Uninitialized:
2686 ASSERT_NOT_REACHED();
2687 break;
2688 case HTMLToken::DOCTYPE:
2689 parseError(token);
2690 break;
2691 case HTMLToken::StartTag: {
2692 if (token->name() == bTag
2693 || token->name() == bigTag
2694 || token->name() == blockquoteTag
2695 || token->name() == bodyTag
2696 || token->name() == brTag
2697 || token->name() == centerTag
2698 || token->name() == codeTag
2699 || token->name() == ddTag
2700 || token->name() == divTag
2701 || token->name() == dlTag
2702 || token->name() == dtTag
2703 || token->name() == emTag
2704 || token->name() == embedTag
2705 || isNumberedHeaderTag(token->name())
2706 || token->name() == headTag
2707 || token->name() == hrTag
2708 || token->name() == iTag
2709 || token->name() == imgTag
2710 || token->name() == liTag
2711 || token->name() == listingTag
2712 || token->name() == menuTag
2713 || token->name() == metaTag
2714 || token->name() == nobrTag
2715 || token->name() == olTag
2716 || token->name() == pTag
2717 || token->name() == preTag
2718 || token->name() == rubyTag
2719 || token->name() == sTag
2720 || token->name() == smallTag
2721 || token->name() == spanTag
2722 || token->name() == strongTag
2723 || token->name() == strikeTag
2724 || token->name() == subTag
2725 || token->name() == supTag
2726 || token->name() == tableTag
2727 || token->name() == ttTag
2728 || token->name() == uTag
2729 || token->name() == ulTag
2730 || token->name() == varTag
2731 || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) {
2732 parseError(token);
2733 m_tree.openElements()->popUntilForeignContentScopeMarker();
2734 processStartTag(token);
2735 return;
2736 }
2737 const AtomicString& currentNamespace = adjustedCurrentNode->namespaceURI();
2738 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
2739 adjustMathMLAttributes(token);
2740 if (currentNamespace == SVGNames::svgNamespaceURI) {
2741 adjustSVGTagNameCase(token);
2742 adjustSVGAttributes(token);
2743 }
2744 adjustForeignAttributes(token);
2745 m_tree.insertForeignElement(token, currentNamespace);
2746 break;
2747 }
2748 case HTMLToken::EndTag: {
2749 if (adjustedCurrentNode->namespaceURI() == SVGNames::svgNamespaceURI)
2750 adjustSVGTagNameCase(token);
2751
2752 if (token->name() == SVGNames::scriptTag && m_tree.currentStackItem()->hasTagName(SVGNames::scriptTag)) {
2753 if (scriptingContentIsAllowed(m_tree.parserContentPolicy()))
2754 m_scriptToProcess = m_tree.currentElement();
2755 m_tree.openElements()->pop();
2756 return;
2757 }
2758 if (!m_tree.currentStackItem()->isInHTMLNamespace()) {
2759 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2760 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2761 if (!nodeRecord->stackItem()->hasLocalName(token->name()))
2762 parseError(token);
2763 while (1) {
2764 if (nodeRecord->stackItem()->hasLocalName(token->name())) {
2765 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2766 return;
2767 }
2768 nodeRecord = nodeRecord->next();
2769
2770 if (nodeRecord->stackItem()->isInHTMLNamespace())
2771 break;
2772 }
2773 }
2774 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
2775 processEndTag(token);
2776 break;
2777 }
2778 case HTMLToken::Comment:
2779 m_tree.insertComment(token);
2780 break;
2781 case HTMLToken::Character:
2782 case HTMLToken::EndOfFile:
2783 ASSERT_NOT_REACHED();
2784 break;
2785 }
2786 }
2787
finished()2788 void HTMLTreeBuilder::finished()
2789 {
2790 if (isParsingFragment())
2791 return;
2792
2793 ASSERT(m_templateInsertionModes.isEmpty());
2794 ASSERT(m_isAttached);
2795 // Warning, this may detach the parser. Do not do anything else after this.
2796 m_tree.finishedParsing();
2797 }
2798
parseError(AtomicHTMLToken *)2799 void HTMLTreeBuilder::parseError(AtomicHTMLToken*)
2800 {
2801 }
2802
2803 } // namespace WebCore
2804