• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "core/html/parser/HTMLConstructionSite.h"
29 
30 #include "core/HTMLElementFactory.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Comment.h"
33 #include "core/dom/DocumentFragment.h"
34 #include "core/dom/DocumentType.h"
35 #include "core/dom/Element.h"
36 #include "core/dom/ScriptLoader.h"
37 #include "core/dom/Text.h"
38 #include "core/frame/LocalFrame.h"
39 #include "core/html/HTMLFormElement.h"
40 #include "core/html/HTMLHtmlElement.h"
41 #include "core/html/HTMLPlugInElement.h"
42 #include "core/html/HTMLScriptElement.h"
43 #include "core/html/HTMLTemplateElement.h"
44 #include "core/html/parser/AtomicHTMLToken.h"
45 #include "core/html/parser/HTMLParserIdioms.h"
46 #include "core/html/parser/HTMLStackItem.h"
47 #include "core/html/parser/HTMLToken.h"
48 #include "core/loader/FrameLoader.h"
49 #include "core/loader/FrameLoaderClient.h"
50 #include "core/svg/SVGScriptElement.h"
51 #include "platform/NotImplemented.h"
52 #include "platform/text/TextBreakIterator.h"
53 #include <limits>
54 
55 namespace blink {
56 
57 using namespace HTMLNames;
58 
59 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
60 
setAttributes(Element * element,AtomicHTMLToken * token,ParserContentPolicy parserContentPolicy)61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
62 {
63     if (!scriptingContentIsAllowed(parserContentPolicy))
64         element->stripScriptingAttributes(token->attributes());
65     element->parserSetAttributes(token->attributes());
66 }
67 
hasImpliedEndTag(const HTMLStackItem * item)68 static bool hasImpliedEndTag(const HTMLStackItem* item)
69 {
70     return item->hasTagName(ddTag)
71         || item->hasTagName(dtTag)
72         || item->hasTagName(liTag)
73         || item->hasTagName(optionTag)
74         || item->hasTagName(optgroupTag)
75         || item->hasTagName(pTag)
76         || item->hasTagName(rbTag)
77         || item->hasTagName(rpTag)
78         || item->hasTagName(rtTag)
79         || item->hasTagName(rtcTag);
80 }
81 
shouldUseLengthLimit(const ContainerNode & node)82 static bool shouldUseLengthLimit(const ContainerNode& node)
83 {
84     return !isHTMLScriptElement(node)
85         && !isHTMLStyleElement(node)
86         && !isSVGScriptElement(node);
87 }
88 
textLengthLimitForContainer(const ContainerNode & node)89 static unsigned textLengthLimitForContainer(const ContainerNode& node)
90 {
91     return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
92 }
93 
isAllWhitespace(const String & string)94 static inline bool isAllWhitespace(const String& string)
95 {
96     return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
97 }
98 
insert(HTMLConstructionSiteTask & task)99 static inline void insert(HTMLConstructionSiteTask& task)
100 {
101     if (isHTMLTemplateElement(*task.parent))
102         task.parent = toHTMLTemplateElement(task.parent.get())->content();
103 
104     if (ContainerNode* parent = task.child->parentNode())
105         parent->parserRemoveChild(*task.child);
106 
107     if (task.nextChild)
108         task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
109     else
110         task.parent->parserAppendChild(task.child.get());
111 }
112 
executeInsertTask(HTMLConstructionSiteTask & task)113 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
114 {
115     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
116 
117     insert(task);
118 
119     if (task.child->isElementNode()) {
120         Element& child = toElement(*task.child);
121         child.beginParsingChildren();
122         if (task.selfClosing)
123             child.finishParsingChildren();
124     }
125 }
126 
executeInsertTextTask(HTMLConstructionSiteTask & task)127 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
128 {
129     ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
130     ASSERT(task.child->isTextNode());
131 
132     // Merge text nodes into previous ones if possible:
133     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
134     Text* newText = toText(task.child.get());
135     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
136     if (previousChild && previousChild->isTextNode()) {
137         Text* previousText = toText(previousChild);
138         unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
139         if (previousText->length() + newText->length() < lengthLimit) {
140             previousText->parserAppendData(newText->data());
141             return;
142         }
143     }
144 
145     insert(task);
146 }
147 
executeReparentTask(HTMLConstructionSiteTask & task)148 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
149 {
150     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
151 
152     if (ContainerNode* parent = task.child->parentNode())
153         parent->parserRemoveChild(*task.child);
154 
155     task.parent->parserAppendChild(task.child);
156 }
157 
executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask & task)158 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
159 {
160     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
161 
162     insert(task);
163 }
164 
executeTakeAllChildrenTask(HTMLConstructionSiteTask & task)165 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
166 {
167     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
168 
169     task.parent->parserTakeAllChildrenFrom(*task.oldParent());
170 }
171 
executeTask(HTMLConstructionSiteTask & task)172 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
173 {
174     ASSERT(m_taskQueue.isEmpty());
175     if (task.operation == HTMLConstructionSiteTask::Insert)
176         return executeInsertTask(task);
177 
178     if (task.operation == HTMLConstructionSiteTask::InsertText)
179         return executeInsertTextTask(task);
180 
181     // All the cases below this point are only used by the adoption agency.
182 
183     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
184         return executeInsertAlreadyParsedChildTask(task);
185 
186     if (task.operation == HTMLConstructionSiteTask::Reparent)
187         return executeReparentTask(task);
188 
189     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
190         return executeTakeAllChildrenTask(task);
191 
192     ASSERT_NOT_REACHED();
193 }
194 
195 // This is only needed for TextDocuments where we might have text nodes
196 // approaching the default length limit (~64k) and we don't want to
197 // break a text node in the middle of a combining character.
findBreakIndexBetween(const StringBuilder & string,unsigned currentPosition,unsigned proposedBreakIndex)198 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
199 {
200     ASSERT(currentPosition < proposedBreakIndex);
201     ASSERT(proposedBreakIndex <= string.length());
202     // The end of the string is always a valid break.
203     if (proposedBreakIndex == string.length())
204         return proposedBreakIndex;
205 
206     // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
207     if (string.is8Bit())
208         return proposedBreakIndex;
209 
210     const UChar* breakSearchCharacters = string.characters16() + currentPosition;
211     // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
212     unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
213     NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
214 
215     if (it.isBreak(proposedBreakIndex - currentPosition))
216         return proposedBreakIndex;
217 
218     int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
219     if (adjustedBreakIndexInSubstring > 0)
220         return currentPosition + adjustedBreakIndexInSubstring;
221     // We failed to find a breakable point, let the caller figure out what to do.
222     return 0;
223 }
224 
atomizeIfAllWhitespace(const String & string,WhitespaceMode whitespaceMode)225 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
226 {
227     // Strings composed entirely of whitespace are likely to be repeated.
228     // Turn them into AtomicString so we share a single string for each.
229     if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
230         return AtomicString(string).string();
231     return string;
232 }
233 
flushPendingText(FlushMode mode)234 void HTMLConstructionSite::flushPendingText(FlushMode mode)
235 {
236     if (m_pendingText.isEmpty())
237         return;
238 
239     if (mode == FlushIfAtTextLimit
240         && !shouldUseLengthLimit(*m_pendingText.parent))
241         return;
242 
243     PendingText pendingText;
244     // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
245     m_pendingText.swap(pendingText);
246     ASSERT(m_pendingText.isEmpty());
247 
248     // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
249     // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
250     unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
251 
252     unsigned currentPosition = 0;
253     const StringBuilder& string = pendingText.stringBuilder;
254     while (currentPosition < string.length()) {
255         unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
256         unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
257         ASSERT(breakIndex <= string.length());
258         String substring = string.substring(currentPosition, breakIndex - currentPosition);
259         substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
260 
261         HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
262         task.parent = pendingText.parent;
263         task.nextChild = pendingText.nextChild;
264         task.child = Text::create(task.parent->document(), substring);
265         queueTask(task);
266 
267         ASSERT(breakIndex > currentPosition);
268         ASSERT(breakIndex - currentPosition == substring.length());
269         ASSERT(toText(task.child.get())->length() == substring.length());
270         currentPosition = breakIndex;
271     }
272 }
273 
queueTask(const HTMLConstructionSiteTask & task)274 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
275 {
276     flushPendingText(FlushAlways);
277     ASSERT(m_pendingText.isEmpty());
278     m_taskQueue.append(task);
279 }
280 
attachLater(ContainerNode * parent,PassRefPtrWillBeRawPtr<Node> prpChild,bool selfClosing)281 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
282 {
283     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
284     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
285 
286     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
287     task.parent = parent;
288     task.child = prpChild;
289     task.selfClosing = selfClosing;
290 
291     if (shouldFosterParent()) {
292         fosterParent(task.child);
293         return;
294     }
295 
296     // Add as a sibling of the parent if we have reached the maximum depth allowed.
297     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
298         task.parent = task.parent->parentNode();
299 
300     ASSERT(task.parent);
301     queueTask(task);
302 }
303 
executeQueuedTasks()304 void HTMLConstructionSite::executeQueuedTasks()
305 {
306     // This has no affect on pendingText, and we may have pendingText
307     // remaining after executing all other queued tasks.
308     const size_t size = m_taskQueue.size();
309     if (!size)
310         return;
311 
312     // Copy the task queue into a local variable in case executeTask
313     // re-enters the parser.
314     TaskQueue queue;
315     queue.swap(m_taskQueue);
316 
317     for (size_t i = 0; i < size; ++i)
318         executeTask(queue[i]);
319 
320     // We might be detached now.
321 }
322 
HTMLConstructionSite(Document * document,ParserContentPolicy parserContentPolicy)323 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
324     : m_document(document)
325     , m_attachmentRoot(document)
326     , m_parserContentPolicy(parserContentPolicy)
327     , m_isParsingFragment(false)
328     , m_redirectAttachToFosterParent(false)
329     , m_inQuirksMode(document->inQuirksMode())
330 {
331     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
332 }
333 
HTMLConstructionSite(DocumentFragment * fragment,ParserContentPolicy parserContentPolicy)334 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
335     : m_document(&fragment->document())
336     , m_attachmentRoot(fragment)
337     , m_parserContentPolicy(parserContentPolicy)
338     , m_isParsingFragment(true)
339     , m_redirectAttachToFosterParent(false)
340     , m_inQuirksMode(fragment->document().inQuirksMode())
341 {
342     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
343 }
344 
~HTMLConstructionSite()345 HTMLConstructionSite::~HTMLConstructionSite()
346 {
347     // Depending on why we're being destroyed it might be OK
348     // to forget queued tasks, but currently we don't expect to.
349     ASSERT(m_taskQueue.isEmpty());
350     // Currently we assume that text will never be the last token in the
351     // document and that we'll always queue some additional task to cause it to flush.
352     ASSERT(m_pendingText.isEmpty());
353 }
354 
trace(Visitor * visitor)355 void HTMLConstructionSite::trace(Visitor* visitor)
356 {
357     visitor->trace(m_document);
358     visitor->trace(m_attachmentRoot);
359     visitor->trace(m_head);
360     visitor->trace(m_form);
361     visitor->trace(m_openElements);
362     visitor->trace(m_activeFormattingElements);
363     visitor->trace(m_taskQueue);
364     visitor->trace(m_pendingText);
365 }
366 
detach()367 void HTMLConstructionSite::detach()
368 {
369     // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
370     // text that really should have made it into the DOM earlier, but there
371     // doesn't seem to be a nice way to do that.
372     m_pendingText.discard();
373     m_document = nullptr;
374     m_attachmentRoot = nullptr;
375 }
376 
setForm(HTMLFormElement * form)377 void HTMLConstructionSite::setForm(HTMLFormElement* form)
378 {
379     // This method should only be needed for HTMLTreeBuilder in the fragment case.
380     ASSERT(!m_form);
381     m_form = form;
382 }
383 
takeForm()384 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
385 {
386     return m_form.release();
387 }
388 
dispatchDocumentElementAvailableIfNeeded()389 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
390 {
391     ASSERT(m_document);
392     if (m_document->frame() && !m_isParsingFragment)
393         m_document->frame()->loader().dispatchDocumentElementAvailable();
394 }
395 
insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken * token)396 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
397 {
398     ASSERT(m_document);
399     RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
400     setAttributes(element.get(), token, m_parserContentPolicy);
401     attachLater(m_attachmentRoot, element);
402     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
403 
404     executeQueuedTasks();
405     element->insertedByParser();
406     dispatchDocumentElementAvailableIfNeeded();
407 }
408 
mergeAttributesFromTokenIntoElement(AtomicHTMLToken * token,Element * element)409 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
410 {
411     if (token->attributes().isEmpty())
412         return;
413 
414     for (unsigned i = 0; i < token->attributes().size(); ++i) {
415         const Attribute& tokenAttribute = token->attributes().at(i);
416         if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound)
417             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
418     }
419 }
420 
insertHTMLHtmlStartTagInBody(AtomicHTMLToken * token)421 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
422 {
423     // Fragments do not have a root HTML element, so any additional HTML elements
424     // encountered during fragment parsing should be ignored.
425     if (m_isParsingFragment)
426         return;
427 
428     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
429 }
430 
insertHTMLBodyStartTagInBody(AtomicHTMLToken * token)431 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
432 {
433     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
434 }
435 
setDefaultCompatibilityMode()436 void HTMLConstructionSite::setDefaultCompatibilityMode()
437 {
438     if (m_isParsingFragment)
439         return;
440     setCompatibilityMode(Document::QuirksMode);
441 }
442 
setCompatibilityMode(Document::CompatibilityMode mode)443 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
444 {
445     m_inQuirksMode = (mode == Document::QuirksMode);
446     m_document->setCompatibilityMode(mode);
447 }
448 
setCompatibilityModeFromDoctype(const String & name,const String & publicId,const String & systemId)449 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
450 {
451     // There are three possible compatibility modes:
452     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
453     // be omitted from numbers.
454     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
455     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
456 
457     // Check for Quirks Mode.
458     if (name != "html"
459         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
460         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
461         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
462         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
463         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
464         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
465         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
466         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
467         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
468         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
469         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
470         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
471         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
472         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
473         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
474         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
475         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
476         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
477         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
478         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
479         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
480         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
481         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
482         || publicId.startsWith("-//IETF//DTD HTML//", false)
483         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
484         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
485         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
486         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
487         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
488         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
489         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
490         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
491         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
492         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
493         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
494         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
495         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
496         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
497         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
498         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
499         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
500         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
501         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
502         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
503         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
504         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
505         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
506         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
507         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
508         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
509         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
510         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
511         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
512         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
513         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
514         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
515         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
516         || equalIgnoringCase(publicId, "HTML")
517         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
518         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
519         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
520         setCompatibilityMode(Document::QuirksMode);
521         return;
522     }
523 
524     // Check for Limited Quirks Mode.
525     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
526         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
527         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
528         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
529         setCompatibilityMode(Document::LimitedQuirksMode);
530         return;
531     }
532 
533     // Otherwise we are No Quirks Mode.
534     setCompatibilityMode(Document::NoQuirksMode);
535 }
536 
processEndOfFile()537 void HTMLConstructionSite::processEndOfFile()
538 {
539     ASSERT(currentNode());
540     flush(FlushAlways);
541     openElements()->popAll();
542 }
543 
finishedParsing()544 void HTMLConstructionSite::finishedParsing()
545 {
546     // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
547     ASSERT(m_taskQueue.isEmpty());
548     flush(FlushAlways);
549     m_document->finishedParsing();
550 }
551 
insertDoctype(AtomicHTMLToken * token)552 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
553 {
554     ASSERT(token->type() == HTMLToken::DOCTYPE);
555 
556     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
557     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
558     RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
559     attachLater(m_attachmentRoot, doctype.release());
560 
561     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
562     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
563     // because context-less fragments can determine their own quirks mode, and thus change
564     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
565     // in a fragment, as changing the owning document's compatibility mode would be wrong.
566     ASSERT(!m_isParsingFragment);
567     if (m_isParsingFragment)
568         return;
569 
570     if (token->forceQuirks())
571         setCompatibilityMode(Document::QuirksMode);
572     else {
573         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
574     }
575 }
576 
insertComment(AtomicHTMLToken * token)577 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
578 {
579     ASSERT(token->type() == HTMLToken::Comment);
580     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
581 }
582 
insertCommentOnDocument(AtomicHTMLToken * token)583 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
584 {
585     ASSERT(token->type() == HTMLToken::Comment);
586     ASSERT(m_document);
587     attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
588 }
589 
insertCommentOnHTMLHtmlElement(AtomicHTMLToken * token)590 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
591 {
592     ASSERT(token->type() == HTMLToken::Comment);
593     ContainerNode* parent = m_openElements.rootNode();
594     attachLater(parent, Comment::create(parent->document(), token->comment()));
595 }
596 
insertHTMLHeadElement(AtomicHTMLToken * token)597 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
598 {
599     ASSERT(!shouldFosterParent());
600     m_head = HTMLStackItem::create(createHTMLElement(token), token);
601     attachLater(currentNode(), m_head->element());
602     m_openElements.pushHTMLHeadElement(m_head);
603 }
604 
insertHTMLBodyElement(AtomicHTMLToken * token)605 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
606 {
607     ASSERT(!shouldFosterParent());
608     RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token);
609     attachLater(currentNode(), body);
610     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
611     if (LocalFrame* frame = m_document->frame())
612         frame->loader().client()->dispatchWillInsertBody();
613 }
614 
insertHTMLFormElement(AtomicHTMLToken * token,bool isDemoted)615 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
616 {
617     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
618     ASSERT(isHTMLFormElement(element));
619     m_form = static_pointer_cast<HTMLFormElement>(element.release());
620     m_form->setDemoted(isDemoted);
621     attachLater(currentNode(), m_form.get());
622     m_openElements.push(HTMLStackItem::create(m_form.get(), token));
623 }
624 
insertHTMLElement(AtomicHTMLToken * token)625 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
626 {
627     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
628     attachLater(currentNode(), element);
629     m_openElements.push(HTMLStackItem::create(element.release(), token));
630 }
631 
insertSelfClosingHTMLElement(AtomicHTMLToken * token)632 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
633 {
634     ASSERT(token->type() == HTMLToken::StartTag);
635     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
636     // but self-closing elements are never in the element stack so the stack
637     // doesn't get a chance to tell them that we're done parsing their children.
638     attachLater(currentNode(), createHTMLElement(token), true);
639     // FIXME: Do we want to acknowledge the token's self-closing flag?
640     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
641 }
642 
insertFormattingElement(AtomicHTMLToken * token)643 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
644 {
645     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
646     // Possible active formatting elements include:
647     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
648     insertHTMLElement(token);
649     m_activeFormattingElements.append(currentElementRecord()->stackItem());
650 }
651 
insertScriptElement(AtomicHTMLToken * token)652 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
653 {
654     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
655     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
656     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
657     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
658     // those flags or effects thereof.
659     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
660     const bool alreadyStarted = m_isParsingFragment && parserInserted;
661     RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
662     setAttributes(element.get(), token, m_parserContentPolicy);
663     if (scriptingContentIsAllowed(m_parserContentPolicy))
664         attachLater(currentNode(), element);
665     m_openElements.push(HTMLStackItem::create(element.release(), token));
666 }
667 
insertForeignElement(AtomicHTMLToken * token,const AtomicString & namespaceURI)668 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
669 {
670     ASSERT(token->type() == HTMLToken::StartTag);
671     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
672 
673     RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
674     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
675         attachLater(currentNode(), element, token->selfClosing());
676     if (!token->selfClosing())
677         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
678 }
679 
insertTextNode(const String & string,WhitespaceMode whitespaceMode)680 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
681 {
682     HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
683     dummyTask.parent = currentNode();
684 
685     if (shouldFosterParent())
686         findFosterSite(dummyTask);
687 
688     // FIXME: This probably doesn't need to be done both here and in insert(Task).
689     if (isHTMLTemplateElement(*dummyTask.parent))
690         dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
691 
692     // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
693     // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
694     // In either case we have to flush the pending text into the task queue before making more.
695     if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
696         flushPendingText(FlushAlways);
697     m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
698 }
699 
reparent(HTMLElementStack::ElementRecord * newParent,HTMLElementStack::ElementRecord * child)700 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
701 {
702     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
703     task.parent = newParent->node();
704     task.child = child->node();
705     queueTask(task);
706 }
707 
reparent(HTMLElementStack::ElementRecord * newParent,HTMLStackItem * child)708 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
709 {
710     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
711     task.parent = newParent->node();
712     task.child = child->node();
713     queueTask(task);
714 }
715 
insertAlreadyParsedChild(HTMLStackItem * newParent,HTMLElementStack::ElementRecord * child)716 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
717 {
718     if (newParent->causesFosterParenting()) {
719         fosterParent(child->node());
720         return;
721     }
722 
723     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
724     task.parent = newParent->node();
725     task.child = child->node();
726     queueTask(task);
727 }
728 
takeAllChildren(HTMLStackItem * newParent,HTMLElementStack::ElementRecord * oldParent)729 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
730 {
731     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
732     task.parent = newParent->node();
733     task.child = oldParent->node();
734     queueTask(task);
735 }
736 
createElement(AtomicHTMLToken * token,const AtomicString & namespaceURI)737 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
738 {
739     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
740     RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
741     setAttributes(element.get(), token, m_parserContentPolicy);
742     return element.release();
743 }
744 
ownerDocumentForCurrentNode()745 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
746 {
747     if (isHTMLTemplateElement(*currentNode()))
748         return toHTMLTemplateElement(currentElement())->content()->document();
749     return currentNode()->document();
750 }
751 
createHTMLElement(AtomicHTMLToken * token)752 PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
753 {
754     Document& document = ownerDocumentForCurrentNode();
755     // Only associate the element with the current form if we're creating the new element
756     // in a document with a browsing context (rather than in <template> contents).
757     HTMLFormElement* form = document.frame() ? m_form.get() : 0;
758     // FIXME: This can't use HTMLConstructionSite::createElement because we
759     // have to pass the current form element.  We should rework form association
760     // to occur after construction to allow better code sharing here.
761     RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
762     setAttributes(element.get(), token, m_parserContentPolicy);
763     return element.release();
764 }
765 
createElementFromSavedToken(HTMLStackItem * item)766 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
767 {
768     RefPtrWillBeRawPtr<Element> element;
769     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
770     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
771     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
772         element = createHTMLElement(&fakeToken);
773     else
774         element = createElement(&fakeToken, item->namespaceURI());
775     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
776 }
777 
indexOfFirstUnopenFormattingElement(unsigned & firstUnopenElementIndex) const778 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
779 {
780     if (m_activeFormattingElements.isEmpty())
781         return false;
782     unsigned index = m_activeFormattingElements.size();
783     do {
784         --index;
785         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
786         if (entry.isMarker() || m_openElements.contains(entry.element())) {
787             firstUnopenElementIndex = index + 1;
788             return firstUnopenElementIndex < m_activeFormattingElements.size();
789         }
790     } while (index);
791     firstUnopenElementIndex = index;
792     return true;
793 }
794 
reconstructTheActiveFormattingElements()795 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
796 {
797     unsigned firstUnopenElementIndex;
798     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
799         return;
800 
801     unsigned unopenEntryIndex = firstUnopenElementIndex;
802     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
803     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
804         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
805         RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
806         attachLater(currentNode(), reconstructed->node());
807         m_openElements.push(reconstructed);
808         unopenedEntry.replaceElement(reconstructed.release());
809     }
810 }
811 
generateImpliedEndTagsWithExclusion(const AtomicString & tagName)812 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
813 {
814     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
815         m_openElements.pop();
816 }
817 
generateImpliedEndTags()818 void HTMLConstructionSite::generateImpliedEndTags()
819 {
820     while (hasImpliedEndTag(currentStackItem()))
821         m_openElements.pop();
822 }
823 
inQuirksMode()824 bool HTMLConstructionSite::inQuirksMode()
825 {
826     return m_inQuirksMode;
827 }
828 
findFosterSite(HTMLConstructionSiteTask & task)829 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
830 {
831     // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
832     HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
833     if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
834         task.parent = lastTemplateElement->element();
835         return;
836     }
837 
838     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
839     if (lastTableElementRecord) {
840         Element* lastTableElement = lastTableElementRecord->element();
841         ContainerNode* parent;
842         if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
843             parent = lastTableElementRecord->next()->element();
844         else
845             parent = lastTableElement->parentNode();
846 
847         // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
848         // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
849         if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
850             task.parent = parent;
851             task.nextChild = lastTableElement;
852             return;
853         }
854         task.parent = lastTableElementRecord->next()->element();
855         return;
856     }
857     // Fragment case
858     task.parent = m_openElements.rootNode(); // DocumentFragment
859 }
860 
shouldFosterParent() const861 bool HTMLConstructionSite::shouldFosterParent() const
862 {
863     return m_redirectAttachToFosterParent
864         && currentStackItem()->isElementNode()
865         && currentStackItem()->causesFosterParenting();
866 }
867 
fosterParent(PassRefPtrWillBeRawPtr<Node> node)868 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
869 {
870     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
871     findFosterSite(task);
872     task.child = node;
873     ASSERT(task.parent);
874     queueTask(task);
875 }
876 
trace(Visitor * visitor)877 void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
878 {
879     visitor->trace(parent);
880     visitor->trace(nextChild);
881 }
882 
883 
884 }
885