1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "core/html/parser/HTMLConstructionSite.h"
29
30 #include "core/HTMLElementFactory.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Comment.h"
33 #include "core/dom/DocumentFragment.h"
34 #include "core/dom/DocumentType.h"
35 #include "core/dom/Element.h"
36 #include "core/dom/ScriptLoader.h"
37 #include "core/dom/Text.h"
38 #include "core/frame/LocalFrame.h"
39 #include "core/html/HTMLFormElement.h"
40 #include "core/html/HTMLHtmlElement.h"
41 #include "core/html/HTMLPlugInElement.h"
42 #include "core/html/HTMLScriptElement.h"
43 #include "core/html/HTMLTemplateElement.h"
44 #include "core/html/parser/AtomicHTMLToken.h"
45 #include "core/html/parser/HTMLParserIdioms.h"
46 #include "core/html/parser/HTMLStackItem.h"
47 #include "core/html/parser/HTMLToken.h"
48 #include "core/loader/FrameLoader.h"
49 #include "core/loader/FrameLoaderClient.h"
50 #include "core/svg/SVGScriptElement.h"
51 #include "platform/NotImplemented.h"
52 #include "platform/text/TextBreakIterator.h"
53 #include <limits>
54
55 namespace blink {
56
57 using namespace HTMLNames;
58
59 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
60
setAttributes(Element * element,AtomicHTMLToken * token,ParserContentPolicy parserContentPolicy)61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
62 {
63 if (!scriptingContentIsAllowed(parserContentPolicy))
64 element->stripScriptingAttributes(token->attributes());
65 element->parserSetAttributes(token->attributes());
66 }
67
hasImpliedEndTag(const HTMLStackItem * item)68 static bool hasImpliedEndTag(const HTMLStackItem* item)
69 {
70 return item->hasTagName(ddTag)
71 || item->hasTagName(dtTag)
72 || item->hasTagName(liTag)
73 || item->hasTagName(optionTag)
74 || item->hasTagName(optgroupTag)
75 || item->hasTagName(pTag)
76 || item->hasTagName(rbTag)
77 || item->hasTagName(rpTag)
78 || item->hasTagName(rtTag)
79 || item->hasTagName(rtcTag);
80 }
81
shouldUseLengthLimit(const ContainerNode & node)82 static bool shouldUseLengthLimit(const ContainerNode& node)
83 {
84 return !isHTMLScriptElement(node)
85 && !isHTMLStyleElement(node)
86 && !isSVGScriptElement(node);
87 }
88
textLengthLimitForContainer(const ContainerNode & node)89 static unsigned textLengthLimitForContainer(const ContainerNode& node)
90 {
91 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
92 }
93
isAllWhitespace(const String & string)94 static inline bool isAllWhitespace(const String& string)
95 {
96 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
97 }
98
insert(HTMLConstructionSiteTask & task)99 static inline void insert(HTMLConstructionSiteTask& task)
100 {
101 if (isHTMLTemplateElement(*task.parent))
102 task.parent = toHTMLTemplateElement(task.parent.get())->content();
103
104 if (ContainerNode* parent = task.child->parentNode())
105 parent->parserRemoveChild(*task.child);
106
107 if (task.nextChild)
108 task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
109 else
110 task.parent->parserAppendChild(task.child.get());
111 }
112
executeInsertTask(HTMLConstructionSiteTask & task)113 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
114 {
115 ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
116
117 insert(task);
118
119 if (task.child->isElementNode()) {
120 Element& child = toElement(*task.child);
121 child.beginParsingChildren();
122 if (task.selfClosing)
123 child.finishParsingChildren();
124 }
125 }
126
executeInsertTextTask(HTMLConstructionSiteTask & task)127 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
128 {
129 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
130 ASSERT(task.child->isTextNode());
131
132 // Merge text nodes into previous ones if possible:
133 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
134 Text* newText = toText(task.child.get());
135 Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
136 if (previousChild && previousChild->isTextNode()) {
137 Text* previousText = toText(previousChild);
138 unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
139 if (previousText->length() + newText->length() < lengthLimit) {
140 previousText->parserAppendData(newText->data());
141 return;
142 }
143 }
144
145 insert(task);
146 }
147
executeReparentTask(HTMLConstructionSiteTask & task)148 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
149 {
150 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
151
152 if (ContainerNode* parent = task.child->parentNode())
153 parent->parserRemoveChild(*task.child);
154
155 task.parent->parserAppendChild(task.child);
156 }
157
executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask & task)158 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
159 {
160 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
161
162 insert(task);
163 }
164
executeTakeAllChildrenTask(HTMLConstructionSiteTask & task)165 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
166 {
167 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
168
169 task.parent->parserTakeAllChildrenFrom(*task.oldParent());
170 }
171
executeTask(HTMLConstructionSiteTask & task)172 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
173 {
174 ASSERT(m_taskQueue.isEmpty());
175 if (task.operation == HTMLConstructionSiteTask::Insert)
176 return executeInsertTask(task);
177
178 if (task.operation == HTMLConstructionSiteTask::InsertText)
179 return executeInsertTextTask(task);
180
181 // All the cases below this point are only used by the adoption agency.
182
183 if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
184 return executeInsertAlreadyParsedChildTask(task);
185
186 if (task.operation == HTMLConstructionSiteTask::Reparent)
187 return executeReparentTask(task);
188
189 if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
190 return executeTakeAllChildrenTask(task);
191
192 ASSERT_NOT_REACHED();
193 }
194
195 // This is only needed for TextDocuments where we might have text nodes
196 // approaching the default length limit (~64k) and we don't want to
197 // break a text node in the middle of a combining character.
findBreakIndexBetween(const StringBuilder & string,unsigned currentPosition,unsigned proposedBreakIndex)198 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
199 {
200 ASSERT(currentPosition < proposedBreakIndex);
201 ASSERT(proposedBreakIndex <= string.length());
202 // The end of the string is always a valid break.
203 if (proposedBreakIndex == string.length())
204 return proposedBreakIndex;
205
206 // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
207 if (string.is8Bit())
208 return proposedBreakIndex;
209
210 const UChar* breakSearchCharacters = string.characters16() + currentPosition;
211 // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
212 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
213 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
214
215 if (it.isBreak(proposedBreakIndex - currentPosition))
216 return proposedBreakIndex;
217
218 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
219 if (adjustedBreakIndexInSubstring > 0)
220 return currentPosition + adjustedBreakIndexInSubstring;
221 // We failed to find a breakable point, let the caller figure out what to do.
222 return 0;
223 }
224
atomizeIfAllWhitespace(const String & string,WhitespaceMode whitespaceMode)225 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
226 {
227 // Strings composed entirely of whitespace are likely to be repeated.
228 // Turn them into AtomicString so we share a single string for each.
229 if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
230 return AtomicString(string).string();
231 return string;
232 }
233
flushPendingText(FlushMode mode)234 void HTMLConstructionSite::flushPendingText(FlushMode mode)
235 {
236 if (m_pendingText.isEmpty())
237 return;
238
239 if (mode == FlushIfAtTextLimit
240 && !shouldUseLengthLimit(*m_pendingText.parent))
241 return;
242
243 PendingText pendingText;
244 // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
245 m_pendingText.swap(pendingText);
246 ASSERT(m_pendingText.isEmpty());
247
248 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
249 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
250 unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
251
252 unsigned currentPosition = 0;
253 const StringBuilder& string = pendingText.stringBuilder;
254 while (currentPosition < string.length()) {
255 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
256 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
257 ASSERT(breakIndex <= string.length());
258 String substring = string.substring(currentPosition, breakIndex - currentPosition);
259 substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
260
261 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
262 task.parent = pendingText.parent;
263 task.nextChild = pendingText.nextChild;
264 task.child = Text::create(task.parent->document(), substring);
265 queueTask(task);
266
267 ASSERT(breakIndex > currentPosition);
268 ASSERT(breakIndex - currentPosition == substring.length());
269 ASSERT(toText(task.child.get())->length() == substring.length());
270 currentPosition = breakIndex;
271 }
272 }
273
queueTask(const HTMLConstructionSiteTask & task)274 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
275 {
276 flushPendingText(FlushAlways);
277 ASSERT(m_pendingText.isEmpty());
278 m_taskQueue.append(task);
279 }
280
attachLater(ContainerNode * parent,PassRefPtrWillBeRawPtr<Node> prpChild,bool selfClosing)281 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
282 {
283 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
284 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
285
286 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
287 task.parent = parent;
288 task.child = prpChild;
289 task.selfClosing = selfClosing;
290
291 if (shouldFosterParent()) {
292 fosterParent(task.child);
293 return;
294 }
295
296 // Add as a sibling of the parent if we have reached the maximum depth allowed.
297 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
298 task.parent = task.parent->parentNode();
299
300 ASSERT(task.parent);
301 queueTask(task);
302 }
303
executeQueuedTasks()304 void HTMLConstructionSite::executeQueuedTasks()
305 {
306 // This has no affect on pendingText, and we may have pendingText
307 // remaining after executing all other queued tasks.
308 const size_t size = m_taskQueue.size();
309 if (!size)
310 return;
311
312 // Copy the task queue into a local variable in case executeTask
313 // re-enters the parser.
314 TaskQueue queue;
315 queue.swap(m_taskQueue);
316
317 for (size_t i = 0; i < size; ++i)
318 executeTask(queue[i]);
319
320 // We might be detached now.
321 }
322
HTMLConstructionSite(Document * document,ParserContentPolicy parserContentPolicy)323 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
324 : m_document(document)
325 , m_attachmentRoot(document)
326 , m_parserContentPolicy(parserContentPolicy)
327 , m_isParsingFragment(false)
328 , m_redirectAttachToFosterParent(false)
329 , m_inQuirksMode(document->inQuirksMode())
330 {
331 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
332 }
333
HTMLConstructionSite(DocumentFragment * fragment,ParserContentPolicy parserContentPolicy)334 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
335 : m_document(&fragment->document())
336 , m_attachmentRoot(fragment)
337 , m_parserContentPolicy(parserContentPolicy)
338 , m_isParsingFragment(true)
339 , m_redirectAttachToFosterParent(false)
340 , m_inQuirksMode(fragment->document().inQuirksMode())
341 {
342 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
343 }
344
~HTMLConstructionSite()345 HTMLConstructionSite::~HTMLConstructionSite()
346 {
347 // Depending on why we're being destroyed it might be OK
348 // to forget queued tasks, but currently we don't expect to.
349 ASSERT(m_taskQueue.isEmpty());
350 // Currently we assume that text will never be the last token in the
351 // document and that we'll always queue some additional task to cause it to flush.
352 ASSERT(m_pendingText.isEmpty());
353 }
354
trace(Visitor * visitor)355 void HTMLConstructionSite::trace(Visitor* visitor)
356 {
357 visitor->trace(m_document);
358 visitor->trace(m_attachmentRoot);
359 visitor->trace(m_head);
360 visitor->trace(m_form);
361 visitor->trace(m_openElements);
362 visitor->trace(m_activeFormattingElements);
363 visitor->trace(m_taskQueue);
364 visitor->trace(m_pendingText);
365 }
366
detach()367 void HTMLConstructionSite::detach()
368 {
369 // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
370 // text that really should have made it into the DOM earlier, but there
371 // doesn't seem to be a nice way to do that.
372 m_pendingText.discard();
373 m_document = nullptr;
374 m_attachmentRoot = nullptr;
375 }
376
setForm(HTMLFormElement * form)377 void HTMLConstructionSite::setForm(HTMLFormElement* form)
378 {
379 // This method should only be needed for HTMLTreeBuilder in the fragment case.
380 ASSERT(!m_form);
381 m_form = form;
382 }
383
takeForm()384 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
385 {
386 return m_form.release();
387 }
388
dispatchDocumentElementAvailableIfNeeded()389 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
390 {
391 ASSERT(m_document);
392 if (m_document->frame() && !m_isParsingFragment)
393 m_document->frame()->loader().dispatchDocumentElementAvailable();
394 }
395
insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken * token)396 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
397 {
398 ASSERT(m_document);
399 RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
400 setAttributes(element.get(), token, m_parserContentPolicy);
401 attachLater(m_attachmentRoot, element);
402 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
403
404 executeQueuedTasks();
405 element->insertedByParser();
406 dispatchDocumentElementAvailableIfNeeded();
407 }
408
mergeAttributesFromTokenIntoElement(AtomicHTMLToken * token,Element * element)409 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
410 {
411 if (token->attributes().isEmpty())
412 return;
413
414 for (unsigned i = 0; i < token->attributes().size(); ++i) {
415 const Attribute& tokenAttribute = token->attributes().at(i);
416 if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound)
417 element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
418 }
419 }
420
insertHTMLHtmlStartTagInBody(AtomicHTMLToken * token)421 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
422 {
423 // Fragments do not have a root HTML element, so any additional HTML elements
424 // encountered during fragment parsing should be ignored.
425 if (m_isParsingFragment)
426 return;
427
428 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
429 }
430
insertHTMLBodyStartTagInBody(AtomicHTMLToken * token)431 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
432 {
433 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
434 }
435
setDefaultCompatibilityMode()436 void HTMLConstructionSite::setDefaultCompatibilityMode()
437 {
438 if (m_isParsingFragment)
439 return;
440 setCompatibilityMode(Document::QuirksMode);
441 }
442
setCompatibilityMode(Document::CompatibilityMode mode)443 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
444 {
445 m_inQuirksMode = (mode == Document::QuirksMode);
446 m_document->setCompatibilityMode(mode);
447 }
448
setCompatibilityModeFromDoctype(const String & name,const String & publicId,const String & systemId)449 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
450 {
451 // There are three possible compatibility modes:
452 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
453 // be omitted from numbers.
454 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
455 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
456
457 // Check for Quirks Mode.
458 if (name != "html"
459 || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
460 || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
461 || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
462 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
463 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
464 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
465 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
466 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
467 || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
468 || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
469 || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
470 || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
471 || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
472 || publicId.startsWith("-//IETF//DTD HTML 3//", false)
473 || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
474 || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
475 || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
476 || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
477 || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
478 || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
479 || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
480 || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
481 || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
482 || publicId.startsWith("-//IETF//DTD HTML//", false)
483 || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
484 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
485 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
486 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
487 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
488 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
489 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
490 || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
491 || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
492 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
493 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
494 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
495 || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
496 || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
497 || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
498 || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
499 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
500 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
501 || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
502 || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
503 || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
504 || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
505 || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
506 || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
507 || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
508 || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
509 || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
510 || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
511 || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
512 || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
513 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
514 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
515 || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
516 || equalIgnoringCase(publicId, "HTML")
517 || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
518 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
519 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
520 setCompatibilityMode(Document::QuirksMode);
521 return;
522 }
523
524 // Check for Limited Quirks Mode.
525 if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
526 || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
527 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
528 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
529 setCompatibilityMode(Document::LimitedQuirksMode);
530 return;
531 }
532
533 // Otherwise we are No Quirks Mode.
534 setCompatibilityMode(Document::NoQuirksMode);
535 }
536
processEndOfFile()537 void HTMLConstructionSite::processEndOfFile()
538 {
539 ASSERT(currentNode());
540 flush(FlushAlways);
541 openElements()->popAll();
542 }
543
finishedParsing()544 void HTMLConstructionSite::finishedParsing()
545 {
546 // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
547 ASSERT(m_taskQueue.isEmpty());
548 flush(FlushAlways);
549 m_document->finishedParsing();
550 }
551
insertDoctype(AtomicHTMLToken * token)552 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
553 {
554 ASSERT(token->type() == HTMLToken::DOCTYPE);
555
556 const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
557 const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
558 RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
559 attachLater(m_attachmentRoot, doctype.release());
560
561 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
562 // never occurs. However, if we ever chose to support such, this code is subtly wrong,
563 // because context-less fragments can determine their own quirks mode, and thus change
564 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code
565 // in a fragment, as changing the owning document's compatibility mode would be wrong.
566 ASSERT(!m_isParsingFragment);
567 if (m_isParsingFragment)
568 return;
569
570 if (token->forceQuirks())
571 setCompatibilityMode(Document::QuirksMode);
572 else {
573 setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
574 }
575 }
576
insertComment(AtomicHTMLToken * token)577 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
578 {
579 ASSERT(token->type() == HTMLToken::Comment);
580 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
581 }
582
insertCommentOnDocument(AtomicHTMLToken * token)583 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
584 {
585 ASSERT(token->type() == HTMLToken::Comment);
586 ASSERT(m_document);
587 attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
588 }
589
insertCommentOnHTMLHtmlElement(AtomicHTMLToken * token)590 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
591 {
592 ASSERT(token->type() == HTMLToken::Comment);
593 ContainerNode* parent = m_openElements.rootNode();
594 attachLater(parent, Comment::create(parent->document(), token->comment()));
595 }
596
insertHTMLHeadElement(AtomicHTMLToken * token)597 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
598 {
599 ASSERT(!shouldFosterParent());
600 m_head = HTMLStackItem::create(createHTMLElement(token), token);
601 attachLater(currentNode(), m_head->element());
602 m_openElements.pushHTMLHeadElement(m_head);
603 }
604
insertHTMLBodyElement(AtomicHTMLToken * token)605 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
606 {
607 ASSERT(!shouldFosterParent());
608 RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token);
609 attachLater(currentNode(), body);
610 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
611 if (LocalFrame* frame = m_document->frame())
612 frame->loader().client()->dispatchWillInsertBody();
613 }
614
insertHTMLFormElement(AtomicHTMLToken * token,bool isDemoted)615 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
616 {
617 RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
618 ASSERT(isHTMLFormElement(element));
619 m_form = static_pointer_cast<HTMLFormElement>(element.release());
620 m_form->setDemoted(isDemoted);
621 attachLater(currentNode(), m_form.get());
622 m_openElements.push(HTMLStackItem::create(m_form.get(), token));
623 }
624
insertHTMLElement(AtomicHTMLToken * token)625 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
626 {
627 RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
628 attachLater(currentNode(), element);
629 m_openElements.push(HTMLStackItem::create(element.release(), token));
630 }
631
insertSelfClosingHTMLElement(AtomicHTMLToken * token)632 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
633 {
634 ASSERT(token->type() == HTMLToken::StartTag);
635 // Normally HTMLElementStack is responsible for calling finishParsingChildren,
636 // but self-closing elements are never in the element stack so the stack
637 // doesn't get a chance to tell them that we're done parsing their children.
638 attachLater(currentNode(), createHTMLElement(token), true);
639 // FIXME: Do we want to acknowledge the token's self-closing flag?
640 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
641 }
642
insertFormattingElement(AtomicHTMLToken * token)643 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
644 {
645 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
646 // Possible active formatting elements include:
647 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
648 insertHTMLElement(token);
649 m_activeFormattingElements.append(currentElementRecord()->stackItem());
650 }
651
insertScriptElement(AtomicHTMLToken * token)652 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
653 {
654 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
655 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
656 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
657 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
658 // those flags or effects thereof.
659 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
660 const bool alreadyStarted = m_isParsingFragment && parserInserted;
661 RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
662 setAttributes(element.get(), token, m_parserContentPolicy);
663 if (scriptingContentIsAllowed(m_parserContentPolicy))
664 attachLater(currentNode(), element);
665 m_openElements.push(HTMLStackItem::create(element.release(), token));
666 }
667
insertForeignElement(AtomicHTMLToken * token,const AtomicString & namespaceURI)668 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
669 {
670 ASSERT(token->type() == HTMLToken::StartTag);
671 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
672
673 RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
674 if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
675 attachLater(currentNode(), element, token->selfClosing());
676 if (!token->selfClosing())
677 m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
678 }
679
insertTextNode(const String & string,WhitespaceMode whitespaceMode)680 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
681 {
682 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
683 dummyTask.parent = currentNode();
684
685 if (shouldFosterParent())
686 findFosterSite(dummyTask);
687
688 // FIXME: This probably doesn't need to be done both here and in insert(Task).
689 if (isHTMLTemplateElement(*dummyTask.parent))
690 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
691
692 // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
693 // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
694 // In either case we have to flush the pending text into the task queue before making more.
695 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild))
696 flushPendingText(FlushAlways);
697 m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
698 }
699
reparent(HTMLElementStack::ElementRecord * newParent,HTMLElementStack::ElementRecord * child)700 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
701 {
702 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
703 task.parent = newParent->node();
704 task.child = child->node();
705 queueTask(task);
706 }
707
reparent(HTMLElementStack::ElementRecord * newParent,HTMLStackItem * child)708 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
709 {
710 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
711 task.parent = newParent->node();
712 task.child = child->node();
713 queueTask(task);
714 }
715
insertAlreadyParsedChild(HTMLStackItem * newParent,HTMLElementStack::ElementRecord * child)716 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
717 {
718 if (newParent->causesFosterParenting()) {
719 fosterParent(child->node());
720 return;
721 }
722
723 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
724 task.parent = newParent->node();
725 task.child = child->node();
726 queueTask(task);
727 }
728
takeAllChildren(HTMLStackItem * newParent,HTMLElementStack::ElementRecord * oldParent)729 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
730 {
731 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
732 task.parent = newParent->node();
733 task.child = oldParent->node();
734 queueTask(task);
735 }
736
createElement(AtomicHTMLToken * token,const AtomicString & namespaceURI)737 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
738 {
739 QualifiedName tagName(nullAtom, token->name(), namespaceURI);
740 RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
741 setAttributes(element.get(), token, m_parserContentPolicy);
742 return element.release();
743 }
744
ownerDocumentForCurrentNode()745 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
746 {
747 if (isHTMLTemplateElement(*currentNode()))
748 return toHTMLTemplateElement(currentElement())->content()->document();
749 return currentNode()->document();
750 }
751
createHTMLElement(AtomicHTMLToken * token)752 PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
753 {
754 Document& document = ownerDocumentForCurrentNode();
755 // Only associate the element with the current form if we're creating the new element
756 // in a document with a browsing context (rather than in <template> contents).
757 HTMLFormElement* form = document.frame() ? m_form.get() : 0;
758 // FIXME: This can't use HTMLConstructionSite::createElement because we
759 // have to pass the current form element. We should rework form association
760 // to occur after construction to allow better code sharing here.
761 RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
762 setAttributes(element.get(), token, m_parserContentPolicy);
763 return element.release();
764 }
765
createElementFromSavedToken(HTMLStackItem * item)766 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
767 {
768 RefPtrWillBeRawPtr<Element> element;
769 // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
770 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
771 if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
772 element = createHTMLElement(&fakeToken);
773 else
774 element = createElement(&fakeToken, item->namespaceURI());
775 return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
776 }
777
indexOfFirstUnopenFormattingElement(unsigned & firstUnopenElementIndex) const778 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
779 {
780 if (m_activeFormattingElements.isEmpty())
781 return false;
782 unsigned index = m_activeFormattingElements.size();
783 do {
784 --index;
785 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
786 if (entry.isMarker() || m_openElements.contains(entry.element())) {
787 firstUnopenElementIndex = index + 1;
788 return firstUnopenElementIndex < m_activeFormattingElements.size();
789 }
790 } while (index);
791 firstUnopenElementIndex = index;
792 return true;
793 }
794
reconstructTheActiveFormattingElements()795 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
796 {
797 unsigned firstUnopenElementIndex;
798 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
799 return;
800
801 unsigned unopenEntryIndex = firstUnopenElementIndex;
802 ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
803 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
804 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
805 RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
806 attachLater(currentNode(), reconstructed->node());
807 m_openElements.push(reconstructed);
808 unopenedEntry.replaceElement(reconstructed.release());
809 }
810 }
811
generateImpliedEndTagsWithExclusion(const AtomicString & tagName)812 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
813 {
814 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
815 m_openElements.pop();
816 }
817
generateImpliedEndTags()818 void HTMLConstructionSite::generateImpliedEndTags()
819 {
820 while (hasImpliedEndTag(currentStackItem()))
821 m_openElements.pop();
822 }
823
inQuirksMode()824 bool HTMLConstructionSite::inQuirksMode()
825 {
826 return m_inQuirksMode;
827 }
828
findFosterSite(HTMLConstructionSiteTask & task)829 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
830 {
831 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
832 HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
833 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
834 task.parent = lastTemplateElement->element();
835 return;
836 }
837
838 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
839 if (lastTableElementRecord) {
840 Element* lastTableElement = lastTableElementRecord->element();
841 ContainerNode* parent;
842 if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
843 parent = lastTableElementRecord->next()->element();
844 else
845 parent = lastTableElement->parentNode();
846
847 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
848 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
849 if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
850 task.parent = parent;
851 task.nextChild = lastTableElement;
852 return;
853 }
854 task.parent = lastTableElementRecord->next()->element();
855 return;
856 }
857 // Fragment case
858 task.parent = m_openElements.rootNode(); // DocumentFragment
859 }
860
shouldFosterParent() const861 bool HTMLConstructionSite::shouldFosterParent() const
862 {
863 return m_redirectAttachToFosterParent
864 && currentStackItem()->isElementNode()
865 && currentStackItem()->causesFosterParenting();
866 }
867
fosterParent(PassRefPtrWillBeRawPtr<Node> node)868 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
869 {
870 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
871 findFosterSite(task);
872 task.child = node;
873 ASSERT(task.parent);
874 queueTask(task);
875 }
876
trace(Visitor * visitor)877 void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
878 {
879 visitor->trace(parent);
880 visitor->trace(nextChild);
881 }
882
883
884 }
885