• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29 
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Element.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLToken.h"
42 #include "HTMLTokenizer.h"
43 #include "LocalizedStrings.h"
44 #if ENABLE(MATHML)
45 #include "MathMLNames.h"
46 #endif
47 #include "NotImplemented.h"
48 #if ENABLE(SVG)
49 #include "SVGNames.h"
50 #endif
51 #include "ScriptController.h"
52 #include "Settings.h"
53 #include "Text.h"
54 #include <wtf/UnusedParam.h>
55 
56 namespace WebCore {
57 
58 using namespace HTMLNames;
59 
60 namespace {
61 
hasImpliedEndTag(ContainerNode * node)62 bool hasImpliedEndTag(ContainerNode* node)
63 {
64     return node->hasTagName(ddTag)
65         || node->hasTagName(dtTag)
66         || node->hasTagName(liTag)
67         || node->hasTagName(optionTag)
68         || node->hasTagName(optgroupTag)
69         || node->hasTagName(pTag)
70         || node->hasTagName(rpTag)
71         || node->hasTagName(rtTag);
72 }
73 
causesFosterParenting(const QualifiedName & tagName)74 bool causesFosterParenting(const QualifiedName& tagName)
75 {
76     return tagName == tableTag
77         || tagName == tbodyTag
78         || tagName == tfootTag
79         || tagName == theadTag
80         || tagName == trTag;
81 }
82 
83 } // namespace
84 
85 template<typename ChildType>
attach(ContainerNode * rawParent,PassRefPtr<ChildType> prpChild)86 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild)
87 {
88     RefPtr<ChildType> child = prpChild;
89     RefPtr<ContainerNode> parent = rawParent;
90 
91     // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
92     // redirection to the foster parent but HTMLConstructionSite::attachAtSite
93     // doesn't. It feels like we're missing a concept somehow.
94     if (shouldFosterParent()) {
95         fosterParent(child.get());
96         ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
97         return child.release();
98     }
99 
100     parent->parserAddChild(child);
101 
102     // An event handler (DOM Mutation, beforeload, et al.) could have removed
103     // the child, in which case we shouldn't try attaching it.
104     if (!child->parentNode())
105         return child.release();
106 
107     if (parent->attached() && !child->attached())
108         child->attach();
109     return child.release();
110 }
111 
attachAtSite(const AttachmentSite & site,PassRefPtr<Node> prpChild)112 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
113 {
114     // FIXME: It's unfortunate that we need to hold a reference to child
115     // here to call attach().  We should investigate whether we can rely on
116     // |site.parent| to hold a ref at this point.
117     RefPtr<Node> child = prpChild;
118 
119     if (site.nextChild)
120         site.parent->parserInsertBefore(child, site.nextChild);
121     else
122         site.parent->parserAddChild(child);
123 
124     // JavaScript run from beforeload (or DOM Mutation or event handlers)
125     // might have removed the child, in which case we should not attach it.
126     if (child->parentNode() && site.parent->attached() && !child->attached())
127         child->attach();
128 }
129 
HTMLConstructionSite(Document * document)130 HTMLConstructionSite::HTMLConstructionSite(Document* document)
131     : m_document(document)
132     , m_attachmentRoot(document)
133     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
134     , m_isParsingFragment(false)
135     , m_redirectAttachToFosterParent(false)
136 {
137 }
138 
HTMLConstructionSite(DocumentFragment * fragment,FragmentScriptingPermission scriptingPermission)139 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
140     : m_document(fragment->document())
141     , m_attachmentRoot(fragment)
142     , m_fragmentScriptingPermission(scriptingPermission)
143     , m_isParsingFragment(true)
144     , m_redirectAttachToFosterParent(false)
145 {
146 }
147 
~HTMLConstructionSite()148 HTMLConstructionSite::~HTMLConstructionSite()
149 {
150 }
151 
detach()152 void HTMLConstructionSite::detach()
153 {
154     m_document = 0;
155     m_attachmentRoot = 0;
156 }
157 
setForm(HTMLFormElement * form)158 void HTMLConstructionSite::setForm(HTMLFormElement* form)
159 {
160     // This method should only be needed for HTMLTreeBuilder in the fragment case.
161     ASSERT(!m_form);
162     m_form = form;
163 }
164 
takeForm()165 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
166 {
167     return m_form.release();
168 }
169 
dispatchDocumentElementAvailableIfNeeded()170 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
171 {
172     ASSERT(m_document);
173     if (m_document->frame() && !m_isParsingFragment)
174         m_document->frame()->loader()->dispatchDocumentElementAvailable();
175 }
176 
insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken & token)177 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
178 {
179     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
180     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
181     m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
182 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
183     element->insertedByParser();
184 #endif
185     dispatchDocumentElementAvailableIfNeeded();
186 }
187 
mergeAttributesFromTokenIntoElement(AtomicHTMLToken & token,Element * element)188 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
189 {
190     if (!token.attributes())
191         return;
192 
193     NamedNodeMap* attributes = element->attributes(false);
194     for (unsigned i = 0; i < token.attributes()->length(); ++i) {
195         Attribute* attribute = token.attributes()->attributeItem(i);
196         if (!attributes->getAttributeItem(attribute->name()))
197             element->setAttribute(attribute->name(), attribute->value());
198     }
199 }
200 
insertHTMLHtmlStartTagInBody(AtomicHTMLToken & token)201 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
202 {
203     // FIXME: parse error
204 
205     // Fragments do not have a root HTML element, so any additional HTML elements
206     // encountered during fragment parsing should be ignored.
207     if (m_isParsingFragment)
208         return;
209 
210     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
211 }
212 
insertHTMLBodyStartTagInBody(AtomicHTMLToken & token)213 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
214 {
215     // FIXME: parse error
216     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
217 }
218 
insertDoctype(AtomicHTMLToken & token)219 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
220 {
221     ASSERT(token.type() == HTMLToken::DOCTYPE);
222     attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
223 
224     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
225     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
226     // because context-less fragments can determine their own quirks mode, and thus change
227     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
228     // in a fragment, as changing the owning document's compatibility mode would be wrong.
229     ASSERT(!m_isParsingFragment);
230     if (m_isParsingFragment)
231         return;
232 
233     if (token.forceQuirks())
234         m_document->setCompatibilityMode(Document::QuirksMode);
235     else
236         m_document->setCompatibilityModeFromDoctype();
237 }
238 
insertComment(AtomicHTMLToken & token)239 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
240 {
241     ASSERT(token.type() == HTMLToken::Comment);
242     attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
243 }
244 
insertCommentOnDocument(AtomicHTMLToken & token)245 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
246 {
247     ASSERT(token.type() == HTMLToken::Comment);
248     attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
249 }
250 
insertCommentOnHTMLHtmlElement(AtomicHTMLToken & token)251 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
252 {
253     ASSERT(token.type() == HTMLToken::Comment);
254     ContainerNode* parent = m_openElements.rootNode();
255     attach(parent, Comment::create(parent->document(), token.comment()));
256 }
257 
attachToCurrent(PassRefPtr<Element> child)258 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
259 {
260     return attach(currentNode(), child);
261 }
262 
insertHTMLHeadElement(AtomicHTMLToken & token)263 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
264 {
265     ASSERT(!shouldFosterParent());
266     m_head = attachToCurrent(createHTMLElement(token));
267     m_openElements.pushHTMLHeadElement(m_head);
268 }
269 
insertHTMLBodyElement(AtomicHTMLToken & token)270 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
271 {
272     ASSERT(!shouldFosterParent());
273     m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
274 }
275 
insertHTMLFormElement(AtomicHTMLToken & token,bool isDemoted)276 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
277 {
278     RefPtr<Element> element = createHTMLElement(token);
279     ASSERT(element->hasTagName(formTag));
280     RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
281     form->setDemoted(isDemoted);
282     m_openElements.push(attachToCurrent(form.release()));
283     ASSERT(currentElement()->isHTMLElement());
284     ASSERT(currentElement()->hasTagName(formTag));
285     m_form = static_cast<HTMLFormElement*>(currentElement());
286 }
287 
insertHTMLElement(AtomicHTMLToken & token)288 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
289 {
290     m_openElements.push(attachToCurrent(createHTMLElement(token)));
291 }
292 
insertSelfClosingHTMLElement(AtomicHTMLToken & token)293 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
294 {
295     ASSERT(token.type() == HTMLToken::StartTag);
296     RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
297     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
298     // but self-closing elements are never in the element stack so the stack
299     // doesn't get a chance to tell them that we're done parsing their children.
300     element->finishParsingChildren();
301     // FIXME: Do we want to acknowledge the token's self-closing flag?
302     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
303 }
304 
insertFormattingElement(AtomicHTMLToken & token)305 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
306 {
307     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
308     // Possible active formatting elements include:
309     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
310     insertHTMLElement(token);
311     m_activeFormattingElements.append(currentElement());
312 }
313 
insertScriptElement(AtomicHTMLToken & token)314 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
315 {
316     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
317     if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
318         element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
319     m_openElements.push(attachToCurrent(element.release()));
320 }
321 
insertForeignElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)322 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
323 {
324     ASSERT(token.type() == HTMLToken::StartTag);
325     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
326 
327     RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
328     if (!token.selfClosing())
329         m_openElements.push(element);
330 }
331 
insertTextNode(const String & characters)332 void HTMLConstructionSite::insertTextNode(const String& characters)
333 {
334     AttachmentSite site;
335     site.parent = currentNode();
336     site.nextChild = 0;
337     if (shouldFosterParent())
338         findFosterSite(site);
339 
340     unsigned currentPosition = 0;
341 
342     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
343     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
344 
345     Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
346     if (previousChild && previousChild->isTextNode()) {
347         // FIXME: We're only supposed to append to this text node if it
348         // was the last text node inserted by the parser.
349         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
350         currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
351     }
352 
353     while (currentPosition < characters.length()) {
354         RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
355         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
356         if (!textNode->length())
357             textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
358 
359         currentPosition += textNode->length();
360         ASSERT(currentPosition <= characters.length());
361         attachAtSite(site, textNode.release());
362     }
363 }
364 
createElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)365 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
366 {
367     QualifiedName tagName(nullAtom, token.name(), namespaceURI);
368     RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
369     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
370     return element.release();
371 }
372 
createHTMLElement(AtomicHTMLToken & token)373 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
374 {
375     QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
376     // FIXME: This can't use HTMLConstructionSite::createElement because we
377     // have to pass the current form element.  We should rework form association
378     // to occur after construction to allow better code sharing here.
379     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
380     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
381     ASSERT(element->isHTMLElement());
382     return element.release();
383 }
384 
createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord * record)385 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
386 {
387     return createHTMLElementFromSavedElement(record->element());
388 }
389 
390 namespace {
391 
cloneAttributes(Element * element)392 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
393 {
394     NamedNodeMap* attributes = element->attributes(true);
395     if (!attributes)
396         return 0;
397 
398     RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
399     for (size_t i = 0; i < attributes->length(); ++i) {
400         Attribute* attribute = attributes->attributeItem(i);
401         RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
402         newAttributes->addAttribute(clone);
403     }
404     return newAttributes.release();
405 }
406 
407 }
408 
createHTMLElementFromSavedElement(Element * element)409 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
410 {
411     // FIXME: This method is wrong.  We should be using the original token.
412     // Using an Element* causes us to fail examples like this:
413     // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
414     // When reconstructTheActiveFormattingElements calls this method to open
415     // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
416     // spec implies it should be "1".  Minefield matches the HTML5 spec here.
417 
418     ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
419     AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
420     return createHTMLElement(fakeToken);
421 }
422 
indexOfFirstUnopenFormattingElement(unsigned & firstUnopenElementIndex) const423 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
424 {
425     if (m_activeFormattingElements.isEmpty())
426         return false;
427     unsigned index = m_activeFormattingElements.size();
428     do {
429         --index;
430         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
431         if (entry.isMarker() || m_openElements.contains(entry.element())) {
432             firstUnopenElementIndex = index + 1;
433             return firstUnopenElementIndex < m_activeFormattingElements.size();
434         }
435     } while (index);
436     firstUnopenElementIndex = index;
437     return true;
438 }
439 
reconstructTheActiveFormattingElements()440 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
441 {
442     unsigned firstUnopenElementIndex;
443     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
444         return;
445 
446     unsigned unopenEntryIndex = firstUnopenElementIndex;
447     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
448     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
449         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
450         RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
451         m_openElements.push(attachToCurrent(reconstructed.release()));
452         unopenedEntry.replaceElement(currentElement());
453     }
454 }
455 
generateImpliedEndTagsWithExclusion(const AtomicString & tagName)456 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
457 {
458     while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
459         m_openElements.pop();
460 }
461 
generateImpliedEndTags()462 void HTMLConstructionSite::generateImpliedEndTags()
463 {
464     while (hasImpliedEndTag(currentNode()))
465         m_openElements.pop();
466 }
467 
findFosterSite(AttachmentSite & site)468 void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
469 {
470     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
471     if (lastTableElementRecord) {
472         Element* lastTableElement = lastTableElementRecord->element();
473         if (ContainerNode* parent = lastTableElement->parentNode()) {
474             site.parent = parent;
475             site.nextChild = lastTableElement;
476             return;
477         }
478         site.parent = lastTableElementRecord->next()->element();
479         site.nextChild = 0;
480         return;
481     }
482     // Fragment case
483     site.parent = m_openElements.rootNode(); // DocumentFragment
484     site.nextChild = 0;
485 }
486 
shouldFosterParent() const487 bool HTMLConstructionSite::shouldFosterParent() const
488 {
489     return m_redirectAttachToFosterParent
490         && currentNode()->isElementNode()
491         && causesFosterParenting(currentElement()->tagQName());
492 }
493 
fosterParent(Node * node)494 void HTMLConstructionSite::fosterParent(Node* node)
495 {
496     AttachmentSite site;
497     findFosterSite(site);
498     attachAtSite(site, node);
499 }
500 
501 }
502