1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "core/editing/MarkupAccumulator.h"
29
30 #include "HTMLNames.h"
31 #include "XLinkNames.h"
32 #include "XMLNSNames.h"
33 #include "XMLNames.h"
34 #include "core/dom/CDATASection.h"
35 #include "core/dom/Comment.h"
36 #include "core/dom/DocumentFragment.h"
37 #include "core/dom/DocumentType.h"
38 #include "core/dom/ProcessingInstruction.h"
39 #include "core/editing/Editor.h"
40 #include "core/html/HTMLElement.h"
41 #include "core/html/HTMLTemplateElement.h"
42 #include "platform/weborigin/KURL.h"
43 #include "wtf/unicode/CharacterNames.h"
44
45 namespace WebCore {
46
47 using namespace HTMLNames;
48
appendCharactersReplacingEntities(StringBuilder & result,const String & source,unsigned offset,unsigned length,EntityMask entityMask)49 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask)
50 {
51 DEFINE_STATIC_LOCAL(const String, ampReference, ("&"));
52 DEFINE_STATIC_LOCAL(const String, ltReference, ("<"));
53 DEFINE_STATIC_LOCAL(const String, gtReference, (">"));
54 DEFINE_STATIC_LOCAL(const String, quotReference, ("""));
55 DEFINE_STATIC_LOCAL(const String, nbspReference, (" "));
56
57 static const EntityDescription entityMaps[] = {
58 { '&', ampReference, EntityAmp },
59 { '<', ltReference, EntityLt },
60 { '>', gtReference, EntityGt },
61 { '"', quotReference, EntityQuot },
62 { noBreakSpace, nbspReference, EntityNbsp },
63 };
64
65 if (!(offset + length))
66 return;
67
68 ASSERT(offset + length <= source.length());
69
70 if (source.is8Bit()) {
71 const LChar* text = source.characters8() + offset;
72
73 size_t positionAfterLastEntity = 0;
74 for (size_t i = 0; i < length; ++i) {
75 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
76 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
77 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
78 result.append(entityMaps[entityIndex].reference);
79 positionAfterLastEntity = i + 1;
80 break;
81 }
82 }
83 }
84 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
85 } else {
86 const UChar* text = source.characters16() + offset;
87
88 size_t positionAfterLastEntity = 0;
89 for (size_t i = 0; i < length; ++i) {
90 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
91 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
92 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
93 result.append(entityMaps[entityIndex].reference);
94 positionAfterLastEntity = i + 1;
95 break;
96 }
97 }
98 }
99 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
100 }
101 }
102
MarkupAccumulator(Vector<Node * > * nodes,EAbsoluteURLs resolveUrlsMethod,const Range * range)103 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range)
104 : m_nodes(nodes)
105 , m_range(range)
106 , m_resolveURLsMethod(resolveUrlsMethod)
107 {
108 }
109
~MarkupAccumulator()110 MarkupAccumulator::~MarkupAccumulator()
111 {
112 }
113
serializeNodes(Node * targetNode,EChildrenOnly childrenOnly)114 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly)
115 {
116 return serializeNodes(targetNode, childrenOnly, 0);
117 }
118
serializeNodes(Node * targetNode,EChildrenOnly childrenOnly,Vector<QualifiedName> * tagNamesToSkip)119 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
120 {
121 serializeNodesWithNamespaces(targetNode, childrenOnly, 0, tagNamesToSkip);
122 return m_markup.toString();
123 }
124
serializeNodesWithNamespaces(Node * targetNode,EChildrenOnly childrenOnly,const Namespaces * namespaces,Vector<QualifiedName> * tagNamesToSkip)125 void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
126 {
127 if (tagNamesToSkip) {
128 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
129 if (targetNode->hasTagName(tagNamesToSkip->at(i)))
130 return;
131 }
132 }
133
134 Namespaces namespaceHash;
135 if (namespaces)
136 namespaceHash = *namespaces;
137
138 if (!childrenOnly)
139 appendStartTag(targetNode, &namespaceHash);
140
141 if (!(targetNode->document().isHTMLDocument() && elementCannotHaveEndTag(targetNode))) {
142 Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild();
143 for ( ; current; current = current->nextSibling())
144 serializeNodesWithNamespaces(current, IncludeNode, &namespaceHash, tagNamesToSkip);
145 }
146
147 if (!childrenOnly)
148 appendEndTag(targetNode);
149 }
150
resolveURLIfNeeded(const Element * element,const String & urlString) const151 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const
152 {
153 switch (m_resolveURLsMethod) {
154 case ResolveAllURLs:
155 return element->document().completeURL(urlString).string();
156
157 case ResolveNonLocalURLs:
158 if (!element->document().url().isLocalFile())
159 return element->document().completeURL(urlString).string();
160 break;
161
162 case DoNotResolveURLs:
163 break;
164 }
165 return urlString;
166 }
167
appendString(const String & string)168 void MarkupAccumulator::appendString(const String& string)
169 {
170 m_markup.append(string);
171 }
172
appendStartTag(Node * node,Namespaces * namespaces)173 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
174 {
175 appendStartMarkup(m_markup, node, namespaces);
176 if (m_nodes)
177 m_nodes->append(node);
178 }
179
appendEndTag(Node * node)180 void MarkupAccumulator::appendEndTag(Node* node)
181 {
182 appendEndMarkup(m_markup, node);
183 }
184
totalLength(const Vector<String> & strings)185 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
186 {
187 size_t length = 0;
188 for (size_t i = 0; i < strings.size(); ++i)
189 length += strings[i].length();
190 return length;
191 }
192
concatenateMarkup(StringBuilder & result)193 void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
194 {
195 result.append(m_markup);
196 }
197
appendAttributeValue(StringBuilder & result,const String & attribute,bool documentIsHTML)198 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
199 {
200 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
201 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
202 }
203
appendCustomAttributes(StringBuilder &,Element *,Namespaces *)204 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*)
205 {
206 }
207
appendQuotedURLAttributeValue(StringBuilder & result,const Element * element,const Attribute & attribute)208 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute)
209 {
210 ASSERT(element->isURLAttribute(attribute));
211 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
212 UChar quoteChar = '"';
213 String strippedURLString = resolvedURLString.stripWhiteSpace();
214 if (protocolIsJavaScript(strippedURLString)) {
215 // minimal escaping for javascript urls
216 if (strippedURLString.contains('"')) {
217 if (strippedURLString.contains('\''))
218 strippedURLString.replaceWithLiteral('"', """);
219 else
220 quoteChar = '\'';
221 }
222 result.append(quoteChar);
223 result.append(strippedURLString);
224 result.append(quoteChar);
225 return;
226 }
227
228 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
229 result.append(quoteChar);
230 appendAttributeValue(result, resolvedURLString, false);
231 result.append(quoteChar);
232 }
233
appendNodeValue(StringBuilder & result,const Node * node,const Range * range,EntityMask entityMask)234 void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask)
235 {
236 const String str = node->nodeValue();
237 unsigned length = str.length();
238 unsigned start = 0;
239
240 if (range) {
241 if (node == range->endContainer())
242 length = range->endOffset();
243 if (node == range->startContainer()) {
244 start = range->startOffset();
245 length -= start;
246 }
247 }
248
249 appendCharactersReplacingEntities(result, str, start, length, entityMask);
250 }
251
shouldAddNamespaceElement(const Element * element)252 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
253 {
254 // Don't add namespace attribute if it is already defined for this elem.
255 const AtomicString& prefix = element->prefix();
256 if (prefix.isEmpty())
257 return !element->hasAttribute(xmlnsAtom);
258
259 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:"));
260 return !element->hasAttribute(xmlnsWithColon + prefix);
261 }
262
shouldAddNamespaceAttribute(const Attribute & attribute,Namespaces & namespaces)263 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
264 {
265 // Don't add namespace attributes twice
266 if (attribute.name() == XMLNSNames::xmlnsAttr) {
267 namespaces.set(emptyAtom.impl(), attribute.value().impl());
268 return false;
269 }
270
271 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
272 if (attribute.name() == xmlnsPrefixAttr) {
273 namespaces.set(attribute.localName().impl(), attribute.value().impl());
274 return false;
275 }
276
277 return true;
278 }
279
appendNamespace(StringBuilder & result,const AtomicString & prefix,const AtomicString & namespaceURI,Namespaces & namespaces)280 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
281 {
282 if (namespaceURI.isEmpty())
283 return;
284
285 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
286 StringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
287 StringImpl* foundNS = namespaces.get(pre);
288 if (foundNS != namespaceURI.impl()) {
289 namespaces.set(pre, namespaceURI.impl());
290 result.append(' ');
291 result.append(xmlnsAtom.string());
292 if (!prefix.isEmpty()) {
293 result.append(':');
294 result.append(prefix);
295 }
296
297 result.append('=');
298 result.append('"');
299 appendAttributeValue(result, namespaceURI, false);
300 result.append('"');
301 }
302 }
303
entityMaskForText(Text * text) const304 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
305 {
306 if (!text->document().isHTMLDocument())
307 return EntityMaskInPCDATA;
308
309 const QualifiedName* parentName = 0;
310 if (text->parentElement())
311 parentName = &(text->parentElement())->tagQName();
312
313 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
314 return EntityMaskInCDATA;
315 return EntityMaskInHTMLPCDATA;
316 }
317
appendText(StringBuilder & result,Text * text)318 void MarkupAccumulator::appendText(StringBuilder& result, Text* text)
319 {
320 appendNodeValue(result, text, m_range, entityMaskForText(text));
321 }
322
appendComment(StringBuilder & result,const String & comment)323 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
324 {
325 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
326 result.appendLiteral("<!--");
327 result.append(comment);
328 result.appendLiteral("-->");
329 }
330
appendXMLDeclaration(StringBuilder & result,const Document * document)331 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document)
332 {
333 if (!document->hasXMLDeclaration())
334 return;
335
336 result.appendLiteral("<?xml version=\"");
337 result.append(document->xmlVersion());
338 const String& encoding = document->xmlEncoding();
339 if (!encoding.isEmpty()) {
340 result.appendLiteral("\" encoding=\"");
341 result.append(encoding);
342 }
343 if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) {
344 result.appendLiteral("\" standalone=\"");
345 if (document->xmlStandalone())
346 result.appendLiteral("yes");
347 else
348 result.appendLiteral("no");
349 }
350
351 result.appendLiteral("\"?>");
352 }
353
appendDocumentType(StringBuilder & result,const DocumentType * n)354 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n)
355 {
356 if (n->name().isEmpty())
357 return;
358
359 result.appendLiteral("<!DOCTYPE ");
360 result.append(n->name());
361 if (!n->publicId().isEmpty()) {
362 result.appendLiteral(" PUBLIC \"");
363 result.append(n->publicId());
364 result.append('"');
365 if (!n->systemId().isEmpty()) {
366 result.append(' ');
367 result.append('"');
368 result.append(n->systemId());
369 result.append('"');
370 }
371 } else if (!n->systemId().isEmpty()) {
372 result.appendLiteral(" SYSTEM \"");
373 result.append(n->systemId());
374 result.append('"');
375 }
376 if (!n->internalSubset().isEmpty()) {
377 result.append(' ');
378 result.append('[');
379 result.append(n->internalSubset());
380 result.append(']');
381 }
382 result.append('>');
383 }
384
appendProcessingInstruction(StringBuilder & result,const String & target,const String & data)385 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
386 {
387 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
388 result.append('<');
389 result.append('?');
390 result.append(target);
391 result.append(' ');
392 result.append(data);
393 result.append('?');
394 result.append('>');
395 }
396
appendElement(StringBuilder & result,Element * element,Namespaces * namespaces)397 void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces)
398 {
399 appendOpenTag(result, element, namespaces);
400
401 if (element->hasAttributes()) {
402 unsigned length = element->attributeCount();
403 for (unsigned int i = 0; i < length; i++)
404 appendAttribute(result, element, *element->attributeItem(i), namespaces);
405 }
406
407 // Give an opportunity to subclasses to add their own attributes.
408 appendCustomAttributes(result, element, namespaces);
409
410 appendCloseTag(result, element);
411 }
412
appendOpenTag(StringBuilder & result,Element * element,Namespaces * namespaces)413 void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces)
414 {
415 result.append('<');
416 result.append(element->nodeNamePreservingCase());
417 if (!element->document().isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
418 appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces);
419 }
420
appendCloseTag(StringBuilder & result,Element * element)421 void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element)
422 {
423 if (shouldSelfClose(element)) {
424 if (element->isHTMLElement())
425 result.append(' '); // XHTML 1.0 <-> HTML compatibility.
426 result.append('/');
427 }
428 result.append('>');
429 }
430
attributeIsInSerializedNamespace(const Attribute & attribute)431 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute)
432 {
433 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI
434 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI
435 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI;
436 }
437
appendAttribute(StringBuilder & result,Element * element,const Attribute & attribute,Namespaces * namespaces)438 void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces)
439 {
440 bool documentIsHTML = element->document().isHTMLDocument();
441
442 result.append(' ');
443
444 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute))
445 result.append(attribute.name().localName());
446 else {
447 QualifiedName prefixedName = attribute.name();
448 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) {
449 if (!attribute.prefix())
450 prefixedName.setPrefix(xlinkAtom);
451 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) {
452 if (!attribute.prefix())
453 prefixedName.setPrefix(xmlAtom);
454 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) {
455 if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix())
456 prefixedName.setPrefix(xmlnsAtom);
457 }
458 result.append(prefixedName.toString());
459 }
460
461 result.append('=');
462
463 if (element->isURLAttribute(attribute))
464 appendQuotedURLAttributeValue(result, element, attribute);
465 else {
466 result.append('"');
467 appendAttributeValue(result, attribute.value(), documentIsHTML);
468 result.append('"');
469 }
470
471 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
472 appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces);
473 }
474
appendCDATASection(StringBuilder & result,const String & section)475 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
476 {
477 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
478 result.appendLiteral("<![CDATA[");
479 result.append(section);
480 result.appendLiteral("]]>");
481 }
482
appendStartMarkup(StringBuilder & result,const Node * node,Namespaces * namespaces)483 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces)
484 {
485 switch (node->nodeType()) {
486 case Node::TEXT_NODE:
487 appendText(result, toText(const_cast<Node*>(node)));
488 break;
489 case Node::COMMENT_NODE:
490 appendComment(result, toComment(node)->data());
491 break;
492 case Node::DOCUMENT_NODE:
493 appendXMLDeclaration(result, toDocument(node));
494 break;
495 case Node::DOCUMENT_FRAGMENT_NODE:
496 break;
497 case Node::DOCUMENT_TYPE_NODE:
498 appendDocumentType(result, toDocumentType(node));
499 break;
500 case Node::PROCESSING_INSTRUCTION_NODE:
501 appendProcessingInstruction(result, toProcessingInstruction(node)->target(), toProcessingInstruction(node)->data());
502 break;
503 case Node::ELEMENT_NODE:
504 appendElement(result, toElement(const_cast<Node*>(node)), namespaces);
505 break;
506 case Node::CDATA_SECTION_NODE:
507 appendCDATASection(result, toCDATASection(node)->data());
508 break;
509 case Node::ATTRIBUTE_NODE:
510 case Node::ENTITY_NODE:
511 case Node::NOTATION_NODE:
512 case Node::XPATH_NAMESPACE_NODE:
513 ASSERT_NOT_REACHED();
514 break;
515 }
516 }
517
518 // Rules of self-closure
519 // 1. No elements in HTML documents use the self-closing syntax.
520 // 2. Elements w/ children never self-close because they use a separate end tag.
521 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
522 // 4. Other elements self-close.
shouldSelfClose(const Node * node)523 bool MarkupAccumulator::shouldSelfClose(const Node* node)
524 {
525 if (node->document().isHTMLDocument())
526 return false;
527 if (node->hasChildNodes())
528 return false;
529 if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
530 return false;
531 return true;
532 }
533
elementCannotHaveEndTag(const Node * node)534 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
535 {
536 if (!node->isHTMLElement())
537 return false;
538
539 // FIXME: ieForbidsInsertHTML may not be the right function to call here
540 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
541 // or createContextualFragment. It does not necessarily align with
542 // which elements should be serialized w/o end tags.
543 return toHTMLElement(node)->ieForbidsInsertHTML();
544 }
545
appendEndMarkup(StringBuilder & result,const Node * node)546 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node)
547 {
548 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
549 return;
550
551 result.append('<');
552 result.append('/');
553 result.append(toElement(node)->nodeNamePreservingCase());
554 result.append('>');
555 }
556
557 }
558