• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "config.h"
32 #include "core/page/PageSerializer.h"
33 
34 #include "core/HTMLNames.h"
35 #include "core/InputTypeNames.h"
36 #include "core/css/CSSFontFaceRule.h"
37 #include "core/css/CSSFontFaceSrcValue.h"
38 #include "core/css/CSSImageValue.h"
39 #include "core/css/CSSImportRule.h"
40 #include "core/css/CSSStyleDeclaration.h"
41 #include "core/css/CSSStyleRule.h"
42 #include "core/css/CSSValueList.h"
43 #include "core/css/StylePropertySet.h"
44 #include "core/css/StyleRule.h"
45 #include "core/css/StyleSheetContents.h"
46 #include "core/dom/Document.h"
47 #include "core/dom/Element.h"
48 #include "core/dom/Text.h"
49 #include "core/editing/MarkupAccumulator.h"
50 #include "core/fetch/FontResource.h"
51 #include "core/fetch/ImageResource.h"
52 #include "core/frame/LocalFrame.h"
53 #include "core/html/HTMLFrameOwnerElement.h"
54 #include "core/html/HTMLImageElement.h"
55 #include "core/html/HTMLInputElement.h"
56 #include "core/html/HTMLLinkElement.h"
57 #include "core/html/HTMLMetaElement.h"
58 #include "core/html/HTMLStyleElement.h"
59 #include "core/html/parser/HTMLParserIdioms.h"
60 #include "core/page/Page.h"
61 #include "core/rendering/RenderImage.h"
62 #include "core/rendering/style/StyleFetchedImage.h"
63 #include "core/rendering/style/StyleImage.h"
64 #include "platform/SerializedResource.h"
65 #include "platform/graphics/Image.h"
66 #include "wtf/text/CString.h"
67 #include "wtf/text/StringBuilder.h"
68 #include "wtf/text/TextEncoding.h"
69 #include "wtf/text/WTFString.h"
70 
71 namespace blink {
72 
isCharsetSpecifyingNode(const Node & node)73 static bool isCharsetSpecifyingNode(const Node& node)
74 {
75     if (!isHTMLMetaElement(node))
76         return false;
77 
78     const HTMLMetaElement& element = toHTMLMetaElement(node);
79     HTMLAttributeList attributeList;
80     AttributeCollection attributes = element.attributes();
81     AttributeCollection::iterator end = attributes.end();
82     for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
83         // FIXME: We should deal appropriately with the attribute if they have a namespace.
84         attributeList.append(std::make_pair(it->name().localName(), it->value().string()));
85     }
86     WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributeList);
87     return textEncoding.isValid();
88 }
89 
shouldIgnoreElement(const Element & element)90 static bool shouldIgnoreElement(const Element& element)
91 {
92     return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isCharsetSpecifyingNode(element);
93 }
94 
frameOwnerURLAttributeName(const HTMLFrameOwnerElement & frameOwner)95 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner)
96 {
97     // FIXME: We should support all frame owners including applets.
98     return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::srcAttr;
99 }
100 
101 class SerializerMarkupAccumulator FINAL : public MarkupAccumulator {
102 public:
103     SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node> >*);
104     virtual ~SerializerMarkupAccumulator();
105 
106 protected:
107     virtual void appendText(StringBuilder& out, Text&) OVERRIDE;
108     virtual void appendElement(StringBuilder& out, Element&, Namespaces*) OVERRIDE;
109     virtual void appendCustomAttributes(StringBuilder& out, const Element&, Namespaces*) OVERRIDE;
110     virtual void appendEndTag(const Element&) OVERRIDE;
111 
112 private:
113     PageSerializer* m_serializer;
114     const Document& m_document;
115 };
116 
SerializerMarkupAccumulator(PageSerializer * serializer,const Document & document,WillBeHeapVector<RawPtrWillBeMember<Node>> * nodes)117 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node> >* nodes)
118     : MarkupAccumulator(nodes, ResolveAllURLs, nullptr)
119     , m_serializer(serializer)
120     , m_document(document)
121 {
122 }
123 
~SerializerMarkupAccumulator()124 SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
125 {
126 }
127 
appendText(StringBuilder & out,Text & text)128 void SerializerMarkupAccumulator::appendText(StringBuilder& out, Text& text)
129 {
130     Element* parent = text.parentElement();
131     if (parent && !shouldIgnoreElement(*parent))
132         MarkupAccumulator::appendText(out, text);
133 }
134 
appendElement(StringBuilder & out,Element & element,Namespaces * namespaces)135 void SerializerMarkupAccumulator::appendElement(StringBuilder& out, Element& element, Namespaces* namespaces)
136 {
137     if (!shouldIgnoreElement(element))
138         MarkupAccumulator::appendElement(out, element, namespaces);
139 
140     if (isHTMLHeadElement(element)) {
141         out.appendLiteral("<meta charset=\"");
142         out.append(m_document.charset());
143         out.appendLiteral("\">");
144     }
145 
146     // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
147 }
148 
appendCustomAttributes(StringBuilder & out,const Element & element,Namespaces * namespaces)149 void SerializerMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
150 {
151     if (!element.isFrameOwnerElement())
152         return;
153 
154     const HTMLFrameOwnerElement& frameOwner = toHTMLFrameOwnerElement(element);
155     Frame* frame = frameOwner.contentFrame();
156     // FIXME: RemoteFrames not currently supported here.
157     if (!frame || !frame->isLocalFrame())
158         return;
159 
160     KURL url = toLocalFrame(frame)->document()->url();
161     if (url.isValid() && !url.protocolIsAbout())
162         return;
163 
164     // We need to give a fake location to blank frames so they can be referenced by the serialized frame.
165     url = m_serializer->urlForBlankFrame(toLocalFrame(frame));
166     appendAttribute(out, element, Attribute(frameOwnerURLAttributeName(frameOwner), AtomicString(url.string())), namespaces);
167 }
168 
appendEndTag(const Element & element)169 void SerializerMarkupAccumulator::appendEndTag(const Element& element)
170 {
171     if (!shouldIgnoreElement(element))
172         MarkupAccumulator::appendEndTag(element);
173 }
174 
PageSerializer(Vector<SerializedResource> * resources)175 PageSerializer::PageSerializer(Vector<SerializedResource>* resources)
176     : m_resources(resources)
177     , m_blankFrameCounter(0)
178 {
179 }
180 
serialize(Page * page)181 void PageSerializer::serialize(Page* page)
182 {
183     serializeFrame(page->deprecatedLocalMainFrame());
184 }
185 
serializeFrame(LocalFrame * frame)186 void PageSerializer::serializeFrame(LocalFrame* frame)
187 {
188     ASSERT(frame->document());
189     Document& document = *frame->document();
190     KURL url = document.url();
191     // FIXME: This probably wants isAboutBlankURL? to exclude other about: urls (like about:srcdoc)?
192     if (!url.isValid() || url.protocolIsAbout()) {
193         // For blank frames we generate a fake URL so they can be referenced by their containing frame.
194         url = urlForBlankFrame(frame);
195     }
196 
197     if (m_resourceURLs.contains(url)) {
198         // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
199         // different content. So we should serialize both and somehow rename the frame src in the containing
200         // frame. Arg!
201         return;
202     }
203 
204     WTF::TextEncoding textEncoding(document.charset());
205     if (!textEncoding.isValid()) {
206         // FIXME: iframes used as images trigger this. We should deal with them correctly.
207         return;
208     }
209 
210     WillBeHeapVector<RawPtrWillBeMember<Node> > serializedNodes;
211     SerializerMarkupAccumulator accumulator(this, document, &serializedNodes);
212     String text = accumulator.serializeNodes(document, IncludeNode);
213     CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUnencodables);
214     m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
215     m_resourceURLs.add(url);
216 
217     for (WillBeHeapVector<RawPtrWillBeMember<Node> >::iterator iter = serializedNodes.begin(); iter != serializedNodes.end(); ++iter) {
218         ASSERT(*iter);
219         Node& node = **iter;
220         if (!node.isElementNode())
221             continue;
222 
223         Element& element = toElement(node);
224         // We have to process in-line style as it might contain some resources (typically background images).
225         if (element.isStyledElement())
226             retrieveResourcesForProperties(element.inlineStyle(), document);
227 
228         if (isHTMLImageElement(element)) {
229             HTMLImageElement& imageElement = toHTMLImageElement(element);
230             KURL url = document.completeURL(imageElement.getAttribute(HTMLNames::srcAttr));
231             ImageResource* cachedImage = imageElement.cachedImage();
232             addImageToResources(cachedImage, imageElement.renderer(), url);
233         } else if (isHTMLInputElement(element)) {
234             HTMLInputElement& inputElement = toHTMLInputElement(element);
235             if (inputElement.type() == InputTypeNames::image && inputElement.hasImageLoader()) {
236                 KURL url = inputElement.src();
237                 ImageResource* cachedImage = inputElement.imageLoader()->image();
238                 addImageToResources(cachedImage, inputElement.renderer(), url);
239             }
240         } else if (isHTMLLinkElement(element)) {
241             HTMLLinkElement& linkElement = toHTMLLinkElement(element);
242             if (CSSStyleSheet* sheet = linkElement.sheet()) {
243                 KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
244                 serializeCSSStyleSheet(*sheet, url);
245                 ASSERT(m_resourceURLs.contains(url));
246             }
247         } else if (isHTMLStyleElement(element)) {
248             HTMLStyleElement& styleElement = toHTMLStyleElement(element);
249             if (CSSStyleSheet* sheet = styleElement.sheet())
250                 serializeCSSStyleSheet(*sheet, KURL());
251         }
252     }
253 
254     for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling()) {
255         if (childFrame->isLocalFrame())
256             serializeFrame(toLocalFrame(childFrame));
257     }
258 }
259 
serializeCSSStyleSheet(CSSStyleSheet & styleSheet,const KURL & url)260 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KURL& url)
261 {
262     StringBuilder cssText;
263     for (unsigned i = 0; i < styleSheet.length(); ++i) {
264         CSSRule* rule = styleSheet.item(i);
265         String itemText = rule->cssText();
266         if (!itemText.isEmpty()) {
267             cssText.append(itemText);
268             if (i < styleSheet.length() - 1)
269                 cssText.appendLiteral("\n\n");
270         }
271         ASSERT(styleSheet.ownerDocument());
272         Document& document = *styleSheet.ownerDocument();
273         // Some rules have resources associated with them that we need to retrieve.
274         if (rule->type() == CSSRule::IMPORT_RULE) {
275             CSSImportRule* importRule = toCSSImportRule(rule);
276             KURL importURL = document.completeURL(importRule->href());
277             if (m_resourceURLs.contains(importURL))
278                 continue;
279             if (importRule->styleSheet())
280                 serializeCSSStyleSheet(*importRule->styleSheet(), importURL);
281         } else if (rule->type() == CSSRule::FONT_FACE_RULE) {
282             retrieveResourcesForProperties(&toCSSFontFaceRule(rule)->styleRule()->properties(), document);
283         } else if (rule->type() == CSSRule::STYLE_RULE) {
284             retrieveResourcesForProperties(&toCSSStyleRule(rule)->styleRule()->properties(), document);
285         }
286     }
287 
288     if (url.isValid() && !m_resourceURLs.contains(url)) {
289         // FIXME: We should check whether a charset has been specified and if none was found add one.
290         WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
291         ASSERT(textEncoding.isValid());
292         String textString = cssText.toString();
293         CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables);
294         m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
295         m_resourceURLs.add(url);
296     }
297 }
298 
shouldAddURL(const KURL & url)299 bool PageSerializer::shouldAddURL(const KURL& url)
300 {
301     return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData();
302 }
303 
addToResources(Resource * resource,PassRefPtr<SharedBuffer> data,const KURL & url)304 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
305 {
306     if (!data) {
307         WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
308         return;
309     }
310 
311     String mimeType = resource->response().mimeType();
312     m_resources->append(SerializedResource(url, mimeType, data));
313     m_resourceURLs.add(url);
314 }
315 
addImageToResources(ImageResource * image,RenderObject * imageRenderer,const KURL & url)316 void PageSerializer::addImageToResources(ImageResource* image, RenderObject* imageRenderer, const KURL& url)
317 {
318     if (!shouldAddURL(url))
319         return;
320 
321     if (!image || image->image() == Image::nullImage() || image->errorOccurred())
322         return;
323 
324     RefPtr<SharedBuffer> data = imageRenderer ? image->imageForRenderer(imageRenderer)->data() : 0;
325     if (!data)
326         data = image->image()->data();
327 
328     addToResources(image, data, url);
329 }
330 
addFontToResources(FontResource * font)331 void PageSerializer::addFontToResources(FontResource* font)
332 {
333     if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer()) {
334         return;
335     }
336     RefPtr<SharedBuffer> data(font->resourceBuffer());
337 
338     addToResources(font, data, font->url());
339 }
340 
retrieveResourcesForProperties(const StylePropertySet * styleDeclaration,Document & document)341 void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document& document)
342 {
343     if (!styleDeclaration)
344         return;
345 
346     // The background-image and list-style-image (for ul or ol) are the CSS properties
347     // that make use of images. We iterate to make sure we include any other
348     // image properties there might be.
349     unsigned propertyCount = styleDeclaration->propertyCount();
350     for (unsigned i = 0; i < propertyCount; ++i) {
351         RefPtrWillBeRawPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
352         retrieveResourcesForCSSValue(cssValue.get(), document);
353     }
354 }
355 
retrieveResourcesForCSSValue(CSSValue * cssValue,Document & document)356 void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& document)
357 {
358     if (cssValue->isImageValue()) {
359         CSSImageValue* imageValue = toCSSImageValue(cssValue);
360         StyleImage* styleImage = imageValue->cachedOrPendingImage();
361         // Non cached-images are just place-holders and do not contain data.
362         if (!styleImage || !styleImage->isImageResource())
363             return;
364 
365         addImageToResources(styleImage->cachedImage(), 0, styleImage->cachedImage()->url());
366     } else if (cssValue->isFontFaceSrcValue()) {
367         CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
368         if (fontFaceSrcValue->isLocal()) {
369             return;
370         }
371 
372         addFontToResources(fontFaceSrcValue->fetch(&document));
373     } else if (cssValue->isValueList()) {
374         CSSValueList* cssValueList = toCSSValueList(cssValue);
375         for (unsigned i = 0; i < cssValueList->length(); i++)
376             retrieveResourcesForCSSValue(cssValueList->item(i), document);
377     }
378 }
379 
urlForBlankFrame(LocalFrame * frame)380 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame)
381 {
382     HashMap<LocalFrame*, KURL>::iterator iter = m_blankFrameURLs.find(frame);
383     if (iter != m_blankFrameURLs.end())
384         return iter->value;
385     String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
386     KURL fakeURL(ParsedURLString, url);
387     m_blankFrameURLs.add(frame, fakeURL);
388 
389     return fakeURL;
390 }
391 
392 }
393