• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifndef WebPageSerializer_h
32 #define WebPageSerializer_h
33 
34 #include "../platform/WebCString.h"
35 #include "../platform/WebCommon.h"
36 #include "../platform/WebURL.h"
37 #include "WebFrame.h"
38 
39 namespace blink {
40 
41 class WebPageSerializerClient;
42 class WebString;
43 class WebView;
44 template <typename T> class WebVector;
45 
46 // Get html data by serializing all frames of current page with lists
47 // which contain all resource links that have local copy.
48 class WebPageSerializer {
49 public:
50     struct Resource {
51         WebURL url;
52         WebCString mimeType;
53         WebCString data;
54     };
55 
56     // Serializes all the frames from the WebView, retrieves the page's
57     // resources (such as images and CSS) and adds them to the passed vector.
58     // The first resource in that vector is the top frame contents.
59     // Note that this also strips-out any script tag or link to JavaScript.
60     BLINK_EXPORT static void serialize(WebView*, WebVector<Resource>*);
61 
62     // Serializes the WebView contents to a MHTML representation.
63     BLINK_EXPORT static WebCString serializeToMHTML(WebView*);
64 
65     // Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
66     // This results in a smaller MHTML file but it might not be supported by other browsers.
67     BLINK_EXPORT static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
68 
69     // IMPORTANT:
70     // The API below is an older implementation of a pageserialization that
71     // will be removed soon.
72 
73 
74     // This function will find out all frames and serialize them to HTML data.
75     // We have a data buffer to temporary saving generated html data. We will
76     // sequentially call WebPageSeriazlierClient once the data buffer is full.
77     //
78     // Return false means no available frame has been serialized, otherwise
79     // return true.
80     //
81     // The parameter frame specifies which frame need to be serialized.
82     // The parameter recursive specifies whether we need to
83     // serialize all sub frames of the specified frame or not.
84     // The parameter client specifies the pointer of interface
85     // WebPageSerializerClient providing a sink interface to receive the
86     // individual chunks of data to be saved.
87     // The parameter links contain original URLs of all saved links.
88     // The parameter localPaths contain corresponding local file paths of all
89     // saved links, which matched with vector:links one by one.
90     // The parameter localDirectoryName is relative path of directory which
91     // contain all saved auxiliary files included all sub frames and resources.
92     BLINK_EXPORT static bool serialize(
93         WebLocalFrame*,
94         bool recursive,
95         WebPageSerializerClient*,
96         const WebVector<WebURL>& links,
97         const WebVector<WebString>& localPaths,
98         const WebString& localDirectoryName);
99 
100     // Retrieve all the resource for the passed view, including the main frame
101     // and sub-frames. Returns true if all resources were retrieved
102     // successfully.
103     BLINK_EXPORT static bool retrieveAllResources(WebView*,
104                                                    const WebVector<WebCString>& supportedSchemes,
105                                                    WebVector<WebURL>* resources,
106                                                    WebVector<WebURL>* frames);
107 
108     // FIXME: The following are here for unit testing purposes. Consider
109     // changing the unit tests instead.
110 
111     // Generate the META for charset declaration.
112     BLINK_EXPORT static WebString generateMetaCharsetDeclaration(const WebString& charset);
113     // Generate the MOTW declaration.
114     BLINK_EXPORT static WebString generateMarkOfTheWebDeclaration(const WebURL&);
115     // Generate the default base tag declaration.
116     BLINK_EXPORT static WebString generateBaseTagDeclaration(const WebString& baseTarget);
117 };
118 
119 } // namespace blink
120 
121 #endif
122