• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifndef WebPageSerializer_h
32 #define WebPageSerializer_h
33 
34 #include "../platform/WebCString.h"
35 #include "../platform/WebCommon.h"
36 #include "../platform/WebURL.h"
37 #include "WebFrame.h"
38 
39 namespace blink {
40 class WebPageSerializerClient;
41 class WebString;
42 class WebView;
43 template <typename T> class WebVector;
44 
45 // Get html data by serializing all frames of current page with lists
46 // which contain all resource links that have local copy.
47 class WebPageSerializer {
48 public:
49     struct Resource {
50         WebURL url;
51         WebCString mimeType;
52         WebCString data;
53     };
54 
55     // Serializes all the frames from the WebView, retrieves the page's
56     // resources (such as images and CSS) and adds them to the passed vector.
57     // The first resource in that vector is the top frame contents.
58     // Note that this also strips-out any script tag or link to JavaScript.
59     BLINK_EXPORT static void serialize(WebView*, WebVector<Resource>*);
60 
61     // Serializes the WebView contents to a MHTML representation.
62     BLINK_EXPORT static WebCString serializeToMHTML(WebView*);
63 
64     // Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
65     // This results in a smaller MHTML file but it might not be supported by other browsers.
66     BLINK_EXPORT static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
67 
68     // IMPORTANT:
69     // The API below is an older implementation of a pageserialization that
70     // will be removed soon.
71 
72 
73     // This function will find out all frames and serialize them to HTML data.
74     // We have a data buffer to temporary saving generated html data. We will
75     // sequentially call WebPageSeriazlierClient once the data buffer is full.
76     //
77     // Return false means no available frame has been serialized, otherwise
78     // return true.
79     //
80     // The parameter frame specifies which frame need to be serialized.
81     // The parameter recursive specifies whether we need to
82     // serialize all sub frames of the specified frame or not.
83     // The parameter client specifies the pointer of interface
84     // WebPageSerializerClient providing a sink interface to receive the
85     // individual chunks of data to be saved.
86     // The parameter links contain original URLs of all saved links.
87     // The parameter localPaths contain corresponding local file paths of all
88     // saved links, which matched with vector:links one by one.
89     // The parameter localDirectoryName is relative path of directory which
90     // contain all saved auxiliary files included all sub frames and resources.
91     BLINK_EXPORT static bool serialize(
92         WebLocalFrame*,
93         bool recursive,
94         WebPageSerializerClient*,
95         const WebVector<WebURL>& links,
96         const WebVector<WebString>& localPaths,
97         const WebString& localDirectoryName);
98 
99     // Retrieve all the resource for the passed view, including the main frame
100     // and sub-frames. Returns true if all resources were retrieved
101     // successfully.
102     BLINK_EXPORT static bool retrieveAllResources(WebView*,
103                                                    const WebVector<WebCString>& supportedSchemes,
104                                                    WebVector<WebURL>* resources,
105                                                    WebVector<WebURL>* frames);
106 
107     // FIXME: The following are here for unit testing purposes. Consider
108     // changing the unit tests instead.
109 
110     // Generate the META for charset declaration.
111     BLINK_EXPORT static WebString generateMetaCharsetDeclaration(const WebString& charset);
112     // Generate the MOTW declaration.
113     BLINK_EXPORT static WebString generateMarkOfTheWebDeclaration(const WebURL&);
114     // Generate the default base tag declaration.
115     BLINK_EXPORT static WebString generateBaseTagDeclaration(const WebString& baseTarget);
116 };
117 
118 } // namespace blink
119 
120 #endif
121