1 /* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef WebPageSerializerImpl_h 32 #define WebPageSerializerImpl_h 33 34 #include "PlatformString.h" 35 #include "StringBuilder.h" 36 #include "StringHash.h" 37 #include <wtf/HashMap.h> 38 #include <wtf/Vector.h> 39 40 #include "WebEntities.h" 41 #include "WebPageSerializer.h" 42 #include "WebPageSerializerClient.h" 43 #include "WebString.h" 44 #include "WebURL.h" 45 46 namespace WebCore { 47 class Document; 48 class Element; 49 class Node; 50 class String; 51 class TextEncoding; 52 } 53 54 namespace WebKit { 55 class WebFrameImpl; 56 57 // Get html data by serializing all frames of current page with lists 58 // which contain all resource links that have local copy. 59 // contain all saved auxiliary files included all sub frames and resources. 60 // This function will find out all frames and serialize them to HTML data. 61 // We have a data buffer to temporary saving generated html data. We will 62 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data 63 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData 64 // for getting more information. 65 class WebPageSerializerImpl { 66 public: 67 // Do serialization action. Return false means no available frame has been 68 // serialized, otherwise return true. 69 bool serialize(); 70 71 // The parameter specifies which frame need to be serialized. 72 // The parameter recursive_serialization specifies whether we need to 73 // serialize all sub frames of the specified frame or not. 74 // The parameter delegate specifies the pointer of interface 75 // DomSerializerDelegate provide sink interface which can receive the 76 // individual chunks of data to be saved. 77 // The parameter links contain original URLs of all saved links. 78 // The parameter local_paths contain corresponding local file paths of all 79 // saved links, which matched with vector:links one by one. 80 // The parameter local_directory_name is relative path of directory which 81 // contain all saved auxiliary files included all sub frames and resources. 82 WebPageSerializerImpl(WebFrame* frame, 83 bool recursive, 84 WebPageSerializerClient* client, 85 const WebVector<WebURL>& links, 86 const WebVector<WebString>& localPaths, 87 const WebString& localDirectoryName); 88 89 private: 90 // Specified frame which need to be serialized; 91 WebFrameImpl* m_specifiedWebFrameImpl; 92 // Pointer of WebPageSerializerClient 93 WebPageSerializerClient* m_client; 94 // This hash map is used to map resource URL of original link to its local 95 // file path. 96 typedef HashMap<WebCore::String, WebCore::String> LinkLocalPathMap; 97 // local_links_ include all pair of local resource path and corresponding 98 // original link. 99 LinkLocalPathMap m_localLinks; 100 // Data buffer for saving result of serialized DOM data. 101 WebCore::StringBuilder m_dataBuffer; 102 // Passing true to recursive_serialization_ indicates we will serialize not 103 // only the specified frame but also all sub-frames in the specific frame. 104 // Otherwise we only serialize the specified frame excluded all sub-frames. 105 bool m_recursiveSerialization; 106 // Flag indicates whether we have collected all frames which need to be 107 // serialized or not; 108 bool m_framesCollected; 109 // Local directory name of all local resource files. 110 WebCore::String m_localDirectoryName; 111 // Vector for saving all frames which need to be serialized. 112 Vector<WebFrameImpl*> m_frames; 113 114 // Web entities conversion maps. 115 WebEntities m_htmlEntities; 116 WebEntities m_xmlEntities; 117 118 struct SerializeDomParam { 119 // Frame URL of current processing document presented by GURL 120 const WebCore::KURL& currentFrameURL; 121 // Current using text encoding object. 122 const WebCore::TextEncoding& textEncoding; 123 124 // Document object of current frame. 125 WebCore::Document* doc; 126 // Local directory name of all local resource files. 127 const WebCore::String& directoryName; 128 129 // Flag indicates current doc is html document or not. It's a cache value 130 // of Document.isHTMLDocument(). 131 bool isHTMLDocument; 132 // Flag which indicate whether we have met document type declaration. 133 bool hasDoctype; 134 // Flag which indicate whether will process meta issue. 135 bool hasCheckedMeta; 136 // This meta element need to be skipped when serializing DOM. 137 const WebCore::Element* skipMetaElement; 138 // Flag indicates we are in script or style tag. 139 bool isInScriptOrStyleTag; 140 // Flag indicates whether we have written xml document declaration. 141 // It is only used in xml document 142 bool hasDocDeclaration; 143 // Flag indicates whether we have added additional contents before end tag. 144 // This flag will be re-assigned in each call of function 145 // PostActionAfterSerializeOpenTag and it could be changed in function 146 // PreActionBeforeSerializeEndTag if the function adds new contents into 147 // serialization stream. 148 bool hasAddedContentsBeforeEnd; 149 150 // Constructor. 151 SerializeDomParam(const WebCore::KURL& currentFrameURL, 152 const WebCore::TextEncoding& textEncoding, 153 WebCore::Document* doc, 154 const WebCore::String& directoryName); 155 }; 156 157 // Collect all target frames which need to be serialized. 158 void collectTargetFrames(); 159 // Before we begin serializing open tag of a element, we give the target 160 // element a chance to do some work prior to add some additional data. 161 WebCore::String preActionBeforeSerializeOpenTag(const WebCore::Element* element, 162 SerializeDomParam* param, 163 bool* needSkip); 164 // After we finish serializing open tag of a element, we give the target 165 // element a chance to do some post work to add some additional data. 166 WebCore::String postActionAfterSerializeOpenTag(const WebCore::Element* element, 167 SerializeDomParam* param); 168 // Before we begin serializing end tag of a element, we give the target 169 // element a chance to do some work prior to add some additional data. 170 WebCore::String preActionBeforeSerializeEndTag(const WebCore::Element* element, 171 SerializeDomParam* param, 172 bool* needSkip); 173 // After we finish serializing end tag of a element, we give the target 174 // element a chance to do some post work to add some additional data. 175 WebCore::String postActionAfterSerializeEndTag(const WebCore::Element* element, 176 SerializeDomParam* param); 177 // Save generated html content to data buffer. 178 void saveHTMLContentToBuffer(const WebCore::String& content, 179 SerializeDomParam* param); 180 // Flushes the content buffer by encoding and sending the content to the 181 // WebPageSerializerClient. Content is not flushed if the buffer is not full 182 // unless force is 1. 183 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus status, 184 SerializeDomParam* param, 185 bool force); 186 // Serialize open tag of an specified element. 187 void openTagToString(const WebCore::Element* element, 188 SerializeDomParam* param); 189 // Serialize end tag of an specified element. 190 void endTagToString(const WebCore::Element* element, 191 SerializeDomParam* param); 192 // Build content for a specified node 193 void buildContentForNode(const WebCore::Node* node, 194 SerializeDomParam* param); 195 }; 196 197 } // namespace WebKit 198 199 #endif 200