1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 6 #define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 7 8 #include <queue> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/basictypes.h" 14 #include "base/containers/hash_tables.h" 15 #include "base/files/file_path.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/memory/weak_ptr.h" 19 #include "base/time/time.h" 20 #include "content/common/content_export.h" 21 #include "content/public/browser/download_item.h" 22 #include "content/public/browser/download_manager_delegate.h" 23 #include "content/public/browser/save_page_type.h" 24 #include "content/public/browser/web_contents_observer.h" 25 #include "content/public/common/referrer.h" 26 #include "net/base/net_errors.h" 27 #include "url/gurl.h" 28 29 class GURL; 30 31 namespace content { 32 class DownloadItemImpl; 33 class DownloadManagerImpl; 34 class WebContents; 35 class SaveFileManager; 36 class SaveItem; 37 class SavePackage; 38 struct SaveFileCreateInfo; 39 40 // The SavePackage object manages the process of saving a page as only-html or 41 // complete-html or MHTML and providing the information for displaying saving 42 // status. Saving page as only-html means means that we save web page to a 43 // single HTML file regardless internal sub resources and sub frames. Saving 44 // page as complete-html page means we save not only the main html file the user 45 // told it to save but also a directory for the auxiliary files such as all 46 // sub-frame html files, image files, css files and js files. Saving page as 47 // MHTML means the same thing as complete-html, but it uses the MHTML format to 48 // contain the html and all auxiliary files in a single text file. 49 // 50 // Each page saving job may include one or multiple files which need to be 51 // saved. Each file is represented by a SaveItem, and all SaveItems are owned 52 // by the SavePackage. SaveItems are created when a user initiates a page 53 // saving job, and exist for the duration of one contents's life time. 54 class CONTENT_EXPORT SavePackage 55 : public base::RefCountedThreadSafe<SavePackage>, 56 public WebContentsObserver, 57 public DownloadItem::Observer, 58 public base::SupportsWeakPtr<SavePackage> { 59 public: 60 enum WaitState { 61 // State when created but not initialized. 62 INITIALIZE = 0, 63 // State when after initializing, but not yet saving. 64 START_PROCESS, 65 // Waiting on a list of savable resources from the backend. 66 RESOURCES_LIST, 67 // Waiting for data sent from net IO or from file system. 68 NET_FILES, 69 // Waiting for html DOM data sent from render process. 70 HTML_DATA, 71 // Saving page finished successfully. 72 SUCCESSFUL, 73 // Failed to save page. 74 FAILED 75 }; 76 77 static const base::FilePath::CharType kDefaultHtmlExtension[]; 78 79 // Constructor for user initiated page saving. This constructor results in a 80 // SavePackage that will generate and sanitize a suggested name for the user 81 // in the "Save As" dialog box. 82 explicit SavePackage(WebContents* web_contents); 83 84 // This contructor is used only for testing. We can bypass the file and 85 // directory name generation / sanitization by providing well known paths 86 // better suited for tests. 87 SavePackage(WebContents* web_contents, 88 SavePageType save_type, 89 const base::FilePath& file_full_path, 90 const base::FilePath& directory_full_path); 91 92 // Initialize the SavePackage. Returns true if it initializes properly. Need 93 // to make sure that this method must be called in the UI thread because using 94 // g_browser_process on a non-UI thread can cause crashes during shutdown. 95 // |cb| will be called when the DownloadItem is created, before data is 96 // written to disk. 97 bool Init(const SavePackageDownloadCreatedCallback& cb); 98 99 // Cancel all in progress request, might be called by user or internal error. 100 void Cancel(bool user_action); 101 102 void Finish(); 103 104 // Notifications sent from the file thread to the UI thread. 105 void StartSave(const SaveFileCreateInfo* info); 106 bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success); 107 void SaveFinished(int32 save_id, int64 size, bool is_success); 108 void SaveFailed(const GURL& save_url); 109 void SaveCanceled(SaveItem* save_item); 110 111 // Rough percent complete, -1 means we don't know (since we didn't receive a 112 // total size). 113 int PercentComplete(); 114 canceled()115 bool canceled() const { return user_canceled_ || disk_error_occurred_; } finished()116 bool finished() const { return finished_; } save_type()117 SavePageType save_type() const { return save_type_; } contents_id()118 int contents_id() const { return contents_id_; } id()119 int id() const { return unique_id_; } 120 WebContents* web_contents() const; 121 122 void GetSaveInfo(); 123 124 private: 125 friend class base::RefCountedThreadSafe<SavePackage>; 126 127 void InitWithDownloadItem( 128 const SavePackageDownloadCreatedCallback& download_created_callback, 129 DownloadItemImpl* item); 130 131 // Callback for WebContents::GenerateMHTML(). 132 void OnMHTMLGenerated(int64 size); 133 134 // For testing only. 135 SavePackage(WebContents* web_contents, 136 const base::FilePath& file_full_path, 137 const base::FilePath& directory_full_path); 138 139 virtual ~SavePackage(); 140 141 // Notes from Init() above applies here as well. 142 void InternalInit(); 143 144 void Stop(); 145 void CheckFinish(); 146 void SaveNextFile(bool process_all_remainder_items); 147 void DoSavingProcess(); 148 149 // WebContentsObserver implementation. 150 virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; 151 152 // DownloadItem::Observer implementation. 153 virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE; 154 155 // Update the download history of this item upon completion. 156 void FinalizeDownloadEntry(); 157 158 // Detach from DownloadManager. 159 void StopObservation(); 160 161 // Return max length of a path for a specific base directory. 162 // This is needed on POSIX, which restrict the length of file names in 163 // addition to the restriction on the length of path names. 164 // |base_dir| is assumed to be a directory name with no trailing slash. 165 static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir); 166 167 static bool GetSafePureFileName( 168 const base::FilePath& dir_path, 169 const base::FilePath::StringType& file_name_ext, 170 uint32 max_file_path_len, 171 base::FilePath::StringType* pure_file_name); 172 173 // Create a file name based on the response from the server. 174 bool GenerateFileName(const std::string& disposition, 175 const GURL& url, 176 bool need_html_ext, 177 base::FilePath::StringType* generated_name); 178 179 // Get all savable resource links from current web page, include main 180 // frame and sub-frame. 181 void GetAllSavableResourceLinksForCurrentPage(); 182 // Get html data by serializing all frames of current page with lists 183 // which contain all resource links that have local copy. 184 void GetSerializedHtmlDataForCurrentPageWithLocalLinks(); 185 186 // Look up SaveItem by save id from in progress map. 187 SaveItem* LookupItemInProcessBySaveId(int32 save_id); 188 189 // Remove SaveItem from in progress map and put it to saved map. 190 void PutInProgressItemToSavedMap(SaveItem* save_item); 191 192 // Retrieves the URL to be saved from the WebContents. 193 GURL GetUrlToBeSaved(); 194 195 void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir, 196 const base::FilePath& download_save_dir, 197 bool skip_dir_check, 198 const std::string& mime_type, 199 const std::string& accept_langs); 200 void ContinueGetSaveInfo(const base::FilePath& suggested_path, 201 bool can_save_as_complete); 202 void OnPathPicked( 203 const base::FilePath& final_name, 204 SavePageType type, 205 const SavePackageDownloadCreatedCallback& cb); 206 void OnReceivedSavableResourceLinksForCurrentPage( 207 const std::vector<GURL>& resources_list, 208 const std::vector<Referrer>& referrers_list, 209 const std::vector<GURL>& frames_list); 210 211 void OnReceivedSerializedHtmlData(const GURL& frame_url, 212 const std::string& data, 213 int32 status); 214 215 typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap; 216 // in_progress_items_ is map of all saving job in in-progress state. 217 SaveUrlItemMap in_progress_items_; 218 // saved_failed_items_ is map of all saving job which are failed. 219 SaveUrlItemMap saved_failed_items_; 220 221 // The number of in process SaveItems. in_process_count()222 int in_process_count() const { 223 return static_cast<int>(in_progress_items_.size()); 224 } 225 226 // The number of all SaveItems which have completed, including success items 227 // and failed items. completed_count()228 int completed_count() const { 229 return static_cast<int>(saved_success_items_.size() + 230 saved_failed_items_.size()); 231 } 232 233 // The current speed in files per second. This is used to update the 234 // DownloadItem associated to this SavePackage. The files per second is 235 // presented by the DownloadItem to the UI as bytes per second, which is 236 // not correct but matches the way the total and received number of files is 237 // presented as the total and received bytes. 238 int64 CurrentSpeed() const; 239 240 // Helper function for preparing suggested name for the SaveAs Dialog. The 241 // suggested name is determined by the web document's title. 242 base::FilePath GetSuggestedNameForSaveAs( 243 bool can_save_as_complete, 244 const std::string& contents_mime_type, 245 const std::string& accept_langs); 246 247 // Ensures that the file name has a proper extension for HTML by adding ".htm" 248 // if necessary. 249 static base::FilePath EnsureHtmlExtension(const base::FilePath& name); 250 251 // Ensures that the file name has a proper extension for supported formats 252 // if necessary. 253 static base::FilePath EnsureMimeExtension(const base::FilePath& name, 254 const std::string& contents_mime_type); 255 256 // Returns extension for supported MIME types (for example, for "text/plain" 257 // it returns "txt"). 258 static const base::FilePath::CharType* ExtensionForMimeType( 259 const std::string& contents_mime_type); 260 261 typedef std::queue<SaveItem*> SaveItemQueue; 262 // A queue for items we are about to start saving. 263 SaveItemQueue waiting_item_queue_; 264 265 typedef base::hash_map<int32, SaveItem*> SavedItemMap; 266 // saved_success_items_ is map of all saving job which are successfully saved. 267 SavedItemMap saved_success_items_; 268 269 // Non-owning pointer for handling file writing on the file thread. 270 SaveFileManager* file_manager_; 271 272 // DownloadManager owns the DownloadItem and handles history and UI. 273 DownloadManagerImpl* download_manager_; 274 DownloadItemImpl* download_; 275 276 // The URL of the page the user wants to save. 277 GURL page_url_; 278 base::FilePath saved_main_file_path_; 279 base::FilePath saved_main_directory_path_; 280 281 // The title of the page the user wants to save. 282 base::string16 title_; 283 284 // Used to calculate package download speed (in files per second). 285 base::TimeTicks start_tick_; 286 287 // Indicates whether the actual saving job is finishing or not. 288 bool finished_; 289 290 // Indicates whether a call to Finish() has been scheduled. 291 bool mhtml_finishing_; 292 293 // Indicates whether user canceled the saving job. 294 bool user_canceled_; 295 296 // Indicates whether user get disk error. 297 bool disk_error_occurred_; 298 299 // Type about saving page as only-html or complete-html. 300 SavePageType save_type_; 301 302 // Number of all need to be saved resources. 303 size_t all_save_items_count_; 304 305 typedef std::set<base::FilePath::StringType, 306 bool (*)(const base::FilePath::StringType&, 307 const base::FilePath::StringType&)> FileNameSet; 308 // This set is used to eliminate duplicated file names in saving directory. 309 FileNameSet file_name_set_; 310 311 typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap; 312 // This map is used to track serial number for specified filename. 313 FileNameCountMap file_name_count_map_; 314 315 // Indicates current waiting state when SavePackage try to get something 316 // from outside. 317 WaitState wait_state_; 318 319 // Since for one contents, it can only have one SavePackage in same time. 320 // Now we actually use render_process_id as the contents's unique id. 321 const int contents_id_; 322 323 // Unique ID for this SavePackage. 324 const int unique_id_; 325 326 // Variables to record errors that happened so we can record them via 327 // UMA statistics. 328 bool wrote_to_completed_file_; 329 bool wrote_to_failed_file_; 330 331 friend class SavePackageTest; 332 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames); 333 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename); 334 335 DISALLOW_COPY_AND_ASSIGN(SavePackage); 336 }; 337 338 } // namespace content 339 340 #endif // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 341