1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 6 #define CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 7 #pragma once 8 9 #include <queue> 10 #include <string> 11 #include <vector> 12 13 #include "base/basictypes.h" 14 #include "base/file_path.h" 15 #include "base/gtest_prod_util.h" 16 #include "base/hash_tables.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/task.h" 19 #include "chrome/browser/ui/shell_dialogs.h" 20 #include "content/browser/tab_contents/tab_contents_observer.h" 21 #include "googleurl/src/gurl.h" 22 23 class DownloadItem; 24 class DownloadManager; 25 class GURL; 26 class MessageLoop; 27 class PrefService; 28 class Profile; 29 struct SaveFileCreateInfo; 30 class SaveFileManager; 31 class SaveItem; 32 class SavePackage; 33 struct SavePackageParam; 34 class TabContents; 35 36 namespace base { 37 class Thread; 38 class Time; 39 } 40 41 namespace net { 42 class URLRequestContextGetter; 43 } 44 45 46 // The SavePackage object manages the process of saving a page as only-html or 47 // complete-html and providing the information for displaying saving status. 48 // Saving page as only-html means means that we save web page to a single HTML 49 // file regardless internal sub resources and sub frames. 50 // Saving page as complete-html page means we save not only the main html file 51 // the user told it to save but also a directory for the auxiliary files such 52 // as all sub-frame html files, image files, css files and js files. 53 // 54 // Each page saving job may include one or multiple files which need to be 55 // saved. Each file is represented by a SaveItem, and all SaveItems are owned 56 // by the SavePackage. SaveItems are created when a user initiates a page 57 // saving job, and exist for the duration of one tab's life time. 58 class SavePackage : public base::RefCountedThreadSafe<SavePackage>, 59 public TabContentsObserver, 60 public SelectFileDialog::Listener { 61 public: 62 enum SavePackageType { 63 // The value of the save type before its set by the user. 64 SAVE_TYPE_UNKNOWN = -1, 65 // User chose to save only the HTML of the page. 66 SAVE_AS_ONLY_HTML = 0, 67 // User chose to save complete-html page. 68 SAVE_AS_COMPLETE_HTML = 1 69 }; 70 71 enum WaitState { 72 // State when created but not initialized. 73 INITIALIZE = 0, 74 // State when after initializing, but not yet saving. 75 START_PROCESS, 76 // Waiting on a list of savable resources from the backend. 77 RESOURCES_LIST, 78 // Waiting for data sent from net IO or from file system. 79 NET_FILES, 80 // Waiting for html DOM data sent from render process. 81 HTML_DATA, 82 // Saving page finished successfully. 83 SUCCESSFUL, 84 // Failed to save page. 85 FAILED 86 }; 87 88 // Constructor for user initiated page saving. This constructor results in a 89 // SavePackage that will generate and sanitize a suggested name for the user 90 // in the "Save As" dialog box. 91 explicit SavePackage(TabContents* tab_contents); 92 93 // This contructor is used only for testing. We can bypass the file and 94 // directory name generation / sanitization by providing well known paths 95 // better suited for tests. 96 SavePackage(TabContents* tab_contents, 97 SavePackageType save_type, 98 const FilePath& file_full_path, 99 const FilePath& directory_full_path); 100 101 // Initialize the SavePackage. Returns true if it initializes properly. 102 // Need to make sure that this method must be called in the UI thread because 103 // using g_browser_process on a non-UI thread can cause crashes during 104 // shutdown. 105 bool Init(); 106 107 void Cancel(bool user_action); 108 109 void Finish(); 110 111 // Notifications sent from the file thread to the UI thread. 112 void StartSave(const SaveFileCreateInfo* info); 113 bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success); 114 void SaveFinished(int32 save_id, int64 size, bool is_success); 115 void SaveFailed(const GURL& save_url); 116 void SaveCanceled(SaveItem* save_item); 117 118 // Rough percent complete, -1 means we don't know (since we didn't receive a 119 // total size). 120 int PercentComplete(); 121 122 // Show or Open a saved page via the Windows shell. 123 void ShowDownloadInShell(); 124 canceled()125 bool canceled() const { return user_canceled_ || disk_error_occurred_; } finished()126 bool finished() const { return finished_; } save_type()127 SavePackageType save_type() const { return save_type_; } tab_id()128 int tab_id() const { return tab_id_; } id()129 int id() const { return unique_id_; } 130 131 void GetSaveInfo(); 132 133 // Statics ------------------------------------------------------------------- 134 135 // Used to disable prompting the user for a directory/filename of the saved 136 // web page. This is available for testing. 137 static void SetShouldPromptUser(bool should_prompt); 138 139 // Check whether we can do the saving page operation for the specified URL. 140 static bool IsSavableURL(const GURL& url); 141 142 // Check whether we can do the saving page operation for the contents which 143 // have the specified MIME type. 144 static bool IsSavableContents(const std::string& contents_mime_type); 145 146 // SelectFileDialog::Listener ------------------------------------------------ 147 virtual void FileSelected(const FilePath& path, int index, void* params); 148 virtual void FileSelectionCanceled(void* params); 149 150 private: 151 friend class base::RefCountedThreadSafe<SavePackage>; 152 153 // For testing only. 154 SavePackage(TabContents* tab_contents, 155 const FilePath& file_full_path, 156 const FilePath& directory_full_path); 157 158 ~SavePackage(); 159 160 // Notes from Init() above applies here as well. 161 void InternalInit(); 162 163 void Stop(); 164 void CheckFinish(); 165 void SaveNextFile(bool process_all_remainder_items); 166 void DoSavingProcess(); 167 168 // TabContentsObserver implementation. 169 virtual bool OnMessageReceived(const IPC::Message& message); 170 171 // Return max length of a path for a specific base directory. 172 // This is needed on POSIX, which restrict the length of file names in 173 // addition to the restriction on the length of path names. 174 // |base_dir| is assumed to be a directory name with no trailing slash. 175 static uint32 GetMaxPathLengthForDirectory(const FilePath& base_dir); 176 177 static bool GetSafePureFileName(const FilePath& dir_path, 178 const FilePath::StringType& file_name_ext, 179 uint32 max_file_path_len, 180 FilePath::StringType* pure_file_name); 181 182 // Create a file name based on the response from the server. 183 bool GenerateFileName(const std::string& disposition, 184 const GURL& url, 185 bool need_html_ext, 186 FilePath::StringType* generated_name); 187 188 // Get all savable resource links from current web page, include main 189 // frame and sub-frame. 190 void GetAllSavableResourceLinksForCurrentPage(); 191 // Get html data by serializing all frames of current page with lists 192 // which contain all resource links that have local copy. 193 void GetSerializedHtmlDataForCurrentPageWithLocalLinks(); 194 195 SaveItem* LookupItemInProcessBySaveId(int32 save_id); 196 void PutInProgressItemToSavedMap(SaveItem* save_item); 197 198 // Retrieves the URL to be saved from tab_contents_ variable. 199 GURL GetUrlToBeSaved(); 200 201 void CreateDirectoryOnFileThread(const FilePath& website_save_dir, 202 const FilePath& download_save_dir, 203 const std::string& mime_type); 204 void ContinueGetSaveInfo(const FilePath& suggested_path, 205 bool can_save_as_complete); 206 void ContinueSave(const FilePath& final_name, int index); 207 208 void OnReceivedSavableResourceLinksForCurrentPage( 209 const std::vector<GURL>& resources_list, 210 const std::vector<GURL>& referrers_list, 211 const std::vector<GURL>& frames_list); 212 213 void OnReceivedSerializedHtmlData(const GURL& frame_url, 214 const std::string& data, 215 int32 status); 216 217 218 typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap; 219 // in_progress_items_ is map of all saving job in in-progress state. 220 SaveUrlItemMap in_progress_items_; 221 // saved_failed_items_ is map of all saving job which are failed. 222 SaveUrlItemMap saved_failed_items_; 223 224 // The number of in process SaveItems. in_process_count()225 int in_process_count() const { 226 return static_cast<int>(in_progress_items_.size()); 227 } 228 229 // The number of all SaveItems which have completed, including success items 230 // and failed items. completed_count()231 int completed_count() const { 232 return static_cast<int>(saved_success_items_.size() + 233 saved_failed_items_.size()); 234 } 235 236 // Retrieve the preference for the directory to save pages to. 237 static FilePath GetSaveDirPreference(PrefService* prefs); 238 239 // Helper function for preparing suggested name for the SaveAs Dialog. The 240 // suggested name is determined by the web document's title. 241 FilePath GetSuggestedNameForSaveAs( 242 bool can_save_as_complete, 243 const std::string& contents_mime_type); 244 245 // Ensures that the file name has a proper extension for HTML by adding ".htm" 246 // if necessary. 247 static FilePath EnsureHtmlExtension(const FilePath& name); 248 249 // Ensures that the file name has a proper extension for supported formats 250 // if necessary. 251 static FilePath EnsureMimeExtension(const FilePath& name, 252 const std::string& contents_mime_type); 253 254 // Returns extension for supported MIME types (for example, for "text/plain" 255 // it returns "txt"). 256 static const FilePath::CharType* ExtensionForMimeType( 257 const std::string& contents_mime_type); 258 259 typedef std::queue<SaveItem*> SaveItemQueue; 260 // A queue for items we are about to start saving. 261 SaveItemQueue waiting_item_queue_; 262 263 typedef base::hash_map<int32, SaveItem*> SavedItemMap; 264 // saved_success_items_ is map of all saving job which are successfully saved. 265 SavedItemMap saved_success_items_; 266 267 // The request context which provides application-specific context for 268 // net::URLRequest instances. 269 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 270 271 // Non-owning pointer for handling file writing on the file thread. 272 SaveFileManager* file_manager_; 273 274 // We use a fake DownloadItem here in order to reuse the DownloadItemView. 275 // This class owns the pointer. 276 DownloadItem* download_; 277 278 // The URL of the page the user wants to save. 279 GURL page_url_; 280 FilePath saved_main_file_path_; 281 FilePath saved_main_directory_path_; 282 283 // The title of the page the user wants to save. 284 string16 title_; 285 286 // Indicates whether the actual saving job is finishing or not. 287 bool finished_; 288 289 // Indicates whether user canceled the saving job. 290 bool user_canceled_; 291 292 // Indicates whether user get disk error. 293 bool disk_error_occurred_; 294 295 // Type about saving page as only-html or complete-html. 296 SavePackageType save_type_; 297 298 // Number of all need to be saved resources. 299 size_t all_save_items_count_; 300 301 typedef base::hash_set<FilePath::StringType> FileNameSet; 302 // This set is used to eliminate duplicated file names in saving directory. 303 FileNameSet file_name_set_; 304 305 typedef base::hash_map<FilePath::StringType, uint32> FileNameCountMap; 306 // This map is used to track serial number for specified filename. 307 FileNameCountMap file_name_count_map_; 308 309 // Indicates current waiting state when SavePackage try to get something 310 // from outside. 311 WaitState wait_state_; 312 313 // Since for one tab, it can only have one SavePackage in same time. 314 // Now we actually use render_process_id as tab's unique id. 315 const int tab_id_; 316 317 // Unique ID for this SavePackage. 318 const int unique_id_; 319 320 // For managing select file dialogs. 321 scoped_refptr<SelectFileDialog> select_file_dialog_; 322 323 friend class SavePackageTest; 324 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames); 325 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename); 326 327 ScopedRunnableMethodFactory<SavePackage> method_factory_; 328 329 DISALLOW_COPY_AND_ASSIGN(SavePackage); 330 }; 331 332 #endif // CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 333