• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
6 #define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
7 
8 #include <queue>
9 #include <set>
10 #include <string>
11 #include <vector>
12 
13 #include "base/basictypes.h"
14 #include "base/containers/hash_tables.h"
15 #include "base/files/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/time/time.h"
20 #include "content/common/content_export.h"
21 #include "content/public/browser/download_item.h"
22 #include "content/public/browser/download_manager_delegate.h"
23 #include "content/public/browser/save_page_type.h"
24 #include "content/public/browser/web_contents_observer.h"
25 #include "content/public/common/referrer.h"
26 #include "net/base/net_errors.h"
27 #include "url/gurl.h"
28 
29 class GURL;
30 
31 namespace content {
32 class DownloadItemImpl;
33 class DownloadManagerImpl;
34 class WebContents;
35 class SaveFileManager;
36 class SaveItem;
37 class SavePackage;
38 struct SaveFileCreateInfo;
39 
40 // The SavePackage object manages the process of saving a page as only-html or
41 // complete-html or MHTML and providing the information for displaying saving
42 // status.  Saving page as only-html means means that we save web page to a
43 // single HTML file regardless internal sub resources and sub frames.  Saving
44 // page as complete-html page means we save not only the main html file the user
45 // told it to save but also a directory for the auxiliary files such as all
46 // sub-frame html files, image files, css files and js files.  Saving page as
47 // MHTML means the same thing as complete-html, but it uses the MHTML format to
48 // contain the html and all auxiliary files in a single text file.
49 //
50 // Each page saving job may include one or multiple files which need to be
51 // saved. Each file is represented by a SaveItem, and all SaveItems are owned
52 // by the SavePackage. SaveItems are created when a user initiates a page
53 // saving job, and exist for the duration of one contents's life time.
54 class CONTENT_EXPORT SavePackage
55     : public base::RefCountedThreadSafe<SavePackage>,
56       public WebContentsObserver,
57       public DownloadItem::Observer,
58       public base::SupportsWeakPtr<SavePackage> {
59  public:
60   enum WaitState {
61     // State when created but not initialized.
62     INITIALIZE = 0,
63     // State when after initializing, but not yet saving.
64     START_PROCESS,
65     // Waiting on a list of savable resources from the backend.
66     RESOURCES_LIST,
67     // Waiting for data sent from net IO or from file system.
68     NET_FILES,
69     // Waiting for html DOM data sent from render process.
70     HTML_DATA,
71     // Saving page finished successfully.
72     SUCCESSFUL,
73     // Failed to save page.
74     FAILED
75   };
76 
77   static const base::FilePath::CharType kDefaultHtmlExtension[];
78 
79   // Constructor for user initiated page saving. This constructor results in a
80   // SavePackage that will generate and sanitize a suggested name for the user
81   // in the "Save As" dialog box.
82   explicit SavePackage(WebContents* web_contents);
83 
84   // This contructor is used only for testing. We can bypass the file and
85   // directory name generation / sanitization by providing well known paths
86   // better suited for tests.
87   SavePackage(WebContents* web_contents,
88               SavePageType save_type,
89               const base::FilePath& file_full_path,
90               const base::FilePath& directory_full_path);
91 
92   // Initialize the SavePackage. Returns true if it initializes properly.  Need
93   // to make sure that this method must be called in the UI thread because using
94   // g_browser_process on a non-UI thread can cause crashes during shutdown.
95   // |cb| will be called when the DownloadItem is created, before data is
96   // written to disk.
97   bool Init(const SavePackageDownloadCreatedCallback& cb);
98 
99   // Cancel all in progress request, might be called by user or internal error.
100   void Cancel(bool user_action);
101 
102   void Finish();
103 
104   // Notifications sent from the file thread to the UI thread.
105   void StartSave(const SaveFileCreateInfo* info);
106   bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
107   void SaveFinished(int32 save_id, int64 size, bool is_success);
108   void SaveFailed(const GURL& save_url);
109   void SaveCanceled(SaveItem* save_item);
110 
111   // Rough percent complete, -1 means we don't know (since we didn't receive a
112   // total size).
113   int PercentComplete();
114 
canceled()115   bool canceled() const { return user_canceled_ || disk_error_occurred_; }
finished()116   bool finished() const { return finished_; }
save_type()117   SavePageType save_type() const { return save_type_; }
contents_id()118   int contents_id() const { return contents_id_; }
id()119   int id() const { return unique_id_; }
120   WebContents* web_contents() const;
121 
122   void GetSaveInfo();
123 
124  private:
125   friend class base::RefCountedThreadSafe<SavePackage>;
126 
127   void InitWithDownloadItem(
128       const SavePackageDownloadCreatedCallback& download_created_callback,
129       DownloadItemImpl* item);
130 
131   // Callback for WebContents::GenerateMHTML().
132   void OnMHTMLGenerated(int64 size);
133 
134   // For testing only.
135   SavePackage(WebContents* web_contents,
136               const base::FilePath& file_full_path,
137               const base::FilePath& directory_full_path);
138 
139   virtual ~SavePackage();
140 
141   // Notes from Init() above applies here as well.
142   void InternalInit();
143 
144   void Stop();
145   void CheckFinish();
146   void SaveNextFile(bool process_all_remainder_items);
147   void DoSavingProcess();
148 
149   // WebContentsObserver implementation.
150   virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
151 
152   // DownloadItem::Observer implementation.
153   virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE;
154 
155   // Update the download history of this item upon completion.
156   void FinalizeDownloadEntry();
157 
158   // Detach from DownloadManager.
159   void StopObservation();
160 
161   // Return max length of a path for a specific base directory.
162   // This is needed on POSIX, which restrict the length of file names in
163   // addition to the restriction on the length of path names.
164   // |base_dir| is assumed to be a directory name with no trailing slash.
165   static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir);
166 
167   static bool GetSafePureFileName(
168       const base::FilePath& dir_path,
169       const base::FilePath::StringType& file_name_ext,
170       uint32 max_file_path_len,
171       base::FilePath::StringType* pure_file_name);
172 
173   // Create a file name based on the response from the server.
174   bool GenerateFileName(const std::string& disposition,
175                         const GURL& url,
176                         bool need_html_ext,
177                         base::FilePath::StringType* generated_name);
178 
179   // Get all savable resource links from current web page, include main
180   // frame and sub-frame.
181   void GetAllSavableResourceLinksForCurrentPage();
182   // Get html data by serializing all frames of current page with lists
183   // which contain all resource links that have local copy.
184   void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
185 
186   // Look up SaveItem by save id from in progress map.
187   SaveItem* LookupItemInProcessBySaveId(int32 save_id);
188 
189   // Remove SaveItem from in progress map and put it to saved map.
190   void PutInProgressItemToSavedMap(SaveItem* save_item);
191 
192   // Retrieves the URL to be saved from the WebContents.
193   GURL GetUrlToBeSaved();
194 
195   void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir,
196                                    const base::FilePath& download_save_dir,
197                                    bool skip_dir_check,
198                                    const std::string& mime_type,
199                                    const std::string& accept_langs);
200   void ContinueGetSaveInfo(const base::FilePath& suggested_path,
201                            bool can_save_as_complete);
202   void OnPathPicked(
203       const base::FilePath& final_name,
204       SavePageType type,
205       const SavePackageDownloadCreatedCallback& cb);
206   void OnReceivedSavableResourceLinksForCurrentPage(
207       const std::vector<GURL>& resources_list,
208       const std::vector<Referrer>& referrers_list,
209       const std::vector<GURL>& frames_list);
210 
211   void OnReceivedSerializedHtmlData(const GURL& frame_url,
212                                     const std::string& data,
213                                     int32 status);
214 
215   typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
216   // in_progress_items_ is map of all saving job in in-progress state.
217   SaveUrlItemMap in_progress_items_;
218   // saved_failed_items_ is map of all saving job which are failed.
219   SaveUrlItemMap saved_failed_items_;
220 
221   // The number of in process SaveItems.
in_process_count()222   int in_process_count() const {
223     return static_cast<int>(in_progress_items_.size());
224   }
225 
226   // The number of all SaveItems which have completed, including success items
227   // and failed items.
completed_count()228   int completed_count() const {
229     return static_cast<int>(saved_success_items_.size() +
230                             saved_failed_items_.size());
231   }
232 
233   // The current speed in files per second. This is used to update the
234   // DownloadItem associated to this SavePackage. The files per second is
235   // presented by the DownloadItem to the UI as bytes per second, which is
236   // not correct but matches the way the total and received number of files is
237   // presented as the total and received bytes.
238   int64 CurrentSpeed() const;
239 
240   // Helper function for preparing suggested name for the SaveAs Dialog. The
241   // suggested name is determined by the web document's title.
242   base::FilePath GetSuggestedNameForSaveAs(
243       bool can_save_as_complete,
244       const std::string& contents_mime_type,
245       const std::string& accept_langs);
246 
247   // Ensures that the file name has a proper extension for HTML by adding ".htm"
248   // if necessary.
249   static base::FilePath EnsureHtmlExtension(const base::FilePath& name);
250 
251   // Ensures that the file name has a proper extension for supported formats
252   // if necessary.
253   static base::FilePath EnsureMimeExtension(const base::FilePath& name,
254       const std::string& contents_mime_type);
255 
256   // Returns extension for supported MIME types (for example, for "text/plain"
257   // it returns "txt").
258   static const base::FilePath::CharType* ExtensionForMimeType(
259       const std::string& contents_mime_type);
260 
261   typedef std::queue<SaveItem*> SaveItemQueue;
262   // A queue for items we are about to start saving.
263   SaveItemQueue waiting_item_queue_;
264 
265   typedef base::hash_map<int32, SaveItem*> SavedItemMap;
266   // saved_success_items_ is map of all saving job which are successfully saved.
267   SavedItemMap saved_success_items_;
268 
269   // Non-owning pointer for handling file writing on the file thread.
270   SaveFileManager* file_manager_;
271 
272   // DownloadManager owns the DownloadItem and handles history and UI.
273   DownloadManagerImpl* download_manager_;
274   DownloadItemImpl* download_;
275 
276   // The URL of the page the user wants to save.
277   GURL page_url_;
278   base::FilePath saved_main_file_path_;
279   base::FilePath saved_main_directory_path_;
280 
281   // The title of the page the user wants to save.
282   base::string16 title_;
283 
284   // Used to calculate package download speed (in files per second).
285   base::TimeTicks start_tick_;
286 
287   // Indicates whether the actual saving job is finishing or not.
288   bool finished_;
289 
290   // Indicates whether a call to Finish() has been scheduled.
291   bool mhtml_finishing_;
292 
293   // Indicates whether user canceled the saving job.
294   bool user_canceled_;
295 
296   // Indicates whether user get disk error.
297   bool disk_error_occurred_;
298 
299   // Type about saving page as only-html or complete-html.
300   SavePageType save_type_;
301 
302   // Number of all need to be saved resources.
303   size_t all_save_items_count_;
304 
305   typedef std::set<base::FilePath::StringType,
306                    bool (*)(const base::FilePath::StringType&,
307                             const base::FilePath::StringType&)> FileNameSet;
308   // This set is used to eliminate duplicated file names in saving directory.
309   FileNameSet file_name_set_;
310 
311   typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap;
312   // This map is used to track serial number for specified filename.
313   FileNameCountMap file_name_count_map_;
314 
315   // Indicates current waiting state when SavePackage try to get something
316   // from outside.
317   WaitState wait_state_;
318 
319   // Since for one contents, it can only have one SavePackage in same time.
320   // Now we actually use render_process_id as the contents's unique id.
321   const int contents_id_;
322 
323   // Unique ID for this SavePackage.
324   const int unique_id_;
325 
326   // Variables to record errors that happened so we can record them via
327   // UMA statistics.
328   bool wrote_to_completed_file_;
329   bool wrote_to_failed_file_;
330 
331   friend class SavePackageTest;
332   FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
333   FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
334 
335   DISALLOW_COPY_AND_ASSIGN(SavePackage);
336 };
337 
338 }  // namespace content
339 
340 #endif  // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
341