• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "content/browser/download/save_package.h"
6 
7 #include <algorithm>
8 
9 #include "base/bind.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h"
21 #include "content/browser/download/download_item_impl.h"
22 #include "content/browser/download/download_manager_impl.h"
23 #include "content/browser/download/download_stats.h"
24 #include "content/browser/download/save_file.h"
25 #include "content/browser/download/save_file_manager.h"
26 #include "content/browser/download/save_item.h"
27 #include "content/browser/loader/resource_dispatcher_host_impl.h"
28 #include "content/browser/renderer_host/render_process_host_impl.h"
29 #include "content/browser/renderer_host/render_view_host_delegate.h"
30 #include "content/browser/renderer_host/render_view_host_impl.h"
31 #include "content/common/view_messages.h"
32 #include "content/public/browser/browser_context.h"
33 #include "content/public/browser/browser_thread.h"
34 #include "content/public/browser/content_browser_client.h"
35 #include "content/public/browser/download_manager_delegate.h"
36 #include "content/public/browser/navigation_entry.h"
37 #include "content/public/browser/notification_service.h"
38 #include "content/public/browser/notification_types.h"
39 #include "content/public/browser/resource_context.h"
40 #include "content/public/browser/web_contents.h"
41 #include "net/base/filename_util.h"
42 #include "net/base/io_buffer.h"
43 #include "net/base/mime_util.h"
44 #include "net/url_request/url_request_context.h"
45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
46 #include "url/url_constants.h"
47 
48 using base::Time;
49 using blink::WebPageSerializerClient;
50 
51 namespace content {
52 namespace {
53 
54 // A counter for uniquely identifying each save package.
55 int g_save_package_id = 0;
56 
57 // Default name which will be used when we can not get proper name from
58 // resource URL.
59 const char kDefaultSaveName[] = "saved_resource";
60 
61 // Maximum number of file ordinal number. I think it's big enough for resolving
62 // name-conflict files which has same base file name.
63 const int32 kMaxFileOrdinalNumber = 9999;
64 
65 // Maximum length for file path. Since Windows have MAX_PATH limitation for
66 // file path, we need to make sure length of file path of every saved file
67 // is less than MAX_PATH
68 #if defined(OS_WIN)
69 const uint32 kMaxFilePathLength = MAX_PATH - 1;
70 #elif defined(OS_POSIX)
71 const uint32 kMaxFilePathLength = PATH_MAX - 1;
72 #endif
73 
74 // Maximum length for file ordinal number part. Since we only support the
75 // maximum 9999 for ordinal number, which means maximum file ordinal number part
76 // should be "(9998)", so the value is 6.
77 const uint32 kMaxFileOrdinalNumberPartLength = 6;
78 
79 // Strip current ordinal number, if any. Should only be used on pure
80 // file names, i.e. those stripped of their extensions.
81 // TODO(estade): improve this to not choke on alternate encodings.
StripOrdinalNumber(const base::FilePath::StringType & pure_file_name)82 base::FilePath::StringType StripOrdinalNumber(
83     const base::FilePath::StringType& pure_file_name) {
84   base::FilePath::StringType::size_type r_paren_index =
85       pure_file_name.rfind(FILE_PATH_LITERAL(')'));
86   base::FilePath::StringType::size_type l_paren_index =
87       pure_file_name.rfind(FILE_PATH_LITERAL('('));
88   if (l_paren_index >= r_paren_index)
89     return pure_file_name;
90 
91   for (base::FilePath::StringType::size_type i = l_paren_index + 1;
92        i != r_paren_index; ++i) {
93     if (!IsAsciiDigit(pure_file_name[i]))
94       return pure_file_name;
95   }
96 
97   return pure_file_name.substr(0, l_paren_index);
98 }
99 
100 // Check whether we can save page as complete-HTML for the contents which
101 // have specified a MIME type. Now only contents which have the MIME type
102 // "text/html" can be saved as complete-HTML.
CanSaveAsComplete(const std::string & contents_mime_type)103 bool CanSaveAsComplete(const std::string& contents_mime_type) {
104   return contents_mime_type == "text/html" ||
105          contents_mime_type == "application/xhtml+xml";
106 }
107 
108 // Request handle for SavePackage downloads. Currently doesn't support
109 // pause/resume/cancel, but returns a WebContents.
110 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
111  public:
SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)112   SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
113       : save_package_(save_package) {}
114 
115   // DownloadRequestHandleInterface
GetWebContents() const116   virtual WebContents* GetWebContents() const OVERRIDE {
117     return save_package_.get() ? save_package_->web_contents() : NULL;
118   }
GetDownloadManager() const119   virtual DownloadManager* GetDownloadManager() const OVERRIDE {
120     return NULL;
121   }
PauseRequest() const122   virtual void PauseRequest() const OVERRIDE {}
ResumeRequest() const123   virtual void ResumeRequest() const OVERRIDE {}
CancelRequest() const124   virtual void CancelRequest() const OVERRIDE {}
DebugString() const125   virtual std::string DebugString() const OVERRIDE {
126     return "SavePackage DownloadRequestHandle";
127   }
128 
129  private:
130   base::WeakPtr<SavePackage> save_package_;
131 };
132 
133 }  // namespace
134 
135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
136     FILE_PATH_LITERAL("html");
137 
SavePackage(WebContents * web_contents,SavePageType save_type,const base::FilePath & file_full_path,const base::FilePath & directory_full_path)138 SavePackage::SavePackage(WebContents* web_contents,
139                          SavePageType save_type,
140                          const base::FilePath& file_full_path,
141                          const base::FilePath& directory_full_path)
142     : WebContentsObserver(web_contents),
143       file_manager_(NULL),
144       download_manager_(NULL),
145       download_(NULL),
146       page_url_(GetUrlToBeSaved()),
147       saved_main_file_path_(file_full_path),
148       saved_main_directory_path_(directory_full_path),
149       title_(web_contents->GetTitle()),
150       start_tick_(base::TimeTicks::Now()),
151       finished_(false),
152       mhtml_finishing_(false),
153       user_canceled_(false),
154       disk_error_occurred_(false),
155       save_type_(save_type),
156       all_save_items_count_(0),
157       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
158       wait_state_(INITIALIZE),
159       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
160       unique_id_(g_save_package_id++),
161       wrote_to_completed_file_(false),
162       wrote_to_failed_file_(false) {
163   DCHECK(page_url_.is_valid());
164   DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
165          (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
166          (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
167   DCHECK(!saved_main_file_path_.empty() &&
168          saved_main_file_path_.value().length() <= kMaxFilePathLength);
169   DCHECK(!saved_main_directory_path_.empty() &&
170          saved_main_directory_path_.value().length() < kMaxFilePathLength);
171   InternalInit();
172 }
173 
SavePackage(WebContents * web_contents)174 SavePackage::SavePackage(WebContents* web_contents)
175     : WebContentsObserver(web_contents),
176       file_manager_(NULL),
177       download_manager_(NULL),
178       download_(NULL),
179       page_url_(GetUrlToBeSaved()),
180       title_(web_contents->GetTitle()),
181       start_tick_(base::TimeTicks::Now()),
182       finished_(false),
183       mhtml_finishing_(false),
184       user_canceled_(false),
185       disk_error_occurred_(false),
186       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
187       all_save_items_count_(0),
188       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
189       wait_state_(INITIALIZE),
190       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
191       unique_id_(g_save_package_id++),
192       wrote_to_completed_file_(false),
193       wrote_to_failed_file_(false) {
194   DCHECK(page_url_.is_valid());
195   InternalInit();
196 }
197 
198 // This is for testing use. Set |finished_| as true because we don't want
199 // method Cancel to be be called in destructor in test mode.
200 // We also don't call InternalInit().
SavePackage(WebContents * web_contents,const base::FilePath & file_full_path,const base::FilePath & directory_full_path)201 SavePackage::SavePackage(WebContents* web_contents,
202                          const base::FilePath& file_full_path,
203                          const base::FilePath& directory_full_path)
204     : WebContentsObserver(web_contents),
205       file_manager_(NULL),
206       download_manager_(NULL),
207       download_(NULL),
208       saved_main_file_path_(file_full_path),
209       saved_main_directory_path_(directory_full_path),
210       start_tick_(base::TimeTicks::Now()),
211       finished_(true),
212       mhtml_finishing_(false),
213       user_canceled_(false),
214       disk_error_occurred_(false),
215       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
216       all_save_items_count_(0),
217       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
218       wait_state_(INITIALIZE),
219       contents_id_(0),
220       unique_id_(g_save_package_id++),
221       wrote_to_completed_file_(false),
222       wrote_to_failed_file_(false) {
223 }
224 
~SavePackage()225 SavePackage::~SavePackage() {
226   // Stop receiving saving job's updates
227   if (!finished_ && !canceled()) {
228     // Unexpected quit.
229     Cancel(true);
230   }
231 
232   // We should no longer be observing the DownloadItem at this point.
233   CHECK(!download_);
234 
235   DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
236                                    completed_count() +
237                                    in_process_count()));
238   // Free all SaveItems.
239   while (!waiting_item_queue_.empty()) {
240     // We still have some items which are waiting for start to save.
241     SaveItem* save_item = waiting_item_queue_.front();
242     waiting_item_queue_.pop();
243     delete save_item;
244   }
245 
246   STLDeleteValues(&saved_success_items_);
247   STLDeleteValues(&in_progress_items_);
248   STLDeleteValues(&saved_failed_items_);
249 
250   file_manager_ = NULL;
251 }
252 
GetUrlToBeSaved()253 GURL SavePackage::GetUrlToBeSaved() {
254   // Instead of using web_contents_.GetURL here, we use url() (which is the
255   // "real" url of the page) from the NavigationEntry because it reflects its
256   // origin rather than the displayed one (returned by GetURL) which may be
257   // different (like having "view-source:" on the front).
258   NavigationEntry* visible_entry =
259       web_contents()->GetController().GetVisibleEntry();
260   return visible_entry->GetURL();
261 }
262 
Cancel(bool user_action)263 void SavePackage::Cancel(bool user_action) {
264   if (!canceled()) {
265     if (user_action)
266       user_canceled_ = true;
267     else
268       disk_error_occurred_ = true;
269     Stop();
270   }
271   RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
272 }
273 
274 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
275 // cases, we need file_manager_ to be initialized, so we do this first.
InternalInit()276 void SavePackage::InternalInit() {
277   ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
278   if (!rdh) {
279     NOTREACHED();
280     return;
281   }
282 
283   file_manager_ = rdh->save_file_manager();
284   DCHECK(file_manager_);
285 
286   download_manager_ = static_cast<DownloadManagerImpl*>(
287       BrowserContext::GetDownloadManager(
288           web_contents()->GetBrowserContext()));
289   DCHECK(download_manager_);
290 
291   RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
292 }
293 
Init(const SavePackageDownloadCreatedCallback & download_created_callback)294 bool SavePackage::Init(
295     const SavePackageDownloadCreatedCallback& download_created_callback) {
296   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
297   // Set proper running state.
298   if (wait_state_ != INITIALIZE)
299     return false;
300 
301   wait_state_ = START_PROCESS;
302 
303   // Initialize the request context and resource dispatcher.
304   BrowserContext* browser_context = web_contents()->GetBrowserContext();
305   if (!browser_context) {
306     NOTREACHED();
307     return false;
308   }
309 
310   scoped_ptr<DownloadRequestHandleInterface> request_handle(
311       new SavePackageRequestHandle(AsWeakPtr()));
312   // The download manager keeps ownership but adds us as an observer.
313   download_manager_->CreateSavePackageDownloadItem(
314       saved_main_file_path_,
315       page_url_,
316       ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
317        "multipart/related" : "text/html"),
318       request_handle.Pass(),
319       base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
320                  download_created_callback));
321   return true;
322 }
323 
InitWithDownloadItem(const SavePackageDownloadCreatedCallback & download_created_callback,DownloadItemImpl * item)324 void SavePackage::InitWithDownloadItem(
325     const SavePackageDownloadCreatedCallback& download_created_callback,
326     DownloadItemImpl* item) {
327   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
328   DCHECK(item);
329   download_ = item;
330   download_->AddObserver(this);
331   // Confirm above didn't delete the tab out from under us.
332   if (!download_created_callback.is_null())
333     download_created_callback.Run(download_);
334 
335   // Check save type and process the save page job.
336   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
337     // Get directory
338     DCHECK(!saved_main_directory_path_.empty());
339     GetAllSavableResourceLinksForCurrentPage();
340   } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
341     web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
342         &SavePackage::OnMHTMLGenerated, this));
343   } else {
344     DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
345     wait_state_ = NET_FILES;
346     SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
347         SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
348         SaveFileCreateInfo::SAVE_FILE_FROM_NET;
349     SaveItem* save_item = new SaveItem(page_url_,
350                                        Referrer(),
351                                        this,
352                                        save_source);
353     // Add this item to waiting list.
354     waiting_item_queue_.push(save_item);
355     all_save_items_count_ = 1;
356     download_->SetTotalBytes(1);
357 
358     DoSavingProcess();
359   }
360 }
361 
OnMHTMLGenerated(int64 size)362 void SavePackage::OnMHTMLGenerated(int64 size) {
363   if (size <= 0) {
364     Cancel(false);
365     return;
366   }
367   wrote_to_completed_file_ = true;
368 
369   // Hack to avoid touching download_ after user cancel.
370   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
371   // with SavePackage flow.
372   if (download_->GetState() == DownloadItem::IN_PROGRESS) {
373     download_->SetTotalBytes(size);
374     download_->DestinationUpdate(size, 0, std::string());
375     // Must call OnAllDataSaved here in order for
376     // GDataDownloadObserver::ShouldUpload() to return true.
377     // ShouldCompleteDownload() may depend on the gdata uploader to finish.
378     download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
379   }
380 
381   if (!download_manager_->GetDelegate()) {
382     Finish();
383     return;
384   }
385 
386   if (download_manager_->GetDelegate()->ShouldCompleteDownload(
387           download_, base::Bind(&SavePackage::Finish, this))) {
388     Finish();
389   }
390 }
391 
392 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
393 // restricted by NAME_MAX. The maximum allowed path looks like:
394 // '/path/to/save_dir' + '/' + NAME_MAX.
GetMaxPathLengthForDirectory(const base::FilePath & base_dir)395 uint32 SavePackage::GetMaxPathLengthForDirectory(
396     const base::FilePath& base_dir) {
397 #if defined(OS_POSIX)
398   return std::min(kMaxFilePathLength,
399                   static_cast<uint32>(base_dir.value().length()) +
400                   NAME_MAX + 1);
401 #else
402   return kMaxFilePathLength;
403 #endif
404 }
405 
406 // File name is considered being consist of pure file name, dot and file
407 // extension name. File name might has no dot and file extension, or has
408 // multiple dot inside file name. The dot, which separates the pure file
409 // name and file extension name, is last dot in the whole file name.
410 // This function is for making sure the length of specified file path is not
411 // great than the specified maximum length of file path and getting safe pure
412 // file name part if the input pure file name is too long.
413 // The parameter |dir_path| specifies directory part of the specified
414 // file path. The parameter |file_name_ext| specifies file extension
415 // name part of the specified file path (including start dot). The parameter
416 // |max_file_path_len| specifies maximum length of the specified file path.
417 // The parameter |pure_file_name| input pure file name part of the specified
418 // file path. If the length of specified file path is great than
419 // |max_file_path_len|, the |pure_file_name| will output new pure file name
420 // part for making sure the length of specified file path is less than
421 // specified maximum length of file path. Return false if the function can
422 // not get a safe pure file name, otherwise it returns true.
GetSafePureFileName(const base::FilePath & dir_path,const base::FilePath::StringType & file_name_ext,uint32 max_file_path_len,base::FilePath::StringType * pure_file_name)423 bool SavePackage::GetSafePureFileName(
424     const base::FilePath& dir_path,
425     const base::FilePath::StringType& file_name_ext,
426     uint32 max_file_path_len,
427     base::FilePath::StringType* pure_file_name) {
428   DCHECK(!pure_file_name->empty());
429   int available_length = static_cast<int>(max_file_path_len -
430                                           dir_path.value().length() -
431                                           file_name_ext.length());
432   // Need an extra space for the separator.
433   if (!dir_path.EndsWithSeparator())
434     --available_length;
435 
436   // Plenty of room.
437   if (static_cast<int>(pure_file_name->length()) <= available_length)
438     return true;
439 
440   // Limited room. Truncate |pure_file_name| to fit.
441   if (available_length > 0) {
442     *pure_file_name = pure_file_name->substr(0, available_length);
443     return true;
444   }
445 
446   // Not enough room to even use a shortened |pure_file_name|.
447   pure_file_name->clear();
448   return false;
449 }
450 
451 // Generate name for saving resource.
GenerateFileName(const std::string & disposition,const GURL & url,bool need_html_ext,base::FilePath::StringType * generated_name)452 bool SavePackage::GenerateFileName(const std::string& disposition,
453                                    const GURL& url,
454                                    bool need_html_ext,
455                                    base::FilePath::StringType* generated_name) {
456   // TODO(jungshik): Figure out the referrer charset when having one
457   // makes sense and pass it to GenerateFileName.
458   base::FilePath file_path = net::GenerateFileName(url,
459                                                    disposition,
460                                                    std::string(),
461                                                    std::string(),
462                                                    std::string(),
463                                                    kDefaultSaveName);
464 
465   DCHECK(!file_path.empty());
466   base::FilePath::StringType pure_file_name =
467       file_path.RemoveExtension().BaseName().value();
468   base::FilePath::StringType file_name_ext = file_path.Extension();
469 
470   // If it is HTML resource, use ".html" as its extension.
471   if (need_html_ext) {
472     file_name_ext = FILE_PATH_LITERAL(".");
473     file_name_ext.append(kDefaultHtmlExtension);
474   }
475 
476   // Need to make sure the suggested file name is not too long.
477   uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
478 
479   // Get safe pure file name.
480   if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
481                            max_path, &pure_file_name))
482     return false;
483 
484   base::FilePath::StringType file_name = pure_file_name + file_name_ext;
485 
486   // Check whether we already have same name in a case insensitive manner.
487   FileNameSet::const_iterator iter = file_name_set_.find(file_name);
488   if (iter == file_name_set_.end()) {
489     file_name_set_.insert(file_name);
490   } else {
491     // Found same name, increase the ordinal number for the file name.
492     pure_file_name =
493         base::FilePath(*iter).RemoveExtension().BaseName().value();
494     base::FilePath::StringType base_file_name =
495         StripOrdinalNumber(pure_file_name);
496 
497     // We need to make sure the length of base file name plus maximum ordinal
498     // number path will be less than or equal to kMaxFilePathLength.
499     if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
500         max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
501       return false;
502 
503     // Prepare the new ordinal number.
504     uint32 ordinal_number;
505     FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
506     if (it == file_name_count_map_.end()) {
507       // First base-name-conflict resolving, use 1 as initial ordinal number.
508       file_name_count_map_[base_file_name] = 1;
509       ordinal_number = 1;
510     } else {
511       // We have met same base-name conflict, use latest ordinal number.
512       ordinal_number = it->second;
513     }
514 
515     if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
516       // Use a random file from temporary file.
517       base::FilePath temp_file;
518       base::CreateTemporaryFile(&temp_file);
519       file_name = temp_file.RemoveExtension().BaseName().value();
520       // Get safe pure file name.
521       if (!GetSafePureFileName(saved_main_directory_path_,
522                                base::FilePath::StringType(),
523                                max_path, &file_name))
524         return false;
525     } else {
526       for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
527         base::FilePath::StringType new_name = base_file_name +
528             base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
529         if (file_name_set_.find(new_name) == file_name_set_.end()) {
530           // Resolved name conflict.
531           file_name = new_name;
532           file_name_count_map_[base_file_name] = ++i;
533           break;
534         }
535       }
536     }
537 
538     file_name_set_.insert(file_name);
539   }
540 
541   DCHECK(!file_name.empty());
542   generated_name->assign(file_name);
543 
544   return true;
545 }
546 
547 // We have received a message from SaveFileManager about a new saving job. We
548 // create a SaveItem and store it in our in_progress list.
StartSave(const SaveFileCreateInfo * info)549 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
550   DCHECK(info && !info->url.is_empty());
551 
552   SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
553   if (it == in_progress_items_.end()) {
554     // If not found, we must have cancel action.
555     DCHECK(canceled());
556     return;
557   }
558   SaveItem* save_item = it->second;
559 
560   DCHECK(!saved_main_file_path_.empty());
561 
562   save_item->SetSaveId(info->save_id);
563   save_item->SetTotalBytes(info->total_bytes);
564 
565   // Determine the proper path for a saving job, by choosing either the default
566   // save directory, or prompting the user.
567   DCHECK(!save_item->has_final_name());
568   if (info->url != page_url_) {
569     base::FilePath::StringType generated_name;
570     // For HTML resource file, make sure it will have .htm as extension name,
571     // otherwise, when you open the saved page in Chrome again, download
572     // file manager will treat it as downloadable resource, and download it
573     // instead of opening it as HTML.
574     bool need_html_ext =
575         info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
576     if (!GenerateFileName(info->content_disposition,
577                           GURL(info->url),
578                           need_html_ext,
579                           &generated_name)) {
580       // We can not generate file name for this SaveItem, so we cancel the
581       // saving page job if the save source is from serialized DOM data.
582       // Otherwise, it means this SaveItem is sub-resource type, we treat it
583       // as an error happened on saving. We can ignore this type error for
584       // sub-resource links which will be resolved as absolute links instead
585       // of local links in final saved contents.
586       if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
587         Cancel(true);
588       else
589         SaveFinished(save_item->save_id(), 0, false);
590       return;
591     }
592 
593     // When saving page as only-HTML, we only have a SaveItem whose url
594     // must be page_url_.
595     DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
596     DCHECK(!saved_main_directory_path_.empty());
597 
598     // Now we get final name retrieved from GenerateFileName, we will use it
599     // rename the SaveItem.
600     base::FilePath final_name =
601         saved_main_directory_path_.Append(generated_name);
602     save_item->Rename(final_name);
603   } else {
604     // It is the main HTML file, use the name chosen by the user.
605     save_item->Rename(saved_main_file_path_);
606   }
607 
608   // If the save source is from file system, inform SaveFileManager to copy
609   // corresponding file to the file path which this SaveItem specifies.
610   if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
611     BrowserThread::PostTask(
612         BrowserThread::FILE, FROM_HERE,
613         base::Bind(&SaveFileManager::SaveLocalFile,
614                    file_manager_,
615                    save_item->url(),
616                    save_item->save_id(),
617                    contents_id()));
618     return;
619   }
620 
621   // Check whether we begin to require serialized HTML data.
622   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
623       wait_state_ == HTML_DATA) {
624     // Inform backend to serialize the all frames' DOM and send serialized
625     // HTML data back.
626     GetSerializedHtmlDataForCurrentPageWithLocalLinks();
627   }
628 }
629 
LookupItemInProcessBySaveId(int32 save_id)630 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
631   if (in_process_count()) {
632     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
633         it != in_progress_items_.end(); ++it) {
634       SaveItem* save_item = it->second;
635       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
636       if (save_item->save_id() == save_id)
637         return save_item;
638     }
639   }
640   return NULL;
641 }
642 
PutInProgressItemToSavedMap(SaveItem * save_item)643 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
644   SaveUrlItemMap::iterator it = in_progress_items_.find(
645       save_item->url().spec());
646   DCHECK(it != in_progress_items_.end());
647   DCHECK(save_item == it->second);
648   in_progress_items_.erase(it);
649 
650   if (save_item->success()) {
651     // Add it to saved_success_items_.
652     DCHECK(saved_success_items_.find(save_item->save_id()) ==
653            saved_success_items_.end());
654     saved_success_items_[save_item->save_id()] = save_item;
655   } else {
656     // Add it to saved_failed_items_.
657     DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
658            saved_failed_items_.end());
659     saved_failed_items_[save_item->url().spec()] = save_item;
660   }
661 }
662 
663 // Called for updating saving state.
UpdateSaveProgress(int32 save_id,int64 size,bool write_success)664 bool SavePackage::UpdateSaveProgress(int32 save_id,
665                                      int64 size,
666                                      bool write_success) {
667   // Because we might have canceled this saving job before,
668   // so we might not find corresponding SaveItem.
669   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
670   if (!save_item)
671     return false;
672 
673   save_item->Update(size);
674 
675   // If we got disk error, cancel whole save page job.
676   if (!write_success) {
677     // Cancel job with reason of disk error.
678     Cancel(false);
679   }
680   return true;
681 }
682 
683 // Stop all page saving jobs that are in progress and instruct the file thread
684 // to delete all saved  files.
Stop()685 void SavePackage::Stop() {
686   // If we haven't moved out of the initial state, there's nothing to cancel and
687   // there won't be valid pointers for file_manager_ or download_.
688   if (wait_state_ == INITIALIZE)
689     return;
690 
691   // When stopping, if it still has some items in in_progress, cancel them.
692   DCHECK(canceled());
693   if (in_process_count()) {
694     SaveUrlItemMap::iterator it = in_progress_items_.begin();
695     for (; it != in_progress_items_.end(); ++it) {
696       SaveItem* save_item = it->second;
697       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
698       save_item->Cancel();
699     }
700     // Remove all in progress item to saved map. For failed items, they will
701     // be put into saved_failed_items_, for successful item, they will be put
702     // into saved_success_items_.
703     while (in_process_count())
704       PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
705   }
706 
707   // This vector contains the save ids of the save files which SaveFileManager
708   // needs to remove from its save_file_map_.
709   SaveIDList save_ids;
710   for (SavedItemMap::iterator it = saved_success_items_.begin();
711       it != saved_success_items_.end(); ++it)
712     save_ids.push_back(it->first);
713   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
714       it != saved_failed_items_.end(); ++it)
715     save_ids.push_back(it->second->save_id());
716 
717   BrowserThread::PostTask(
718       BrowserThread::FILE, FROM_HERE,
719       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
720                  file_manager_,
721                  save_ids));
722 
723   finished_ = true;
724   wait_state_ = FAILED;
725 
726   // Inform the DownloadItem we have canceled whole save page job.
727   if (download_) {
728     download_->Cancel(false);
729     FinalizeDownloadEntry();
730   }
731 }
732 
CheckFinish()733 void SavePackage::CheckFinish() {
734   if (in_process_count() || finished_)
735     return;
736 
737   base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
738                         saved_success_items_.size() > 1) ?
739                         saved_main_directory_path_ : base::FilePath();
740 
741   // This vector contains the final names of all the successfully saved files
742   // along with their save ids. It will be passed to SaveFileManager to do the
743   // renaming job.
744   FinalNameList final_names;
745   for (SavedItemMap::iterator it = saved_success_items_.begin();
746       it != saved_success_items_.end(); ++it)
747     final_names.push_back(std::make_pair(it->first,
748                                          it->second->full_path()));
749 
750   BrowserThread::PostTask(
751       BrowserThread::FILE, FROM_HERE,
752       base::Bind(&SaveFileManager::RenameAllFiles,
753                  file_manager_,
754                  final_names,
755                  dir,
756                  web_contents()->GetRenderProcessHost()->GetID(),
757                  web_contents()->GetRenderViewHost()->GetRoutingID(),
758                  id()));
759 }
760 
761 // Successfully finished all items of this SavePackage.
Finish()762 void SavePackage::Finish() {
763   // User may cancel the job when we're moving files to the final directory.
764   if (canceled())
765     return;
766 
767   wait_state_ = SUCCESSFUL;
768   finished_ = true;
769 
770   // Record finish.
771   RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
772 
773   // Record any errors that occurred.
774   if (wrote_to_completed_file_) {
775     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
776   }
777 
778   if (wrote_to_failed_file_) {
779     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
780   }
781 
782   // This vector contains the save ids of the save files which SaveFileManager
783   // needs to remove from its save_file_map_.
784   SaveIDList save_ids;
785   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
786        it != saved_failed_items_.end(); ++it)
787     save_ids.push_back(it->second->save_id());
788 
789   BrowserThread::PostTask(
790       BrowserThread::FILE, FROM_HERE,
791       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
792                  file_manager_,
793                  save_ids));
794 
795   if (download_) {
796     // Hack to avoid touching download_ after user cancel.
797     // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
798     // with SavePackage flow.
799     if (download_->GetState() == DownloadItem::IN_PROGRESS) {
800       if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
801         download_->DestinationUpdate(
802             all_save_items_count_, CurrentSpeed(), std::string());
803         download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
804       }
805       download_->MarkAsComplete();
806     }
807     FinalizeDownloadEntry();
808   }
809 }
810 
811 // Called for updating end state.
SaveFinished(int32 save_id,int64 size,bool is_success)812 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
813   // Because we might have canceled this saving job before,
814   // so we might not find corresponding SaveItem. Just ignore it.
815   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
816   if (!save_item)
817     return;
818 
819   // Let SaveItem set end state.
820   save_item->Finish(size, is_success);
821   // Remove the associated save id and SavePackage.
822   file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
823 
824   PutInProgressItemToSavedMap(save_item);
825 
826   // Inform the DownloadItem to update UI.
827   // We use the received bytes as number of saved files.
828   // Hack to avoid touching download_ after user cancel.
829   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
830   // with SavePackage flow.
831   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
832     download_->DestinationUpdate(
833         completed_count(), CurrentSpeed(), std::string());
834   }
835 
836   if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
837       save_item->url() == page_url_ && !save_item->received_bytes()) {
838     // If size of main HTML page is 0, treat it as disk error.
839     Cancel(false);
840     return;
841   }
842 
843   if (canceled()) {
844     DCHECK(finished_);
845     return;
846   }
847 
848   // Continue processing the save page job.
849   DoSavingProcess();
850 
851   // Check whether we can successfully finish whole job.
852   CheckFinish();
853 }
854 
855 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
856 // save id -1 when it encounters error. Since in this case, save id will be
857 // -1, so we can only use URL to find which SaveItem is associated with
858 // this error.
859 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
860 // from serializing HTML data, then cancel saving page.
SaveFailed(const GURL & save_url)861 void SavePackage::SaveFailed(const GURL& save_url) {
862   SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
863   if (it == in_progress_items_.end()) {
864     NOTREACHED();  // Should not exist!
865     return;
866   }
867   SaveItem* save_item = it->second;
868 
869   save_item->Finish(0, false);
870 
871   PutInProgressItemToSavedMap(save_item);
872 
873   // Inform the DownloadItem to update UI.
874   // We use the received bytes as number of saved files.
875   // Hack to avoid touching download_ after user cancel.
876   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
877   // with SavePackage flow.
878   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
879     download_->DestinationUpdate(
880         completed_count(), CurrentSpeed(), std::string());
881   }
882 
883   if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
884       (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
885       (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
886     // We got error when saving page. Treat it as disk error.
887     Cancel(true);
888   }
889 
890   if (canceled()) {
891     DCHECK(finished_);
892     return;
893   }
894 
895   // Continue processing the save page job.
896   DoSavingProcess();
897 
898   CheckFinish();
899 }
900 
SaveCanceled(SaveItem * save_item)901 void SavePackage::SaveCanceled(SaveItem* save_item) {
902   // Call the RemoveSaveFile in UI thread.
903   file_manager_->RemoveSaveFile(save_item->save_id(),
904                                 save_item->url(),
905                                 this);
906   if (save_item->save_id() != -1)
907     BrowserThread::PostTask(
908         BrowserThread::FILE, FROM_HERE,
909         base::Bind(&SaveFileManager::CancelSave,
910                    file_manager_,
911                    save_item->save_id()));
912 }
913 
914 // Initiate a saving job of a specific URL. We send the request to
915 // SaveFileManager, which will dispatch it to different approach according to
916 // the save source. Parameter process_all_remaining_items indicates whether
917 // we need to save all remaining items.
SaveNextFile(bool process_all_remaining_items)918 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
919   DCHECK(web_contents());
920   DCHECK(waiting_item_queue_.size());
921 
922   do {
923     // Pop SaveItem from waiting list.
924     SaveItem* save_item = waiting_item_queue_.front();
925     waiting_item_queue_.pop();
926 
927     // Add the item to in_progress_items_.
928     SaveUrlItemMap::iterator it = in_progress_items_.find(
929         save_item->url().spec());
930     DCHECK(it == in_progress_items_.end());
931     in_progress_items_[save_item->url().spec()] = save_item;
932     save_item->Start();
933     file_manager_->SaveURL(save_item->url(),
934                            save_item->referrer(),
935                            web_contents()->GetRenderProcessHost()->GetID(),
936                            routing_id(),
937                            save_item->save_source(),
938                            save_item->full_path(),
939                            web_contents()->
940                                GetBrowserContext()->GetResourceContext(),
941                            this);
942   } while (process_all_remaining_items && waiting_item_queue_.size());
943 }
944 
945 // Calculate the percentage of whole save page job.
PercentComplete()946 int SavePackage::PercentComplete() {
947   if (!all_save_items_count_)
948     return 0;
949   else if (!in_process_count())
950     return 100;
951   else
952     return completed_count() / all_save_items_count_;
953 }
954 
CurrentSpeed() const955 int64 SavePackage::CurrentSpeed() const {
956   base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
957   int64 diff_ms = diff.InMilliseconds();
958   return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
959 }
960 
961 // Continue processing the save page job after one SaveItem has been
962 // finished.
DoSavingProcess()963 void SavePackage::DoSavingProcess() {
964   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
965     // We guarantee that images and JavaScripts must be downloaded first.
966     // So when finishing all those sub-resources, we will know which
967     // sub-resource's link can be replaced with local file path, which
968     // sub-resource's link need to be replaced with absolute URL which
969     // point to its internet address because it got error when saving its data.
970 
971     // Start a new SaveItem job if we still have job in waiting queue.
972     if (waiting_item_queue_.size()) {
973       DCHECK(wait_state_ == NET_FILES);
974       SaveItem* save_item = waiting_item_queue_.front();
975       if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
976         SaveNextFile(false);
977       } else if (!in_process_count()) {
978         // If there is no in-process SaveItem, it means all sub-resources
979         // have been processed. Now we need to start serializing HTML DOM
980         // for the current page to get the generated HTML data.
981         wait_state_ = HTML_DATA;
982         // All non-HTML resources have been finished, start all remaining
983         // HTML files.
984         SaveNextFile(true);
985       }
986     } else if (in_process_count()) {
987       // Continue asking for HTML data.
988       DCHECK(wait_state_ == HTML_DATA);
989     }
990   } else {
991     // Save as HTML only or MHTML.
992     DCHECK(wait_state_ == NET_FILES);
993     DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
994            (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
995     if (waiting_item_queue_.size()) {
996       DCHECK(all_save_items_count_ == waiting_item_queue_.size());
997       SaveNextFile(false);
998     }
999   }
1000 }
1001 
OnMessageReceived(const IPC::Message & message)1002 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1003   bool handled = true;
1004   IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1005     IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
1006                         OnReceivedSavableResourceLinksForCurrentPage)
1007     IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1008                         OnReceivedSerializedHtmlData)
1009     IPC_MESSAGE_UNHANDLED(handled = false)
1010   IPC_END_MESSAGE_MAP()
1011   return handled;
1012 }
1013 
1014 // After finishing all SaveItems which need to get data from net.
1015 // We collect all URLs which have local storage and send the
1016 // map:(originalURL:currentLocalPath) to render process (backend).
1017 // Then render process will serialize DOM and send data to us.
GetSerializedHtmlDataForCurrentPageWithLocalLinks()1018 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1019   if (wait_state_ != HTML_DATA)
1020     return;
1021   std::vector<GURL> saved_links;
1022   std::vector<base::FilePath> saved_file_paths;
1023   int successful_started_items_count = 0;
1024 
1025   // Collect all saved items which have local storage.
1026   // First collect the status of all the resource files and check whether they
1027   // have created local files although they have not been completely saved.
1028   // If yes, the file can be saved. Otherwise, there is a disk error, so we
1029   // need to cancel the page saving job.
1030   for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1031        it != in_progress_items_.end(); ++it) {
1032     DCHECK(it->second->save_source() ==
1033            SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1034     if (it->second->has_final_name())
1035       successful_started_items_count++;
1036     saved_links.push_back(it->second->url());
1037     saved_file_paths.push_back(it->second->file_name());
1038   }
1039 
1040   // If not all file of HTML resource have been started, then wait.
1041   if (successful_started_items_count != in_process_count())
1042     return;
1043 
1044   // Collect all saved success items.
1045   for (SavedItemMap::iterator it = saved_success_items_.begin();
1046        it != saved_success_items_.end(); ++it) {
1047     DCHECK(it->second->has_final_name());
1048     saved_links.push_back(it->second->url());
1049     saved_file_paths.push_back(it->second->file_name());
1050   }
1051 
1052   // Get the relative directory name.
1053   base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1054 
1055   Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1056       routing_id(), saved_links, saved_file_paths, relative_dir_name));
1057 }
1058 
1059 // Process the serialized HTML content data of a specified web page
1060 // retrieved from render process.
OnReceivedSerializedHtmlData(const GURL & frame_url,const std::string & data,int32 status)1061 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1062                                                const std::string& data,
1063                                                int32 status) {
1064   WebPageSerializerClient::PageSerializationStatus flag =
1065       static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1066   // Check current state.
1067   if (wait_state_ != HTML_DATA)
1068     return;
1069 
1070   int id = contents_id();
1071   // If the all frames are finished saving, we need to close the
1072   // remaining SaveItems.
1073   if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1074     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1075          it != in_progress_items_.end(); ++it) {
1076       VLOG(20) << " " << __FUNCTION__ << "()"
1077                << " save_id = " << it->second->save_id()
1078                << " url = \"" << it->second->url().spec() << "\"";
1079       BrowserThread::PostTask(
1080           BrowserThread::FILE, FROM_HERE,
1081           base::Bind(&SaveFileManager::SaveFinished,
1082                      file_manager_,
1083                      it->second->save_id(),
1084                      it->second->url(),
1085                      id,
1086                      true));
1087     }
1088     return;
1089   }
1090 
1091   SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1092   if (it == in_progress_items_.end()) {
1093     for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1094       saved_it != saved_success_items_.end(); ++saved_it) {
1095       if (saved_it->second->url() == frame_url) {
1096         wrote_to_completed_file_ = true;
1097         break;
1098       }
1099     }
1100 
1101     it = saved_failed_items_.find(frame_url.spec());
1102     if (it != saved_failed_items_.end())
1103       wrote_to_failed_file_ = true;
1104 
1105     return;
1106   }
1107 
1108   SaveItem* save_item = it->second;
1109   DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1110 
1111   if (!data.empty()) {
1112     // Prepare buffer for saving HTML data.
1113     scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1114     memcpy(new_data->data(), data.data(), data.size());
1115 
1116     // Call write file functionality in file thread.
1117     BrowserThread::PostTask(
1118         BrowserThread::FILE, FROM_HERE,
1119         base::Bind(&SaveFileManager::UpdateSaveProgress,
1120                    file_manager_,
1121                    save_item->save_id(),
1122                    new_data,
1123                    static_cast<int>(data.size())));
1124   }
1125 
1126   // Current frame is completed saving, call finish in file thread.
1127   if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1128     VLOG(20) << " " << __FUNCTION__ << "()"
1129              << " save_id = " << save_item->save_id()
1130              << " url = \"" << save_item->url().spec() << "\"";
1131     BrowserThread::PostTask(
1132         BrowserThread::FILE, FROM_HERE,
1133         base::Bind(&SaveFileManager::SaveFinished,
1134                    file_manager_,
1135                    save_item->save_id(),
1136                    save_item->url(),
1137                    id,
1138                    true));
1139   }
1140 }
1141 
1142 // Ask for all savable resource links from backend, include main frame and
1143 // sub-frame.
GetAllSavableResourceLinksForCurrentPage()1144 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
1145   if (wait_state_ != START_PROCESS)
1146     return;
1147 
1148   wait_state_ = RESOURCES_LIST;
1149   Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
1150                                                             page_url_));
1151 }
1152 
1153 // Give backend the lists which contain all resource links that have local
1154 // storage, after which, render process will serialize DOM for generating
1155 // HTML data.
OnReceivedSavableResourceLinksForCurrentPage(const std::vector<GURL> & resources_list,const std::vector<Referrer> & referrers_list,const std::vector<GURL> & frames_list)1156 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
1157     const std::vector<GURL>& resources_list,
1158     const std::vector<Referrer>& referrers_list,
1159     const std::vector<GURL>& frames_list) {
1160   if (wait_state_ != RESOURCES_LIST)
1161     return;
1162 
1163   if (resources_list.size() != referrers_list.size())
1164     return;
1165 
1166   all_save_items_count_ = static_cast<int>(resources_list.size()) +
1167                            static_cast<int>(frames_list.size());
1168 
1169   // We use total bytes as the total number of files we want to save.
1170   // Hack to avoid touching download_ after user cancel.
1171   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1172   // with SavePackage flow.
1173   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1174     download_->SetTotalBytes(all_save_items_count_);
1175 
1176   if (all_save_items_count_) {
1177     // Put all sub-resources to wait list.
1178     for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1179       const GURL& u = resources_list[i];
1180       DCHECK(u.is_valid());
1181       SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
1182           SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
1183           SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1184       SaveItem* save_item = new SaveItem(u, referrers_list[i],
1185                                          this, save_source);
1186       waiting_item_queue_.push(save_item);
1187     }
1188     // Put all HTML resources to wait list.
1189     for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
1190       const GURL& u = frames_list[i];
1191       DCHECK(u.is_valid());
1192       SaveItem* save_item = new SaveItem(
1193           u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1194       waiting_item_queue_.push(save_item);
1195     }
1196     wait_state_ = NET_FILES;
1197     DoSavingProcess();
1198   } else {
1199     // No resource files need to be saved, treat it as user cancel.
1200     Cancel(true);
1201   }
1202 }
1203 
GetSuggestedNameForSaveAs(bool can_save_as_complete,const std::string & contents_mime_type,const std::string & accept_langs)1204 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1205     bool can_save_as_complete,
1206     const std::string& contents_mime_type,
1207     const std::string& accept_langs) {
1208   base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1209 
1210   // If the page's title matches its URL, use the URL. Try to use the last path
1211   // component or if there is none, the domain as the file name.
1212   // Normally we want to base the filename on the page title, or if it doesn't
1213   // exist, on the URL. It's not easy to tell if the page has no title, because
1214   // if the page has no title, WebContents::GetTitle() will return the page's
1215   // URL (adjusted for display purposes). Therefore, we convert the "title"
1216   // back to a URL, and if it matches the original page URL, we know the page
1217   // had no title (or had a title equal to its URL, which is fine to treat
1218   // similarly).
1219   if (title_ == net::FormatUrl(page_url_, accept_langs)) {
1220     std::string url_path;
1221     if (!page_url_.SchemeIs(url::kDataScheme)) {
1222       std::vector<std::string> url_parts;
1223       base::SplitString(page_url_.path(), '/', &url_parts);
1224       if (!url_parts.empty()) {
1225         for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1226           url_path = url_parts[i];
1227           if (!url_path.empty())
1228             break;
1229         }
1230       }
1231       if (url_path.empty())
1232         url_path = page_url_.host();
1233     } else {
1234       url_path = "dataurl";
1235     }
1236     name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1237   }
1238 
1239   // Ask user for getting final saving name.
1240   name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1241                                              contents_mime_type);
1242   // Adjust extension for complete types.
1243   if (can_save_as_complete)
1244     name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1245 
1246   base::FilePath::StringType file_name = name_with_proper_ext.value();
1247   base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' ');
1248   return base::FilePath(file_name);
1249 }
1250 
EnsureHtmlExtension(const base::FilePath & name)1251 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1252   // If the file name doesn't have an extension suitable for HTML files,
1253   // append one.
1254   base::FilePath::StringType ext = name.Extension();
1255   if (!ext.empty())
1256     ext.erase(ext.begin());  // Erase preceding '.'.
1257   std::string mime_type;
1258   if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1259       !CanSaveAsComplete(mime_type)) {
1260     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1261                           kDefaultHtmlExtension);
1262   }
1263   return name;
1264 }
1265 
EnsureMimeExtension(const base::FilePath & name,const std::string & contents_mime_type)1266 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1267     const std::string& contents_mime_type) {
1268   // Start extension at 1 to skip over period if non-empty.
1269   base::FilePath::StringType ext = name.Extension().length() ?
1270       name.Extension().substr(1) : name.Extension();
1271   base::FilePath::StringType suggested_extension =
1272       ExtensionForMimeType(contents_mime_type);
1273   std::string mime_type;
1274   if (!suggested_extension.empty() &&
1275       !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1276     // Extension is absent or needs to be updated.
1277     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1278                     suggested_extension);
1279   }
1280   return name;
1281 }
1282 
ExtensionForMimeType(const std::string & contents_mime_type)1283 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1284     const std::string& contents_mime_type) {
1285   static const struct {
1286     const base::FilePath::CharType *mime_type;
1287     const base::FilePath::CharType *suggested_extension;
1288   } extensions[] = {
1289     { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1290     { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1291     { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1292     { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1293     { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1294   };
1295 #if defined(OS_POSIX)
1296   base::FilePath::StringType mime_type(contents_mime_type);
1297 #elif defined(OS_WIN)
1298   base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1299 #endif  // OS_WIN
1300   for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) {
1301     if (mime_type == extensions[i].mime_type)
1302       return extensions[i].suggested_extension;
1303   }
1304   return FILE_PATH_LITERAL("");
1305 }
1306 
GetSaveInfo()1307 void SavePackage::GetSaveInfo() {
1308   // Can't use web_contents_ in the file thread, so get the data that we need
1309   // before calling to it.
1310   base::FilePath website_save_dir, download_save_dir;
1311   bool skip_dir_check = false;
1312   DCHECK(download_manager_);
1313   if (download_manager_->GetDelegate()) {
1314     download_manager_->GetDelegate()->GetSaveDir(
1315         web_contents()->GetBrowserContext(), &website_save_dir,
1316         &download_save_dir, &skip_dir_check);
1317   }
1318   std::string mime_type = web_contents()->GetContentsMimeType();
1319   std::string accept_languages =
1320       GetContentClient()->browser()->GetAcceptLangs(
1321           web_contents()->GetBrowserContext());
1322 
1323   BrowserThread::PostTask(
1324       BrowserThread::FILE, FROM_HERE,
1325       base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1326           website_save_dir, download_save_dir, skip_dir_check,
1327           mime_type, accept_languages));
1328 }
1329 
CreateDirectoryOnFileThread(const base::FilePath & website_save_dir,const base::FilePath & download_save_dir,bool skip_dir_check,const std::string & mime_type,const std::string & accept_langs)1330 void SavePackage::CreateDirectoryOnFileThread(
1331     const base::FilePath& website_save_dir,
1332     const base::FilePath& download_save_dir,
1333     bool skip_dir_check,
1334     const std::string& mime_type,
1335     const std::string& accept_langs) {
1336   base::FilePath save_dir;
1337   // If the default html/websites save folder doesn't exist...
1338   // We skip the directory check for gdata directories on ChromeOS.
1339   if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1340     // If the default download dir doesn't exist, create it.
1341     if (!base::DirectoryExists(download_save_dir)) {
1342       bool res = base::CreateDirectory(download_save_dir);
1343       DCHECK(res);
1344     }
1345     save_dir = download_save_dir;
1346   } else {
1347     // If it does exist, use the default save dir param.
1348     save_dir = website_save_dir;
1349   }
1350 
1351   bool can_save_as_complete = CanSaveAsComplete(mime_type);
1352   base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1353       can_save_as_complete, mime_type, accept_langs);
1354   base::FilePath::StringType pure_file_name =
1355       suggested_filename.RemoveExtension().BaseName().value();
1356   base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1357 
1358   // Need to make sure the suggested file name is not too long.
1359   uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1360 
1361   if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1362     save_dir = save_dir.Append(pure_file_name + file_name_ext);
1363   } else {
1364     // Cannot create a shorter filename. This will cause the save as operation
1365     // to fail unless the user pick a shorter name. Continuing even though it
1366     // will fail because returning means no save as popup for the user, which
1367     // is even more confusing. This case should be rare though.
1368     save_dir = save_dir.Append(suggested_filename);
1369   }
1370 
1371   BrowserThread::PostTask(
1372       BrowserThread::UI, FROM_HERE,
1373       base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1374                  can_save_as_complete));
1375 }
1376 
ContinueGetSaveInfo(const base::FilePath & suggested_path,bool can_save_as_complete)1377 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1378                                       bool can_save_as_complete) {
1379 
1380   // The WebContents which owns this SavePackage may have disappeared during
1381   // the UI->FILE->UI thread hop of
1382   // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1383   if (!web_contents() || !download_manager_->GetDelegate())
1384     return;
1385 
1386   base::FilePath::StringType default_extension;
1387   if (can_save_as_complete)
1388     default_extension = kDefaultHtmlExtension;
1389 
1390   download_manager_->GetDelegate()->ChooseSavePath(
1391       web_contents(),
1392       suggested_path,
1393       default_extension,
1394       can_save_as_complete,
1395       base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1396 }
1397 
OnPathPicked(const base::FilePath & final_name,SavePageType type,const SavePackageDownloadCreatedCallback & download_created_callback)1398 void SavePackage::OnPathPicked(
1399     const base::FilePath& final_name,
1400     SavePageType type,
1401     const SavePackageDownloadCreatedCallback& download_created_callback) {
1402   DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1403          (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1404          (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1405   // Ensure the filename is safe.
1406   saved_main_file_path_ = final_name;
1407   // TODO(asanka): This call may block on IO and shouldn't be made
1408   // from the UI thread.  See http://crbug.com/61827.
1409   net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1410                             &saved_main_file_path_);
1411 
1412   saved_main_directory_path_ = saved_main_file_path_.DirName();
1413   save_type_ = type;
1414   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1415     // Make new directory for saving complete file.
1416     saved_main_directory_path_ = saved_main_directory_path_.Append(
1417         saved_main_file_path_.RemoveExtension().BaseName().value() +
1418         FILE_PATH_LITERAL("_files"));
1419   }
1420 
1421   Init(download_created_callback);
1422 }
1423 
StopObservation()1424 void SavePackage::StopObservation() {
1425   DCHECK(download_);
1426   DCHECK(download_manager_);
1427 
1428   download_->RemoveObserver(this);
1429   download_ = NULL;
1430   download_manager_ = NULL;
1431 }
1432 
OnDownloadDestroyed(DownloadItem * download)1433 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1434   StopObservation();
1435 }
1436 
FinalizeDownloadEntry()1437 void SavePackage::FinalizeDownloadEntry() {
1438   DCHECK(download_);
1439   DCHECK(download_manager_);
1440 
1441   download_manager_->OnSavePackageSuccessfullyFinished(download_);
1442   StopObservation();
1443 }
1444 
1445 }  // namespace content
1446