• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "content/browser/download/save_package.h"
6 
7 #include <algorithm>
8 
9 #include "base/bind.h"
10 #include "base/file_util.h"
11 #include "base/files/file_path.h"
12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h"
21 #include "content/browser/download/download_item_impl.h"
22 #include "content/browser/download/download_manager_impl.h"
23 #include "content/browser/download/download_stats.h"
24 #include "content/browser/download/save_file.h"
25 #include "content/browser/download/save_file_manager.h"
26 #include "content/browser/download/save_item.h"
27 #include "content/browser/loader/resource_dispatcher_host_impl.h"
28 #include "content/browser/renderer_host/render_process_host_impl.h"
29 #include "content/browser/renderer_host/render_view_host_delegate.h"
30 #include "content/browser/renderer_host/render_view_host_impl.h"
31 #include "content/common/view_messages.h"
32 #include "content/public/browser/browser_context.h"
33 #include "content/public/browser/browser_thread.h"
34 #include "content/public/browser/content_browser_client.h"
35 #include "content/public/browser/download_manager_delegate.h"
36 #include "content/public/browser/navigation_entry.h"
37 #include "content/public/browser/notification_service.h"
38 #include "content/public/browser/notification_types.h"
39 #include "content/public/browser/resource_context.h"
40 #include "content/public/browser/web_contents.h"
41 #include "net/base/filename_util.h"
42 #include "net/base/io_buffer.h"
43 #include "net/base/mime_util.h"
44 #include "net/url_request/url_request_context.h"
45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
46 #include "url/url_constants.h"
47 
48 using base::Time;
49 using blink::WebPageSerializerClient;
50 
51 namespace content {
52 namespace {
53 
54 // A counter for uniquely identifying each save package.
55 int g_save_package_id = 0;
56 
57 // Default name which will be used when we can not get proper name from
58 // resource URL.
59 const char kDefaultSaveName[] = "saved_resource";
60 
61 // Maximum number of file ordinal number. I think it's big enough for resolving
62 // name-conflict files which has same base file name.
63 const int32 kMaxFileOrdinalNumber = 9999;
64 
65 // Maximum length for file path. Since Windows have MAX_PATH limitation for
66 // file path, we need to make sure length of file path of every saved file
67 // is less than MAX_PATH
68 #if defined(OS_WIN)
69 const uint32 kMaxFilePathLength = MAX_PATH - 1;
70 #elif defined(OS_POSIX)
71 const uint32 kMaxFilePathLength = PATH_MAX - 1;
72 #endif
73 
74 // Maximum length for file ordinal number part. Since we only support the
75 // maximum 9999 for ordinal number, which means maximum file ordinal number part
76 // should be "(9998)", so the value is 6.
77 const uint32 kMaxFileOrdinalNumberPartLength = 6;
78 
79 // Strip current ordinal number, if any. Should only be used on pure
80 // file names, i.e. those stripped of their extensions.
81 // TODO(estade): improve this to not choke on alternate encodings.
StripOrdinalNumber(const base::FilePath::StringType & pure_file_name)82 base::FilePath::StringType StripOrdinalNumber(
83     const base::FilePath::StringType& pure_file_name) {
84   base::FilePath::StringType::size_type r_paren_index =
85       pure_file_name.rfind(FILE_PATH_LITERAL(')'));
86   base::FilePath::StringType::size_type l_paren_index =
87       pure_file_name.rfind(FILE_PATH_LITERAL('('));
88   if (l_paren_index >= r_paren_index)
89     return pure_file_name;
90 
91   for (base::FilePath::StringType::size_type i = l_paren_index + 1;
92        i != r_paren_index; ++i) {
93     if (!IsAsciiDigit(pure_file_name[i]))
94       return pure_file_name;
95   }
96 
97   return pure_file_name.substr(0, l_paren_index);
98 }
99 
100 // Check whether we can save page as complete-HTML for the contents which
101 // have specified a MIME type. Now only contents which have the MIME type
102 // "text/html" can be saved as complete-HTML.
CanSaveAsComplete(const std::string & contents_mime_type)103 bool CanSaveAsComplete(const std::string& contents_mime_type) {
104   return contents_mime_type == "text/html" ||
105          contents_mime_type == "application/xhtml+xml";
106 }
107 
108 // Request handle for SavePackage downloads. Currently doesn't support
109 // pause/resume/cancel, but returns a WebContents.
110 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
111  public:
SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)112   SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
113       : save_package_(save_package) {}
114 
115   // DownloadRequestHandleInterface
GetWebContents() const116   virtual WebContents* GetWebContents() const OVERRIDE {
117     return save_package_.get() ? save_package_->web_contents() : NULL;
118   }
GetDownloadManager() const119   virtual DownloadManager* GetDownloadManager() const OVERRIDE {
120     return NULL;
121   }
PauseRequest() const122   virtual void PauseRequest() const OVERRIDE {}
ResumeRequest() const123   virtual void ResumeRequest() const OVERRIDE {}
CancelRequest() const124   virtual void CancelRequest() const OVERRIDE {}
DebugString() const125   virtual std::string DebugString() const OVERRIDE {
126     return "SavePackage DownloadRequestHandle";
127   }
128 
129  private:
130   base::WeakPtr<SavePackage> save_package_;
131 };
132 
133 }  // namespace
134 
135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
136 #if defined(OS_WIN)
137     FILE_PATH_LITERAL("htm");
138 #else
139     FILE_PATH_LITERAL("html");
140 #endif
141 
SavePackage(WebContents * web_contents,SavePageType save_type,const base::FilePath & file_full_path,const base::FilePath & directory_full_path)142 SavePackage::SavePackage(WebContents* web_contents,
143                          SavePageType save_type,
144                          const base::FilePath& file_full_path,
145                          const base::FilePath& directory_full_path)
146     : WebContentsObserver(web_contents),
147       file_manager_(NULL),
148       download_manager_(NULL),
149       download_(NULL),
150       page_url_(GetUrlToBeSaved()),
151       saved_main_file_path_(file_full_path),
152       saved_main_directory_path_(directory_full_path),
153       title_(web_contents->GetTitle()),
154       start_tick_(base::TimeTicks::Now()),
155       finished_(false),
156       mhtml_finishing_(false),
157       user_canceled_(false),
158       disk_error_occurred_(false),
159       save_type_(save_type),
160       all_save_items_count_(0),
161       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
162       wait_state_(INITIALIZE),
163       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
164       unique_id_(g_save_package_id++),
165       wrote_to_completed_file_(false),
166       wrote_to_failed_file_(false) {
167   DCHECK(page_url_.is_valid());
168   DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
169          (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
170          (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
171   DCHECK(!saved_main_file_path_.empty() &&
172          saved_main_file_path_.value().length() <= kMaxFilePathLength);
173   DCHECK(!saved_main_directory_path_.empty() &&
174          saved_main_directory_path_.value().length() < kMaxFilePathLength);
175   InternalInit();
176 }
177 
SavePackage(WebContents * web_contents)178 SavePackage::SavePackage(WebContents* web_contents)
179     : WebContentsObserver(web_contents),
180       file_manager_(NULL),
181       download_manager_(NULL),
182       download_(NULL),
183       page_url_(GetUrlToBeSaved()),
184       title_(web_contents->GetTitle()),
185       start_tick_(base::TimeTicks::Now()),
186       finished_(false),
187       mhtml_finishing_(false),
188       user_canceled_(false),
189       disk_error_occurred_(false),
190       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
191       all_save_items_count_(0),
192       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
193       wait_state_(INITIALIZE),
194       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
195       unique_id_(g_save_package_id++),
196       wrote_to_completed_file_(false),
197       wrote_to_failed_file_(false) {
198   DCHECK(page_url_.is_valid());
199   InternalInit();
200 }
201 
202 // This is for testing use. Set |finished_| as true because we don't want
203 // method Cancel to be be called in destructor in test mode.
204 // We also don't call InternalInit().
SavePackage(WebContents * web_contents,const base::FilePath & file_full_path,const base::FilePath & directory_full_path)205 SavePackage::SavePackage(WebContents* web_contents,
206                          const base::FilePath& file_full_path,
207                          const base::FilePath& directory_full_path)
208     : WebContentsObserver(web_contents),
209       file_manager_(NULL),
210       download_manager_(NULL),
211       download_(NULL),
212       saved_main_file_path_(file_full_path),
213       saved_main_directory_path_(directory_full_path),
214       start_tick_(base::TimeTicks::Now()),
215       finished_(true),
216       mhtml_finishing_(false),
217       user_canceled_(false),
218       disk_error_occurred_(false),
219       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
220       all_save_items_count_(0),
221       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
222       wait_state_(INITIALIZE),
223       contents_id_(0),
224       unique_id_(g_save_package_id++),
225       wrote_to_completed_file_(false),
226       wrote_to_failed_file_(false) {
227 }
228 
~SavePackage()229 SavePackage::~SavePackage() {
230   // Stop receiving saving job's updates
231   if (!finished_ && !canceled()) {
232     // Unexpected quit.
233     Cancel(true);
234   }
235 
236   // We should no longer be observing the DownloadItem at this point.
237   CHECK(!download_);
238 
239   DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
240                                    completed_count() +
241                                    in_process_count()));
242   // Free all SaveItems.
243   while (!waiting_item_queue_.empty()) {
244     // We still have some items which are waiting for start to save.
245     SaveItem* save_item = waiting_item_queue_.front();
246     waiting_item_queue_.pop();
247     delete save_item;
248   }
249 
250   STLDeleteValues(&saved_success_items_);
251   STLDeleteValues(&in_progress_items_);
252   STLDeleteValues(&saved_failed_items_);
253 
254   file_manager_ = NULL;
255 }
256 
GetUrlToBeSaved()257 GURL SavePackage::GetUrlToBeSaved() {
258   // Instead of using web_contents_.GetURL here, we use url() (which is the
259   // "real" url of the page) from the NavigationEntry because it reflects its
260   // origin rather than the displayed one (returned by GetURL) which may be
261   // different (like having "view-source:" on the front).
262   NavigationEntry* visible_entry =
263       web_contents()->GetController().GetVisibleEntry();
264   return visible_entry->GetURL();
265 }
266 
Cancel(bool user_action)267 void SavePackage::Cancel(bool user_action) {
268   if (!canceled()) {
269     if (user_action)
270       user_canceled_ = true;
271     else
272       disk_error_occurred_ = true;
273     Stop();
274   }
275   RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
276 }
277 
278 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
279 // cases, we need file_manager_ to be initialized, so we do this first.
InternalInit()280 void SavePackage::InternalInit() {
281   ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
282   if (!rdh) {
283     NOTREACHED();
284     return;
285   }
286 
287   file_manager_ = rdh->save_file_manager();
288   DCHECK(file_manager_);
289 
290   download_manager_ = static_cast<DownloadManagerImpl*>(
291       BrowserContext::GetDownloadManager(
292           web_contents()->GetBrowserContext()));
293   DCHECK(download_manager_);
294 
295   RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
296 }
297 
Init(const SavePackageDownloadCreatedCallback & download_created_callback)298 bool SavePackage::Init(
299     const SavePackageDownloadCreatedCallback& download_created_callback) {
300   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
301   // Set proper running state.
302   if (wait_state_ != INITIALIZE)
303     return false;
304 
305   wait_state_ = START_PROCESS;
306 
307   // Initialize the request context and resource dispatcher.
308   BrowserContext* browser_context = web_contents()->GetBrowserContext();
309   if (!browser_context) {
310     NOTREACHED();
311     return false;
312   }
313 
314   scoped_ptr<DownloadRequestHandleInterface> request_handle(
315       new SavePackageRequestHandle(AsWeakPtr()));
316   // The download manager keeps ownership but adds us as an observer.
317   download_manager_->CreateSavePackageDownloadItem(
318       saved_main_file_path_,
319       page_url_,
320       ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
321        "multipart/related" : "text/html"),
322       request_handle.Pass(),
323       base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
324                  download_created_callback));
325   return true;
326 }
327 
InitWithDownloadItem(const SavePackageDownloadCreatedCallback & download_created_callback,DownloadItemImpl * item)328 void SavePackage::InitWithDownloadItem(
329     const SavePackageDownloadCreatedCallback& download_created_callback,
330     DownloadItemImpl* item) {
331   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
332   DCHECK(item);
333   download_ = item;
334   download_->AddObserver(this);
335   // Confirm above didn't delete the tab out from under us.
336   if (!download_created_callback.is_null())
337     download_created_callback.Run(download_);
338 
339   // Check save type and process the save page job.
340   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
341     // Get directory
342     DCHECK(!saved_main_directory_path_.empty());
343     GetAllSavableResourceLinksForCurrentPage();
344   } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
345     web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
346         &SavePackage::OnMHTMLGenerated, this));
347   } else {
348     DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
349     wait_state_ = NET_FILES;
350     SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
351         SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
352         SaveFileCreateInfo::SAVE_FILE_FROM_NET;
353     SaveItem* save_item = new SaveItem(page_url_,
354                                        Referrer(),
355                                        this,
356                                        save_source);
357     // Add this item to waiting list.
358     waiting_item_queue_.push(save_item);
359     all_save_items_count_ = 1;
360     download_->SetTotalBytes(1);
361 
362     DoSavingProcess();
363   }
364 }
365 
OnMHTMLGenerated(int64 size)366 void SavePackage::OnMHTMLGenerated(int64 size) {
367   if (size <= 0) {
368     Cancel(false);
369     return;
370   }
371   wrote_to_completed_file_ = true;
372 
373   // Hack to avoid touching download_ after user cancel.
374   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
375   // with SavePackage flow.
376   if (download_->GetState() == DownloadItem::IN_PROGRESS) {
377     download_->SetTotalBytes(size);
378     download_->DestinationUpdate(size, 0, std::string());
379     // Must call OnAllDataSaved here in order for
380     // GDataDownloadObserver::ShouldUpload() to return true.
381     // ShouldCompleteDownload() may depend on the gdata uploader to finish.
382     download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
383   }
384 
385   if (!download_manager_->GetDelegate()) {
386     Finish();
387     return;
388   }
389 
390   if (download_manager_->GetDelegate()->ShouldCompleteDownload(
391           download_, base::Bind(&SavePackage::Finish, this))) {
392     Finish();
393   }
394 }
395 
396 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
397 // restricted by NAME_MAX. The maximum allowed path looks like:
398 // '/path/to/save_dir' + '/' + NAME_MAX.
GetMaxPathLengthForDirectory(const base::FilePath & base_dir)399 uint32 SavePackage::GetMaxPathLengthForDirectory(
400     const base::FilePath& base_dir) {
401 #if defined(OS_POSIX)
402   return std::min(kMaxFilePathLength,
403                   static_cast<uint32>(base_dir.value().length()) +
404                   NAME_MAX + 1);
405 #else
406   return kMaxFilePathLength;
407 #endif
408 }
409 
410 // File name is considered being consist of pure file name, dot and file
411 // extension name. File name might has no dot and file extension, or has
412 // multiple dot inside file name. The dot, which separates the pure file
413 // name and file extension name, is last dot in the whole file name.
414 // This function is for making sure the length of specified file path is not
415 // great than the specified maximum length of file path and getting safe pure
416 // file name part if the input pure file name is too long.
417 // The parameter |dir_path| specifies directory part of the specified
418 // file path. The parameter |file_name_ext| specifies file extension
419 // name part of the specified file path (including start dot). The parameter
420 // |max_file_path_len| specifies maximum length of the specified file path.
421 // The parameter |pure_file_name| input pure file name part of the specified
422 // file path. If the length of specified file path is great than
423 // |max_file_path_len|, the |pure_file_name| will output new pure file name
424 // part for making sure the length of specified file path is less than
425 // specified maximum length of file path. Return false if the function can
426 // not get a safe pure file name, otherwise it returns true.
GetSafePureFileName(const base::FilePath & dir_path,const base::FilePath::StringType & file_name_ext,uint32 max_file_path_len,base::FilePath::StringType * pure_file_name)427 bool SavePackage::GetSafePureFileName(
428     const base::FilePath& dir_path,
429     const base::FilePath::StringType& file_name_ext,
430     uint32 max_file_path_len,
431     base::FilePath::StringType* pure_file_name) {
432   DCHECK(!pure_file_name->empty());
433   int available_length = static_cast<int>(max_file_path_len -
434                                           dir_path.value().length() -
435                                           file_name_ext.length());
436   // Need an extra space for the separator.
437   if (!dir_path.EndsWithSeparator())
438     --available_length;
439 
440   // Plenty of room.
441   if (static_cast<int>(pure_file_name->length()) <= available_length)
442     return true;
443 
444   // Limited room. Truncate |pure_file_name| to fit.
445   if (available_length > 0) {
446     *pure_file_name = pure_file_name->substr(0, available_length);
447     return true;
448   }
449 
450   // Not enough room to even use a shortened |pure_file_name|.
451   pure_file_name->clear();
452   return false;
453 }
454 
455 // Generate name for saving resource.
GenerateFileName(const std::string & disposition,const GURL & url,bool need_html_ext,base::FilePath::StringType * generated_name)456 bool SavePackage::GenerateFileName(const std::string& disposition,
457                                    const GURL& url,
458                                    bool need_html_ext,
459                                    base::FilePath::StringType* generated_name) {
460   // TODO(jungshik): Figure out the referrer charset when having one
461   // makes sense and pass it to GenerateFileName.
462   base::FilePath file_path = net::GenerateFileName(url,
463                                                    disposition,
464                                                    std::string(),
465                                                    std::string(),
466                                                    std::string(),
467                                                    kDefaultSaveName);
468 
469   DCHECK(!file_path.empty());
470   base::FilePath::StringType pure_file_name =
471       file_path.RemoveExtension().BaseName().value();
472   base::FilePath::StringType file_name_ext = file_path.Extension();
473 
474   // If it is HTML resource, use ".htm{l,}" as its extension.
475   if (need_html_ext) {
476     file_name_ext = FILE_PATH_LITERAL(".");
477     file_name_ext.append(kDefaultHtmlExtension);
478   }
479 
480   // Need to make sure the suggested file name is not too long.
481   uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
482 
483   // Get safe pure file name.
484   if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
485                            max_path, &pure_file_name))
486     return false;
487 
488   base::FilePath::StringType file_name = pure_file_name + file_name_ext;
489 
490   // Check whether we already have same name in a case insensitive manner.
491   FileNameSet::const_iterator iter = file_name_set_.find(file_name);
492   if (iter == file_name_set_.end()) {
493     file_name_set_.insert(file_name);
494   } else {
495     // Found same name, increase the ordinal number for the file name.
496     pure_file_name =
497         base::FilePath(*iter).RemoveExtension().BaseName().value();
498     base::FilePath::StringType base_file_name =
499         StripOrdinalNumber(pure_file_name);
500 
501     // We need to make sure the length of base file name plus maximum ordinal
502     // number path will be less than or equal to kMaxFilePathLength.
503     if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
504         max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
505       return false;
506 
507     // Prepare the new ordinal number.
508     uint32 ordinal_number;
509     FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
510     if (it == file_name_count_map_.end()) {
511       // First base-name-conflict resolving, use 1 as initial ordinal number.
512       file_name_count_map_[base_file_name] = 1;
513       ordinal_number = 1;
514     } else {
515       // We have met same base-name conflict, use latest ordinal number.
516       ordinal_number = it->second;
517     }
518 
519     if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
520       // Use a random file from temporary file.
521       base::FilePath temp_file;
522       base::CreateTemporaryFile(&temp_file);
523       file_name = temp_file.RemoveExtension().BaseName().value();
524       // Get safe pure file name.
525       if (!GetSafePureFileName(saved_main_directory_path_,
526                                base::FilePath::StringType(),
527                                max_path, &file_name))
528         return false;
529     } else {
530       for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
531         base::FilePath::StringType new_name = base_file_name +
532             base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
533         if (file_name_set_.find(new_name) == file_name_set_.end()) {
534           // Resolved name conflict.
535           file_name = new_name;
536           file_name_count_map_[base_file_name] = ++i;
537           break;
538         }
539       }
540     }
541 
542     file_name_set_.insert(file_name);
543   }
544 
545   DCHECK(!file_name.empty());
546   generated_name->assign(file_name);
547 
548   return true;
549 }
550 
551 // We have received a message from SaveFileManager about a new saving job. We
552 // create a SaveItem and store it in our in_progress list.
StartSave(const SaveFileCreateInfo * info)553 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
554   DCHECK(info && !info->url.is_empty());
555 
556   SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
557   if (it == in_progress_items_.end()) {
558     // If not found, we must have cancel action.
559     DCHECK(canceled());
560     return;
561   }
562   SaveItem* save_item = it->second;
563 
564   DCHECK(!saved_main_file_path_.empty());
565 
566   save_item->SetSaveId(info->save_id);
567   save_item->SetTotalBytes(info->total_bytes);
568 
569   // Determine the proper path for a saving job, by choosing either the default
570   // save directory, or prompting the user.
571   DCHECK(!save_item->has_final_name());
572   if (info->url != page_url_) {
573     base::FilePath::StringType generated_name;
574     // For HTML resource file, make sure it will have .htm as extension name,
575     // otherwise, when you open the saved page in Chrome again, download
576     // file manager will treat it as downloadable resource, and download it
577     // instead of opening it as HTML.
578     bool need_html_ext =
579         info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
580     if (!GenerateFileName(info->content_disposition,
581                           GURL(info->url),
582                           need_html_ext,
583                           &generated_name)) {
584       // We can not generate file name for this SaveItem, so we cancel the
585       // saving page job if the save source is from serialized DOM data.
586       // Otherwise, it means this SaveItem is sub-resource type, we treat it
587       // as an error happened on saving. We can ignore this type error for
588       // sub-resource links which will be resolved as absolute links instead
589       // of local links in final saved contents.
590       if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
591         Cancel(true);
592       else
593         SaveFinished(save_item->save_id(), 0, false);
594       return;
595     }
596 
597     // When saving page as only-HTML, we only have a SaveItem whose url
598     // must be page_url_.
599     DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
600     DCHECK(!saved_main_directory_path_.empty());
601 
602     // Now we get final name retrieved from GenerateFileName, we will use it
603     // rename the SaveItem.
604     base::FilePath final_name =
605         saved_main_directory_path_.Append(generated_name);
606     save_item->Rename(final_name);
607   } else {
608     // It is the main HTML file, use the name chosen by the user.
609     save_item->Rename(saved_main_file_path_);
610   }
611 
612   // If the save source is from file system, inform SaveFileManager to copy
613   // corresponding file to the file path which this SaveItem specifies.
614   if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
615     BrowserThread::PostTask(
616         BrowserThread::FILE, FROM_HERE,
617         base::Bind(&SaveFileManager::SaveLocalFile,
618                    file_manager_,
619                    save_item->url(),
620                    save_item->save_id(),
621                    contents_id()));
622     return;
623   }
624 
625   // Check whether we begin to require serialized HTML data.
626   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
627       wait_state_ == HTML_DATA) {
628     // Inform backend to serialize the all frames' DOM and send serialized
629     // HTML data back.
630     GetSerializedHtmlDataForCurrentPageWithLocalLinks();
631   }
632 }
633 
LookupItemInProcessBySaveId(int32 save_id)634 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
635   if (in_process_count()) {
636     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
637         it != in_progress_items_.end(); ++it) {
638       SaveItem* save_item = it->second;
639       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
640       if (save_item->save_id() == save_id)
641         return save_item;
642     }
643   }
644   return NULL;
645 }
646 
PutInProgressItemToSavedMap(SaveItem * save_item)647 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
648   SaveUrlItemMap::iterator it = in_progress_items_.find(
649       save_item->url().spec());
650   DCHECK(it != in_progress_items_.end());
651   DCHECK(save_item == it->second);
652   in_progress_items_.erase(it);
653 
654   if (save_item->success()) {
655     // Add it to saved_success_items_.
656     DCHECK(saved_success_items_.find(save_item->save_id()) ==
657            saved_success_items_.end());
658     saved_success_items_[save_item->save_id()] = save_item;
659   } else {
660     // Add it to saved_failed_items_.
661     DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
662            saved_failed_items_.end());
663     saved_failed_items_[save_item->url().spec()] = save_item;
664   }
665 }
666 
667 // Called for updating saving state.
UpdateSaveProgress(int32 save_id,int64 size,bool write_success)668 bool SavePackage::UpdateSaveProgress(int32 save_id,
669                                      int64 size,
670                                      bool write_success) {
671   // Because we might have canceled this saving job before,
672   // so we might not find corresponding SaveItem.
673   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
674   if (!save_item)
675     return false;
676 
677   save_item->Update(size);
678 
679   // If we got disk error, cancel whole save page job.
680   if (!write_success) {
681     // Cancel job with reason of disk error.
682     Cancel(false);
683   }
684   return true;
685 }
686 
687 // Stop all page saving jobs that are in progress and instruct the file thread
688 // to delete all saved  files.
Stop()689 void SavePackage::Stop() {
690   // If we haven't moved out of the initial state, there's nothing to cancel and
691   // there won't be valid pointers for file_manager_ or download_.
692   if (wait_state_ == INITIALIZE)
693     return;
694 
695   // When stopping, if it still has some items in in_progress, cancel them.
696   DCHECK(canceled());
697   if (in_process_count()) {
698     SaveUrlItemMap::iterator it = in_progress_items_.begin();
699     for (; it != in_progress_items_.end(); ++it) {
700       SaveItem* save_item = it->second;
701       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
702       save_item->Cancel();
703     }
704     // Remove all in progress item to saved map. For failed items, they will
705     // be put into saved_failed_items_, for successful item, they will be put
706     // into saved_success_items_.
707     while (in_process_count())
708       PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
709   }
710 
711   // This vector contains the save ids of the save files which SaveFileManager
712   // needs to remove from its save_file_map_.
713   SaveIDList save_ids;
714   for (SavedItemMap::iterator it = saved_success_items_.begin();
715       it != saved_success_items_.end(); ++it)
716     save_ids.push_back(it->first);
717   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
718       it != saved_failed_items_.end(); ++it)
719     save_ids.push_back(it->second->save_id());
720 
721   BrowserThread::PostTask(
722       BrowserThread::FILE, FROM_HERE,
723       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
724                  file_manager_,
725                  save_ids));
726 
727   finished_ = true;
728   wait_state_ = FAILED;
729 
730   // Inform the DownloadItem we have canceled whole save page job.
731   if (download_) {
732     download_->Cancel(false);
733     FinalizeDownloadEntry();
734   }
735 }
736 
CheckFinish()737 void SavePackage::CheckFinish() {
738   if (in_process_count() || finished_)
739     return;
740 
741   base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
742                         saved_success_items_.size() > 1) ?
743                         saved_main_directory_path_ : base::FilePath();
744 
745   // This vector contains the final names of all the successfully saved files
746   // along with their save ids. It will be passed to SaveFileManager to do the
747   // renaming job.
748   FinalNameList final_names;
749   for (SavedItemMap::iterator it = saved_success_items_.begin();
750       it != saved_success_items_.end(); ++it)
751     final_names.push_back(std::make_pair(it->first,
752                                          it->second->full_path()));
753 
754   BrowserThread::PostTask(
755       BrowserThread::FILE, FROM_HERE,
756       base::Bind(&SaveFileManager::RenameAllFiles,
757                  file_manager_,
758                  final_names,
759                  dir,
760                  web_contents()->GetRenderProcessHost()->GetID(),
761                  web_contents()->GetRenderViewHost()->GetRoutingID(),
762                  id()));
763 }
764 
765 // Successfully finished all items of this SavePackage.
Finish()766 void SavePackage::Finish() {
767   // User may cancel the job when we're moving files to the final directory.
768   if (canceled())
769     return;
770 
771   wait_state_ = SUCCESSFUL;
772   finished_ = true;
773 
774   // Record finish.
775   RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
776 
777   // Record any errors that occurred.
778   if (wrote_to_completed_file_) {
779     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
780   }
781 
782   if (wrote_to_failed_file_) {
783     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
784   }
785 
786   // This vector contains the save ids of the save files which SaveFileManager
787   // needs to remove from its save_file_map_.
788   SaveIDList save_ids;
789   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
790        it != saved_failed_items_.end(); ++it)
791     save_ids.push_back(it->second->save_id());
792 
793   BrowserThread::PostTask(
794       BrowserThread::FILE, FROM_HERE,
795       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
796                  file_manager_,
797                  save_ids));
798 
799   if (download_) {
800     // Hack to avoid touching download_ after user cancel.
801     // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
802     // with SavePackage flow.
803     if (download_->GetState() == DownloadItem::IN_PROGRESS) {
804       if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
805         download_->DestinationUpdate(
806             all_save_items_count_, CurrentSpeed(), std::string());
807         download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
808       }
809       download_->MarkAsComplete();
810     }
811     FinalizeDownloadEntry();
812   }
813 }
814 
815 // Called for updating end state.
SaveFinished(int32 save_id,int64 size,bool is_success)816 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
817   // Because we might have canceled this saving job before,
818   // so we might not find corresponding SaveItem. Just ignore it.
819   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
820   if (!save_item)
821     return;
822 
823   // Let SaveItem set end state.
824   save_item->Finish(size, is_success);
825   // Remove the associated save id and SavePackage.
826   file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
827 
828   PutInProgressItemToSavedMap(save_item);
829 
830   // Inform the DownloadItem to update UI.
831   // We use the received bytes as number of saved files.
832   // Hack to avoid touching download_ after user cancel.
833   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
834   // with SavePackage flow.
835   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
836     download_->DestinationUpdate(
837         completed_count(), CurrentSpeed(), std::string());
838   }
839 
840   if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
841       save_item->url() == page_url_ && !save_item->received_bytes()) {
842     // If size of main HTML page is 0, treat it as disk error.
843     Cancel(false);
844     return;
845   }
846 
847   if (canceled()) {
848     DCHECK(finished_);
849     return;
850   }
851 
852   // Continue processing the save page job.
853   DoSavingProcess();
854 
855   // Check whether we can successfully finish whole job.
856   CheckFinish();
857 }
858 
859 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
860 // save id -1 when it encounters error. Since in this case, save id will be
861 // -1, so we can only use URL to find which SaveItem is associated with
862 // this error.
863 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
864 // from serializing HTML data, then cancel saving page.
SaveFailed(const GURL & save_url)865 void SavePackage::SaveFailed(const GURL& save_url) {
866   SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
867   if (it == in_progress_items_.end()) {
868     NOTREACHED();  // Should not exist!
869     return;
870   }
871   SaveItem* save_item = it->second;
872 
873   save_item->Finish(0, false);
874 
875   PutInProgressItemToSavedMap(save_item);
876 
877   // Inform the DownloadItem to update UI.
878   // We use the received bytes as number of saved files.
879   // Hack to avoid touching download_ after user cancel.
880   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
881   // with SavePackage flow.
882   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
883     download_->DestinationUpdate(
884         completed_count(), CurrentSpeed(), std::string());
885   }
886 
887   if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
888       (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
889       (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
890     // We got error when saving page. Treat it as disk error.
891     Cancel(true);
892   }
893 
894   if (canceled()) {
895     DCHECK(finished_);
896     return;
897   }
898 
899   // Continue processing the save page job.
900   DoSavingProcess();
901 
902   CheckFinish();
903 }
904 
SaveCanceled(SaveItem * save_item)905 void SavePackage::SaveCanceled(SaveItem* save_item) {
906   // Call the RemoveSaveFile in UI thread.
907   file_manager_->RemoveSaveFile(save_item->save_id(),
908                                 save_item->url(),
909                                 this);
910   if (save_item->save_id() != -1)
911     BrowserThread::PostTask(
912         BrowserThread::FILE, FROM_HERE,
913         base::Bind(&SaveFileManager::CancelSave,
914                    file_manager_,
915                    save_item->save_id()));
916 }
917 
918 // Initiate a saving job of a specific URL. We send the request to
919 // SaveFileManager, which will dispatch it to different approach according to
920 // the save source. Parameter process_all_remaining_items indicates whether
921 // we need to save all remaining items.
SaveNextFile(bool process_all_remaining_items)922 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
923   DCHECK(web_contents());
924   DCHECK(waiting_item_queue_.size());
925 
926   do {
927     // Pop SaveItem from waiting list.
928     SaveItem* save_item = waiting_item_queue_.front();
929     waiting_item_queue_.pop();
930 
931     // Add the item to in_progress_items_.
932     SaveUrlItemMap::iterator it = in_progress_items_.find(
933         save_item->url().spec());
934     DCHECK(it == in_progress_items_.end());
935     in_progress_items_[save_item->url().spec()] = save_item;
936     save_item->Start();
937     file_manager_->SaveURL(save_item->url(),
938                            save_item->referrer(),
939                            web_contents()->GetRenderProcessHost()->GetID(),
940                            routing_id(),
941                            save_item->save_source(),
942                            save_item->full_path(),
943                            web_contents()->
944                                GetBrowserContext()->GetResourceContext(),
945                            this);
946   } while (process_all_remaining_items && waiting_item_queue_.size());
947 }
948 
949 // Calculate the percentage of whole save page job.
PercentComplete()950 int SavePackage::PercentComplete() {
951   if (!all_save_items_count_)
952     return 0;
953   else if (!in_process_count())
954     return 100;
955   else
956     return completed_count() / all_save_items_count_;
957 }
958 
CurrentSpeed() const959 int64 SavePackage::CurrentSpeed() const {
960   base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
961   int64 diff_ms = diff.InMilliseconds();
962   return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
963 }
964 
965 // Continue processing the save page job after one SaveItem has been
966 // finished.
DoSavingProcess()967 void SavePackage::DoSavingProcess() {
968   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
969     // We guarantee that images and JavaScripts must be downloaded first.
970     // So when finishing all those sub-resources, we will know which
971     // sub-resource's link can be replaced with local file path, which
972     // sub-resource's link need to be replaced with absolute URL which
973     // point to its internet address because it got error when saving its data.
974 
975     // Start a new SaveItem job if we still have job in waiting queue.
976     if (waiting_item_queue_.size()) {
977       DCHECK(wait_state_ == NET_FILES);
978       SaveItem* save_item = waiting_item_queue_.front();
979       if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
980         SaveNextFile(false);
981       } else if (!in_process_count()) {
982         // If there is no in-process SaveItem, it means all sub-resources
983         // have been processed. Now we need to start serializing HTML DOM
984         // for the current page to get the generated HTML data.
985         wait_state_ = HTML_DATA;
986         // All non-HTML resources have been finished, start all remaining
987         // HTML files.
988         SaveNextFile(true);
989       }
990     } else if (in_process_count()) {
991       // Continue asking for HTML data.
992       DCHECK(wait_state_ == HTML_DATA);
993     }
994   } else {
995     // Save as HTML only or MHTML.
996     DCHECK(wait_state_ == NET_FILES);
997     DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
998            (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
999     if (waiting_item_queue_.size()) {
1000       DCHECK(all_save_items_count_ == waiting_item_queue_.size());
1001       SaveNextFile(false);
1002     }
1003   }
1004 }
1005 
OnMessageReceived(const IPC::Message & message)1006 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1007   bool handled = true;
1008   IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1009     IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
1010                         OnReceivedSavableResourceLinksForCurrentPage)
1011     IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1012                         OnReceivedSerializedHtmlData)
1013     IPC_MESSAGE_UNHANDLED(handled = false)
1014   IPC_END_MESSAGE_MAP()
1015   return handled;
1016 }
1017 
1018 // After finishing all SaveItems which need to get data from net.
1019 // We collect all URLs which have local storage and send the
1020 // map:(originalURL:currentLocalPath) to render process (backend).
1021 // Then render process will serialize DOM and send data to us.
GetSerializedHtmlDataForCurrentPageWithLocalLinks()1022 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1023   if (wait_state_ != HTML_DATA)
1024     return;
1025   std::vector<GURL> saved_links;
1026   std::vector<base::FilePath> saved_file_paths;
1027   int successful_started_items_count = 0;
1028 
1029   // Collect all saved items which have local storage.
1030   // First collect the status of all the resource files and check whether they
1031   // have created local files although they have not been completely saved.
1032   // If yes, the file can be saved. Otherwise, there is a disk error, so we
1033   // need to cancel the page saving job.
1034   for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1035        it != in_progress_items_.end(); ++it) {
1036     DCHECK(it->second->save_source() ==
1037            SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1038     if (it->second->has_final_name())
1039       successful_started_items_count++;
1040     saved_links.push_back(it->second->url());
1041     saved_file_paths.push_back(it->second->file_name());
1042   }
1043 
1044   // If not all file of HTML resource have been started, then wait.
1045   if (successful_started_items_count != in_process_count())
1046     return;
1047 
1048   // Collect all saved success items.
1049   for (SavedItemMap::iterator it = saved_success_items_.begin();
1050        it != saved_success_items_.end(); ++it) {
1051     DCHECK(it->second->has_final_name());
1052     saved_links.push_back(it->second->url());
1053     saved_file_paths.push_back(it->second->file_name());
1054   }
1055 
1056   // Get the relative directory name.
1057   base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1058 
1059   Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1060       routing_id(), saved_links, saved_file_paths, relative_dir_name));
1061 }
1062 
1063 // Process the serialized HTML content data of a specified web page
1064 // retrieved from render process.
OnReceivedSerializedHtmlData(const GURL & frame_url,const std::string & data,int32 status)1065 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1066                                                const std::string& data,
1067                                                int32 status) {
1068   WebPageSerializerClient::PageSerializationStatus flag =
1069       static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1070   // Check current state.
1071   if (wait_state_ != HTML_DATA)
1072     return;
1073 
1074   int id = contents_id();
1075   // If the all frames are finished saving, we need to close the
1076   // remaining SaveItems.
1077   if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1078     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1079          it != in_progress_items_.end(); ++it) {
1080       VLOG(20) << " " << __FUNCTION__ << "()"
1081                << " save_id = " << it->second->save_id()
1082                << " url = \"" << it->second->url().spec() << "\"";
1083       BrowserThread::PostTask(
1084           BrowserThread::FILE, FROM_HERE,
1085           base::Bind(&SaveFileManager::SaveFinished,
1086                      file_manager_,
1087                      it->second->save_id(),
1088                      it->second->url(),
1089                      id,
1090                      true));
1091     }
1092     return;
1093   }
1094 
1095   SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1096   if (it == in_progress_items_.end()) {
1097     for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1098       saved_it != saved_success_items_.end(); ++saved_it) {
1099       if (saved_it->second->url() == frame_url) {
1100         wrote_to_completed_file_ = true;
1101         break;
1102       }
1103     }
1104 
1105     it = saved_failed_items_.find(frame_url.spec());
1106     if (it != saved_failed_items_.end())
1107       wrote_to_failed_file_ = true;
1108 
1109     return;
1110   }
1111 
1112   SaveItem* save_item = it->second;
1113   DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1114 
1115   if (!data.empty()) {
1116     // Prepare buffer for saving HTML data.
1117     scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1118     memcpy(new_data->data(), data.data(), data.size());
1119 
1120     // Call write file functionality in file thread.
1121     BrowserThread::PostTask(
1122         BrowserThread::FILE, FROM_HERE,
1123         base::Bind(&SaveFileManager::UpdateSaveProgress,
1124                    file_manager_,
1125                    save_item->save_id(),
1126                    new_data,
1127                    static_cast<int>(data.size())));
1128   }
1129 
1130   // Current frame is completed saving, call finish in file thread.
1131   if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1132     VLOG(20) << " " << __FUNCTION__ << "()"
1133              << " save_id = " << save_item->save_id()
1134              << " url = \"" << save_item->url().spec() << "\"";
1135     BrowserThread::PostTask(
1136         BrowserThread::FILE, FROM_HERE,
1137         base::Bind(&SaveFileManager::SaveFinished,
1138                    file_manager_,
1139                    save_item->save_id(),
1140                    save_item->url(),
1141                    id,
1142                    true));
1143   }
1144 }
1145 
1146 // Ask for all savable resource links from backend, include main frame and
1147 // sub-frame.
GetAllSavableResourceLinksForCurrentPage()1148 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
1149   if (wait_state_ != START_PROCESS)
1150     return;
1151 
1152   wait_state_ = RESOURCES_LIST;
1153   Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
1154                                                             page_url_));
1155 }
1156 
1157 // Give backend the lists which contain all resource links that have local
1158 // storage, after which, render process will serialize DOM for generating
1159 // HTML data.
OnReceivedSavableResourceLinksForCurrentPage(const std::vector<GURL> & resources_list,const std::vector<Referrer> & referrers_list,const std::vector<GURL> & frames_list)1160 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
1161     const std::vector<GURL>& resources_list,
1162     const std::vector<Referrer>& referrers_list,
1163     const std::vector<GURL>& frames_list) {
1164   if (wait_state_ != RESOURCES_LIST)
1165     return;
1166 
1167   if (resources_list.size() != referrers_list.size())
1168     return;
1169 
1170   all_save_items_count_ = static_cast<int>(resources_list.size()) +
1171                            static_cast<int>(frames_list.size());
1172 
1173   // We use total bytes as the total number of files we want to save.
1174   // Hack to avoid touching download_ after user cancel.
1175   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1176   // with SavePackage flow.
1177   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1178     download_->SetTotalBytes(all_save_items_count_);
1179 
1180   if (all_save_items_count_) {
1181     // Put all sub-resources to wait list.
1182     for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1183       const GURL& u = resources_list[i];
1184       DCHECK(u.is_valid());
1185       SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
1186           SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
1187           SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1188       SaveItem* save_item = new SaveItem(u, referrers_list[i],
1189                                          this, save_source);
1190       waiting_item_queue_.push(save_item);
1191     }
1192     // Put all HTML resources to wait list.
1193     for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
1194       const GURL& u = frames_list[i];
1195       DCHECK(u.is_valid());
1196       SaveItem* save_item = new SaveItem(
1197           u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1198       waiting_item_queue_.push(save_item);
1199     }
1200     wait_state_ = NET_FILES;
1201     DoSavingProcess();
1202   } else {
1203     // No resource files need to be saved, treat it as user cancel.
1204     Cancel(true);
1205   }
1206 }
1207 
GetSuggestedNameForSaveAs(bool can_save_as_complete,const std::string & contents_mime_type,const std::string & accept_langs)1208 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1209     bool can_save_as_complete,
1210     const std::string& contents_mime_type,
1211     const std::string& accept_langs) {
1212   base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1213 
1214   // If the page's title matches its URL, use the URL. Try to use the last path
1215   // component or if there is none, the domain as the file name.
1216   // Normally we want to base the filename on the page title, or if it doesn't
1217   // exist, on the URL. It's not easy to tell if the page has no title, because
1218   // if the page has no title, WebContents::GetTitle() will return the page's
1219   // URL (adjusted for display purposes). Therefore, we convert the "title"
1220   // back to a URL, and if it matches the original page URL, we know the page
1221   // had no title (or had a title equal to its URL, which is fine to treat
1222   // similarly).
1223   if (title_ == net::FormatUrl(page_url_, accept_langs)) {
1224     std::string url_path;
1225     if (!page_url_.SchemeIs(url::kDataScheme)) {
1226       std::vector<std::string> url_parts;
1227       base::SplitString(page_url_.path(), '/', &url_parts);
1228       if (!url_parts.empty()) {
1229         for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1230           url_path = url_parts[i];
1231           if (!url_path.empty())
1232             break;
1233         }
1234       }
1235       if (url_path.empty())
1236         url_path = page_url_.host();
1237     } else {
1238       url_path = "dataurl";
1239     }
1240     name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1241   }
1242 
1243   // Ask user for getting final saving name.
1244   name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1245                                              contents_mime_type);
1246   // Adjust extension for complete types.
1247   if (can_save_as_complete)
1248     name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1249 
1250   base::FilePath::StringType file_name = name_with_proper_ext.value();
1251   file_util::ReplaceIllegalCharactersInPath(&file_name, ' ');
1252   return base::FilePath(file_name);
1253 }
1254 
EnsureHtmlExtension(const base::FilePath & name)1255 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1256   // If the file name doesn't have an extension suitable for HTML files,
1257   // append one.
1258   base::FilePath::StringType ext = name.Extension();
1259   if (!ext.empty())
1260     ext.erase(ext.begin());  // Erase preceding '.'.
1261   std::string mime_type;
1262   if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1263       !CanSaveAsComplete(mime_type)) {
1264     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1265                           kDefaultHtmlExtension);
1266   }
1267   return name;
1268 }
1269 
EnsureMimeExtension(const base::FilePath & name,const std::string & contents_mime_type)1270 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1271     const std::string& contents_mime_type) {
1272   // Start extension at 1 to skip over period if non-empty.
1273   base::FilePath::StringType ext = name.Extension().length() ?
1274       name.Extension().substr(1) : name.Extension();
1275   base::FilePath::StringType suggested_extension =
1276       ExtensionForMimeType(contents_mime_type);
1277   std::string mime_type;
1278   if (!suggested_extension.empty() &&
1279       !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1280     // Extension is absent or needs to be updated.
1281     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1282                     suggested_extension);
1283   }
1284   return name;
1285 }
1286 
ExtensionForMimeType(const std::string & contents_mime_type)1287 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1288     const std::string& contents_mime_type) {
1289   static const struct {
1290     const base::FilePath::CharType *mime_type;
1291     const base::FilePath::CharType *suggested_extension;
1292   } extensions[] = {
1293     { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1294     { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1295     { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1296     { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1297     { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1298   };
1299 #if defined(OS_POSIX)
1300   base::FilePath::StringType mime_type(contents_mime_type);
1301 #elif defined(OS_WIN)
1302   base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1303 #endif  // OS_WIN
1304   for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) {
1305     if (mime_type == extensions[i].mime_type)
1306       return extensions[i].suggested_extension;
1307   }
1308   return FILE_PATH_LITERAL("");
1309 }
1310 
web_contents() const1311 WebContents* SavePackage::web_contents() const {
1312   return WebContentsObserver::web_contents();
1313 }
1314 
GetSaveInfo()1315 void SavePackage::GetSaveInfo() {
1316   // Can't use web_contents_ in the file thread, so get the data that we need
1317   // before calling to it.
1318   base::FilePath website_save_dir, download_save_dir;
1319   bool skip_dir_check = false;
1320   DCHECK(download_manager_);
1321   if (download_manager_->GetDelegate()) {
1322     download_manager_->GetDelegate()->GetSaveDir(
1323         web_contents()->GetBrowserContext(), &website_save_dir,
1324         &download_save_dir, &skip_dir_check);
1325   }
1326   std::string mime_type = web_contents()->GetContentsMimeType();
1327   std::string accept_languages =
1328       GetContentClient()->browser()->GetAcceptLangs(
1329           web_contents()->GetBrowserContext());
1330 
1331   BrowserThread::PostTask(
1332       BrowserThread::FILE, FROM_HERE,
1333       base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1334           website_save_dir, download_save_dir, skip_dir_check,
1335           mime_type, accept_languages));
1336 }
1337 
CreateDirectoryOnFileThread(const base::FilePath & website_save_dir,const base::FilePath & download_save_dir,bool skip_dir_check,const std::string & mime_type,const std::string & accept_langs)1338 void SavePackage::CreateDirectoryOnFileThread(
1339     const base::FilePath& website_save_dir,
1340     const base::FilePath& download_save_dir,
1341     bool skip_dir_check,
1342     const std::string& mime_type,
1343     const std::string& accept_langs) {
1344   base::FilePath save_dir;
1345   // If the default html/websites save folder doesn't exist...
1346   // We skip the directory check for gdata directories on ChromeOS.
1347   if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1348     // If the default download dir doesn't exist, create it.
1349     if (!base::DirectoryExists(download_save_dir)) {
1350       bool res = base::CreateDirectory(download_save_dir);
1351       DCHECK(res);
1352     }
1353     save_dir = download_save_dir;
1354   } else {
1355     // If it does exist, use the default save dir param.
1356     save_dir = website_save_dir;
1357   }
1358 
1359   bool can_save_as_complete = CanSaveAsComplete(mime_type);
1360   base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1361       can_save_as_complete, mime_type, accept_langs);
1362   base::FilePath::StringType pure_file_name =
1363       suggested_filename.RemoveExtension().BaseName().value();
1364   base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1365 
1366   // Need to make sure the suggested file name is not too long.
1367   uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1368 
1369   if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1370     save_dir = save_dir.Append(pure_file_name + file_name_ext);
1371   } else {
1372     // Cannot create a shorter filename. This will cause the save as operation
1373     // to fail unless the user pick a shorter name. Continuing even though it
1374     // will fail because returning means no save as popup for the user, which
1375     // is even more confusing. This case should be rare though.
1376     save_dir = save_dir.Append(suggested_filename);
1377   }
1378 
1379   BrowserThread::PostTask(
1380       BrowserThread::UI, FROM_HERE,
1381       base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1382                  can_save_as_complete));
1383 }
1384 
ContinueGetSaveInfo(const base::FilePath & suggested_path,bool can_save_as_complete)1385 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1386                                       bool can_save_as_complete) {
1387 
1388   // The WebContents which owns this SavePackage may have disappeared during
1389   // the UI->FILE->UI thread hop of
1390   // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1391   if (!web_contents() || !download_manager_->GetDelegate())
1392     return;
1393 
1394   base::FilePath::StringType default_extension;
1395   if (can_save_as_complete)
1396     default_extension = kDefaultHtmlExtension;
1397 
1398   download_manager_->GetDelegate()->ChooseSavePath(
1399       web_contents(),
1400       suggested_path,
1401       default_extension,
1402       can_save_as_complete,
1403       base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1404 }
1405 
OnPathPicked(const base::FilePath & final_name,SavePageType type,const SavePackageDownloadCreatedCallback & download_created_callback)1406 void SavePackage::OnPathPicked(
1407     const base::FilePath& final_name,
1408     SavePageType type,
1409     const SavePackageDownloadCreatedCallback& download_created_callback) {
1410   DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1411          (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1412          (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1413   // Ensure the filename is safe.
1414   saved_main_file_path_ = final_name;
1415   // TODO(asanka): This call may block on IO and shouldn't be made
1416   // from the UI thread.  See http://crbug.com/61827.
1417   net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1418                             &saved_main_file_path_);
1419 
1420   saved_main_directory_path_ = saved_main_file_path_.DirName();
1421   save_type_ = type;
1422   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1423     // Make new directory for saving complete file.
1424     saved_main_directory_path_ = saved_main_directory_path_.Append(
1425         saved_main_file_path_.RemoveExtension().BaseName().value() +
1426         FILE_PATH_LITERAL("_files"));
1427   }
1428 
1429   Init(download_created_callback);
1430 }
1431 
StopObservation()1432 void SavePackage::StopObservation() {
1433   DCHECK(download_);
1434   DCHECK(download_manager_);
1435 
1436   download_->RemoveObserver(this);
1437   download_ = NULL;
1438   download_manager_ = NULL;
1439 }
1440 
OnDownloadDestroyed(DownloadItem * download)1441 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1442   StopObservation();
1443 }
1444 
FinalizeDownloadEntry()1445 void SavePackage::FinalizeDownloadEntry() {
1446   DCHECK(download_);
1447   DCHECK(download_manager_);
1448 
1449   download_manager_->OnSavePackageSuccessfullyFinished(download_);
1450   StopObservation();
1451 }
1452 
1453 }  // namespace content
1454