• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/disk_cache/simple/simple_index_file.h"
6 
7 #include <vector>
8 
9 #include "base/files/file_util.h"
10 #include "base/files/memory_mapped_file.h"
11 #include "base/hash.h"
12 #include "base/logging.h"
13 #include "base/pickle.h"
14 #include "base/single_thread_task_runner.h"
15 #include "base/task_runner_util.h"
16 #include "base/threading/thread_restrictions.h"
17 #include "net/disk_cache/simple/simple_backend_version.h"
18 #include "net/disk_cache/simple/simple_entry_format.h"
19 #include "net/disk_cache/simple/simple_histogram_macros.h"
20 #include "net/disk_cache/simple/simple_index.h"
21 #include "net/disk_cache/simple/simple_synchronous_entry.h"
22 #include "net/disk_cache/simple/simple_util.h"
23 #include "third_party/zlib/zlib.h"
24 
25 namespace disk_cache {
26 namespace {
27 
28 const int kEntryFilesHashLength = 16;
29 const int kEntryFilesSuffixLength = 2;
30 
31 const uint64 kMaxEntiresInIndex = 100000000;
32 
CalculatePickleCRC(const Pickle & pickle)33 uint32 CalculatePickleCRC(const Pickle& pickle) {
34   return crc32(crc32(0, Z_NULL, 0),
35                reinterpret_cast<const Bytef*>(pickle.payload()),
36                pickle.payload_size());
37 }
38 
39 // Used in histograms. Please only add new values at the end.
40 enum IndexFileState {
41   INDEX_STATE_CORRUPT = 0,
42   INDEX_STATE_STALE = 1,
43   INDEX_STATE_FRESH = 2,
44   INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
45   INDEX_STATE_MAX = 4,
46 };
47 
UmaRecordIndexFileState(IndexFileState state,net::CacheType cache_type)48 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
49   SIMPLE_CACHE_UMA(ENUMERATION,
50                    "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
51 }
52 
53 // Used in histograms. Please only add new values at the end.
54 enum IndexInitMethod {
55   INITIALIZE_METHOD_RECOVERED = 0,
56   INITIALIZE_METHOD_LOADED = 1,
57   INITIALIZE_METHOD_NEWCACHE = 2,
58   INITIALIZE_METHOD_MAX = 3,
59 };
60 
UmaRecordIndexInitMethod(IndexInitMethod method,net::CacheType cache_type)61 void UmaRecordIndexInitMethod(IndexInitMethod method,
62                               net::CacheType cache_type) {
63   SIMPLE_CACHE_UMA(ENUMERATION,
64                    "IndexInitializeMethod", cache_type,
65                    method, INITIALIZE_METHOD_MAX);
66 }
67 
WritePickleFile(Pickle * pickle,const base::FilePath & file_name)68 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) {
69   int bytes_written = base::WriteFile(
70       file_name, static_cast<const char*>(pickle->data()), pickle->size());
71   if (bytes_written != implicit_cast<int>(pickle->size())) {
72     base::DeleteFile(file_name, /* recursive = */ false);
73     return false;
74   }
75   return true;
76 }
77 
78 // Called for each cache directory traversal iteration.
ProcessEntryFile(SimpleIndex::EntrySet * entries,const base::FilePath & file_path)79 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
80                       const base::FilePath& file_path) {
81   static const size_t kEntryFilesLength =
82       kEntryFilesHashLength + kEntryFilesSuffixLength;
83   // Converting to std::string is OK since we never use UTF8 wide chars in our
84   // file names.
85   const base::FilePath::StringType base_name = file_path.BaseName().value();
86   const std::string file_name(base_name.begin(), base_name.end());
87   if (file_name.size() != kEntryFilesLength)
88     return;
89   const base::StringPiece hash_string(
90       file_name.begin(), file_name.begin() + kEntryFilesHashLength);
91   uint64 hash_key = 0;
92   if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
93     LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
94                  << " disk: " << file_name;
95     return;
96   }
97 
98   base::File::Info file_info;
99   if (!base::GetFileInfo(file_path, &file_info)) {
100     LOG(ERROR) << "Could not get file info for " << file_path.value();
101     return;
102   }
103   base::Time last_used_time;
104 #if defined(OS_POSIX)
105   // For POSIX systems, a last access time is available. However, it's not
106   // guaranteed to be more accurate than mtime. It is no worse though.
107   last_used_time = file_info.last_accessed;
108 #endif
109   if (last_used_time.is_null())
110     last_used_time = file_info.last_modified;
111 
112   int64 file_size = file_info.size;
113   SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
114   if (it == entries->end()) {
115     SimpleIndex::InsertInEntrySet(
116         hash_key,
117         EntryMetadata(last_used_time, file_size),
118         entries);
119   } else {
120     // Summing up the total size of the entry through all the *_[0-1] files
121     it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
122   }
123 }
124 
125 }  // namespace
126 
SimpleIndexLoadResult()127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
128                                                  flush_required(false) {
129 }
130 
~SimpleIndexLoadResult()131 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
132 }
133 
Reset()134 void SimpleIndexLoadResult::Reset() {
135   did_load = false;
136   flush_required = false;
137   entries.clear();
138 }
139 
140 // static
141 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
142 // static
143 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
144 // static
145 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
146 
IndexMetadata()147 SimpleIndexFile::IndexMetadata::IndexMetadata()
148     : magic_number_(kSimpleIndexMagicNumber),
149       version_(kSimpleVersion),
150       number_of_entries_(0),
151       cache_size_(0) {}
152 
IndexMetadata(uint64 number_of_entries,uint64 cache_size)153 SimpleIndexFile::IndexMetadata::IndexMetadata(
154     uint64 number_of_entries, uint64 cache_size)
155     : magic_number_(kSimpleIndexMagicNumber),
156       version_(kSimpleVersion),
157       number_of_entries_(number_of_entries),
158       cache_size_(cache_size) {}
159 
Serialize(Pickle * pickle) const160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
161   DCHECK(pickle);
162   pickle->WriteUInt64(magic_number_);
163   pickle->WriteUInt32(version_);
164   pickle->WriteUInt64(number_of_entries_);
165   pickle->WriteUInt64(cache_size_);
166 }
167 
168 // static
SerializeFinalData(base::Time cache_modified,Pickle * pickle)169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
170                                          Pickle* pickle) {
171   if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
172     return false;
173   SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
174   header_p->crc = CalculatePickleCRC(*pickle);
175   return true;
176 }
177 
Deserialize(PickleIterator * it)178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
179   DCHECK(it);
180   return it->ReadUInt64(&magic_number_) &&
181       it->ReadUInt32(&version_) &&
182       it->ReadUInt64(&number_of_entries_)&&
183       it->ReadUInt64(&cache_size_);
184 }
185 
SyncWriteToDisk(net::CacheType cache_type,const base::FilePath & cache_directory,const base::FilePath & index_filename,const base::FilePath & temp_index_filename,scoped_ptr<Pickle> pickle,const base::TimeTicks & start_time,bool app_on_background)186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
187                                       const base::FilePath& cache_directory,
188                                       const base::FilePath& index_filename,
189                                       const base::FilePath& temp_index_filename,
190                                       scoped_ptr<Pickle> pickle,
191                                       const base::TimeTicks& start_time,
192                                       bool app_on_background) {
193   // There is a chance that the index containing all the necessary data about
194   // newly created entries will appear to be stale. This can happen if on-disk
195   // part of a Create operation does not fit into the time budget for the index
196   // flush delay. This simple approach will be reconsidered if it does not allow
197   // for maintaining freshness.
198   base::Time cache_dir_mtime;
199   if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
200     LOG(ERROR) << "Could obtain information about cache age";
201     return;
202   }
203   SerializeFinalData(cache_dir_mtime, pickle.get());
204   if (!WritePickleFile(pickle.get(), temp_index_filename)) {
205     if (!base::CreateDirectory(temp_index_filename.DirName())) {
206       LOG(ERROR) << "Could not create a directory to hold the index file";
207       return;
208     }
209     if (!WritePickleFile(pickle.get(), temp_index_filename)) {
210       LOG(ERROR) << "Failed to write the temporary index file";
211       return;
212     }
213   }
214 
215   // Atomically rename the temporary index file to become the real one.
216   bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
217   DCHECK(result);
218 
219   if (app_on_background) {
220     SIMPLE_CACHE_UMA(TIMES,
221                      "IndexWriteToDiskTime.Background", cache_type,
222                      (base::TimeTicks::Now() - start_time));
223   } else {
224     SIMPLE_CACHE_UMA(TIMES,
225                      "IndexWriteToDiskTime.Foreground", cache_type,
226                      (base::TimeTicks::Now() - start_time));
227   }
228 }
229 
CheckIndexMetadata()230 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
231   return number_of_entries_ <= kMaxEntiresInIndex &&
232       magic_number_ == kSimpleIndexMagicNumber &&
233       version_ == kSimpleVersion;
234 }
235 
SimpleIndexFile(const scoped_refptr<base::SingleThreadTaskRunner> & cache_thread,const scoped_refptr<base::TaskRunner> & worker_pool,net::CacheType cache_type,const base::FilePath & cache_directory)236 SimpleIndexFile::SimpleIndexFile(
237     const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
238     const scoped_refptr<base::TaskRunner>& worker_pool,
239     net::CacheType cache_type,
240     const base::FilePath& cache_directory)
241     : cache_thread_(cache_thread),
242       worker_pool_(worker_pool),
243       cache_type_(cache_type),
244       cache_directory_(cache_directory),
245       index_file_(cache_directory_.AppendASCII(kIndexDirectory)
246                       .AppendASCII(kIndexFileName)),
247       temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
248                            .AppendASCII(kTempIndexFileName)) {
249 }
250 
~SimpleIndexFile()251 SimpleIndexFile::~SimpleIndexFile() {}
252 
LoadIndexEntries(base::Time cache_last_modified,const base::Closure & callback,SimpleIndexLoadResult * out_result)253 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
254                                        const base::Closure& callback,
255                                        SimpleIndexLoadResult* out_result) {
256   base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
257                                   cache_type_,
258                                   cache_last_modified, cache_directory_,
259                                   index_file_, out_result);
260   worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
261 }
262 
WriteToDisk(const SimpleIndex::EntrySet & entry_set,uint64 cache_size,const base::TimeTicks & start,bool app_on_background)263 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
264                                   uint64 cache_size,
265                                   const base::TimeTicks& start,
266                                   bool app_on_background) {
267   IndexMetadata index_metadata(entry_set.size(), cache_size);
268   scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
269   cache_thread_->PostTask(FROM_HERE,
270                           base::Bind(&SimpleIndexFile::SyncWriteToDisk,
271                                      cache_type_,
272                                      cache_directory_,
273                                      index_file_,
274                                      temp_index_file_,
275                                      base::Passed(&pickle),
276                                      base::TimeTicks::Now(),
277                                      app_on_background));
278 }
279 
280 // static
SyncLoadIndexEntries(net::CacheType cache_type,base::Time cache_last_modified,const base::FilePath & cache_directory,const base::FilePath & index_file_path,SimpleIndexLoadResult * out_result)281 void SimpleIndexFile::SyncLoadIndexEntries(
282     net::CacheType cache_type,
283     base::Time cache_last_modified,
284     const base::FilePath& cache_directory,
285     const base::FilePath& index_file_path,
286     SimpleIndexLoadResult* out_result) {
287   // Load the index and find its age.
288   base::Time last_cache_seen_by_index;
289   SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
290 
291   // Consider the index loaded if it is fresh.
292   const bool index_file_existed = base::PathExists(index_file_path);
293   if (!out_result->did_load) {
294     if (index_file_existed)
295       UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
296   } else {
297     if (cache_last_modified <= last_cache_seen_by_index) {
298       base::Time latest_dir_mtime;
299       simple_util::GetMTime(cache_directory, &latest_dir_mtime);
300       if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
301         UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
302                                 cache_type);
303       } else {
304         UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
305       }
306       UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
307       return;
308     }
309     UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
310   }
311 
312   // Reconstruct the index by scanning the disk for entries.
313   const base::TimeTicks start = base::TimeTicks::Now();
314   SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
315   SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
316                    base::TimeTicks::Now() - start);
317   SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
318                    out_result->entries.size());
319   if (index_file_existed) {
320     UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
321   } else {
322     UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
323     SIMPLE_CACHE_UMA(COUNTS,
324                      "IndexCreatedEntryCount", cache_type,
325                      out_result->entries.size());
326   }
327 }
328 
329 // static
SyncLoadFromDisk(const base::FilePath & index_filename,base::Time * out_last_cache_seen_by_index,SimpleIndexLoadResult * out_result)330 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
331                                        base::Time* out_last_cache_seen_by_index,
332                                        SimpleIndexLoadResult* out_result) {
333   out_result->Reset();
334 
335   base::MemoryMappedFile index_file_map;
336   if (!index_file_map.Initialize(index_filename)) {
337     LOG(WARNING) << "Could not map Simple Index file.";
338     base::DeleteFile(index_filename, false);
339     return;
340   }
341 
342   SimpleIndexFile::Deserialize(
343       reinterpret_cast<const char*>(index_file_map.data()),
344       index_file_map.length(),
345       out_last_cache_seen_by_index,
346       out_result);
347 
348   if (!out_result->did_load)
349     base::DeleteFile(index_filename, false);
350 }
351 
352 // static
Serialize(const SimpleIndexFile::IndexMetadata & index_metadata,const SimpleIndex::EntrySet & entries)353 scoped_ptr<Pickle> SimpleIndexFile::Serialize(
354     const SimpleIndexFile::IndexMetadata& index_metadata,
355     const SimpleIndex::EntrySet& entries) {
356   scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
357 
358   index_metadata.Serialize(pickle.get());
359   for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
360        it != entries.end(); ++it) {
361     pickle->WriteUInt64(it->first);
362     it->second.Serialize(pickle.get());
363   }
364   return pickle.Pass();
365 }
366 
367 // static
Deserialize(const char * data,int data_len,base::Time * out_cache_last_modified,SimpleIndexLoadResult * out_result)368 void SimpleIndexFile::Deserialize(const char* data, int data_len,
369                                   base::Time* out_cache_last_modified,
370                                   SimpleIndexLoadResult* out_result) {
371   DCHECK(data);
372 
373   out_result->Reset();
374   SimpleIndex::EntrySet* entries = &out_result->entries;
375 
376   Pickle pickle(data, data_len);
377   if (!pickle.data()) {
378     LOG(WARNING) << "Corrupt Simple Index File.";
379     return;
380   }
381 
382   PickleIterator pickle_it(pickle);
383   SimpleIndexFile::PickleHeader* header_p =
384       pickle.headerT<SimpleIndexFile::PickleHeader>();
385   const uint32 crc_read = header_p->crc;
386   const uint32 crc_calculated = CalculatePickleCRC(pickle);
387 
388   if (crc_read != crc_calculated) {
389     LOG(WARNING) << "Invalid CRC in Simple Index file.";
390     return;
391   }
392 
393   SimpleIndexFile::IndexMetadata index_metadata;
394   if (!index_metadata.Deserialize(&pickle_it)) {
395     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
396     return;
397   }
398 
399   if (!index_metadata.CheckIndexMetadata()) {
400     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
401     return;
402   }
403 
404 #if !defined(OS_WIN)
405   // TODO(gavinp): Consider using std::unordered_map.
406   entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
407 #endif
408   while (entries->size() < index_metadata.GetNumberOfEntries()) {
409     uint64 hash_key;
410     EntryMetadata entry_metadata;
411     if (!pickle_it.ReadUInt64(&hash_key) ||
412         !entry_metadata.Deserialize(&pickle_it)) {
413       LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
414       entries->clear();
415       return;
416     }
417     SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
418   }
419 
420   int64 cache_last_modified;
421   if (!pickle_it.ReadInt64(&cache_last_modified)) {
422     entries->clear();
423     return;
424   }
425   DCHECK(out_cache_last_modified);
426   *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
427 
428   out_result->did_load = true;
429 }
430 
431 // static
SyncRestoreFromDisk(const base::FilePath & cache_directory,const base::FilePath & index_file_path,SimpleIndexLoadResult * out_result)432 void SimpleIndexFile::SyncRestoreFromDisk(
433     const base::FilePath& cache_directory,
434     const base::FilePath& index_file_path,
435     SimpleIndexLoadResult* out_result) {
436   VLOG(1) << "Simple Cache Index is being restored from disk.";
437   base::DeleteFile(index_file_path, /* recursive = */ false);
438   out_result->Reset();
439   SimpleIndex::EntrySet* entries = &out_result->entries;
440 
441   const bool did_succeed = TraverseCacheDirectory(
442       cache_directory, base::Bind(&ProcessEntryFile, entries));
443   if (!did_succeed) {
444     LOG(ERROR) << "Could not reconstruct index from disk";
445     return;
446   }
447   out_result->did_load = true;
448   // When we restore from disk we write the merged index file to disk right
449   // away, this might save us from having to restore again next time.
450   out_result->flush_required = true;
451 }
452 
453 // static
LegacyIsIndexFileStale(base::Time cache_last_modified,const base::FilePath & index_file_path)454 bool SimpleIndexFile::LegacyIsIndexFileStale(
455     base::Time cache_last_modified,
456     const base::FilePath& index_file_path) {
457   base::Time index_mtime;
458   if (!simple_util::GetMTime(index_file_path, &index_mtime))
459     return true;
460   return index_mtime < cache_last_modified;
461 }
462 
463 }  // namespace disk_cache
464