1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/disk_cache/simple/simple_index_file.h"
6
7 #include <vector>
8
9 #include "base/file_util.h"
10 #include "base/files/memory_mapped_file.h"
11 #include "base/hash.h"
12 #include "base/logging.h"
13 #include "base/pickle.h"
14 #include "base/single_thread_task_runner.h"
15 #include "base/task_runner_util.h"
16 #include "base/threading/thread_restrictions.h"
17 #include "net/disk_cache/simple/simple_backend_version.h"
18 #include "net/disk_cache/simple/simple_entry_format.h"
19 #include "net/disk_cache/simple/simple_histogram_macros.h"
20 #include "net/disk_cache/simple/simple_index.h"
21 #include "net/disk_cache/simple/simple_synchronous_entry.h"
22 #include "net/disk_cache/simple/simple_util.h"
23 #include "third_party/zlib/zlib.h"
24
25 namespace disk_cache {
26 namespace {
27
28 const int kEntryFilesHashLength = 16;
29 const int kEntryFilesSuffixLength = 2;
30
31 const uint64 kMaxEntiresInIndex = 100000000;
32
CalculatePickleCRC(const Pickle & pickle)33 uint32 CalculatePickleCRC(const Pickle& pickle) {
34 return crc32(crc32(0, Z_NULL, 0),
35 reinterpret_cast<const Bytef*>(pickle.payload()),
36 pickle.payload_size());
37 }
38
39 // Used in histograms. Please only add new values at the end.
40 enum IndexFileState {
41 INDEX_STATE_CORRUPT = 0,
42 INDEX_STATE_STALE = 1,
43 INDEX_STATE_FRESH = 2,
44 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
45 INDEX_STATE_MAX = 4,
46 };
47
UmaRecordIndexFileState(IndexFileState state,net::CacheType cache_type)48 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
49 SIMPLE_CACHE_UMA(ENUMERATION,
50 "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
51 }
52
53 // Used in histograms. Please only add new values at the end.
54 enum IndexInitMethod {
55 INITIALIZE_METHOD_RECOVERED = 0,
56 INITIALIZE_METHOD_LOADED = 1,
57 INITIALIZE_METHOD_NEWCACHE = 2,
58 INITIALIZE_METHOD_MAX = 3,
59 };
60
UmaRecordIndexInitMethod(IndexInitMethod method,net::CacheType cache_type)61 void UmaRecordIndexInitMethod(IndexInitMethod method,
62 net::CacheType cache_type) {
63 SIMPLE_CACHE_UMA(ENUMERATION,
64 "IndexInitializeMethod", cache_type,
65 method, INITIALIZE_METHOD_MAX);
66 }
67
WritePickleFile(Pickle * pickle,const base::FilePath & file_name)68 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) {
69 int bytes_written = file_util::WriteFile(
70 file_name, static_cast<const char*>(pickle->data()), pickle->size());
71 if (bytes_written != implicit_cast<int>(pickle->size())) {
72 base::DeleteFile(file_name, /* recursive = */ false);
73 return false;
74 }
75 return true;
76 }
77
78 // Called for each cache directory traversal iteration.
ProcessEntryFile(SimpleIndex::EntrySet * entries,const base::FilePath & file_path)79 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
80 const base::FilePath& file_path) {
81 static const size_t kEntryFilesLength =
82 kEntryFilesHashLength + kEntryFilesSuffixLength;
83 // Converting to std::string is OK since we never use UTF8 wide chars in our
84 // file names.
85 const base::FilePath::StringType base_name = file_path.BaseName().value();
86 const std::string file_name(base_name.begin(), base_name.end());
87 if (file_name.size() != kEntryFilesLength)
88 return;
89 const base::StringPiece hash_string(
90 file_name.begin(), file_name.begin() + kEntryFilesHashLength);
91 uint64 hash_key = 0;
92 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
93 LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
94 << " disk: " << file_name;
95 return;
96 }
97
98 base::PlatformFileInfo file_info;
99 if (!base::GetFileInfo(file_path, &file_info)) {
100 LOG(ERROR) << "Could not get file info for " << file_path.value();
101 return;
102 }
103 base::Time last_used_time;
104 #if defined(OS_POSIX)
105 // For POSIX systems, a last access time is available. However, it's not
106 // guaranteed to be more accurate than mtime. It is no worse though.
107 last_used_time = file_info.last_accessed;
108 #endif
109 if (last_used_time.is_null())
110 last_used_time = file_info.last_modified;
111
112 int64 file_size = file_info.size;
113 SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
114 if (it == entries->end()) {
115 SimpleIndex::InsertInEntrySet(
116 hash_key,
117 EntryMetadata(last_used_time, file_size),
118 entries);
119 } else {
120 // Summing up the total size of the entry through all the *_[0-1] files
121 it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
122 }
123 }
124
125 } // namespace
126
SimpleIndexLoadResult()127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
128 flush_required(false) {
129 }
130
~SimpleIndexLoadResult()131 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
132 }
133
Reset()134 void SimpleIndexLoadResult::Reset() {
135 did_load = false;
136 flush_required = false;
137 entries.clear();
138 }
139
140 // static
141 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
142 // static
143 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
144 // static
145 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
146
IndexMetadata()147 SimpleIndexFile::IndexMetadata::IndexMetadata()
148 : magic_number_(kSimpleIndexMagicNumber),
149 version_(kSimpleVersion),
150 number_of_entries_(0),
151 cache_size_(0) {}
152
IndexMetadata(uint64 number_of_entries,uint64 cache_size)153 SimpleIndexFile::IndexMetadata::IndexMetadata(
154 uint64 number_of_entries, uint64 cache_size)
155 : magic_number_(kSimpleIndexMagicNumber),
156 version_(kSimpleVersion),
157 number_of_entries_(number_of_entries),
158 cache_size_(cache_size) {}
159
Serialize(Pickle * pickle) const160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
161 DCHECK(pickle);
162 pickle->WriteUInt64(magic_number_);
163 pickle->WriteUInt32(version_);
164 pickle->WriteUInt64(number_of_entries_);
165 pickle->WriteUInt64(cache_size_);
166 }
167
168 // static
SerializeFinalData(base::Time cache_modified,Pickle * pickle)169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
170 Pickle* pickle) {
171 if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
172 return false;
173 SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
174 header_p->crc = CalculatePickleCRC(*pickle);
175 return true;
176 }
177
Deserialize(PickleIterator * it)178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
179 DCHECK(it);
180 return it->ReadUInt64(&magic_number_) &&
181 it->ReadUInt32(&version_) &&
182 it->ReadUInt64(&number_of_entries_)&&
183 it->ReadUInt64(&cache_size_);
184 }
185
SyncWriteToDisk(net::CacheType cache_type,const base::FilePath & cache_directory,const base::FilePath & index_filename,const base::FilePath & temp_index_filename,scoped_ptr<Pickle> pickle,const base::TimeTicks & start_time,bool app_on_background)186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
187 const base::FilePath& cache_directory,
188 const base::FilePath& index_filename,
189 const base::FilePath& temp_index_filename,
190 scoped_ptr<Pickle> pickle,
191 const base::TimeTicks& start_time,
192 bool app_on_background) {
193 // There is a chance that the index containing all the necessary data about
194 // newly created entries will appear to be stale. This can happen if on-disk
195 // part of a Create operation does not fit into the time budget for the index
196 // flush delay. This simple approach will be reconsidered if it does not allow
197 // for maintaining freshness.
198 base::PlatformFileInfo cache_dir_info;
199 base::Time cache_dir_mtime;
200 if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
201 LOG(ERROR) << "Could obtain information about cache age";
202 return;
203 }
204 SerializeFinalData(cache_dir_mtime, pickle.get());
205 if (!WritePickleFile(pickle.get(), temp_index_filename)) {
206 if (!base::CreateDirectory(temp_index_filename.DirName())) {
207 LOG(ERROR) << "Could not create a directory to hold the index file";
208 return;
209 }
210 if (!WritePickleFile(pickle.get(), temp_index_filename)) {
211 LOG(ERROR) << "Failed to write the temporary index file";
212 return;
213 }
214 }
215
216 // Atomically rename the temporary index file to become the real one.
217 bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
218 DCHECK(result);
219
220 if (app_on_background) {
221 SIMPLE_CACHE_UMA(TIMES,
222 "IndexWriteToDiskTime.Background", cache_type,
223 (base::TimeTicks::Now() - start_time));
224 } else {
225 SIMPLE_CACHE_UMA(TIMES,
226 "IndexWriteToDiskTime.Foreground", cache_type,
227 (base::TimeTicks::Now() - start_time));
228 }
229 }
230
CheckIndexMetadata()231 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
232 return number_of_entries_ <= kMaxEntiresInIndex &&
233 magic_number_ == kSimpleIndexMagicNumber &&
234 version_ == kSimpleVersion;
235 }
236
SimpleIndexFile(base::SingleThreadTaskRunner * cache_thread,base::TaskRunner * worker_pool,net::CacheType cache_type,const base::FilePath & cache_directory)237 SimpleIndexFile::SimpleIndexFile(
238 base::SingleThreadTaskRunner* cache_thread,
239 base::TaskRunner* worker_pool,
240 net::CacheType cache_type,
241 const base::FilePath& cache_directory)
242 : cache_thread_(cache_thread),
243 worker_pool_(worker_pool),
244 cache_type_(cache_type),
245 cache_directory_(cache_directory),
246 index_file_(cache_directory_.AppendASCII(kIndexDirectory)
247 .AppendASCII(kIndexFileName)),
248 temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
249 .AppendASCII(kTempIndexFileName)) {
250 }
251
~SimpleIndexFile()252 SimpleIndexFile::~SimpleIndexFile() {}
253
LoadIndexEntries(base::Time cache_last_modified,const base::Closure & callback,SimpleIndexLoadResult * out_result)254 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
255 const base::Closure& callback,
256 SimpleIndexLoadResult* out_result) {
257 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
258 cache_type_,
259 cache_last_modified, cache_directory_,
260 index_file_, out_result);
261 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
262 }
263
WriteToDisk(const SimpleIndex::EntrySet & entry_set,uint64 cache_size,const base::TimeTicks & start,bool app_on_background)264 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
265 uint64 cache_size,
266 const base::TimeTicks& start,
267 bool app_on_background) {
268 IndexMetadata index_metadata(entry_set.size(), cache_size);
269 scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
270 cache_thread_->PostTask(FROM_HERE, base::Bind(
271 &SimpleIndexFile::SyncWriteToDisk,
272 cache_type_,
273 cache_directory_,
274 index_file_,
275 temp_index_file_,
276 base::Passed(&pickle),
277 base::TimeTicks::Now(),
278 app_on_background));
279 }
280
281 // static
SyncLoadIndexEntries(net::CacheType cache_type,base::Time cache_last_modified,const base::FilePath & cache_directory,const base::FilePath & index_file_path,SimpleIndexLoadResult * out_result)282 void SimpleIndexFile::SyncLoadIndexEntries(
283 net::CacheType cache_type,
284 base::Time cache_last_modified,
285 const base::FilePath& cache_directory,
286 const base::FilePath& index_file_path,
287 SimpleIndexLoadResult* out_result) {
288 // Load the index and find its age.
289 base::Time last_cache_seen_by_index;
290 SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
291
292 // Consider the index loaded if it is fresh.
293 const bool index_file_existed = base::PathExists(index_file_path);
294 if (!out_result->did_load) {
295 if (index_file_existed)
296 UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
297 } else {
298 if (cache_last_modified <= last_cache_seen_by_index) {
299 base::Time latest_dir_mtime;
300 simple_util::GetMTime(cache_directory, &latest_dir_mtime);
301 if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
302 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
303 cache_type);
304 } else {
305 UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
306 }
307 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
308 return;
309 }
310 UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
311 }
312
313 // Reconstruct the index by scanning the disk for entries.
314 const base::TimeTicks start = base::TimeTicks::Now();
315 SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
316 SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
317 base::TimeTicks::Now() - start);
318 SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
319 out_result->entries.size());
320 if (index_file_existed) {
321 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
322 } else {
323 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
324 SIMPLE_CACHE_UMA(COUNTS,
325 "IndexCreatedEntryCount", cache_type,
326 out_result->entries.size());
327 }
328 }
329
330 // static
SyncLoadFromDisk(const base::FilePath & index_filename,base::Time * out_last_cache_seen_by_index,SimpleIndexLoadResult * out_result)331 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
332 base::Time* out_last_cache_seen_by_index,
333 SimpleIndexLoadResult* out_result) {
334 out_result->Reset();
335
336 base::MemoryMappedFile index_file_map;
337 if (!index_file_map.Initialize(index_filename)) {
338 LOG(WARNING) << "Could not map Simple Index file.";
339 base::DeleteFile(index_filename, false);
340 return;
341 }
342
343 SimpleIndexFile::Deserialize(
344 reinterpret_cast<const char*>(index_file_map.data()),
345 index_file_map.length(),
346 out_last_cache_seen_by_index,
347 out_result);
348
349 if (!out_result->did_load)
350 base::DeleteFile(index_filename, false);
351 }
352
353 // static
Serialize(const SimpleIndexFile::IndexMetadata & index_metadata,const SimpleIndex::EntrySet & entries)354 scoped_ptr<Pickle> SimpleIndexFile::Serialize(
355 const SimpleIndexFile::IndexMetadata& index_metadata,
356 const SimpleIndex::EntrySet& entries) {
357 scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
358
359 index_metadata.Serialize(pickle.get());
360 for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
361 it != entries.end(); ++it) {
362 pickle->WriteUInt64(it->first);
363 it->second.Serialize(pickle.get());
364 }
365 return pickle.Pass();
366 }
367
368 // static
Deserialize(const char * data,int data_len,base::Time * out_cache_last_modified,SimpleIndexLoadResult * out_result)369 void SimpleIndexFile::Deserialize(const char* data, int data_len,
370 base::Time* out_cache_last_modified,
371 SimpleIndexLoadResult* out_result) {
372 DCHECK(data);
373
374 out_result->Reset();
375 SimpleIndex::EntrySet* entries = &out_result->entries;
376
377 Pickle pickle(data, data_len);
378 if (!pickle.data()) {
379 LOG(WARNING) << "Corrupt Simple Index File.";
380 return;
381 }
382
383 PickleIterator pickle_it(pickle);
384 SimpleIndexFile::PickleHeader* header_p =
385 pickle.headerT<SimpleIndexFile::PickleHeader>();
386 const uint32 crc_read = header_p->crc;
387 const uint32 crc_calculated = CalculatePickleCRC(pickle);
388
389 if (crc_read != crc_calculated) {
390 LOG(WARNING) << "Invalid CRC in Simple Index file.";
391 return;
392 }
393
394 SimpleIndexFile::IndexMetadata index_metadata;
395 if (!index_metadata.Deserialize(&pickle_it)) {
396 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
397 return;
398 }
399
400 if (!index_metadata.CheckIndexMetadata()) {
401 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
402 return;
403 }
404
405 #if !defined(OS_WIN)
406 // TODO(gavinp): Consider using std::unordered_map.
407 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
408 #endif
409 while (entries->size() < index_metadata.GetNumberOfEntries()) {
410 uint64 hash_key;
411 EntryMetadata entry_metadata;
412 if (!pickle_it.ReadUInt64(&hash_key) ||
413 !entry_metadata.Deserialize(&pickle_it)) {
414 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
415 entries->clear();
416 return;
417 }
418 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
419 }
420
421 int64 cache_last_modified;
422 if (!pickle_it.ReadInt64(&cache_last_modified)) {
423 entries->clear();
424 return;
425 }
426 DCHECK(out_cache_last_modified);
427 *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
428
429 out_result->did_load = true;
430 }
431
432 // static
SyncRestoreFromDisk(const base::FilePath & cache_directory,const base::FilePath & index_file_path,SimpleIndexLoadResult * out_result)433 void SimpleIndexFile::SyncRestoreFromDisk(
434 const base::FilePath& cache_directory,
435 const base::FilePath& index_file_path,
436 SimpleIndexLoadResult* out_result) {
437 LOG(INFO) << "Simple Cache Index is being restored from disk.";
438 base::DeleteFile(index_file_path, /* recursive = */ false);
439 out_result->Reset();
440 SimpleIndex::EntrySet* entries = &out_result->entries;
441
442 const bool did_succeed = TraverseCacheDirectory(
443 cache_directory, base::Bind(&ProcessEntryFile, entries));
444 if (!did_succeed) {
445 LOG(ERROR) << "Could not reconstruct index from disk";
446 return;
447 }
448 out_result->did_load = true;
449 // When we restore from disk we write the merged index file to disk right
450 // away, this might save us from having to restore again next time.
451 out_result->flush_required = true;
452 }
453
454 // static
LegacyIsIndexFileStale(base::Time cache_last_modified,const base::FilePath & index_file_path)455 bool SimpleIndexFile::LegacyIsIndexFileStale(
456 base::Time cache_last_modified,
457 const base::FilePath& index_file_path) {
458 base::Time index_mtime;
459 if (!simple_util::GetMTime(index_file_path, &index_mtime))
460 return true;
461 return index_mtime < cache_last_modified;
462 }
463
464 } // namespace disk_cache
465