• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
6 
7 #include "base/callback.h"
8 #include "base/metrics/histogram.h"
9 #include "base/md5.h"
10 
11 namespace {
12 
13 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
14 // that byte-order changes force corruption.
15 const int32 kFileMagic = 0x600D71FE;
16 const int32 kFileVersion = 7;  // SQLite storage was 6...
17 
18 // Header at the front of the main database file.
19 struct FileHeader {
20   int32 magic, version;
21   uint32 add_chunk_count, sub_chunk_count;
22   uint32 add_prefix_count, sub_prefix_count;
23   uint32 add_hash_count, sub_hash_count;
24 };
25 
26 // Header for each chunk in the chunk-accumulation file.
27 struct ChunkHeader {
28   uint32 add_prefix_count, sub_prefix_count;
29   uint32 add_hash_count, sub_hash_count;
30 };
31 
32 // Rewind the file.  Using fseek(2) because rewind(3) errors are
33 // weird.
FileRewind(FILE * fp)34 bool FileRewind(FILE* fp) {
35   int rv = fseek(fp, 0, SEEK_SET);
36   DCHECK_EQ(rv, 0);
37   return rv == 0;
38 }
39 
40 // Move file read pointer forward by |bytes| relative to current position.
FileSkip(size_t bytes,FILE * fp)41 bool FileSkip(size_t bytes, FILE* fp) {
42   // Although fseek takes negative values, for this case, we only want
43   // to skip forward.
44   DCHECK(static_cast<long>(bytes) >= 0);
45   if (static_cast<long>(bytes) < 0)
46     return false;
47   int rv = fseek(fp, static_cast<long>(bytes), SEEK_CUR);
48   DCHECK_EQ(rv, 0);
49   return rv == 0;
50 }
51 
52 // Read an array of |nmemb| items from |fp| into |ptr|, and fold the
53 // input data into the checksum in |context|, if non-NULL.  Return
54 // true on success.
55 template <class T>
ReadArray(T * ptr,size_t nmemb,FILE * fp,MD5Context * context)56 bool ReadArray(T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
57   const size_t ret = fread(ptr, sizeof(T), nmemb, fp);
58   if (ret != nmemb)
59     return false;
60 
61   if (context)
62     MD5Update(context, ptr, sizeof(T) * nmemb);
63   return true;
64 }
65 
66 // Write an array of |nmemb| items from |ptr| to |fp|, and fold the
67 // output data into the checksum in |context|, if non-NULL.  Return
68 // true on success.
69 template <class T>
WriteArray(const T * ptr,size_t nmemb,FILE * fp,MD5Context * context)70 bool WriteArray(const T* ptr, size_t nmemb, FILE* fp, MD5Context* context) {
71   const size_t ret = fwrite(ptr, sizeof(T), nmemb, fp);
72   if (ret != nmemb)
73     return false;
74 
75   if (context)
76     MD5Update(context, ptr, sizeof(T) * nmemb);
77 
78   return true;
79 }
80 
81 // Expand |values| to fit |count| new items, read those items from
82 // |fp| and fold them into the checksum in |context|.  Returns true on
83 // success.
84 template <class T>
ReadToVector(std::vector<T> * values,size_t count,FILE * fp,MD5Context * context)85 bool ReadToVector(std::vector<T>* values, size_t count,
86                   FILE* fp, MD5Context* context) {
87   // Pointers into an empty vector may not be valid.
88   if (!count)
89     return true;
90 
91   // Grab the size for purposes of finding where to read to.  The
92   // resize could invalidate any iterator captured here.
93   const size_t original_size = values->size();
94   values->resize(original_size + count);
95 
96   // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous.  So
97   // get a pointer to where to read the data to.
98   T* ptr = &((*values)[original_size]);
99   if (!ReadArray(ptr, count, fp, context)) {
100     values->resize(original_size);
101     return false;
102   }
103 
104   return true;
105 }
106 
107 // Write all of |values| to |fp|, and fold the data into the checksum
108 // in |context|, if non-NULL.  Returns true on succsess.
109 template <class T>
WriteVector(const std::vector<T> & values,FILE * fp,MD5Context * context)110 bool WriteVector(const std::vector<T>& values, FILE* fp, MD5Context* context) {
111   // Pointers into empty vectors may not be valid.
112   if (values.empty())
113     return true;
114 
115   // Sayeth Herb Sutter: Vectors are guaranteed to be contiguous.  So
116   // get a pointer to where to write from.
117   const T* ptr = &(values[0]);
118   return WriteArray(ptr, values.size(), fp, context);
119 }
120 
121 // Read an array of |count| integers and add them to |values|.
122 // Returns true on success.
ReadToChunkSet(std::set<int32> * values,size_t count,FILE * fp,MD5Context * context)123 bool ReadToChunkSet(std::set<int32>* values, size_t count,
124                     FILE* fp, MD5Context* context) {
125   if (!count)
126     return true;
127 
128   std::vector<int32> flat_values;
129   if (!ReadToVector(&flat_values, count, fp, context))
130     return false;
131 
132   values->insert(flat_values.begin(), flat_values.end());
133   return true;
134 }
135 
136 // Write the contents of |values| as an array of integers.  Returns
137 // true on success.
WriteChunkSet(const std::set<int32> & values,FILE * fp,MD5Context * context)138 bool WriteChunkSet(const std::set<int32>& values,
139                    FILE* fp, MD5Context* context) {
140   if (values.empty())
141     return true;
142 
143   const std::vector<int32> flat_values(values.begin(), values.end());
144   return WriteVector(flat_values, fp, context);
145 }
146 
147 // Delete the chunks in |deleted| from |chunks|.
DeleteChunksFromSet(const base::hash_set<int32> & deleted,std::set<int32> * chunks)148 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
149                          std::set<int32>* chunks) {
150   for (std::set<int32>::iterator iter = chunks->begin();
151        iter != chunks->end();) {
152     std::set<int32>::iterator prev = iter++;
153     if (deleted.count(*prev) > 0)
154       chunks->erase(prev);
155   }
156 }
157 
158 // Sanity-check the header against the file's size to make sure our
159 // vectors aren't gigantic.  This doubles as a cheap way to detect
160 // corruption without having to checksum the entire file.
FileHeaderSanityCheck(const FilePath & filename,const FileHeader & header)161 bool FileHeaderSanityCheck(const FilePath& filename,
162                            const FileHeader& header) {
163   int64 size = 0;
164   if (!file_util::GetFileSize(filename, &size))
165     return false;
166 
167   int64 expected_size = sizeof(FileHeader);
168   expected_size += header.add_chunk_count * sizeof(int32);
169   expected_size += header.sub_chunk_count * sizeof(int32);
170   expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
171   expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
172   expected_size += header.add_hash_count * sizeof(SBAddFullHash);
173   expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
174   expected_size += sizeof(MD5Digest);
175   if (size != expected_size)
176     return false;
177 
178   return true;
179 }
180 
181 // This a helper function that reads header to |header|. Returns true if the
182 // magic number is correct and santiy check passes.
ReadAndVerifyHeader(const FilePath & filename,FILE * fp,FileHeader * header,MD5Context * context)183 bool ReadAndVerifyHeader(const FilePath& filename,
184                          FILE* fp,
185                          FileHeader* header,
186                          MD5Context* context) {
187   if (!ReadArray(header, 1, fp, context))
188     return false;
189   if (header->magic != kFileMagic || header->version != kFileVersion)
190     return false;
191   if (!FileHeaderSanityCheck(filename, *header))
192     return false;
193   return true;
194 }
195 
196 }  // namespace
197 
198 // static
RecordFormatEvent(FormatEventType event_type)199 void SafeBrowsingStoreFile::RecordFormatEvent(FormatEventType event_type) {
200   UMA_HISTOGRAM_ENUMERATION("SB2.FormatEvent", event_type, FORMAT_EVENT_MAX);
201 }
202 
203 // static
CheckForOriginalAndDelete(const FilePath & current_filename)204 void SafeBrowsingStoreFile::CheckForOriginalAndDelete(
205     const FilePath& current_filename) {
206   const FilePath original_filename(
207       current_filename.DirName().AppendASCII("Safe Browsing"));
208   if (file_util::PathExists(original_filename)) {
209     int64 size = 0;
210     if (file_util::GetFileSize(original_filename, &size)) {
211       UMA_HISTOGRAM_COUNTS("SB2.OldDatabaseKilobytes",
212                            static_cast<int>(size / 1024));
213     }
214 
215     if (file_util::Delete(original_filename, false)) {
216       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL);
217     } else {
218       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL_FAILED);
219     }
220 
221     // Just best-effort on the journal file, don't want to get lost in
222     // the weeds.
223     const FilePath journal_filename(
224         current_filename.DirName().AppendASCII("Safe Browsing-journal"));
225     file_util::Delete(journal_filename, false);
226   }
227 }
228 
SafeBrowsingStoreFile()229 SafeBrowsingStoreFile::SafeBrowsingStoreFile()
230     : chunks_written_(0),
231       file_(NULL),
232       empty_(false),
233       corruption_seen_(false) {
234 }
235 
~SafeBrowsingStoreFile()236 SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
237   Close();
238 }
239 
Delete()240 bool SafeBrowsingStoreFile::Delete() {
241   // The database should not be open at this point.  But, just in
242   // case, close everything before deleting.
243   if (!Close()) {
244     NOTREACHED();
245     return false;
246   }
247 
248   if (!file_util::Delete(filename_, false) &&
249       file_util::PathExists(filename_)) {
250     NOTREACHED();
251     return false;
252   }
253 
254   const FilePath new_filename = TemporaryFileForFilename(filename_);
255   if (!file_util::Delete(new_filename, false) &&
256       file_util::PathExists(new_filename)) {
257     NOTREACHED();
258     return false;
259   }
260 
261   // With SQLite support gone, one way to get to this code is if the
262   // existing file is a SQLite file.  Make sure the journal file is
263   // also removed.
264   const FilePath journal_filename(
265       filename_.value() + FILE_PATH_LITERAL("-journal"));
266   if (file_util::PathExists(journal_filename))
267     file_util::Delete(journal_filename, false);
268 
269   return true;
270 }
271 
Init(const FilePath & filename,Callback0::Type * corruption_callback)272 void SafeBrowsingStoreFile::Init(const FilePath& filename,
273                                  Callback0::Type* corruption_callback) {
274   filename_ = filename;
275   corruption_callback_.reset(corruption_callback);
276 }
277 
BeginChunk()278 bool SafeBrowsingStoreFile::BeginChunk() {
279   return ClearChunkBuffers();
280 }
281 
WriteAddPrefix(int32 chunk_id,SBPrefix prefix)282 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
283   add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));
284   return true;
285 }
286 
GetAddPrefixes(std::vector<SBAddPrefix> * add_prefixes)287 bool SafeBrowsingStoreFile::GetAddPrefixes(
288    std::vector<SBAddPrefix>* add_prefixes) {
289   add_prefixes->clear();
290 
291   file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
292   if (file.get() == NULL) return false;
293 
294   FileHeader header;
295   if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
296     return OnCorruptDatabase();
297 
298   size_t add_prefix_offset = header.add_chunk_count * sizeof(int32) +
299       header.sub_chunk_count * sizeof(int32);
300   if (!FileSkip(add_prefix_offset, file.get()))
301     return false;
302 
303   if (!ReadToVector(add_prefixes, header.add_prefix_count, file.get(), NULL))
304     return false;
305 
306   return true;
307 }
308 
GetAddFullHashes(std::vector<SBAddFullHash> * add_full_hashes)309 bool SafeBrowsingStoreFile::GetAddFullHashes(
310     std::vector<SBAddFullHash>* add_full_hashes) {
311   add_full_hashes->clear();
312 
313   file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
314   if (file.get() == NULL) return false;
315 
316   FileHeader header;
317   if (!ReadAndVerifyHeader(filename_, file.get(), &header, NULL))
318     return OnCorruptDatabase();
319 
320   size_t offset =
321       header.add_chunk_count * sizeof(int32) +
322       header.sub_chunk_count * sizeof(int32) +
323       header.add_prefix_count * sizeof(SBAddPrefix) +
324       header.sub_prefix_count * sizeof(SBSubPrefix);
325   if (!FileSkip(offset, file.get()))
326     return false;
327 
328   return ReadToVector(add_full_hashes,
329                       header.add_hash_count,
330                       file.get(),
331                       NULL);
332 }
333 
WriteAddHash(int32 chunk_id,base::Time receive_time,const SBFullHash & full_hash)334 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
335                                          base::Time receive_time,
336                                          const SBFullHash& full_hash) {
337   add_hashes_.push_back(SBAddFullHash(chunk_id, receive_time, full_hash));
338   return true;
339 }
340 
WriteSubPrefix(int32 chunk_id,int32 add_chunk_id,SBPrefix prefix)341 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id,
342                                            int32 add_chunk_id,
343                                            SBPrefix prefix) {
344   sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));
345   return true;
346 }
347 
WriteSubHash(int32 chunk_id,int32 add_chunk_id,const SBFullHash & full_hash)348 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id,
349                                          const SBFullHash& full_hash) {
350   sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));
351   return true;
352 }
353 
OnCorruptDatabase()354 bool SafeBrowsingStoreFile::OnCorruptDatabase() {
355   if (!corruption_seen_)
356     RecordFormatEvent(FORMAT_EVENT_FILE_CORRUPT);
357   corruption_seen_ = true;
358 
359   if (corruption_callback_.get())
360     corruption_callback_->Run();
361 
362   // Return false as a convenience to callers.
363   return false;
364 }
365 
Close()366 bool SafeBrowsingStoreFile::Close() {
367   ClearUpdateBuffers();
368 
369   // Make sure the files are closed.
370   file_.reset();
371   new_file_.reset();
372   return true;
373 }
374 
BeginUpdate()375 bool SafeBrowsingStoreFile::BeginUpdate() {
376   DCHECK(!file_.get() && !new_file_.get());
377 
378   // Structures should all be clear unless something bad happened.
379   DCHECK(add_chunks_cache_.empty());
380   DCHECK(sub_chunks_cache_.empty());
381   DCHECK(add_del_cache_.empty());
382   DCHECK(sub_del_cache_.empty());
383   DCHECK(add_prefixes_.empty());
384   DCHECK(sub_prefixes_.empty());
385   DCHECK(add_hashes_.empty());
386   DCHECK(sub_hashes_.empty());
387   DCHECK_EQ(chunks_written_, 0);
388 
389   // Since the following code will already hit the profile looking for
390   // database files, this is a reasonable to time delete any old
391   // files.
392   CheckForOriginalAndDelete(filename_);
393 
394   corruption_seen_ = false;
395 
396   const FilePath new_filename = TemporaryFileForFilename(filename_);
397   file_util::ScopedFILE new_file(file_util::OpenFile(new_filename, "wb+"));
398   if (new_file.get() == NULL)
399     return false;
400 
401   file_util::ScopedFILE file(file_util::OpenFile(filename_, "rb"));
402   empty_ = (file.get() == NULL);
403   if (empty_) {
404     // If the file exists but cannot be opened, try to delete it (not
405     // deleting directly, the bloom filter needs to be deleted, too).
406     if (file_util::PathExists(filename_))
407       return OnCorruptDatabase();
408 
409     new_file_.swap(new_file);
410     return true;
411   }
412 
413   FileHeader header;
414   if (!ReadArray(&header, 1, file.get(), NULL))
415       return OnCorruptDatabase();
416 
417   if (header.magic != kFileMagic || header.version != kFileVersion) {
418     if (!strcmp(reinterpret_cast<char*>(&header.magic), "SQLite format 3")) {
419       RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
420     } else {
421       RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
422     }
423 
424     // Close the file so that it can be deleted.
425     file.reset();
426 
427     return OnCorruptDatabase();
428   }
429 
430   // TODO(shess): Under POSIX it is possible that this could size a
431   // file different from the file which was opened.
432   if (!FileHeaderSanityCheck(filename_, header))
433     return OnCorruptDatabase();
434 
435   // Pull in the chunks-seen data for purposes of implementing
436   // |GetAddChunks()| and |GetSubChunks()|.  This data is sent up to
437   // the server at the beginning of an update.
438   if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
439                       file.get(), NULL) ||
440       !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
441                       file.get(), NULL))
442     return OnCorruptDatabase();
443 
444   file_.swap(file);
445   new_file_.swap(new_file);
446   return true;
447 }
448 
FinishChunk()449 bool SafeBrowsingStoreFile::FinishChunk() {
450   if (!add_prefixes_.size() && !sub_prefixes_.size() &&
451       !add_hashes_.size() && !sub_hashes_.size())
452     return true;
453 
454   ChunkHeader header;
455   header.add_prefix_count = add_prefixes_.size();
456   header.sub_prefix_count = sub_prefixes_.size();
457   header.add_hash_count = add_hashes_.size();
458   header.sub_hash_count = sub_hashes_.size();
459   if (!WriteArray(&header, 1, new_file_.get(), NULL))
460     return false;
461 
462   if (!WriteVector(add_prefixes_, new_file_.get(), NULL) ||
463       !WriteVector(sub_prefixes_, new_file_.get(), NULL) ||
464       !WriteVector(add_hashes_, new_file_.get(), NULL) ||
465       !WriteVector(sub_hashes_, new_file_.get(), NULL))
466     return false;
467 
468   ++chunks_written_;
469 
470   // Clear everything to save memory.
471   return ClearChunkBuffers();
472 }
473 
DoUpdate(const std::vector<SBAddFullHash> & pending_adds,const std::set<SBPrefix> & prefix_misses,std::vector<SBAddPrefix> * add_prefixes_result,std::vector<SBAddFullHash> * add_full_hashes_result)474 bool SafeBrowsingStoreFile::DoUpdate(
475     const std::vector<SBAddFullHash>& pending_adds,
476     const std::set<SBPrefix>& prefix_misses,
477     std::vector<SBAddPrefix>* add_prefixes_result,
478     std::vector<SBAddFullHash>* add_full_hashes_result) {
479   DCHECK(file_.get() || empty_);
480   DCHECK(new_file_.get());
481   CHECK(add_prefixes_result);
482   CHECK(add_full_hashes_result);
483 
484   std::vector<SBAddPrefix> add_prefixes;
485   std::vector<SBSubPrefix> sub_prefixes;
486   std::vector<SBAddFullHash> add_full_hashes;
487   std::vector<SBSubFullHash> sub_full_hashes;
488 
489   // Read original data into the vectors.
490   if (!empty_) {
491     DCHECK(file_.get());
492 
493     if (!FileRewind(file_.get()))
494       return OnCorruptDatabase();
495 
496     MD5Context context;
497     MD5Init(&context);
498 
499     // Read the file header and make sure it looks right.
500     FileHeader header;
501     if (!ReadAndVerifyHeader(filename_, file_.get(), &header, &context))
502       return OnCorruptDatabase();
503 
504     // Re-read the chunks-seen data to get to the later data in the
505     // file and calculate the checksum.  No new elements should be
506     // added to the sets.
507     if (!ReadToChunkSet(&add_chunks_cache_, header.add_chunk_count,
508                         file_.get(), &context) ||
509         !ReadToChunkSet(&sub_chunks_cache_, header.sub_chunk_count,
510                         file_.get(), &context))
511       return OnCorruptDatabase();
512 
513     if (!ReadToVector(&add_prefixes, header.add_prefix_count,
514                       file_.get(), &context) ||
515         !ReadToVector(&sub_prefixes, header.sub_prefix_count,
516                       file_.get(), &context) ||
517         !ReadToVector(&add_full_hashes, header.add_hash_count,
518                       file_.get(), &context) ||
519         !ReadToVector(&sub_full_hashes, header.sub_hash_count,
520                       file_.get(), &context))
521       return OnCorruptDatabase();
522 
523     // Calculate the digest to this point.
524     MD5Digest calculated_digest;
525     MD5Final(&calculated_digest, &context);
526 
527     // Read the stored checksum and verify it.
528     MD5Digest file_digest;
529     if (!ReadArray(&file_digest, 1, file_.get(), NULL))
530       return OnCorruptDatabase();
531 
532     if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
533       return OnCorruptDatabase();
534 
535     // Close the file so we can later rename over it.
536     file_.reset();
537   }
538   DCHECK(!file_.get());
539 
540   // Rewind the temporary storage.
541   if (!FileRewind(new_file_.get()))
542     return false;
543 
544   // Get chunk file's size for validating counts.
545   int64 size = 0;
546   if (!file_util::GetFileSize(TemporaryFileForFilename(filename_), &size))
547     return OnCorruptDatabase();
548 
549   // Track update size to answer questions at http://crbug.com/72216 .
550   // Log small updates as 1k so that the 0 (underflow) bucket can be
551   // used for "empty" in SafeBrowsingDatabase.
552   UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
553                        std::max(static_cast<int>(size / 1024), 1));
554 
555   // Append the accumulated chunks onto the vectors read from |file_|.
556   for (int i = 0; i < chunks_written_; ++i) {
557     ChunkHeader header;
558 
559     int64 ofs = ftell(new_file_.get());
560     if (ofs == -1)
561       return false;
562 
563     if (!ReadArray(&header, 1, new_file_.get(), NULL))
564       return false;
565 
566     // As a safety measure, make sure that the header describes a sane
567     // chunk, given the remaining file size.
568     int64 expected_size = ofs + sizeof(ChunkHeader);
569     expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
570     expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
571     expected_size += header.add_hash_count * sizeof(SBAddFullHash);
572     expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
573     if (expected_size > size)
574       return false;
575 
576     // TODO(shess): If the vectors were kept sorted, then this code
577     // could use std::inplace_merge() to merge everything together in
578     // sorted order.  That might still be slower than just sorting at
579     // the end if there were a large number of chunks.  In that case
580     // some sort of recursive binary merge might be in order (merge
581     // chunks pairwise, merge those chunks pairwise, and so on, then
582     // merge the result with the main list).
583     if (!ReadToVector(&add_prefixes, header.add_prefix_count,
584                       new_file_.get(), NULL) ||
585         !ReadToVector(&sub_prefixes, header.sub_prefix_count,
586                       new_file_.get(), NULL) ||
587         !ReadToVector(&add_full_hashes, header.add_hash_count,
588                       new_file_.get(), NULL) ||
589         !ReadToVector(&sub_full_hashes, header.sub_hash_count,
590                       new_file_.get(), NULL))
591       return false;
592   }
593 
594   // Append items from |pending_adds|.
595   add_full_hashes.insert(add_full_hashes.end(),
596                          pending_adds.begin(), pending_adds.end());
597 
598   // Check how often a prefix was checked which wasn't in the
599   // database.
600   SBCheckPrefixMisses(add_prefixes, prefix_misses);
601 
602   // Knock the subs from the adds and process deleted chunks.
603   SBProcessSubs(&add_prefixes, &sub_prefixes,
604                 &add_full_hashes, &sub_full_hashes,
605                 add_del_cache_, sub_del_cache_);
606 
607   // We no longer need to track deleted chunks.
608   DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
609   DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
610 
611   // Write the new data to new_file_.
612   if (!FileRewind(new_file_.get()))
613     return false;
614 
615   MD5Context context;
616   MD5Init(&context);
617 
618   // Write a file header.
619   FileHeader header;
620   header.magic = kFileMagic;
621   header.version = kFileVersion;
622   header.add_chunk_count = add_chunks_cache_.size();
623   header.sub_chunk_count = sub_chunks_cache_.size();
624   header.add_prefix_count = add_prefixes.size();
625   header.sub_prefix_count = sub_prefixes.size();
626   header.add_hash_count = add_full_hashes.size();
627   header.sub_hash_count = sub_full_hashes.size();
628   if (!WriteArray(&header, 1, new_file_.get(), &context))
629     return false;
630 
631   // Write all the chunk data.
632   if (!WriteChunkSet(add_chunks_cache_, new_file_.get(), &context) ||
633       !WriteChunkSet(sub_chunks_cache_, new_file_.get(), &context) ||
634       !WriteVector(add_prefixes, new_file_.get(), &context) ||
635       !WriteVector(sub_prefixes, new_file_.get(), &context) ||
636       !WriteVector(add_full_hashes, new_file_.get(), &context) ||
637       !WriteVector(sub_full_hashes, new_file_.get(), &context))
638     return false;
639 
640   // Write the checksum at the end.
641   MD5Digest digest;
642   MD5Final(&digest, &context);
643   if (!WriteArray(&digest, 1, new_file_.get(), NULL))
644     return false;
645 
646   // Trim any excess left over from the temporary chunk data.
647   if (!file_util::TruncateFile(new_file_.get()))
648     return false;
649 
650   // Close the file handle and swizzle the file into place.
651   new_file_.reset();
652   if (!file_util::Delete(filename_, false) &&
653       file_util::PathExists(filename_))
654     return false;
655 
656   const FilePath new_filename = TemporaryFileForFilename(filename_);
657   if (!file_util::Move(new_filename, filename_))
658     return false;
659 
660   // Record counts before swapping to caller.
661   UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefixes.size());
662   UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefixes.size());
663 
664   // Pass the resulting data off to the caller.
665   add_prefixes_result->swap(add_prefixes);
666   add_full_hashes_result->swap(add_full_hashes);
667 
668   return true;
669 }
670 
FinishUpdate(const std::vector<SBAddFullHash> & pending_adds,const std::set<SBPrefix> & prefix_misses,std::vector<SBAddPrefix> * add_prefixes_result,std::vector<SBAddFullHash> * add_full_hashes_result)671 bool SafeBrowsingStoreFile::FinishUpdate(
672     const std::vector<SBAddFullHash>& pending_adds,
673     const std::set<SBPrefix>& prefix_misses,
674     std::vector<SBAddPrefix>* add_prefixes_result,
675     std::vector<SBAddFullHash>* add_full_hashes_result) {
676   DCHECK(add_prefixes_result);
677   DCHECK(add_full_hashes_result);
678 
679   bool ret = DoUpdate(pending_adds, prefix_misses,
680                       add_prefixes_result, add_full_hashes_result);
681 
682   if (!ret) {
683     CancelUpdate();
684     return false;
685   }
686 
687   DCHECK(!new_file_.get());
688   DCHECK(!file_.get());
689 
690   return Close();
691 }
692 
CancelUpdate()693 bool SafeBrowsingStoreFile::CancelUpdate() {
694   return Close();
695 }
696 
SetAddChunk(int32 chunk_id)697 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) {
698   add_chunks_cache_.insert(chunk_id);
699 }
700 
CheckAddChunk(int32 chunk_id)701 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) {
702   return add_chunks_cache_.count(chunk_id) > 0;
703 }
704 
GetAddChunks(std::vector<int32> * out)705 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) {
706   out->clear();
707   out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
708 }
709 
SetSubChunk(int32 chunk_id)710 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) {
711   sub_chunks_cache_.insert(chunk_id);
712 }
713 
CheckSubChunk(int32 chunk_id)714 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) {
715   return sub_chunks_cache_.count(chunk_id) > 0;
716 }
717 
GetSubChunks(std::vector<int32> * out)718 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) {
719   out->clear();
720   out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
721 }
722 
DeleteAddChunk(int32 chunk_id)723 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) {
724   add_del_cache_.insert(chunk_id);
725 }
726 
DeleteSubChunk(int32 chunk_id)727 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) {
728   sub_del_cache_.insert(chunk_id);
729 }
730