• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 
7 #include <algorithm>
8 #include <iterator>
9 
10 #include "base/file_util.h"
11 #include "base/metrics/histogram.h"
12 #include "base/metrics/stats_counters.h"
13 #include "base/time.h"
14 #include "base/message_loop.h"
15 #include "base/process_util.h"
16 #include "crypto/sha2.h"
17 #include "chrome/browser/safe_browsing/bloom_filter.h"
18 #include "chrome/browser/safe_browsing/prefix_set.h"
19 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
20 #include "content/browser/browser_thread.h"
21 #include "googleurl/src/gurl.h"
22 
23 namespace {
24 
25 // Filename suffix for the bloom filter.
26 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
27 // Filename suffix for download store.
28 const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
29 // Filename suffix for client-side phishing detection whitelist store.
30 const FilePath::CharType kCsdWhitelistDBFile[] =
31     FILE_PATH_LITERAL(" Csd Whitelist");
32 // Filename suffix for browse store.
33 // TODO(lzheng): change to a better name when we change the file format.
34 const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
35 
36 // The maximum staleness for a cached entry.
37 const int kMaxStalenessMinutes = 45;
38 
39 // Maximum number of entries we allow in the client-side phishing detection
40 // whitelist.  If the whitelist on disk contains more entries then
41 // ContainsCsdWhitelistedUrl will always return true.
42 const size_t kMaxCsdWhitelistSize = 5000;
43 
44 // If the hash of this exact expression is on the csd whitelist then
45 // ContainsCsdWhitelistedUrl will always return true.
46 const char kCsdKillSwitchUrl[] =
47     "sb-ssl.google.com/safebrowsing/csd/killswitch";
48 
49 // To save space, the incoming |chunk_id| and |list_id| are combined
50 // into an |encoded_chunk_id| for storage by shifting the |list_id|
51 // into the low-order bits.  These functions decode that information.
52 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
53 // there should be better ways to save chunk_id and list_id after we use
54 // SafeBrowsingStoreFile.
GetListIdBit(const int encoded_chunk_id)55 int GetListIdBit(const int encoded_chunk_id) {
56   return encoded_chunk_id & 1;
57 }
DecodeChunkId(int encoded_chunk_id)58 int DecodeChunkId(int encoded_chunk_id) {
59   return encoded_chunk_id >> 1;
60 }
EncodeChunkId(const int chunk,const int list_id)61 int EncodeChunkId(const int chunk, const int list_id) {
62   DCHECK_NE(list_id, safe_browsing_util::INVALID);
63   return chunk << 1 | list_id % 2;
64 }
65 
66 // Generate the set of full hashes to check for |url|.  If
67 // |include_whitelist_hashes| is true we will generate additional path-prefixes
68 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
69 // whitelist it should also match /foo/bar which is not the case for all the
70 // other lists.
71 // TODO(shess): This function is almost the same as
72 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
73 // does an early exit on match.  Since match should be the infrequent
74 // case (phishing or malware found), consider combining this function
75 // with that one.
BrowseFullHashesToCheck(const GURL & url,bool include_whitelist_hashes,std::vector<SBFullHash> * full_hashes)76 void BrowseFullHashesToCheck(const GURL& url,
77                              bool include_whitelist_hashes,
78                              std::vector<SBFullHash>* full_hashes) {
79   std::vector<std::string> hosts;
80   if (url.HostIsIPAddress()) {
81     hosts.push_back(url.host());
82   } else {
83     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
84   }
85 
86   std::vector<std::string> paths;
87   safe_browsing_util::GeneratePathsToCheck(url, &paths);
88 
89   for (size_t i = 0; i < hosts.size(); ++i) {
90     for (size_t j = 0; j < paths.size(); ++j) {
91       const std::string& path = paths[j];
92       SBFullHash full_hash;
93       crypto::SHA256HashString(hosts[i] + path, &full_hash,
94                                sizeof(full_hash));
95       full_hashes->push_back(full_hash);
96 
97       // We may have /foo as path-prefix in the whitelist which should
98       // also match with /foo/bar and /foo?bar.  Hence, for every path
99       // that ends in '/' we also add the path without the slash.
100       if (include_whitelist_hashes &&
101           path.size() > 1 &&
102           path[path.size() - 1] == '/') {
103         crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
104                                  &full_hash, sizeof(full_hash));
105         full_hashes->push_back(full_hash);
106       }
107     }
108   }
109 }
110 
111 // Get the prefixes matching the download |urls|.
GetDownloadUrlPrefixes(const std::vector<GURL> & urls,std::vector<SBPrefix> * prefixes)112 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
113                             std::vector<SBPrefix>* prefixes) {
114   std::vector<SBFullHash> full_hashes;
115   for (size_t i = 0; i < urls.size(); ++i)
116     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
117 
118   for (size_t i = 0; i < full_hashes.size(); ++i)
119     prefixes->push_back(full_hashes[i].prefix);
120 }
121 
122 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
123 // add them to |full_hits| if not expired.  "Not expired" is when
124 // either |last_update| was recent enough, or the item has been
125 // received recently enough.  Expired items are not deleted because a
126 // future update may make them acceptable again.
127 //
128 // For efficiency reasons the code walks |prefix_hits| and
129 // |full_hashes| in parallel, so they must be sorted by prefix.
GetCachedFullHashesForBrowse(const std::vector<SBPrefix> & prefix_hits,const std::vector<SBAddFullHash> & full_hashes,std::vector<SBFullHashResult> * full_hits,base::Time last_update)130 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
131                                   const std::vector<SBAddFullHash>& full_hashes,
132                                   std::vector<SBFullHashResult>* full_hits,
133                                   base::Time last_update) {
134   const base::Time expire_time =
135       base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
136 
137   std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
138   std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
139 
140   while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
141     if (*piter < hiter->full_hash.prefix) {
142       ++piter;
143     } else if (hiter->full_hash.prefix < *piter) {
144       ++hiter;
145     } else {
146       if (expire_time < last_update ||
147           expire_time.ToTimeT() < hiter->received) {
148         SBFullHashResult result;
149         const int list_bit = GetListIdBit(hiter->chunk_id);
150         DCHECK(list_bit == safe_browsing_util::MALWARE ||
151                list_bit == safe_browsing_util::PHISH);
152         if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
153           continue;
154         result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
155         result.hash = hiter->full_hash;
156         full_hits->push_back(result);
157       }
158 
159       // Only increment |hiter|, |piter| might have multiple hits.
160       ++hiter;
161     }
162   }
163 }
164 
165 // This function generates a chunk range string for |chunks|. It
166 // outputs one chunk range string per list and writes it to the
167 // |list_ranges| vector.  We expect |list_ranges| to already be of the
168 // right size.  E.g., if |chunks| contains chunks with two different
169 // list ids then |list_ranges| must contain two elements.
GetChunkRanges(const std::vector<int> & chunks,std::vector<std::string> * list_ranges)170 void GetChunkRanges(const std::vector<int>& chunks,
171                     std::vector<std::string>* list_ranges) {
172   DCHECK_GT(list_ranges->size(), 0U);
173   DCHECK_LE(list_ranges->size(), 2U);
174   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
175   for (std::vector<int>::const_iterator iter = chunks.begin();
176        iter != chunks.end(); ++iter) {
177     int mod_list_id = GetListIdBit(*iter);
178     DCHECK_GE(mod_list_id, 0);
179     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
180     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
181   }
182   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
183     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
184   }
185 }
186 
187 // Helper function to create chunk range lists for Browse related
188 // lists.
UpdateChunkRanges(SafeBrowsingStore * store,const std::vector<std::string> & listnames,std::vector<SBListChunkRanges> * lists)189 void UpdateChunkRanges(SafeBrowsingStore* store,
190                        const std::vector<std::string>& listnames,
191                        std::vector<SBListChunkRanges>* lists) {
192   DCHECK_GT(listnames.size(), 0U);
193   DCHECK_LE(listnames.size(), 2U);
194   std::vector<int> add_chunks;
195   std::vector<int> sub_chunks;
196   store->GetAddChunks(&add_chunks);
197   store->GetSubChunks(&sub_chunks);
198 
199   std::vector<std::string> adds(listnames.size());
200   std::vector<std::string> subs(listnames.size());
201   GetChunkRanges(add_chunks, &adds);
202   GetChunkRanges(sub_chunks, &subs);
203 
204   for (size_t i = 0; i < listnames.size(); ++i) {
205     const std::string& listname = listnames[i];
206     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
207               static_cast<int>(i % 2));
208     DCHECK_NE(safe_browsing_util::GetListId(listname),
209               safe_browsing_util::INVALID);
210     lists->push_back(SBListChunkRanges(listname));
211     lists->back().adds.swap(adds[i]);
212     lists->back().subs.swap(subs[i]);
213   }
214 }
215 
216 // Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
217 // safe_browsing_store.h orders on both chunk-id and prefix.
SBAddFullHashPrefixLess(const SBAddFullHash & a,const SBAddFullHash & b)218 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
219   return a.full_hash.prefix < b.full_hash.prefix;
220 }
221 
222 // As compared to the bloom filter, PrefixSet should have these
223 // properties:
224 // - Any bloom filter miss should be a prefix set miss.
225 // - Any prefix set hit should be a bloom filter hit.
226 // - Bloom filter false positives are prefix set misses.
227 // The following is to log actual performance to verify this.
228 enum PrefixSetEvent {
229   PREFIX_SET_EVENT_HIT,
230   PREFIX_SET_EVENT_BLOOM_HIT,
231   PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
232   PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
233   PREFIX_SET_GETPREFIXES_BROKEN,
234   PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
235   PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
236   PREFIX_SET_SBPREFIX_WAS_BROKEN,
237   PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
238   PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
239   PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
240   PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
241   PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
242 
243   // Memory space for histograms is determined by the max.  ALWAYS ADD
244   // NEW VALUES BEFORE THIS ONE.
245   PREFIX_SET_EVENT_MAX
246 };
247 
RecordPrefixSetInfo(PrefixSetEvent event_type)248 void RecordPrefixSetInfo(PrefixSetEvent event_type) {
249   UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
250                             PREFIX_SET_EVENT_MAX);
251 }
252 
253 // Generate a |PrefixSet| instance from the contents of
254 // |add_prefixes|.  Additionally performs various checks to make sure
255 // that the resulting prefix set is valid, so that the
256 // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
257 // ContainsBrowseUrl() can be trustworthy.
PrefixSetFromAddPrefixes(const std::vector<SBAddPrefix> & add_prefixes)258 safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
259     const std::vector<SBAddPrefix>& add_prefixes) {
260   // TODO(shess): If |add_prefixes| were sorted by the prefix, it
261   // could be passed directly to |PrefixSet()|, removing the need for
262   // |prefixes|.  For now, |prefixes| is useful while debugging
263   // things.
264   std::vector<SBPrefix> prefixes;
265   for (size_t i = 0; i < add_prefixes.size(); ++i) {
266     prefixes.push_back(add_prefixes[i].prefix);
267   }
268 
269   std::sort(prefixes.begin(), prefixes.end());
270   prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
271                  prefixes.end());
272 
273   scoped_ptr<safe_browsing::PrefixSet>
274       prefix_set(new safe_browsing::PrefixSet(prefixes));
275 
276   std::vector<SBPrefix> restored;
277   prefix_set->GetPrefixes(&restored);
278 
279   // Expect them to be equal.
280   if (restored.size() == prefixes.size() &&
281       std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
282     return prefix_set.release();
283 
284   // Log BROKEN for continuity with previous release, and SIZE to
285   // distinguish which test failed.
286   NOTREACHED();
287   RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
288   if (restored.size() != prefixes.size())
289     RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
290 
291   // Try to distinguish between updates from one broken user and a
292   // distributed problem.
293   static bool logged_broken = false;
294   if (!logged_broken) {
295     RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
296     logged_broken = true;
297   }
298 
299   // This seems so very very unlikely.  But if it ever were true, then
300   // it could explain why GetPrefixes() seemed broken.
301   if (sizeof(int) != sizeof(int32))
302     RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
303 
304   // Check if memory was corrupted during construction.
305   if (!prefix_set->CheckChecksum())
306     RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
307 
308   // Check whether |restored| is unsorted, or has duplication.
309   if (restored.size()) {
310     size_t unsorted_count = 0;
311     bool duplicates = false;
312     SBPrefix prev = restored[0];
313     for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
314       if (prev > restored[i]) {
315         unsorted_count++;
316         UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
317                              prev - restored[i]);
318 
319         // When unsorted, how big is the set, and how far are we into
320         // it.  If the set is very small or large, that might inform
321         // pursuit of a degenerate case.  If the percentage is close
322         // to 0%, 100%, or 50%, then there might be an interesting
323         // degenerate case to explore.
324         UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
325         UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
326                                  i * 100 / restored.size());
327 
328         if (prefix_set->IsDeltaAt(i)) {
329           RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
330 
331           // Histograms require memory on the order of the number of
332           // buckets, making high-precision logging expensive.  For
333           // now aim for a sense of the range of the problem.
334           UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
335                                       prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
336         } else {
337           RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
338         }
339       }
340       if (prev == restored[i])
341         duplicates = true;
342     }
343 
344     // Record findings.
345     if (unsorted_count) {
346       RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
347       UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
348     }
349     if (duplicates)
350       RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
351 
352     // Fix the problems noted.  If |restored| was unsorted, then
353     // |duplicates| may give a false negative.
354     if (unsorted_count)
355       std::sort(restored.begin(), restored.end());
356     if (unsorted_count || duplicates)
357       restored.erase(std::unique(restored.begin(), restored.end()),
358                      restored.end());
359   }
360 
361   // NOTE(shess): The following could be done using a single
362   // uber-loop, but it's complicated by needing multiple parallel
363   // iterators.  Didn't seem worthwhile for something that will only
364   // live for a short period and only fires for one in a million
365   // updates.
366 
367   // Find elements in |restored| which are not in |prefixes|.
368   std::vector<SBPrefix> difference;
369   std::set_difference(restored.begin(), restored.end(),
370                       prefixes.begin(), prefixes.end(),
371                       std::back_inserter(difference));
372   if (difference.size())
373     UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
374 
375   // Find elements in |prefixes| which are not in |restored|.
376   difference.clear();
377   std::set_difference(prefixes.begin(), prefixes.end(),
378                       restored.begin(), restored.end(),
379                       std::back_inserter(difference));
380   if (difference.size())
381     UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
382                              difference.size());
383 
384   return prefix_set.release();
385 }
386 
387 }  // namespace
388 
389 // The default SafeBrowsingDatabaseFactory.
390 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
391  public:
CreateSafeBrowsingDatabase(bool enable_download_protection,bool enable_client_side_whitelist)392   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
393       bool enable_download_protection,
394       bool enable_client_side_whitelist) {
395     return new SafeBrowsingDatabaseNew(
396         new SafeBrowsingStoreFile,
397         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
398         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL);
399   }
400 
SafeBrowsingDatabaseFactoryImpl()401   SafeBrowsingDatabaseFactoryImpl() { }
402 
403  private:
404   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
405 };
406 
407 // static
408 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
409 
410 // Factory method, non-thread safe. Caller has to make sure this s called
411 // on SafeBrowsing Thread.
412 // TODO(shess): There's no need for a factory any longer.  Convert
413 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
414 // callers just construct things directly.
Create(bool enable_download_protection,bool enable_client_side_whitelist)415 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
416     bool enable_download_protection,
417     bool enable_client_side_whitelist) {
418   if (!factory_)
419     factory_ = new SafeBrowsingDatabaseFactoryImpl();
420   return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
421                                               enable_client_side_whitelist);
422 }
423 
~SafeBrowsingDatabase()424 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
425 }
426 
427 // static
BrowseDBFilename(const FilePath & db_base_filename)428 FilePath SafeBrowsingDatabase::BrowseDBFilename(
429          const FilePath& db_base_filename) {
430   return FilePath(db_base_filename.value() + kBrowseDBFile);
431 }
432 
433 // static
DownloadDBFilename(const FilePath & db_base_filename)434 FilePath SafeBrowsingDatabase::DownloadDBFilename(
435     const FilePath& db_base_filename) {
436   return FilePath(db_base_filename.value() + kDownloadDBFile);
437 }
438 
439 // static
BloomFilterForFilename(const FilePath & db_filename)440 FilePath SafeBrowsingDatabase::BloomFilterForFilename(
441     const FilePath& db_filename) {
442   return FilePath(db_filename.value() + kBloomFilterFile);
443 }
444 
445 // static
CsdWhitelistDBFilename(const FilePath & db_filename)446 FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
447     const FilePath& db_filename) {
448   return FilePath(db_filename.value() + kCsdWhitelistDBFile);
449 }
450 
GetStore(const int list_id)451 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
452   DVLOG(3) << "Get store for list: " << list_id;
453   if (list_id == safe_browsing_util::PHISH ||
454       list_id == safe_browsing_util::MALWARE) {
455     return browse_store_.get();
456   } else if (list_id == safe_browsing_util::BINURL ||
457              list_id == safe_browsing_util::BINHASH) {
458     return download_store_.get();
459   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
460     return csd_whitelist_store_.get();
461   }
462   return NULL;
463 }
464 
465 // static
RecordFailure(FailureType failure_type)466 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
467   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
468                             FAILURE_DATABASE_MAX);
469 }
470 
SafeBrowsingDatabaseNew()471 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
472     : creation_loop_(MessageLoop::current()),
473       browse_store_(new SafeBrowsingStoreFile),
474       download_store_(NULL),
475       csd_whitelist_store_(NULL),
476       ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
477   DCHECK(browse_store_.get());
478   DCHECK(!download_store_.get());
479   DCHECK(!csd_whitelist_store_.get());
480 }
481 
SafeBrowsingDatabaseNew(SafeBrowsingStore * browse_store,SafeBrowsingStore * download_store,SafeBrowsingStore * csd_whitelist_store)482 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
483     SafeBrowsingStore* browse_store,
484     SafeBrowsingStore* download_store,
485     SafeBrowsingStore* csd_whitelist_store)
486     : creation_loop_(MessageLoop::current()),
487       browse_store_(browse_store),
488       download_store_(download_store),
489       csd_whitelist_store_(csd_whitelist_store),
490       ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
491       corruption_detected_(false) {
492   DCHECK(browse_store_.get());
493 }
494 
~SafeBrowsingDatabaseNew()495 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
496   DCHECK_EQ(creation_loop_, MessageLoop::current());
497 }
498 
Init(const FilePath & filename_base)499 void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
500   DCHECK_EQ(creation_loop_, MessageLoop::current());
501   // Ensure we haven't been run before.
502   DCHECK(browse_filename_.empty());
503   DCHECK(download_filename_.empty());
504   DCHECK(csd_whitelist_filename_.empty());
505 
506   browse_filename_ = BrowseDBFilename(filename_base);
507   bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
508 
509   browse_store_->Init(
510       browse_filename_,
511       NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
512   DVLOG(1) << "Init browse store: " << browse_filename_.value();
513 
514   {
515     // NOTE: There is no need to grab the lock in this function, since
516     // until it returns, there are no pointers to this class on other
517     // threads.  Then again, that means there is no possibility of
518     // contention on the lock...
519     base::AutoLock locked(lookup_lock_);
520     full_browse_hashes_.clear();
521     pending_browse_hashes_.clear();
522     LoadBloomFilter();
523   }
524 
525   if (download_store_.get()) {
526     download_filename_ = DownloadDBFilename(filename_base);
527     download_store_->Init(
528         download_filename_,
529         NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
530     DVLOG(1) << "Init download store: " << download_filename_.value();
531   }
532 
533   if (csd_whitelist_store_.get()) {
534     csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
535     csd_whitelist_store_->Init(
536         csd_whitelist_filename_,
537         NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
538     DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
539     std::vector<SBAddFullHash> full_hashes;
540     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
541       LoadCsdWhitelist(full_hashes);
542     } else {
543       CsdWhitelistAllUrls();
544     }
545   } else {
546     CsdWhitelistAllUrls();  // Just to be safe.
547   }
548 }
549 
ResetDatabase()550 bool SafeBrowsingDatabaseNew::ResetDatabase() {
551   DCHECK_EQ(creation_loop_, MessageLoop::current());
552 
553   // Delete files on disk.
554   // TODO(shess): Hard to see where one might want to delete without a
555   // reset.  Perhaps inline |Delete()|?
556   if (!Delete())
557     return false;
558 
559   // Reset objects in memory.
560   {
561     base::AutoLock locked(lookup_lock_);
562     full_browse_hashes_.clear();
563     pending_browse_hashes_.clear();
564     prefix_miss_cache_.clear();
565     // TODO(shess): This could probably be |bloom_filter_.reset()|.
566     browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
567                                            BloomFilter::kBloomFilterSizeRatio);
568     // TODO(shess): It is simpler for the code to assume that presence
569     // of a bloom filter always implies presence of a prefix set.
570     prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
571   }
572   // Wants to acquire the lock itself.
573   CsdWhitelistAllUrls();
574 
575   return true;
576 }
577 
578 // TODO(lzheng): Remove matching_list, it is not used anywhere.
ContainsBrowseUrl(const GURL & url,std::string * matching_list,std::vector<SBPrefix> * prefix_hits,std::vector<SBFullHashResult> * full_hits,base::Time last_update)579 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
580     const GURL& url,
581     std::string* matching_list,
582     std::vector<SBPrefix>* prefix_hits,
583     std::vector<SBFullHashResult>* full_hits,
584     base::Time last_update) {
585   // Clear the results first.
586   matching_list->clear();
587   prefix_hits->clear();
588   full_hits->clear();
589 
590   std::vector<SBFullHash> full_hashes;
591   BrowseFullHashesToCheck(url, false, &full_hashes);
592   if (full_hashes.empty())
593     return false;
594 
595   // This function is called on the I/O thread, prevent changes to
596   // bloom filter and caches.
597   base::AutoLock locked(lookup_lock_);
598 
599   if (!browse_bloom_filter_.get())
600     return false;
601   DCHECK(prefix_set_.get());
602 
603   // Used to double-check in case of a hit mis-match.
604   std::vector<SBPrefix> restored;
605 
606   size_t miss_count = 0;
607   for (size_t i = 0; i < full_hashes.size(); ++i) {
608     bool found = prefix_set_->Exists(full_hashes[i].prefix);
609 
610     if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
611       RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
612       if (found)
613         RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
614       prefix_hits->push_back(full_hashes[i].prefix);
615       if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
616         ++miss_count;
617     } else {
618       // Bloom filter misses should never be in prefix set.  Re-create
619       // the original prefixes and manually search for it, to check if
620       // there's a bug with how |Exists()| is implemented.
621       // |UpdateBrowseStore()| previously verified that
622       // |GetPrefixes()| returns the same prefixes as were passed to
623       // the constructor.
624       DCHECK(!found);
625       if (found) {
626         if (restored.empty())
627           prefix_set_->GetPrefixes(&restored);
628 
629         // If the item is not in the re-created list, then there is an
630         // error in |PrefixSet::Exists()|.  If the item is in the
631         // re-created list, then the bloom filter was wrong.
632         if (std::binary_search(restored.begin(), restored.end(),
633                                full_hashes[i].prefix)) {
634           RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
635         } else {
636           RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
637         }
638       }
639     }
640   }
641 
642   // If all the prefixes are cached as 'misses', don't issue a GetHash.
643   if (miss_count == prefix_hits->size())
644     return false;
645 
646   // Find the matching full-hash results.  |full_browse_hashes_| are from the
647   // database, |pending_browse_hashes_| are from GetHash requests between
648   // updates.
649   std::sort(prefix_hits->begin(), prefix_hits->end());
650 
651   GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
652                                full_hits, last_update);
653   GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
654                                full_hits, last_update);
655   return true;
656 }
657 
MatchDownloadAddPrefixes(int list_bit,const std::vector<SBPrefix> & prefixes,std::vector<SBPrefix> * prefix_hits)658 bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
659     int list_bit,
660     const std::vector<SBPrefix>& prefixes,
661     std::vector<SBPrefix>* prefix_hits) {
662   prefix_hits->clear();
663 
664   std::vector<SBAddPrefix> add_prefixes;
665   download_store_->GetAddPrefixes(&add_prefixes);
666   for (size_t i = 0; i < add_prefixes.size(); ++i) {
667     for (size_t j = 0; j < prefixes.size(); ++j) {
668       const SBPrefix& prefix = prefixes[j];
669       if (prefix == add_prefixes[i].prefix &&
670           GetListIdBit(add_prefixes[i].chunk_id) == list_bit) {
671         prefix_hits->push_back(prefix);
672       }
673     }
674   }
675   return !prefix_hits->empty();
676 }
677 
ContainsDownloadUrl(const std::vector<GURL> & urls,std::vector<SBPrefix> * prefix_hits)678 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
679     const std::vector<GURL>& urls,
680     std::vector<SBPrefix>* prefix_hits) {
681   DCHECK_EQ(creation_loop_, MessageLoop::current());
682 
683   // Ignore this check when download checking is not enabled.
684   if (!download_store_.get())
685     return false;
686 
687   std::vector<SBPrefix> prefixes;
688   GetDownloadUrlPrefixes(urls, &prefixes);
689   return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
690                                   prefixes,
691                                   prefix_hits);
692 }
693 
ContainsDownloadHashPrefix(const SBPrefix & prefix)694 bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
695     const SBPrefix& prefix) {
696   DCHECK_EQ(creation_loop_, MessageLoop::current());
697 
698   // Ignore this check when download store is not available.
699   if (!download_store_.get())
700     return false;
701 
702   std::vector<SBPrefix> prefixes(1, prefix);
703   std::vector<SBPrefix> prefix_hits;
704   return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
705                                   prefixes,
706                                   &prefix_hits);
707 }
708 
ContainsCsdWhitelistedUrl(const GURL & url)709 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
710   // This method is theoretically thread-safe but we expect all calls to
711   // originate from the IO thread.
712   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
713   base::AutoLock l(lookup_lock_);
714   if (csd_whitelist_all_urls_)
715     return true;
716 
717   std::vector<SBFullHash> full_hashes;
718   BrowseFullHashesToCheck(url, true, &full_hashes);
719   for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin();
720        it != full_hashes.end(); ++it) {
721     if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it))
722       return true;
723   }
724   return false;
725 }
726 
727 // Helper to insert entries for all of the prefixes or full hashes in
728 // |entry| into the store.
InsertAdd(int chunk_id,SBPrefix host,const SBEntry * entry,int list_id)729 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
730                                         const SBEntry* entry, int list_id) {
731   DCHECK_EQ(creation_loop_, MessageLoop::current());
732 
733   SafeBrowsingStore* store = GetStore(list_id);
734   if (!store) return;
735 
736   STATS_COUNTER("SB.HostInsert", 1);
737   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
738   const int count = entry->prefix_count();
739 
740   DCHECK(!entry->IsSub());
741   if (!count) {
742     // No prefixes, use host instead.
743     STATS_COUNTER("SB.PrefixAdd", 1);
744     store->WriteAddPrefix(encoded_chunk_id, host);
745   } else if (entry->IsPrefix()) {
746     // Prefixes only.
747     for (int i = 0; i < count; i++) {
748       const SBPrefix prefix = entry->PrefixAt(i);
749       STATS_COUNTER("SB.PrefixAdd", 1);
750       store->WriteAddPrefix(encoded_chunk_id, prefix);
751     }
752   } else {
753     // Prefixes and hashes.
754     const base::Time receive_time = base::Time::Now();
755     for (int i = 0; i < count; ++i) {
756       const SBFullHash full_hash = entry->FullHashAt(i);
757       const SBPrefix prefix = full_hash.prefix;
758 
759       STATS_COUNTER("SB.PrefixAdd", 1);
760       store->WriteAddPrefix(encoded_chunk_id, prefix);
761 
762       STATS_COUNTER("SB.PrefixAddFull", 1);
763       store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
764     }
765   }
766 }
767 
768 // Helper to iterate over all the entries in the hosts in |chunks| and
769 // add them to the store.
InsertAddChunks(const int list_id,const SBChunkList & chunks)770 void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
771                                               const SBChunkList& chunks) {
772   DCHECK_EQ(creation_loop_, MessageLoop::current());
773 
774   SafeBrowsingStore* store = GetStore(list_id);
775   if (!store) return;
776 
777   for (SBChunkList::const_iterator citer = chunks.begin();
778        citer != chunks.end(); ++citer) {
779     const int chunk_id = citer->chunk_number;
780 
781     // The server can give us a chunk that we already have because
782     // it's part of a range.  Don't add it again.
783     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
784     if (store->CheckAddChunk(encoded_chunk_id))
785       continue;
786 
787     store->SetAddChunk(encoded_chunk_id);
788     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
789          hiter != citer->hosts.end(); ++hiter) {
790       // NOTE: Could pass |encoded_chunk_id|, but then inserting add
791       // chunks would look different from inserting sub chunks.
792       InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
793     }
794   }
795 }
796 
797 // Helper to insert entries for all of the prefixes or full hashes in
798 // |entry| into the store.
InsertSub(int chunk_id,SBPrefix host,const SBEntry * entry,int list_id)799 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
800                                         const SBEntry* entry, int list_id) {
801   DCHECK_EQ(creation_loop_, MessageLoop::current());
802 
803   SafeBrowsingStore* store = GetStore(list_id);
804   if (!store) return;
805 
806   STATS_COUNTER("SB.HostDelete", 1);
807   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
808   const int count = entry->prefix_count();
809 
810   DCHECK(entry->IsSub());
811   if (!count) {
812     // No prefixes, use host instead.
813     STATS_COUNTER("SB.PrefixSub", 1);
814     const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
815     store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
816   } else if (entry->IsPrefix()) {
817     // Prefixes only.
818     for (int i = 0; i < count; i++) {
819       const SBPrefix prefix = entry->PrefixAt(i);
820       const int add_chunk_id =
821           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
822 
823       STATS_COUNTER("SB.PrefixSub", 1);
824       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
825     }
826   } else {
827     // Prefixes and hashes.
828     for (int i = 0; i < count; ++i) {
829       const SBFullHash full_hash = entry->FullHashAt(i);
830       const int add_chunk_id =
831           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
832 
833       STATS_COUNTER("SB.PrefixSub", 1);
834       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
835 
836       STATS_COUNTER("SB.PrefixSubFull", 1);
837       store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
838     }
839   }
840 }
841 
842 // Helper to iterate over all the entries in the hosts in |chunks| and
843 // add them to the store.
InsertSubChunks(int list_id,const SBChunkList & chunks)844 void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
845                                               const SBChunkList& chunks) {
846   DCHECK_EQ(creation_loop_, MessageLoop::current());
847 
848   SafeBrowsingStore* store = GetStore(list_id);
849   if (!store) return;
850 
851   for (SBChunkList::const_iterator citer = chunks.begin();
852        citer != chunks.end(); ++citer) {
853     const int chunk_id = citer->chunk_number;
854 
855     // The server can give us a chunk that we already have because
856     // it's part of a range.  Don't add it again.
857     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
858     if (store->CheckSubChunk(encoded_chunk_id))
859       continue;
860 
861     store->SetSubChunk(encoded_chunk_id);
862     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
863          hiter != citer->hosts.end(); ++hiter) {
864       InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
865     }
866   }
867 }
868 
InsertChunks(const std::string & list_name,const SBChunkList & chunks)869 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
870                                            const SBChunkList& chunks) {
871   DCHECK_EQ(creation_loop_, MessageLoop::current());
872 
873   if (corruption_detected_ || chunks.empty())
874     return;
875 
876   const base::Time insert_start = base::Time::Now();
877 
878   const int list_id = safe_browsing_util::GetListId(list_name);
879   DVLOG(2) << list_name << ": " << list_id;
880 
881   SafeBrowsingStore* store = GetStore(list_id);
882   if (!store) return;
883 
884   change_detected_ = true;
885 
886   store->BeginChunk();
887   if (chunks.front().is_add) {
888     InsertAddChunks(list_id, chunks);
889   } else {
890     InsertSubChunks(list_id, chunks);
891   }
892   store->FinishChunk();
893 
894   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
895 }
896 
DeleteChunks(const std::vector<SBChunkDelete> & chunk_deletes)897 void SafeBrowsingDatabaseNew::DeleteChunks(
898     const std::vector<SBChunkDelete>& chunk_deletes) {
899   DCHECK_EQ(creation_loop_, MessageLoop::current());
900 
901   if (corruption_detected_ || chunk_deletes.empty())
902     return;
903 
904   const std::string& list_name = chunk_deletes.front().list_name;
905   const int list_id = safe_browsing_util::GetListId(list_name);
906 
907   SafeBrowsingStore* store = GetStore(list_id);
908   if (!store) return;
909 
910   change_detected_ = true;
911 
912   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
913     std::vector<int> chunk_numbers;
914     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
915     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
916       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
917       if (chunk_deletes[i].is_sub_del)
918         store->DeleteSubChunk(encoded_chunk_id);
919       else
920         store->DeleteAddChunk(encoded_chunk_id);
921     }
922   }
923 }
924 
CacheHashResults(const std::vector<SBPrefix> & prefixes,const std::vector<SBFullHashResult> & full_hits)925 void SafeBrowsingDatabaseNew::CacheHashResults(
926     const std::vector<SBPrefix>& prefixes,
927     const std::vector<SBFullHashResult>& full_hits) {
928   // This is called on the I/O thread, lock against updates.
929   base::AutoLock locked(lookup_lock_);
930 
931   if (full_hits.empty()) {
932     prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
933     return;
934   }
935 
936   // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
937   // Refactor to make them identical.
938   const base::Time now = base::Time::Now();
939   const size_t orig_size = pending_browse_hashes_.size();
940   for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
941        iter != full_hits.end(); ++iter) {
942     const int list_id = safe_browsing_util::GetListId(iter->list_name);
943     if (list_id == safe_browsing_util::MALWARE ||
944         list_id == safe_browsing_util::PHISH) {
945       int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
946       SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
947       pending_browse_hashes_.push_back(add_full_hash);
948     }
949   }
950 
951   // Sort new entries then merge with the previously-sorted entries.
952   std::vector<SBAddFullHash>::iterator
953       orig_end = pending_browse_hashes_.begin() + orig_size;
954   std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
955   std::inplace_merge(pending_browse_hashes_.begin(),
956                      orig_end, pending_browse_hashes_.end(),
957                      SBAddFullHashPrefixLess);
958 }
959 
UpdateStarted(std::vector<SBListChunkRanges> * lists)960 bool SafeBrowsingDatabaseNew::UpdateStarted(
961     std::vector<SBListChunkRanges>* lists) {
962   DCHECK_EQ(creation_loop_, MessageLoop::current());
963   DCHECK(lists);
964 
965   // If |BeginUpdate()| fails, reset the database.
966   if (!browse_store_->BeginUpdate()) {
967     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
968     HandleCorruptDatabase();
969     return false;
970   }
971 
972   if (download_store_.get() && !download_store_->BeginUpdate()) {
973     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
974     HandleCorruptDatabase();
975     return false;
976   }
977 
978   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
979     RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN);
980     HandleCorruptDatabase();
981     return false;
982   }
983 
984   std::vector<std::string> browse_listnames;
985   browse_listnames.push_back(safe_browsing_util::kMalwareList);
986   browse_listnames.push_back(safe_browsing_util::kPhishingList);
987   UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
988 
989   if (download_store_.get()) {
990     std::vector<std::string> download_listnames;
991     download_listnames.push_back(safe_browsing_util::kBinUrlList);
992     download_listnames.push_back(safe_browsing_util::kBinHashList);
993     UpdateChunkRanges(download_store_.get(), download_listnames, lists);
994   }
995 
996   if (csd_whitelist_store_.get()) {
997     std::vector<std::string> csd_whitelist_listnames;
998     csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
999     UpdateChunkRanges(csd_whitelist_store_.get(),
1000                       csd_whitelist_listnames, lists);
1001   }
1002 
1003   corruption_detected_ = false;
1004   change_detected_ = false;
1005   return true;
1006 }
1007 
UpdateFinished(bool update_succeeded)1008 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
1009   DCHECK_EQ(creation_loop_, MessageLoop::current());
1010   if (corruption_detected_)
1011     return;
1012 
1013   // Unroll the transaction if there was a protocol error or if the
1014   // transaction was empty.  This will leave the bloom filter, the
1015   // pending hashes, and the prefix miss cache in place.
1016   if (!update_succeeded || !change_detected_) {
1017     // Track empty updates to answer questions at http://crbug.com/72216 .
1018     if (update_succeeded && !change_detected_)
1019       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
1020     browse_store_->CancelUpdate();
1021     if (download_store_.get())
1022       download_store_->CancelUpdate();
1023     if (csd_whitelist_store_.get())
1024       csd_whitelist_store_->CancelUpdate();
1025     return;
1026   }
1027 
1028   // for download
1029   UpdateDownloadStore();
1030   // for browsing
1031   UpdateBrowseStore();
1032   // for csd whitelist
1033   UpdateCsdWhitelistStore();
1034 }
1035 
UpdateCsdWhitelistStore()1036 void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() {
1037   if (!csd_whitelist_store_.get())
1038     return;
1039 
1040   // For the csd whitelist, we don't cache and save full hashes since all
1041   // hashes are already full.
1042   std::vector<SBAddFullHash> empty_add_hashes;
1043 
1044   // Not needed for the csd whitelist.
1045   std::set<SBPrefix> empty_miss_cache;
1046 
1047   // Note: prefixes will not be empty.  The current data store implementation
1048   // stores all full-length hashes as both full and prefix hashes.
1049   std::vector<SBAddPrefix> prefixes;
1050   std::vector<SBAddFullHash> full_hashes;
1051   if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes,
1052                                           empty_miss_cache,
1053                                           &prefixes,
1054                                           &full_hashes)) {
1055     RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH);
1056     CsdWhitelistAllUrls();
1057     return;
1058   }
1059   LoadCsdWhitelist(full_hashes);
1060 }
1061 
UpdateDownloadStore()1062 void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
1063   if (!download_store_.get())
1064     return;
1065 
1066   // For download, we don't cache and save full hashes.
1067   std::vector<SBAddFullHash> empty_add_hashes;
1068 
1069   // For download, backend lookup happens only if a prefix is in add list.
1070   // No need to pass in miss cache when call FinishUpdate to caculate
1071   // bloomfilter false positives.
1072   std::set<SBPrefix> empty_miss_cache;
1073 
1074   // These results are not used after this call. Simply ignore the
1075   // returned value after FinishUpdate(...).
1076   std::vector<SBAddPrefix> add_prefixes_result;
1077   std::vector<SBAddFullHash> add_full_hashes_result;
1078 
1079   if (!download_store_->FinishUpdate(empty_add_hashes,
1080                                      empty_miss_cache,
1081                                      &add_prefixes_result,
1082                                      &add_full_hashes_result))
1083     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1084   return;
1085 }
1086 
UpdateBrowseStore()1087 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1088   // Copy out the pending add hashes.  Copy rather than swapping in
1089   // case |ContainsBrowseURL()| is called before the new filter is complete.
1090   std::vector<SBAddFullHash> pending_add_hashes;
1091   {
1092     base::AutoLock locked(lookup_lock_);
1093     pending_add_hashes.insert(pending_add_hashes.end(),
1094                               pending_browse_hashes_.begin(),
1095                               pending_browse_hashes_.end());
1096   }
1097 
1098   // Measure the amount of IO during the bloom filter build.
1099   base::IoCounters io_before, io_after;
1100   base::ProcessHandle handle = base::Process::Current().handle();
1101   scoped_ptr<base::ProcessMetrics> metric(
1102 #if !defined(OS_MACOSX)
1103       base::ProcessMetrics::CreateProcessMetrics(handle)
1104 #else
1105       // Getting stats only for the current process is enough, so NULL is fine.
1106       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1107 #endif
1108   );
1109 
1110   // IoCounters are currently not supported on Mac, and may not be
1111   // available for Linux, so we check the result and only show IO
1112   // stats if they are available.
1113   const bool got_counters = metric->GetIOCounters(&io_before);
1114 
1115   const base::Time before = base::Time::Now();
1116 
1117   std::vector<SBAddPrefix> add_prefixes;
1118   std::vector<SBAddFullHash> add_full_hashes;
1119   if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1120                                    &add_prefixes, &add_full_hashes)) {
1121     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1122     return;
1123   }
1124 
1125   // Create and populate |filter| from |add_prefixes|.
1126   // TODO(shess): The bloom filter doesn't need to be a
1127   // scoped_refptr<> for this code.  Refactor that away.
1128   const int filter_size =
1129       BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
1130   scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
1131   for (size_t i = 0; i < add_prefixes.size(); ++i) {
1132     filter->Insert(add_prefixes[i].prefix);
1133   }
1134 
1135   scoped_ptr<safe_browsing::PrefixSet>
1136       prefix_set(PrefixSetFromAddPrefixes(add_prefixes));
1137 
1138   // This needs to be in sorted order by prefix for efficient access.
1139   std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1140             SBAddFullHashPrefixLess);
1141 
1142   // Swap in the newly built filter and cache.
1143   {
1144     base::AutoLock locked(lookup_lock_);
1145     full_browse_hashes_.swap(add_full_hashes);
1146 
1147     // TODO(shess): If |CacheHashResults()| is posted between the
1148     // earlier lock and this clear, those pending hashes will be lost.
1149     // It could be fixed by only removing hashes which were collected
1150     // at the earlier point.  I believe that is fail-safe as-is (the
1151     // hash will be fetched again).
1152     pending_browse_hashes_.clear();
1153     prefix_miss_cache_.clear();
1154     browse_bloom_filter_.swap(filter);
1155     prefix_set_.swap(prefix_set);
1156   }
1157 
1158   const base::TimeDelta bloom_gen = base::Time::Now() - before;
1159 
1160   // Persist the bloom filter to disk.  Since only this thread changes
1161   // |browse_bloom_filter_|, there is no need to lock.
1162   WriteBloomFilter();
1163 
1164   // Gather statistics.
1165   if (got_counters && metric->GetIOCounters(&io_after)) {
1166     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1167                          static_cast<int>(io_after.ReadTransferCount -
1168                                           io_before.ReadTransferCount) / 1024);
1169     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1170                          static_cast<int>(io_after.WriteTransferCount -
1171                                           io_before.WriteTransferCount) / 1024);
1172     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1173                          static_cast<int>(io_after.ReadOperationCount -
1174                                           io_before.ReadOperationCount));
1175     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1176                          static_cast<int>(io_after.WriteOperationCount -
1177                                           io_before.WriteOperationCount));
1178   }
1179   DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
1180            << bloom_gen.InMilliseconds() << " ms total.  prefix count: "
1181            << add_prefixes.size();
1182   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
1183   UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
1184                        browse_bloom_filter_->size() / 1024);
1185   int64 size_64;
1186   if (file_util::GetFileSize(browse_filename_, &size_64))
1187     UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1188                          static_cast<int>(size_64 / 1024));
1189   if (file_util::GetFileSize(download_filename_, &size_64))
1190     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1191                          static_cast<int>(size_64 / 1024));
1192 }
1193 
HandleCorruptDatabase()1194 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1195   // Reset the database after the current task has unwound (but only
1196   // reset once within the scope of a given task).
1197   if (reset_factory_.empty()) {
1198     RecordFailure(FAILURE_DATABASE_CORRUPT);
1199     MessageLoop::current()->PostTask(FROM_HERE,
1200         reset_factory_.NewRunnableMethod(
1201             &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase));
1202   }
1203 }
1204 
OnHandleCorruptDatabase()1205 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1206   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1207   corruption_detected_ = true;  // Stop updating the database.
1208   ResetDatabase();
1209   DCHECK(false) << "SafeBrowsing database was corrupt and reset";
1210 }
1211 
1212 // TODO(shess): I'm not clear why this code doesn't have any
1213 // real error-handling.
LoadBloomFilter()1214 void SafeBrowsingDatabaseNew::LoadBloomFilter() {
1215   DCHECK_EQ(creation_loop_, MessageLoop::current());
1216   DCHECK(!bloom_filter_filename_.empty());
1217 
1218   // If we're missing either of the database or filter files, we wait until the
1219   // next update to generate a new filter.
1220   // TODO(paulg): Investigate how often the filter file is missing and how
1221   // expensive it would be to regenerate it.
1222   int64 size_64;
1223   if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
1224     return;
1225 
1226   if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
1227       size_64 == 0) {
1228     RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
1229     return;
1230   }
1231 
1232   const base::TimeTicks before = base::TimeTicks::Now();
1233   browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
1234   DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
1235            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1236 
1237   if (!browse_bloom_filter_.get())
1238     RecordFailure(FAILURE_DATABASE_FILTER_READ);
1239 
1240   // Manually re-generate the prefix set from the main database.
1241   // TODO(shess): Write/read for prefix set.
1242   std::vector<SBAddPrefix> add_prefixes;
1243   browse_store_->GetAddPrefixes(&add_prefixes);
1244   prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
1245 }
1246 
Delete()1247 bool SafeBrowsingDatabaseNew::Delete() {
1248   DCHECK_EQ(creation_loop_, MessageLoop::current());
1249 
1250   const bool r1 = browse_store_->Delete();
1251   if (!r1)
1252     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1253 
1254   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1255   if (!r2)
1256     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1257 
1258   const bool r3 = csd_whitelist_store_.get() ?
1259       csd_whitelist_store_->Delete() : true;
1260   if (!r3)
1261     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1262 
1263   const bool r4 = file_util::Delete(bloom_filter_filename_, false);
1264   if (!r4)
1265     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1266   return r1 && r2 && r3 && r4;
1267 }
1268 
WriteBloomFilter()1269 void SafeBrowsingDatabaseNew::WriteBloomFilter() {
1270   DCHECK_EQ(creation_loop_, MessageLoop::current());
1271 
1272   if (!browse_bloom_filter_.get())
1273     return;
1274 
1275   const base::TimeTicks before = base::TimeTicks::Now();
1276   const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
1277   DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
1278            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1279 
1280   if (!write_ok)
1281     RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
1282 }
1283 
CsdWhitelistAllUrls()1284 void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() {
1285   base::AutoLock locked(lookup_lock_);
1286   csd_whitelist_all_urls_ = true;
1287   csd_whitelist_.clear();
1288 }
1289 
LoadCsdWhitelist(const std::vector<SBAddFullHash> & full_hashes)1290 void SafeBrowsingDatabaseNew::LoadCsdWhitelist(
1291     const std::vector<SBAddFullHash>& full_hashes) {
1292   DCHECK_EQ(creation_loop_, MessageLoop::current());
1293   if (full_hashes.size() > kMaxCsdWhitelistSize) {
1294     CsdWhitelistAllUrls();
1295     return;
1296   }
1297 
1298   std::vector<SBFullHash> new_csd_whitelist;
1299   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
1300        it != full_hashes.end(); ++it) {
1301     new_csd_whitelist.push_back(it->full_hash);
1302   }
1303   std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end());
1304 
1305   SBFullHash kill_switch;
1306   crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch,
1307                            sizeof(kill_switch));
1308   if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(),
1309                          kill_switch)) {
1310     // The kill switch is whitelisted hence we whitelist all URLs.
1311     CsdWhitelistAllUrls();
1312   } else {
1313     base::AutoLock locked(lookup_lock_);
1314     csd_whitelist_all_urls_ = false;
1315     csd_whitelist_.swap(new_csd_whitelist);
1316   }
1317 }
1318