• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Utilities for the SafeBrowsing code.
6 
7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9 #pragma once
10 
11 #include <cstring>
12 #include <deque>
13 #include <string>
14 #include <vector>
15 
16 #include "base/basictypes.h"
17 #include "chrome/browser/safe_browsing/chunk_range.h"
18 
19 class GURL;
20 
21 class SBEntry;
22 
23 // A truncated hash's type.
24 typedef int32 SBPrefix;
25 
26 // Container for holding a chunk URL and the MAC of the contents of the URL.
27 struct ChunkUrl {
28   std::string url;
29   std::string mac;
30   std::string list_name;
31 };
32 
33 // A full hash.
34 union SBFullHash {
35   char full_hash[32];
36   SBPrefix prefix;
37 };
38 
39 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
40   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
41 }
42 
43 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
44   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
45 }
46 
47 // Container for information about a specific host in an add/sub chunk.
48 struct SBChunkHost {
49   SBPrefix host;
50   SBEntry* entry;
51 };
52 
53 // Container for an add/sub chunk.
54 struct SBChunk {
55   SBChunk();
56   ~SBChunk();
57 
58   int chunk_number;
59   int list_id;
60   bool is_add;
61   std::deque<SBChunkHost> hosts;
62 };
63 
64 // Container for a set of chunks.  Interim wrapper to replace use of
65 // |std::deque<SBChunk>| with something having safer memory semantics.
66 // management.
67 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
68 // things pending storage.  It could be replaced with the structures
69 // used in SafeBrowsingStore, then lots of bridging code could
70 // dissappear.
71 class SBChunkList {
72  public:
73   SBChunkList();
74   ~SBChunkList();
75 
76   // Implement that subset of the |std::deque<>| interface which
77   // callers expect.
empty()78   bool empty() const { return chunks_.empty(); }
size()79   size_t size() { return chunks_.size(); }
80 
push_back(const SBChunk & chunk)81   void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
back()82   SBChunk& back() { return chunks_.back(); }
front()83   SBChunk& front() { return chunks_.front(); }
front()84   const SBChunk& front() const { return chunks_.front(); }
85 
86   typedef std::vector<SBChunk>::const_iterator const_iterator;
begin()87   const_iterator begin() const { return chunks_.begin(); }
end()88   const_iterator end() const { return chunks_.end(); }
89 
90   typedef std::vector<SBChunk>::iterator iterator;
begin()91   iterator begin() { return chunks_.begin(); }
end()92   iterator end() { return chunks_.end(); }
93 
94   SBChunk& operator[](size_t n) { return chunks_[n]; }
95   const SBChunk& operator[](size_t n) const { return chunks_[n]; }
96 
97   // Calls |SBEvent::Destroy()| before clearing |chunks_|.
98   void clear();
99 
100  private:
101   std::vector<SBChunk> chunks_;
102 
103   DISALLOW_COPY_AND_ASSIGN(SBChunkList);
104 };
105 
106 // Used when we get a gethash response.
107 struct SBFullHashResult {
108   SBFullHash hash;
109   std::string list_name;
110   int add_chunk_id;
111 };
112 
113 // Contains information about a list in the database.
114 struct SBListChunkRanges {
115   explicit SBListChunkRanges(const std::string& n);
116 
117   std::string name;  // The list name.
118   std::string adds;  // The ranges for add chunks.
119   std::string subs;  // The ranges for sub chunks.
120 };
121 
122 // Container for deleting chunks from the database.
123 struct SBChunkDelete {
124   SBChunkDelete();
125   ~SBChunkDelete();
126 
127   std::string list_name;
128   bool is_sub_del;
129   std::vector<ChunkRange> chunk_del;
130 };
131 
132 
133 // SBEntry ---------------------------------------------------------------------
134 
135 // Holds information about the prefixes for a hostkey.  prefixes can either be
136 // 4 bytes (truncated hash) or 32 bytes (full hash).
137 // For adds:
138 //   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
139 // For subs:
140 //   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
141 //       [add chunk][prefix][add chunk][prefix]
142 class SBEntry {
143  public:
144   enum Type {
145     ADD_PREFIX,     // 4 byte add entry.
146     SUB_PREFIX,     // 4 byte sub entry.
147     ADD_FULL_HASH,  // 32 byte add entry.
148     SUB_FULL_HASH,  // 32 byte sub entry.
149   };
150 
151   // Creates a SBEntry with the necessary size for the given number of prefixes.
152   // Caller ownes the object and needs to free it by calling Destroy.
153   static SBEntry* Create(Type type, int prefix_count);
154 
155   // Frees the entry's memory.
156   void Destroy();
157 
set_list_id(int list_id)158   void set_list_id(int list_id) { data_.list_id = list_id; }
list_id()159   int list_id() const { return data_.list_id; }
set_chunk_id(int chunk_id)160   void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
chunk_id()161   int chunk_id() const { return data_.chunk_id; }
prefix_count()162   int prefix_count() const { return data_.prefix_count; }
163 
164   // Returns true if this is a prefix as opposed to a full hash.
IsPrefix()165   bool IsPrefix() const {
166     return type() == ADD_PREFIX || type() == SUB_PREFIX;
167   }
168 
169   // Returns true if this is an add entry.
IsAdd()170   bool IsAdd() const {
171     return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
172   }
173 
174   // Returns true if this is a sub entry.
IsSub()175   bool IsSub() const {
176     return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
177   }
178 
179   // Helper to return the size of the prefixes.
HashLen()180   int HashLen() const {
181     return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
182   }
183 
184   // For add entries, returns the add chunk id.  For sub entries, returns the
185   // add_chunk id for the prefix at the given index.
186   int ChunkIdAtPrefix(int index) const;
187 
188   // Used for sub chunks to set the chunk id at a given index.
189   void SetChunkIdAtPrefix(int index, int chunk_id);
190 
191   // Return the prefix/full hash at the given index.  Caller is expected to
192   // call the right function based on the hash length.
193   const SBPrefix& PrefixAt(int index) const;
194   const SBFullHash& FullHashAt(int index) const;
195 
196   // Return the prefix/full hash at the given index.  Caller is expected to
197   // call the right function based on the hash length.
198   void SetPrefixAt(int index, const SBPrefix& prefix);
199   void SetFullHashAt(int index, const SBFullHash& full_hash);
200 
201  private:
202   // Container for a sub prefix.
203   struct SBSubPrefix {
204     int add_chunk;
205     SBPrefix prefix;
206   };
207 
208   // Container for a sub full hash.
209   struct SBSubFullHash {
210     int add_chunk;
211     SBFullHash prefix;
212   };
213 
214   // Keep the fixed data together in one struct so that we can get its size
215   // easily.  If any of this is modified, the database will have to be cleared.
216   struct Data {
217     int list_id;
218     // For adds, this is the add chunk number.
219     // For subs: if prefix_count is 0 then this is the add chunk that this sub
220     //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
221     //     or sub_full_hashes is used for each corresponding prefix.
222     int chunk_id;
223     Type type;
224     int prefix_count;
225   };
226 
227   SBEntry();
228   ~SBEntry();
229 
230   // Helper to return the size of each prefix entry (i.e. for subs this
231   // includes an add chunk id).
232   static int PrefixSize(Type type);
233 
234   // Helper to return how much memory a given Entry would require.
235   static int Size(Type type, int prefix_count);
236 
237   // Returns how many bytes this entry is.
238   int Size() const;
239 
type()240   Type type() const { return data_.type; }
241 
set_prefix_count(int count)242   void set_prefix_count(int count) { data_.prefix_count = count; }
set_type(Type type)243   void set_type(Type type) { data_.type = type; }
244 
245   // The prefixes union must follow the fixed data so that they're contiguous
246   // in memory.
247   Data data_;
248   union {
249     SBPrefix add_prefixes_[1];
250     SBSubPrefix sub_prefixes_[1];
251     SBFullHash add_full_hashes_[1];
252     SBSubFullHash sub_full_hashes_[1];
253   };
254 };
255 
256 
257 // Utility functions -----------------------------------------------------------
258 
259 namespace safe_browsing_util {
260 
261 // SafeBrowsing list names.
262 extern const char kMalwareList[];
263 extern const char kPhishingList[];
264 // Binary Download list names.
265 extern const char kBinUrlList[];
266 extern const char kBinHashList[];
267 // SafeBrowsing client-side detection whitelist list name.
268 extern const char kCsdWhiteList[];
269 
270 enum ListType {
271   INVALID = -1,
272   MALWARE = 0,
273   PHISH = 1,
274   BINURL = 2,
275   BINHASH = 3,
276   CSDWHITELIST = 4,
277 };
278 
279 // Maps a list name to ListType.
280 int GetListId(const std::string& name);
281 // Maps a ListId to list name. Return false if fails.
282 bool GetListName(int list_id, std::string* list);
283 
284 
285 // Canonicalizes url as per Google Safe Browsing Specification.
286 // See section 6.1 in
287 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
288 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
289                      std::string* canonicalized_path,
290                      std::string* canonicalized_query);
291 
292 // Given a URL, returns all the hosts we need to check.  They are returned
293 // in order of size (i.e. b.c is first, then a.b.c).
294 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
295 
296 // Given a URL, returns all the paths we need to check.
297 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
298 
299 int GetHashIndex(const SBFullHash& hash,
300                  const std::vector<SBFullHashResult>& full_hashes);
301 
302 // Given a URL, compare all the possible host + path full hashes to the set of
303 // provided full hashes.  Returns the index of the match if one is found, or -1
304 // otherwise.
305 int GetUrlHashIndex(const GURL& url,
306                     const std::vector<SBFullHashResult>& full_hashes);
307 
308 bool IsPhishingList(const std::string& list_name);
309 bool IsMalwareList(const std::string& list_name);
310 bool IsBadbinurlList(const std::string& list_name);
311 bool IsBadbinhashList(const std::string& list_name);
312 
313 // Returns 'true' if 'mac' can be verified using 'key' and 'data'.
314 bool VerifyMAC(const std::string& key,
315                const std::string& mac,
316                const char* data,
317                int data_length);
318 
319 GURL GeneratePhishingReportUrl(const std::string& report_page,
320                                const std::string& url_to_report);
321 
322 void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out);
323 std::string SBFullHashToString(const SBFullHash& hash_out);
324 }  // namespace safe_browsing_util
325 
326 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
327