1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Utilities for the SafeBrowsing code. 6 7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 #pragma once 10 11 #include <cstring> 12 #include <deque> 13 #include <string> 14 #include <vector> 15 16 #include "base/basictypes.h" 17 #include "chrome/browser/safe_browsing/chunk_range.h" 18 19 class GURL; 20 21 class SBEntry; 22 23 // A truncated hash's type. 24 typedef int32 SBPrefix; 25 26 // Container for holding a chunk URL and the MAC of the contents of the URL. 27 struct ChunkUrl { 28 std::string url; 29 std::string mac; 30 std::string list_name; 31 }; 32 33 // A full hash. 34 union SBFullHash { 35 char full_hash[32]; 36 SBPrefix prefix; 37 }; 38 39 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) { 40 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0; 41 } 42 43 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) { 44 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0; 45 } 46 47 // Container for information about a specific host in an add/sub chunk. 48 struct SBChunkHost { 49 SBPrefix host; 50 SBEntry* entry; 51 }; 52 53 // Container for an add/sub chunk. 54 struct SBChunk { 55 SBChunk(); 56 ~SBChunk(); 57 58 int chunk_number; 59 int list_id; 60 bool is_add; 61 std::deque<SBChunkHost> hosts; 62 }; 63 64 // Container for a set of chunks. Interim wrapper to replace use of 65 // |std::deque<SBChunk>| with something having safer memory semantics. 66 // management. 67 // TODO(shess): |SBEntry| is currently a very roundabout way to hold 68 // things pending storage. It could be replaced with the structures 69 // used in SafeBrowsingStore, then lots of bridging code could 70 // dissappear. 71 class SBChunkList { 72 public: 73 SBChunkList(); 74 ~SBChunkList(); 75 76 // Implement that subset of the |std::deque<>| interface which 77 // callers expect. empty()78 bool empty() const { return chunks_.empty(); } size()79 size_t size() { return chunks_.size(); } 80 push_back(const SBChunk & chunk)81 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } back()82 SBChunk& back() { return chunks_.back(); } front()83 SBChunk& front() { return chunks_.front(); } front()84 const SBChunk& front() const { return chunks_.front(); } 85 86 typedef std::vector<SBChunk>::const_iterator const_iterator; begin()87 const_iterator begin() const { return chunks_.begin(); } end()88 const_iterator end() const { return chunks_.end(); } 89 90 typedef std::vector<SBChunk>::iterator iterator; begin()91 iterator begin() { return chunks_.begin(); } end()92 iterator end() { return chunks_.end(); } 93 94 SBChunk& operator[](size_t n) { return chunks_[n]; } 95 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 96 97 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 98 void clear(); 99 100 private: 101 std::vector<SBChunk> chunks_; 102 103 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 104 }; 105 106 // Used when we get a gethash response. 107 struct SBFullHashResult { 108 SBFullHash hash; 109 std::string list_name; 110 int add_chunk_id; 111 }; 112 113 // Contains information about a list in the database. 114 struct SBListChunkRanges { 115 explicit SBListChunkRanges(const std::string& n); 116 117 std::string name; // The list name. 118 std::string adds; // The ranges for add chunks. 119 std::string subs; // The ranges for sub chunks. 120 }; 121 122 // Container for deleting chunks from the database. 123 struct SBChunkDelete { 124 SBChunkDelete(); 125 ~SBChunkDelete(); 126 127 std::string list_name; 128 bool is_sub_del; 129 std::vector<ChunkRange> chunk_del; 130 }; 131 132 133 // SBEntry --------------------------------------------------------------------- 134 135 // Holds information about the prefixes for a hostkey. prefixes can either be 136 // 4 bytes (truncated hash) or 32 bytes (full hash). 137 // For adds: 138 // [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 139 // For subs: 140 // [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 141 // [add chunk][prefix][add chunk][prefix] 142 class SBEntry { 143 public: 144 enum Type { 145 ADD_PREFIX, // 4 byte add entry. 146 SUB_PREFIX, // 4 byte sub entry. 147 ADD_FULL_HASH, // 32 byte add entry. 148 SUB_FULL_HASH, // 32 byte sub entry. 149 }; 150 151 // Creates a SBEntry with the necessary size for the given number of prefixes. 152 // Caller ownes the object and needs to free it by calling Destroy. 153 static SBEntry* Create(Type type, int prefix_count); 154 155 // Frees the entry's memory. 156 void Destroy(); 157 set_list_id(int list_id)158 void set_list_id(int list_id) { data_.list_id = list_id; } list_id()159 int list_id() const { return data_.list_id; } set_chunk_id(int chunk_id)160 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } chunk_id()161 int chunk_id() const { return data_.chunk_id; } prefix_count()162 int prefix_count() const { return data_.prefix_count; } 163 164 // Returns true if this is a prefix as opposed to a full hash. IsPrefix()165 bool IsPrefix() const { 166 return type() == ADD_PREFIX || type() == SUB_PREFIX; 167 } 168 169 // Returns true if this is an add entry. IsAdd()170 bool IsAdd() const { 171 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 172 } 173 174 // Returns true if this is a sub entry. IsSub()175 bool IsSub() const { 176 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 177 } 178 179 // Helper to return the size of the prefixes. HashLen()180 int HashLen() const { 181 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 182 } 183 184 // For add entries, returns the add chunk id. For sub entries, returns the 185 // add_chunk id for the prefix at the given index. 186 int ChunkIdAtPrefix(int index) const; 187 188 // Used for sub chunks to set the chunk id at a given index. 189 void SetChunkIdAtPrefix(int index, int chunk_id); 190 191 // Return the prefix/full hash at the given index. Caller is expected to 192 // call the right function based on the hash length. 193 const SBPrefix& PrefixAt(int index) const; 194 const SBFullHash& FullHashAt(int index) const; 195 196 // Return the prefix/full hash at the given index. Caller is expected to 197 // call the right function based on the hash length. 198 void SetPrefixAt(int index, const SBPrefix& prefix); 199 void SetFullHashAt(int index, const SBFullHash& full_hash); 200 201 private: 202 // Container for a sub prefix. 203 struct SBSubPrefix { 204 int add_chunk; 205 SBPrefix prefix; 206 }; 207 208 // Container for a sub full hash. 209 struct SBSubFullHash { 210 int add_chunk; 211 SBFullHash prefix; 212 }; 213 214 // Keep the fixed data together in one struct so that we can get its size 215 // easily. If any of this is modified, the database will have to be cleared. 216 struct Data { 217 int list_id; 218 // For adds, this is the add chunk number. 219 // For subs: if prefix_count is 0 then this is the add chunk that this sub 220 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 221 // or sub_full_hashes is used for each corresponding prefix. 222 int chunk_id; 223 Type type; 224 int prefix_count; 225 }; 226 227 SBEntry(); 228 ~SBEntry(); 229 230 // Helper to return the size of each prefix entry (i.e. for subs this 231 // includes an add chunk id). 232 static int PrefixSize(Type type); 233 234 // Helper to return how much memory a given Entry would require. 235 static int Size(Type type, int prefix_count); 236 237 // Returns how many bytes this entry is. 238 int Size() const; 239 type()240 Type type() const { return data_.type; } 241 set_prefix_count(int count)242 void set_prefix_count(int count) { data_.prefix_count = count; } set_type(Type type)243 void set_type(Type type) { data_.type = type; } 244 245 // The prefixes union must follow the fixed data so that they're contiguous 246 // in memory. 247 Data data_; 248 union { 249 SBPrefix add_prefixes_[1]; 250 SBSubPrefix sub_prefixes_[1]; 251 SBFullHash add_full_hashes_[1]; 252 SBSubFullHash sub_full_hashes_[1]; 253 }; 254 }; 255 256 257 // Utility functions ----------------------------------------------------------- 258 259 namespace safe_browsing_util { 260 261 // SafeBrowsing list names. 262 extern const char kMalwareList[]; 263 extern const char kPhishingList[]; 264 // Binary Download list names. 265 extern const char kBinUrlList[]; 266 extern const char kBinHashList[]; 267 // SafeBrowsing client-side detection whitelist list name. 268 extern const char kCsdWhiteList[]; 269 270 enum ListType { 271 INVALID = -1, 272 MALWARE = 0, 273 PHISH = 1, 274 BINURL = 2, 275 BINHASH = 3, 276 CSDWHITELIST = 4, 277 }; 278 279 // Maps a list name to ListType. 280 int GetListId(const std::string& name); 281 // Maps a ListId to list name. Return false if fails. 282 bool GetListName(int list_id, std::string* list); 283 284 285 // Canonicalizes url as per Google Safe Browsing Specification. 286 // See section 6.1 in 287 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 288 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 289 std::string* canonicalized_path, 290 std::string* canonicalized_query); 291 292 // Given a URL, returns all the hosts we need to check. They are returned 293 // in order of size (i.e. b.c is first, then a.b.c). 294 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 295 296 // Given a URL, returns all the paths we need to check. 297 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 298 299 int GetHashIndex(const SBFullHash& hash, 300 const std::vector<SBFullHashResult>& full_hashes); 301 302 // Given a URL, compare all the possible host + path full hashes to the set of 303 // provided full hashes. Returns the index of the match if one is found, or -1 304 // otherwise. 305 int GetUrlHashIndex(const GURL& url, 306 const std::vector<SBFullHashResult>& full_hashes); 307 308 bool IsPhishingList(const std::string& list_name); 309 bool IsMalwareList(const std::string& list_name); 310 bool IsBadbinurlList(const std::string& list_name); 311 bool IsBadbinhashList(const std::string& list_name); 312 313 // Returns 'true' if 'mac' can be verified using 'key' and 'data'. 314 bool VerifyMAC(const std::string& key, 315 const std::string& mac, 316 const char* data, 317 int data_length); 318 319 GURL GeneratePhishingReportUrl(const std::string& report_page, 320 const std::string& url_to_report); 321 322 void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out); 323 std::string SBFullHashToString(const SBFullHash& hash_out); 324 } // namespace safe_browsing_util 325 326 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 327