• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Utilities for the SafeBrowsing code.
6 
7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9 
10 #include <cstring>
11 #include <set>
12 #include <string>
13 #include <vector>
14 
15 #include "base/basictypes.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/strings/string_piece.h"
18 #include "base/time/time.h"
19 #include "chrome/browser/safe_browsing/chunk_range.h"
20 
21 namespace safe_browsing {
22 class ChunkData;
23 };
24 
25 class GURL;
26 
27 // A truncated hash's type.
28 typedef uint32 SBPrefix;
29 
30 // Container for holding a chunk URL and the list it belongs to.
31 struct ChunkUrl {
32   std::string url;
33   std::string list_name;
34 };
35 
36 // A full hash.
37 union SBFullHash {
38   char full_hash[32];
39   SBPrefix prefix;
40 };
41 
SBFullHashEqual(const SBFullHash & a,const SBFullHash & b)42 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
43   return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
44 }
45 
SBFullHashLess(const SBFullHash & a,const SBFullHash & b)46 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
47   return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
48 }
49 
50 // Generate full hash for the given string.
51 SBFullHash SBFullHashForString(const base::StringPiece& str);
52 
53 // Data for an individual chunk sent from the server.
54 class SBChunkData {
55  public:
56   SBChunkData();
57   ~SBChunkData();
58 
59   // Create with manufactured data, for testing only.
60   // TODO(shess): Right now the test code calling this is in an anonymous
61   // namespace.  Figure out how to shift this into private:.
62   explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
63 
64   // Read serialized ChunkData, returning true if the parse suceeded.
65   bool ParseFrom(const unsigned char* data, size_t length);
66 
67   // Access the chunk data.  |AddChunkNumberAt()| can only be called if
68   // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
69   // if the corrosponding |Is*()| returned true.
70   int ChunkNumber() const;
71   bool IsAdd() const;
72   bool IsSub() const;
73   int AddChunkNumberAt(size_t i) const;
74   bool IsPrefix() const;
75   size_t PrefixCount() const;
76   SBPrefix PrefixAt(size_t i) const;
77   bool IsFullHash() const;
78   size_t FullHashCount() const;
79   SBFullHash FullHashAt(size_t i) const;
80 
81  private:
82   // Protocol buffer sent from server.
83   scoped_ptr<safe_browsing::ChunkData> chunk_data_;
84 
85   DISALLOW_COPY_AND_ASSIGN(SBChunkData);
86 };
87 
88 // Used when we get a gethash response.
89 struct SBFullHashResult {
90   SBFullHash hash;
91   // TODO(shess): Refactor to allow ListType here.
92   int list_id;
93   std::string metadata;
94 };
95 
96 // Caches individual response from GETHASH request.
97 struct SBCachedFullHashResult {
98   SBCachedFullHashResult();
99   explicit SBCachedFullHashResult(const base::Time& in_expire_after);
100   ~SBCachedFullHashResult();
101 
102   base::Time expire_after;
103   std::vector<SBFullHashResult> full_hashes;
104 };
105 
106 // Contains information about a list in the database.
107 struct SBListChunkRanges {
108   explicit SBListChunkRanges(const std::string& n);
109 
110   std::string name;  // The list name.
111   std::string adds;  // The ranges for add chunks.
112   std::string subs;  // The ranges for sub chunks.
113 };
114 
115 // Container for deleting chunks from the database.
116 struct SBChunkDelete {
117   SBChunkDelete();
118   ~SBChunkDelete();
119 
120   std::string list_name;
121   bool is_sub_del;
122   std::vector<ChunkRange> chunk_del;
123 };
124 
125 // Different types of threats that SafeBrowsing protects against.
126 enum SBThreatType {
127   // No threat at all.
128   SB_THREAT_TYPE_SAFE,
129 
130   // The URL is being used for phishing.
131   SB_THREAT_TYPE_URL_PHISHING,
132 
133   // The URL hosts malware.
134   SB_THREAT_TYPE_URL_MALWARE,
135 
136   // The URL hosts harmful programs.
137   SB_THREAT_TYPE_URL_HARMFUL,
138 
139   // The download URL is malware.
140   SB_THREAT_TYPE_BINARY_MALWARE_URL,
141 
142   // Url detected by the client-side phishing model.  Note that unlike the
143   // above values, this does not correspond to a downloaded list.
144   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
145 
146   // The Chrome extension or app (given by its ID) is malware.
147   SB_THREAT_TYPE_EXTENSION,
148 
149   // Url detected by the client-side malware IP list. This IP list is part
150   // of the client side detection model.
151   SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
152 };
153 
154 // Utility functions -----------------------------------------------------------
155 
156 namespace safe_browsing_util {
157 
158 // SafeBrowsing list names.
159 extern const char kMalwareList[];
160 extern const char kPhishingList[];
161 // Binary Download list name.
162 extern const char kBinUrlList[];
163 // SafeBrowsing client-side detection whitelist list name.
164 extern const char kCsdWhiteList[];
165 // SafeBrowsing download whitelist list name.
166 extern const char kDownloadWhiteList[];
167 // SafeBrowsing extension list name.
168 extern const char kExtensionBlacklist[];
169 // SafeBrowsing side-effect free whitelist name.
170 extern const char kSideEffectFreeWhitelist[];
171 // SafeBrowsing csd malware IP blacklist name.
172 extern const char kIPBlacklist[];
173 
174 // This array must contain all Safe Browsing lists.
175 extern const char* kAllLists[8];
176 
177 enum ListType {
178   INVALID = -1,
179   MALWARE = 0,
180   PHISH = 1,
181   BINURL = 2,
182   // Obsolete BINHASH = 3,
183   CSDWHITELIST = 4,
184   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
185   // available for a potential second list that we would store in the
186   // csd-whitelist store file.
187   DOWNLOADWHITELIST = 6,
188   // See above comment. Leave 7 available.
189   EXTENSIONBLACKLIST = 8,
190   // See above comment. Leave 9 available.
191   SIDEEFFECTFREEWHITELIST = 10,
192   // See above comment. Leave 11 available.
193   IPBLACKLIST = 12,
194   // See above comment.  Leave 13 available.
195 };
196 
197 // Maps a list name to ListType.
198 ListType GetListId(const base::StringPiece& name);
199 
200 // Maps a ListId to list name. Return false if fails.
201 bool GetListName(ListType list_id, std::string* list);
202 
203 // Canonicalizes url as per Google Safe Browsing Specification.
204 // See section 6.1 in
205 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
206 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
207                      std::string* canonicalized_path,
208                      std::string* canonicalized_query);
209 
210 // Given a URL, returns all the hosts we need to check.  They are returned
211 // in order of size (i.e. b.c is first, then a.b.c).
212 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
213 
214 // Given a URL, returns all the paths we need to check.
215 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
216 
217 // Given a URL, returns all the patterns we need to check.
218 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
219 
220 GURL GeneratePhishingReportUrl(const std::string& report_page,
221                                const std::string& url_to_report,
222                                bool is_client_side_detection);
223 
224 SBFullHash StringToSBFullHash(const std::string& hash_in);
225 std::string SBFullHashToString(const SBFullHash& hash_out);
226 
227 }  // namespace safe_browsing_util
228 
229 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
230