• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Utilities for the SafeBrowsing code.
6 
7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
9 
10 #include <cstring>
11 #include <set>
12 #include <string>
13 #include <vector>
14 
15 #include "base/basictypes.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/strings/string_piece.h"
18 #include "chrome/browser/safe_browsing/chunk_range.h"
19 
20 namespace safe_browsing {
21 class ChunkData;
22 };
23 
24 class GURL;
25 
26 // A truncated hash's type.
27 typedef uint32 SBPrefix;
28 
29 // Container for holding a chunk URL and the list it belongs to.
30 struct ChunkUrl {
31   std::string url;
32   std::string list_name;
33 };
34 
35 // A full hash.
36 union SBFullHash {
37   char full_hash[32];
38   SBPrefix prefix;
39 };
40 
SBFullHashEqual(const SBFullHash & a,const SBFullHash & b)41 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
42   return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
43 }
44 
SBFullHashLess(const SBFullHash & a,const SBFullHash & b)45 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
46   return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
47 }
48 
49 // Generate full hash for the given string.
50 SBFullHash SBFullHashForString(const base::StringPiece& str);
51 
52 // Data for an individual chunk sent from the server.
53 class SBChunkData {
54  public:
55   SBChunkData();
56   ~SBChunkData();
57 
58   // Create with manufactured data, for testing only.
59   // TODO(shess): Right now the test code calling this is in an anonymous
60   // namespace.  Figure out how to shift this into private:.
61   explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
62 
63   // Read serialized ChunkData, returning true if the parse suceeded.
64   bool ParseFrom(const unsigned char* data, size_t length);
65 
66   // Access the chunk data.  |AddChunkNumberAt()| can only be called if
67   // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
68   // if the corrosponding |Is*()| returned true.
69   int ChunkNumber() const;
70   bool IsAdd() const;
71   bool IsSub() const;
72   int AddChunkNumberAt(size_t i) const;
73   bool IsPrefix() const;
74   size_t PrefixCount() const;
75   SBPrefix PrefixAt(size_t i) const;
76   bool IsFullHash() const;
77   size_t FullHashCount() const;
78   SBFullHash FullHashAt(size_t i) const;
79 
80  private:
81   // Protocol buffer sent from server.
82   scoped_ptr<safe_browsing::ChunkData> chunk_data_;
83 
84   DISALLOW_COPY_AND_ASSIGN(SBChunkData);
85 };
86 
87 // Used when we get a gethash response.
88 struct SBFullHashResult {
89   SBFullHash hash;
90   // TODO(shess): Refactor to allow ListType here.
91   int list_id;
92 };
93 
94 // Contains information about a list in the database.
95 struct SBListChunkRanges {
96   explicit SBListChunkRanges(const std::string& n);
97 
98   std::string name;  // The list name.
99   std::string adds;  // The ranges for add chunks.
100   std::string subs;  // The ranges for sub chunks.
101 };
102 
103 // Container for deleting chunks from the database.
104 struct SBChunkDelete {
105   SBChunkDelete();
106   ~SBChunkDelete();
107 
108   std::string list_name;
109   bool is_sub_del;
110   std::vector<ChunkRange> chunk_del;
111 };
112 
113 // Different types of threats that SafeBrowsing protects against.
114 enum SBThreatType {
115   // No threat at all.
116   SB_THREAT_TYPE_SAFE,
117 
118   // The URL is being used for phishing.
119   SB_THREAT_TYPE_URL_PHISHING,
120 
121   // The URL hosts malware.
122   SB_THREAT_TYPE_URL_MALWARE,
123 
124   // The download URL is malware.
125   SB_THREAT_TYPE_BINARY_MALWARE_URL,
126 
127   // Url detected by the client-side phishing model.  Note that unlike the
128   // above values, this does not correspond to a downloaded list.
129   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
130 
131   // The Chrome extension or app (given by its ID) is malware.
132   SB_THREAT_TYPE_EXTENSION,
133 
134   // Url detected by the client-side malware IP list. This IP list is part
135   // of the client side detection model.
136   SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
137 };
138 
139 // Utility functions -----------------------------------------------------------
140 
141 namespace safe_browsing_util {
142 
143 // SafeBrowsing list names.
144 extern const char kMalwareList[];
145 extern const char kPhishingList[];
146 // Binary Download list name.
147 extern const char kBinUrlList[];
148 // SafeBrowsing client-side detection whitelist list name.
149 extern const char kCsdWhiteList[];
150 // SafeBrowsing download whitelist list name.
151 extern const char kDownloadWhiteList[];
152 // SafeBrowsing extension list name.
153 extern const char kExtensionBlacklist[];
154 // SafeBrowsing side-effect free whitelist name.
155 extern const char kSideEffectFreeWhitelist[];
156 // SafeBrowsing csd malware IP blacklist name.
157 extern const char kIPBlacklist[];
158 
159 // This array must contain all Safe Browsing lists.
160 extern const char* kAllLists[8];
161 
162 enum ListType {
163   INVALID = -1,
164   MALWARE = 0,
165   PHISH = 1,
166   BINURL = 2,
167   // Obsolete BINHASH = 3,
168   CSDWHITELIST = 4,
169   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
170   // available for a potential second list that we would store in the
171   // csd-whitelist store file.
172   DOWNLOADWHITELIST = 6,
173   // See above comment. Leave 7 available.
174   EXTENSIONBLACKLIST = 8,
175   // See above comment. Leave 9 available.
176   SIDEEFFECTFREEWHITELIST = 10,
177   // See above comment. Leave 11 available.
178   IPBLACKLIST = 12,
179   // See above comment.  Leave 13 available.
180 };
181 
182 // Maps a list name to ListType.
183 ListType GetListId(const base::StringPiece& name);
184 
185 // Maps a ListId to list name. Return false if fails.
186 bool GetListName(ListType list_id, std::string* list);
187 
188 // Canonicalizes url as per Google Safe Browsing Specification.
189 // See section 6.1 in
190 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
191 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
192                      std::string* canonicalized_path,
193                      std::string* canonicalized_query);
194 
195 // Given a URL, returns all the hosts we need to check.  They are returned
196 // in order of size (i.e. b.c is first, then a.b.c).
197 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
198 
199 // Given a URL, returns all the paths we need to check.
200 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
201 
202 // Given a URL, returns all the patterns we need to check.
203 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
204 
205 GURL GeneratePhishingReportUrl(const std::string& report_page,
206                                const std::string& url_to_report,
207                                bool is_client_side_detection);
208 
209 SBFullHash StringToSBFullHash(const std::string& hash_in);
210 std::string SBFullHashToString(const SBFullHash& hash_out);
211 
212 }  // namespace safe_browsing_util
213 
214 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
215