• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
6 
7 #include <stdlib.h>
8 
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
17 
18 namespace {
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 //   std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
24 // truncate.
GetLine(const char * input,int input_len,std::string * line)25 bool GetLine(const char* input, int input_len, std::string* line) {
26   const char* pos = input;
27   while (pos && (pos - input < input_len)) {
28     if (*pos == '\n') {
29       line->assign(input, pos - input);
30       return true;
31     }
32     ++pos;
33   }
34   return false;
35 }
36 }  // namespace
37 
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
40 
SafeBrowsingProtocolParser()41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
42 }
43 
ParseGetHash(const char * chunk_data,int chunk_len,std::vector<SBFullHashResult> * full_hashes)44 bool SafeBrowsingProtocolParser::ParseGetHash(
45     const char* chunk_data,
46     int chunk_len,
47     std::vector<SBFullHashResult>* full_hashes) {
48   full_hashes->clear();
49   int length = chunk_len;
50   const char* data = chunk_data;
51 
52   int offset;
53   std::string line;
54   while (length > 0) {
55     if (!GetLine(data, length, &line))
56       return false;
57 
58     offset = static_cast<int>(line.size()) + 1;
59     data += offset;
60     length -= offset;
61 
62     std::vector<std::string> cmd_parts;
63     base::SplitString(line, ':', &cmd_parts);
64     if (cmd_parts.size() != 3)
65       return false;
66 
67     SBFullHashResult full_hash;
68     full_hash.list_name = cmd_parts[0];
69     full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
70     int full_hash_len = atoi(cmd_parts[2].c_str());
71 
72     // Ignore hash results from lists we don't recognize.
73     if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
74       data += full_hash_len;
75       length -= full_hash_len;
76       continue;
77     }
78 
79     while (full_hash_len > 0) {
80       DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
81       memcpy(&full_hash.hash, data, sizeof(SBFullHash));
82       full_hashes->push_back(full_hash);
83       data += sizeof(SBFullHash);
84       length -= sizeof(SBFullHash);
85       full_hash_len -= sizeof(SBFullHash);
86     }
87   }
88 
89   return length == 0;
90 }
91 
FormatGetHash(const std::vector<SBPrefix> & prefixes,std::string * request)92 void SafeBrowsingProtocolParser::FormatGetHash(
93    const std::vector<SBPrefix>& prefixes, std::string* request) {
94   DCHECK(request);
95 
96   // Format the request for GetHash.
97   request->append(base::StringPrintf("%" PRIuS ":%" PRIuS "\n",
98                                      sizeof(SBPrefix),
99                                      sizeof(SBPrefix) * prefixes.size()));
100   for (size_t i = 0; i < prefixes.size(); ++i) {
101     request->append(reinterpret_cast<const char*>(&prefixes[i]),
102                     sizeof(SBPrefix));
103   }
104 }
105 
ParseUpdate(const char * chunk_data,int chunk_len,int * next_update_sec,bool * reset,std::vector<SBChunkDelete> * deletes,std::vector<ChunkUrl> * chunk_urls)106 bool SafeBrowsingProtocolParser::ParseUpdate(
107     const char* chunk_data,
108     int chunk_len,
109     int* next_update_sec,
110     bool* reset,
111     std::vector<SBChunkDelete>* deletes,
112     std::vector<ChunkUrl>* chunk_urls) {
113   DCHECK(next_update_sec);
114   DCHECK(deletes);
115   DCHECK(chunk_urls);
116 
117   int length = chunk_len;
118   const char* data = chunk_data;
119 
120   // Populated below.
121   std::string list_name;
122 
123   while (length > 0) {
124     std::string cmd_line;
125     if (!GetLine(data, length, &cmd_line))
126       return false;  // Error: bad list format!
127 
128     std::vector<std::string> cmd_parts;
129     base::SplitString(cmd_line, ':', &cmd_parts);
130     if (cmd_parts.empty())
131       return false;
132     const std::string& command = cmd_parts[0];
133     if (cmd_parts.size() != 2 && command[0] != 'u')
134       return false;
135 
136     const int consumed = static_cast<int>(cmd_line.size()) + 1;
137     data += consumed;
138     length -= consumed;
139     if (length < 0)
140       return false;  // Parsing error.
141 
142     // Differentiate on the first character of the command (which is usually
143     // only one character, with the exception of the 'ad' and 'sd' commands).
144     switch (command[0]) {
145       case 'a':
146       case 's': {
147         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
148         // have also parsed the list name before getting here, or the add-del
149         // or sub-del will have no context.
150         if (command.size() != 2 || command[1] != 'd' || list_name.empty())
151           return false;
152         SBChunkDelete chunk_delete;
153         chunk_delete.is_sub_del = command[0] == 's';
154         StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
155         chunk_delete.list_name = list_name;
156         deletes->push_back(chunk_delete);
157         break;
158       }
159 
160       case 'i':
161         // The line providing the name of the list (i.e. 'goog-phish-shavar').
162         list_name = cmd_parts[1];
163         break;
164 
165       case 'n':
166         // The line providing the next earliest time (in seconds) to re-query.
167         *next_update_sec = atoi(cmd_parts[1].c_str());
168         break;
169 
170       case 'u': {
171         ChunkUrl chunk_url;
172         chunk_url.url = cmd_line.substr(2);  // Skip the initial "u:".
173         chunk_url.list_name = list_name;
174         chunk_urls->push_back(chunk_url);
175         break;
176       }
177 
178       case 'r':
179         if (cmd_parts[1] != "pleasereset")
180           return false;
181         *reset = true;
182         break;
183 
184       default:
185         // According to the spec, we ignore commands we don't understand.
186         break;
187     }
188   }
189 
190   return true;
191 }
192 
ParseChunk(const std::string & list_name,const char * data,int length,SBChunkList * chunks)193 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
194                                             const char* data,
195                                             int length,
196                                             SBChunkList* chunks) {
197   int remaining = length;
198   const char* chunk_data = data;
199 
200   while (remaining > 0) {
201     std::string cmd_line;
202     if (!GetLine(chunk_data, remaining, &cmd_line))
203       return false;  // Error: bad chunk format!
204 
205     const int line_len = static_cast<int>(cmd_line.length()) + 1;
206     chunk_data += line_len;
207     remaining -= line_len;
208     std::vector<std::string> cmd_parts;
209     base::SplitString(cmd_line, ':', &cmd_parts);
210     if (cmd_parts.size() != 4) {
211       return false;
212     }
213 
214     // Process the chunk data.
215     const int chunk_number = atoi(cmd_parts[1].c_str());
216     const int hash_len = atoi(cmd_parts[2].c_str());
217     if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
218       VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
219       return false;
220     }
221 
222     const int chunk_len = atoi(cmd_parts[3].c_str());
223 
224     if (remaining < chunk_len)
225       return false;  // parse error.
226 
227     chunks->push_back(SBChunk());
228     chunks->back().chunk_number = chunk_number;
229 
230     if (cmd_parts[0] == "a") {
231       chunks->back().is_add = true;
232       if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
233                          &chunks->back().hosts))
234         return false;  // Parse error.
235     } else if (cmd_parts[0] == "s") {
236       chunks->back().is_add = false;
237       if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
238                          &chunks->back().hosts))
239         return false;  // Parse error.
240     } else {
241       NOTREACHED();
242       return false;
243     }
244 
245     chunk_data += chunk_len;
246     remaining -= chunk_len;
247     DCHECK_LE(0, remaining);
248   }
249 
250   DCHECK(remaining == 0);
251 
252   return true;
253 }
254 
ParseAddChunk(const std::string & list_name,const char * data,int data_len,int hash_len,std::deque<SBChunkHost> * hosts)255 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
256                                                const char* data,
257                                                int data_len,
258                                                int hash_len,
259                                                std::deque<SBChunkHost>* hosts) {
260   const char* chunk_data = data;
261   int remaining = data_len;
262   int prefix_count;
263   SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
264       SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
265 
266   if (list_name == safe_browsing_util::kBinHashList ||
267       list_name == safe_browsing_util::kDownloadWhiteList ||
268       list_name == safe_browsing_util::kExtensionBlacklist ||
269       list_name == safe_browsing_util::kIPBlacklist) {
270     // These lists only contain prefixes, no HOSTKEY and COUNT.
271     DCHECK_EQ(0, remaining % hash_len);
272     prefix_count = remaining / hash_len;
273     SBChunkHost chunk_host;
274     chunk_host.host = 0;
275     chunk_host.entry = SBEntry::Create(type, prefix_count);
276     hosts->push_back(chunk_host);
277     if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
278                       prefix_count)) {
279       DVLOG(2) << "Unable to read chunk data for list: " << list_name;
280       return false;
281     }
282     DCHECK_GE(remaining, 0);
283   } else {
284     SBPrefix host;
285     const int min_size = sizeof(SBPrefix) + 1;
286     while (remaining >= min_size) {
287       if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
288                                   &host, &prefix_count)) {
289         return false;
290       }
291       DCHECK_GE(remaining, 0);
292       SBChunkHost chunk_host;
293       chunk_host.host = host;
294       chunk_host.entry = SBEntry::Create(type, prefix_count);
295       hosts->push_back(chunk_host);
296       if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
297                         prefix_count))
298         return false;
299       DCHECK_GE(remaining, 0);
300     }
301   }
302   return remaining == 0;
303 }
304 
ParseSubChunk(const std::string & list_name,const char * data,int data_len,int hash_len,std::deque<SBChunkHost> * hosts)305 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
306                                                const char* data,
307                                                int data_len,
308                                                int hash_len,
309                                                std::deque<SBChunkHost>* hosts) {
310   int remaining = data_len;
311   const char* chunk_data = data;
312   int prefix_count;
313   SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
314       SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
315 
316   if (list_name == safe_browsing_util::kBinHashList ||
317       list_name == safe_browsing_util::kDownloadWhiteList ||
318       list_name == safe_browsing_util::kExtensionBlacklist ||
319       list_name == safe_browsing_util::kIPBlacklist) {
320     SBChunkHost chunk_host;
321     // Set host to 0 and it won't be used for kBinHashList.
322     chunk_host.host = 0;
323     // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
324     // and COUNT. |add_chunk_number| is int32.
325     prefix_count = remaining / (sizeof(int32) + hash_len);
326     chunk_host.entry = SBEntry::Create(type, prefix_count);
327     if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
328       return false;
329     DCHECK_GE(remaining, 0);
330     hosts->push_back(chunk_host);
331   } else {
332     SBPrefix host;
333     const int min_size = 2 * sizeof(SBPrefix) + 1;
334     while (remaining >= min_size) {
335       if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
336                                   &host, &prefix_count)) {
337         return false;
338       }
339       DCHECK_GE(remaining, 0);
340       SBChunkHost chunk_host;
341       chunk_host.host = host;
342       chunk_host.entry = SBEntry::Create(type, prefix_count);
343       hosts->push_back(chunk_host);
344       if (prefix_count == 0) {
345         // There is only an add chunk number (no prefixes).
346         int chunk_id;
347         if (!ReadChunkId(&chunk_data, &remaining, &chunk_id))
348           return false;
349         DCHECK_GE(remaining, 0);
350         chunk_host.entry->set_chunk_id(chunk_id);
351         continue;
352       }
353       if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
354                         prefix_count))
355         return false;
356       DCHECK_GE(remaining, 0);
357     }
358   }
359   return remaining == 0;
360 }
361 
ReadHostAndPrefixCount(const char ** data,int * remaining,SBPrefix * host,int * count)362 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
363     const char** data, int* remaining, SBPrefix* host, int* count) {
364   if (static_cast<size_t>(*remaining) < sizeof(SBPrefix) + 1)
365     return false;
366   // Next 4 bytes are the host prefix.
367   memcpy(host, *data, sizeof(SBPrefix));
368   *data += sizeof(SBPrefix);
369   *remaining -= sizeof(SBPrefix);
370 
371   // Next 1 byte is the prefix count (could be zero, but never negative).
372   *count = static_cast<unsigned char>(**data);
373   *data += 1;
374   *remaining -= 1;
375   DCHECK_GE(*remaining, 0);
376   return true;
377 }
378 
ReadChunkId(const char ** data,int * remaining,int * chunk_id)379 bool SafeBrowsingProtocolParser::ReadChunkId(
380     const char** data, int* remaining, int* chunk_id) {
381   // Protocol says four bytes, not sizeof(int).  Make sure those
382   // values are the same.
383   DCHECK_EQ(sizeof(*chunk_id), 4u);
384   if (static_cast<size_t>(*remaining) < sizeof(*chunk_id))
385     return false;
386   memcpy(chunk_id, *data, sizeof(*chunk_id));
387   *data += sizeof(*chunk_id);
388   *remaining -= sizeof(*chunk_id);
389   *chunk_id = base::HostToNet32(*chunk_id);
390   DCHECK_GE(*remaining, 0);
391   return true;
392 }
393 
ReadPrefixes(const char ** data,int * remaining,SBEntry * entry,int count)394 bool SafeBrowsingProtocolParser::ReadPrefixes(
395     const char** data, int* remaining, SBEntry* entry, int count) {
396   int hash_len = entry->HashLen();
397   for (int i = 0; i < count; ++i) {
398     if (entry->IsSub()) {
399       int chunk_id;
400       if (!ReadChunkId(data, remaining, &chunk_id))
401         return false;
402       DCHECK_GE(*remaining, 0);
403       entry->SetChunkIdAtPrefix(i, chunk_id);
404     }
405 
406     if (*remaining < hash_len)
407       return false;
408     if (entry->IsPrefix()) {
409       SBPrefix prefix;
410       DCHECK_EQ(hash_len, (int)sizeof(prefix));
411       memcpy(&prefix, *data, sizeof(prefix));
412       entry->SetPrefixAt(i, prefix);
413     } else {
414       SBFullHash hash;
415       DCHECK_EQ(hash_len, (int)sizeof(hash));
416       memcpy(&hash, *data, sizeof(hash));
417       entry->SetFullHashAt(i, hash);
418     }
419     *data += hash_len;
420     *remaining -= hash_len;
421     DCHECK_GE(*remaining, 0);
422   }
423 
424   return true;
425 }
426