1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
6
7 #include <stdlib.h>
8
9 #include "chrome/browser/safe_browsing/protocol_parser.h"
10 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
11
12 #include "build/build_config.h"
13
14 #if defined(OS_WIN)
15 #include <Winsock2.h>
16 #elif defined(OS_POSIX)
17 #include <arpa/inet.h>
18 #endif
19
20 #include "base/format_macros.h"
21 #include "base/logging.h"
22 #include "base/string_split.h"
23 #include "base/string_util.h"
24
25 namespace {
26 // Helper function for quick scans of a line oriented protocol. Note that we use
27 // std::string::assign(const charT* s, size_type n)
28 // to copy data into 'line'. This form of 'assign' does not call strlen on
29 // 'input', which is binary data and is not NULL terminated. 'input' may also
30 // contain valid NULL bytes in the payload, which a strlen based copy would
31 // truncate.
GetLine(const char * input,int input_len,std::string * line)32 bool GetLine(const char* input, int input_len, std::string* line) {
33 const char* pos = input;
34 while (pos && (pos - input < input_len)) {
35 if (*pos == '\n') {
36 line->assign(input, pos - input);
37 return true;
38 }
39 ++pos;
40 }
41 return false;
42 }
43 }
44
45 //------------------------------------------------------------------------------
46 // SafeBrowsingParser implementation
47
SafeBrowsingProtocolParser()48 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
49 }
50
ParseGetHash(const char * chunk_data,int chunk_len,const std::string & key,bool * re_key,std::vector<SBFullHashResult> * full_hashes)51 bool SafeBrowsingProtocolParser::ParseGetHash(
52 const char* chunk_data,
53 int chunk_len,
54 const std::string& key,
55 bool* re_key,
56 std::vector<SBFullHashResult>* full_hashes) {
57 full_hashes->clear();
58 int length = chunk_len;
59 const char* data = chunk_data;
60
61 int offset;
62 std::string line;
63 if (!key.empty()) {
64 if (!GetLine(data, length, &line))
65 return false; // Error! Bad GetHash result.
66
67 if (line == "e:pleaserekey") {
68 *re_key = true;
69 return true;
70 }
71
72 offset = static_cast<int>(line.size()) + 1;
73 data += offset;
74 length -= offset;
75
76 if (!safe_browsing_util::VerifyMAC(key, line, data, length))
77 return false;
78 }
79
80 while (length > 0) {
81 if (!GetLine(data, length, &line))
82 return false;
83
84 offset = static_cast<int>(line.size()) + 1;
85 data += offset;
86 length -= offset;
87
88 std::vector<std::string> cmd_parts;
89 base::SplitString(line, ':', &cmd_parts);
90 if (cmd_parts.size() != 3)
91 return false;
92
93 SBFullHashResult full_hash;
94 full_hash.list_name = cmd_parts[0];
95 full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
96 int full_hash_len = atoi(cmd_parts[2].c_str());
97
98 // Ignore hash results from lists we don't recognize.
99 if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
100 data += full_hash_len;
101 length -= full_hash_len;
102 continue;
103 }
104
105 while (full_hash_len > 0) {
106 DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
107 memcpy(&full_hash.hash, data, sizeof(SBFullHash));
108 full_hashes->push_back(full_hash);
109 data += sizeof(SBFullHash);
110 length -= sizeof(SBFullHash);
111 full_hash_len -= sizeof(SBFullHash);
112 }
113 }
114
115 return length == 0;
116 }
117
FormatGetHash(const std::vector<SBPrefix> & prefixes,std::string * request)118 void SafeBrowsingProtocolParser::FormatGetHash(
119 const std::vector<SBPrefix>& prefixes, std::string* request) {
120 DCHECK(request);
121
122 // Format the request for GetHash.
123 request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n",
124 sizeof(SBPrefix),
125 sizeof(SBPrefix) * prefixes.size()));
126 for (size_t i = 0; i < prefixes.size(); ++i) {
127 request->append(reinterpret_cast<const char*>(&prefixes[i]),
128 sizeof(SBPrefix));
129 }
130 }
131
ParseUpdate(const char * chunk_data,int chunk_len,const std::string & key,int * next_update_sec,bool * re_key,bool * reset,std::vector<SBChunkDelete> * deletes,std::vector<ChunkUrl> * chunk_urls)132 bool SafeBrowsingProtocolParser::ParseUpdate(
133 const char* chunk_data,
134 int chunk_len,
135 const std::string& key,
136 int* next_update_sec,
137 bool* re_key,
138 bool* reset,
139 std::vector<SBChunkDelete>* deletes,
140 std::vector<ChunkUrl>* chunk_urls) {
141 DCHECK(next_update_sec);
142 DCHECK(deletes);
143 DCHECK(chunk_urls);
144
145 int length = chunk_len;
146 const char* data = chunk_data;
147
148 // Populated below.
149 std::string list_name;
150
151 while (length > 0) {
152 std::string cmd_line;
153 if (!GetLine(data, length, &cmd_line))
154 return false; // Error: bad list format!
155
156 std::vector<std::string> cmd_parts;
157 base::SplitString(cmd_line, ':', &cmd_parts);
158 if (cmd_parts.empty())
159 return false;
160 const std::string& command = cmd_parts[0];
161 if (cmd_parts.size() != 2 && command[0] != 'u')
162 return false;
163
164 const int consumed = static_cast<int>(cmd_line.size()) + 1;
165 data += consumed;
166 length -= consumed;
167 if (length < 0)
168 return false; // Parsing error.
169
170 // Differentiate on the first character of the command (which is usually
171 // only one character, with the exception of the 'ad' and 'sd' commands).
172 switch (command[0]) {
173 case 'a':
174 case 's': {
175 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
176 // have also parsed the list name before getting here, or the add-del
177 // or sub-del will have no context.
178 if (command.size() != 2 || command[1] != 'd' || list_name.empty())
179 return false;
180 SBChunkDelete chunk_delete;
181 chunk_delete.is_sub_del = command[0] == 's';
182 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
183 chunk_delete.list_name = list_name;
184 deletes->push_back(chunk_delete);
185 break;
186 }
187
188 case 'e':
189 if (cmd_parts[1] != "pleaserekey")
190 return false;
191 *re_key = true;
192 break;
193
194 case 'i':
195 // The line providing the name of the list (i.e. 'goog-phish-shavar').
196 list_name = cmd_parts[1];
197 break;
198
199 case 'm':
200 // Verify that the MAC of the remainer of this chunk is what we expect.
201 if (!key.empty() &&
202 !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length))
203 return false;
204 break;
205
206 case 'n':
207 // The line providing the next earliest time (in seconds) to re-query.
208 *next_update_sec = atoi(cmd_parts[1].c_str());
209 break;
210
211 case 'u': {
212 // The redirect command is of the form: u:<url>,<mac> where <url> can
213 // contain multiple colons, commas or any valid URL characters. We scan
214 // backwards in the string looking for the first ',' we encounter and
215 // assume that everything before that is the URL and everything after
216 // is the MAC (if the MAC was requested).
217 std::string mac;
218 std::string redirect_url(cmd_line, 2); // Skip the initial "u:".
219 if (!key.empty()) {
220 std::string::size_type mac_pos = redirect_url.rfind(',');
221 if (mac_pos == std::string::npos)
222 return false;
223 mac = redirect_url.substr(mac_pos + 1);
224 redirect_url = redirect_url.substr(0, mac_pos);
225 }
226
227 ChunkUrl chunk_url;
228 chunk_url.url = redirect_url;
229 chunk_url.list_name = list_name;
230 if (!key.empty())
231 chunk_url.mac = mac;
232 chunk_urls->push_back(chunk_url);
233 break;
234 }
235
236 case 'r':
237 if (cmd_parts[1] != "pleasereset")
238 return false;
239 *reset = true;
240 break;
241
242 default:
243 // According to the spec, we ignore commands we don't understand.
244 break;
245 }
246 }
247
248 return true;
249 }
250
ParseChunk(const std::string & list_name,const char * data,int length,const std::string & key,const std::string & mac,bool * re_key,SBChunkList * chunks)251 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
252 const char* data,
253 int length,
254 const std::string& key,
255 const std::string& mac,
256 bool* re_key,
257 SBChunkList* chunks) {
258 int remaining = length;
259 const char* chunk_data = data;
260
261 if (!key.empty() &&
262 !safe_browsing_util::VerifyMAC(key, mac, data, length)) {
263 return false;
264 }
265
266 while (remaining > 0) {
267 std::string cmd_line;
268 if (!GetLine(chunk_data, length, &cmd_line))
269 return false; // Error: bad chunk format!
270
271 const int line_len = static_cast<int>(cmd_line.length()) + 1;
272 chunk_data += line_len;
273 remaining -= line_len;
274 std::vector<std::string> cmd_parts;
275 base::SplitString(cmd_line, ':', &cmd_parts);
276
277 // Handle a possible re-key command.
278 if (cmd_parts.size() != 4) {
279 if (cmd_parts.size() == 2 &&
280 cmd_parts[0] == "e" &&
281 cmd_parts[1] == "pleaserekey") {
282 *re_key = true;
283 continue;
284 }
285 return false;
286 }
287
288 // Process the chunk data.
289 const int chunk_number = atoi(cmd_parts[1].c_str());
290 const int hash_len = atoi(cmd_parts[2].c_str());
291 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
292 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
293 return false;
294 }
295
296 const int chunk_len = atoi(cmd_parts[3].c_str());
297
298 if (remaining < chunk_len)
299 return false; // parse error.
300
301 chunks->push_back(SBChunk());
302 chunks->back().chunk_number = chunk_number;
303
304 if (cmd_parts[0] == "a") {
305 chunks->back().is_add = true;
306 if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
307 &chunks->back().hosts))
308 return false; // Parse error.
309 } else if (cmd_parts[0] == "s") {
310 chunks->back().is_add = false;
311 if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
312 &chunks->back().hosts))
313 return false; // Parse error.
314 } else {
315 NOTREACHED();
316 return false;
317 }
318
319 chunk_data += chunk_len;
320 remaining -= chunk_len;
321 DCHECK_LE(0, remaining);
322 }
323
324 DCHECK(remaining == 0);
325
326 return true;
327 }
328
ParseAddChunk(const std::string & list_name,const char * data,int data_len,int hash_len,std::deque<SBChunkHost> * hosts)329 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
330 const char* data,
331 int data_len,
332 int hash_len,
333 std::deque<SBChunkHost>* hosts) {
334 const char* chunk_data = data;
335 int remaining = data_len;
336 int prefix_count;
337 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
338 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
339
340 if (list_name == safe_browsing_util::kBinHashList) {
341 // kBinHashList only contains prefixes, no HOSTKEY and COUNT.
342 DCHECK_EQ(0, remaining % hash_len);
343 prefix_count = remaining / hash_len;
344 SBChunkHost chunk_host;
345 chunk_host.host = 0;
346 chunk_host.entry = SBEntry::Create(type, prefix_count);
347 hosts->push_back(chunk_host);
348 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
349 return false;
350 } else {
351 SBPrefix host;
352 const int min_size = sizeof(SBPrefix) + 1;
353 while (remaining >= min_size) {
354 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
355 SBChunkHost chunk_host;
356 chunk_host.host = host;
357 chunk_host.entry = SBEntry::Create(type, prefix_count);
358 hosts->push_back(chunk_host);
359 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
360 prefix_count))
361 return false;
362 }
363 }
364 return remaining == 0;
365 }
366
ParseSubChunk(const std::string & list_name,const char * data,int data_len,int hash_len,std::deque<SBChunkHost> * hosts)367 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
368 const char* data,
369 int data_len,
370 int hash_len,
371 std::deque<SBChunkHost>* hosts) {
372 int remaining = data_len;
373 const char* chunk_data = data;
374 int prefix_count;
375 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
376 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
377
378 if (list_name == safe_browsing_util::kBinHashList) {
379 SBChunkHost chunk_host;
380 // Set host to 0 and it won't be used for kBinHashList.
381 chunk_host.host = 0;
382 // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
383 // and COUNT. |add_chunk_number| is int32.
384 prefix_count = remaining / (sizeof(int32) + hash_len);
385 chunk_host.entry = SBEntry::Create(type, prefix_count);
386 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
387 return false;
388 hosts->push_back(chunk_host);
389 } else {
390 SBPrefix host;
391 const int min_size = 2 * sizeof(SBPrefix) + 1;
392 while (remaining >= min_size) {
393 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
394 SBChunkHost chunk_host;
395 chunk_host.host = host;
396 chunk_host.entry = SBEntry::Create(type, prefix_count);
397 hosts->push_back(chunk_host);
398 if (prefix_count == 0) {
399 // There is only an add chunk number (no prefixes).
400 chunk_host.entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining));
401 continue;
402 }
403 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
404 prefix_count))
405 return false;
406 }
407 }
408 return remaining == 0;
409 }
410
ReadHostAndPrefixCount(const char ** data,int * remaining,SBPrefix * host,int * count)411 void SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
412 const char** data, int* remaining, SBPrefix* host, int* count) {
413 // Next 4 bytes are the host prefix.
414 memcpy(host, *data, sizeof(SBPrefix));
415 *data += sizeof(SBPrefix);
416 *remaining -= sizeof(SBPrefix);
417
418 // Next 1 byte is the prefix count (could be zero, but never negative).
419 *count = static_cast<unsigned char>(**data);
420 *data += 1;
421 *remaining -= 1;
422 }
423
ReadChunkId(const char ** data,int * remaining)424 int SafeBrowsingProtocolParser::ReadChunkId(
425 const char** data, int* remaining) {
426 int chunk_number;
427 memcpy(&chunk_number, *data, sizeof(chunk_number));
428 *data += sizeof(chunk_number);
429 *remaining -= sizeof(chunk_number);
430 return htonl(chunk_number);
431 }
432
ReadPrefixes(const char ** data,int * remaining,SBEntry * entry,int count)433 bool SafeBrowsingProtocolParser::ReadPrefixes(
434 const char** data, int* remaining, SBEntry* entry, int count) {
435 int hash_len = entry->HashLen();
436 for (int i = 0; i < count; ++i) {
437 if (entry->IsSub()) {
438 entry->SetChunkIdAtPrefix(i, ReadChunkId(data, remaining));
439 if (*remaining <= 0)
440 return false;
441 }
442
443 if (entry->IsPrefix()) {
444 entry->SetPrefixAt(i, *reinterpret_cast<const SBPrefix*>(*data));
445 } else {
446 entry->SetFullHashAt(i, *reinterpret_cast<const SBFullHash*>(*data));
447 }
448 *data += hash_len;
449 *remaining -= hash_len;
450 if (*remaining < 0)
451 return false;
452 }
453
454 return true;
455 }
456
ParseNewKey(const char * chunk_data,int chunk_length,std::string * client_key,std::string * wrapped_key)457 bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data,
458 int chunk_length,
459 std::string* client_key,
460 std::string* wrapped_key) {
461 DCHECK(client_key && wrapped_key);
462 client_key->clear();
463 wrapped_key->clear();
464
465 const char* data = chunk_data;
466 int remaining = chunk_length;
467
468 while (remaining > 0) {
469 std::string line;
470 if (!GetLine(data, remaining, &line))
471 return false;
472
473 std::vector<std::string> cmd_parts;
474 base::SplitString(line, ':', &cmd_parts);
475 if (cmd_parts.size() != 3)
476 return false;
477
478 if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str()))
479 return false;
480
481 if (cmd_parts[0] == "clientkey") {
482 client_key->assign(cmd_parts[2]);
483 } else if (cmd_parts[0] == "wrappedkey") {
484 wrapped_key->assign(cmd_parts[2]);
485 } else {
486 return false;
487 }
488
489 data += line.size() + 1;
490 remaining -= static_cast<int>(line.size()) + 1;
491 }
492
493 if (client_key->empty() || wrapped_key->empty())
494 return false;
495
496 return true;
497 }
498