1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
6
7 // TODOv3(shess): Review these changes carefully.
8
9 #include <stdlib.h>
10
11 #include "base/format_macros.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/sys_byteorder.h"
17 #include "base/time/time.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/safe_browsing/protocol_parser.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
21
22 namespace {
23
24 // Helper class for scanning a buffer.
25 class BufferReader {
26 public:
BufferReader(const char * data,size_t length)27 BufferReader(const char* data, size_t length)
28 : data_(data),
29 length_(length) {
30 }
31
32 // Return info about remaining buffer data.
length() const33 size_t length() const {
34 return length_;
35 }
data() const36 const char* data() const {
37 return data_;
38 }
empty() const39 bool empty() const {
40 return length_ == 0;
41 }
42
43 // Remove |l| characters from the buffer.
Advance(size_t l)44 void Advance(size_t l) {
45 DCHECK_LE(l, length());
46 data_ += l;
47 length_ -= l;
48 }
49
50 // Get a reference to data in the buffer.
51 // TODO(shess): I'm not sure I like this. Fill out a StringPiece instead?
RefData(const void ** pptr,size_t l)52 bool RefData(const void** pptr, size_t l) {
53 if (length() < l) {
54 Advance(length()); // poison
55 return false;
56 }
57
58 *pptr = data();
59 Advance(l);
60 return true;
61 }
62
63 // Copy data out of the buffer.
GetData(void * ptr,size_t l)64 bool GetData(void* ptr, size_t l) {
65 const void* buf_ptr;
66 if (!RefData(&buf_ptr, l))
67 return false;
68
69 memcpy(ptr, buf_ptr, l);
70 return true;
71 }
72
73 // Read a 32-bit integer in network byte order into a local uint32.
GetNet32(uint32 * i)74 bool GetNet32(uint32* i) {
75 if (!GetData(i, sizeof(*i)))
76 return false;
77
78 *i = base::NetToHost32(*i);
79 return true;
80 }
81
82 // Returns false if there is no data, otherwise fills |*line| with a reference
83 // to the next line of data in the buffer.
GetLine(base::StringPiece * line)84 bool GetLine(base::StringPiece* line) {
85 if (!length_)
86 return false;
87
88 // Find the end of the line, or the end of the input.
89 size_t eol = 0;
90 while (eol < length_ && data_[eol] != '\n') {
91 ++eol;
92 }
93 line->set(data_, eol);
94 Advance(eol);
95
96 // Skip the newline if present.
97 if (length_ && data_[0] == '\n')
98 Advance(1);
99
100 return true;
101 }
102
103 // Read out |c| colon-separated pieces from the next line. The resulting
104 // pieces point into the original data buffer.
GetPieces(size_t c,std::vector<base::StringPiece> * pieces)105 bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
106 base::StringPiece line;
107 if (!GetLine(&line))
108 return false;
109
110 // Find the parts separated by ':'.
111 while (pieces->size() + 1 < c) {
112 size_t colon_ofs = line.find(':');
113 if (colon_ofs == base::StringPiece::npos) {
114 Advance(length_);
115 return false;
116 }
117
118 pieces->push_back(line.substr(0, colon_ofs));
119 line.remove_prefix(colon_ofs + 1);
120 }
121
122 // The last piece runs to the end of the line.
123 pieces->push_back(line);
124 return true;
125 }
126
127 private:
128 const char* data_;
129 size_t length_;
130
131 DISALLOW_COPY_AND_ASSIGN(BufferReader);
132 };
133
ParseGetHashMetadata(size_t hash_count,BufferReader * reader,std::vector<SBFullHashResult> * full_hashes)134 bool ParseGetHashMetadata(size_t hash_count,
135 BufferReader* reader,
136 std::vector<SBFullHashResult>* full_hashes) {
137 for (size_t i = 0; i < hash_count; ++i) {
138 base::StringPiece line;
139 if (!reader->GetLine(&line))
140 return false;
141
142 size_t meta_data_len;
143 if (!base::StringToSizeT(line, &meta_data_len))
144 return false;
145
146 const void* meta_data;
147 if (!reader->RefData(&meta_data, meta_data_len))
148 return false;
149
150 if (full_hashes) {
151 (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign(
152 reinterpret_cast<const char*>(meta_data), meta_data_len);
153 }
154 }
155 return true;
156 }
157
158 } // namespace
159
160 namespace safe_browsing {
161
162 // BODY = CACHELIFETIME LF HASHENTRY* EOF
163 // CACHELIFETIME = DIGIT+
164 // HASHENTRY = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
165 // HASHDATA (METADATALEN LF METADATA)*
166 // HASHSIZE = DIGIT+ # Length of each full hash
167 // NUMRESPONSES = DIGIT+ # Number of full hashes in HASHDATA
168 // HASHDATA = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
169 // METADATALEN = DIGIT+
170 // METADATA = <METADATALEN number of unsigned bytes>
ParseGetHash(const char * chunk_data,size_t chunk_len,base::TimeDelta * cache_lifetime,std::vector<SBFullHashResult> * full_hashes)171 bool ParseGetHash(const char* chunk_data,
172 size_t chunk_len,
173 base::TimeDelta* cache_lifetime,
174 std::vector<SBFullHashResult>* full_hashes) {
175 full_hashes->clear();
176 BufferReader reader(chunk_data, chunk_len);
177
178 // Parse out cache lifetime.
179 {
180 base::StringPiece line;
181 if (!reader.GetLine(&line))
182 return false;
183
184 int64_t cache_lifetime_seconds;
185 if (!base::StringToInt64(line, &cache_lifetime_seconds))
186 return false;
187
188 // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
189 // either. Maybe there should be a threshold involved.
190 if (cache_lifetime_seconds < 0)
191 return false;
192
193 *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
194 }
195
196 while (!reader.empty()) {
197 std::vector<base::StringPiece> cmd_parts;
198 if (!reader.GetPieces(3, &cmd_parts))
199 return false;
200
201 SBFullHashResult full_hash;
202 full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
203
204 size_t hash_len;
205 if (!base::StringToSizeT(cmd_parts[1], &hash_len))
206 return false;
207
208 // TODO(shess): Is this possible? If not, why the length present?
209 if (hash_len != sizeof(SBFullHash))
210 return false;
211
212 // Metadata is indicated by an optional ":m" at the end of the line.
213 bool has_metadata = false;
214 base::StringPiece hash_count_string = cmd_parts[2];
215 size_t optional_colon = hash_count_string.find(':', 0);
216 if (optional_colon != base::StringPiece::npos) {
217 if (hash_count_string.substr(optional_colon) != ":m")
218 return false;
219 has_metadata = true;
220 hash_count_string.remove_suffix(2);
221 }
222
223 size_t hash_count;
224 if (!base::StringToSizeT(hash_count_string, &hash_count))
225 return false;
226
227 if (hash_len * hash_count > reader.length())
228 return false;
229
230 // Ignore hash results from lists we don't recognize.
231 if (full_hash.list_id < 0) {
232 reader.Advance(hash_len * hash_count);
233 if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL))
234 return false;
235 continue;
236 }
237
238 for (size_t i = 0; i < hash_count; ++i) {
239 if (!reader.GetData(&full_hash.hash, hash_len))
240 return false;
241 full_hashes->push_back(full_hash);
242 }
243
244 if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes))
245 return false;
246 }
247
248 return reader.empty();
249 }
250
251 // BODY = HEADER LF PREFIXES EOF
252 // HEADER = PREFIXSIZE ":" LENGTH
253 // PREFIXSIZE = DIGIT+ # Size of each prefix in bytes
254 // LENGTH = DIGIT+ # Size of PREFIXES in bytes
FormatGetHash(const std::vector<SBPrefix> & prefixes)255 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
256 std::string request;
257 request.append(base::Uint64ToString(sizeof(SBPrefix)));
258 request.append(":");
259 request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
260 request.append("\n");
261
262 // SBPrefix values are read without concern for byte order, so write back the
263 // same way.
264 for (size_t i = 0; i < prefixes.size(); ++i) {
265 request.append(reinterpret_cast<const char*>(&prefixes[i]),
266 sizeof(SBPrefix));
267 }
268
269 return request;
270 }
271
ParseUpdate(const char * chunk_data,size_t chunk_len,size_t * next_update_sec,bool * reset,std::vector<SBChunkDelete> * deletes,std::vector<ChunkUrl> * chunk_urls)272 bool ParseUpdate(const char* chunk_data,
273 size_t chunk_len,
274 size_t* next_update_sec,
275 bool* reset,
276 std::vector<SBChunkDelete>* deletes,
277 std::vector<ChunkUrl>* chunk_urls) {
278 DCHECK(next_update_sec);
279 DCHECK(deletes);
280 DCHECK(chunk_urls);
281
282 BufferReader reader(chunk_data, chunk_len);
283
284 // Populated below.
285 std::string list_name;
286
287 while (!reader.empty()) {
288 std::vector<base::StringPiece> pieces;
289 if (!reader.GetPieces(2, &pieces))
290 return false;
291
292 base::StringPiece& command = pieces[0];
293
294 // Differentiate on the first character of the command (which is usually
295 // only one character, with the exception of the 'ad' and 'sd' commands).
296 switch (command[0]) {
297 case 'a':
298 case 's': {
299 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
300 // have also parsed the list name before getting here, or the add-del
301 // or sub-del will have no context.
302 if (list_name.empty() || (command != "ad" && command != "sd"))
303 return false;
304 SBChunkDelete chunk_delete;
305 chunk_delete.is_sub_del = command[0] == 's';
306 StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
307 chunk_delete.list_name = list_name;
308 deletes->push_back(chunk_delete);
309 break;
310 }
311
312 case 'i':
313 // The line providing the name of the list (i.e. 'goog-phish-shavar').
314 list_name = pieces[1].as_string();
315 break;
316
317 case 'n':
318 // The line providing the next earliest time (in seconds) to re-query.
319 if (!base::StringToSizeT(pieces[1], next_update_sec))
320 return false;
321 break;
322
323 case 'u': {
324 ChunkUrl chunk_url;
325 chunk_url.url = pieces[1].as_string(); // Skip the initial "u:".
326 chunk_url.list_name = list_name;
327 chunk_urls->push_back(chunk_url);
328 break;
329 }
330
331 case 'r':
332 if (pieces[1] != "pleasereset")
333 return false;
334 *reset = true;
335 break;
336
337 default:
338 // According to the spec, we ignore commands we don't understand.
339 // TODO(shess): Does this apply to r:unknown or n:not-integer?
340 break;
341 }
342 }
343
344 return true;
345 }
346
347 // BODY = (UINT32 CHUNKDATA)+
348 // UINT32 = Unsigned 32-bit integer in network byte order
349 // CHUNKDATA = Encoded ChunkData protocol message
ParseChunk(const char * data,size_t length,ScopedVector<SBChunkData> * chunks)350 bool ParseChunk(const char* data,
351 size_t length,
352 ScopedVector<SBChunkData>* chunks) {
353 BufferReader reader(data, length);
354
355 while (!reader.empty()) {
356 uint32 l = 0;
357 if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
358 return false;
359
360 const void* p = NULL;
361 if (!reader.RefData(&p, l))
362 return false;
363
364 scoped_ptr<SBChunkData> chunk(new SBChunkData());
365 if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
366 return false;
367
368 chunks->push_back(chunk.release());
369 }
370
371 DCHECK(reader.empty());
372 return true;
373 }
374
375 // LIST = LISTNAME ";" LISTINFO (":" LISTINFO)*
376 // LISTINFO = CHUNKTYPE ":" CHUNKLIST
377 // CHUNKTYPE = "a" | "s"
378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
379 // NUMBER = DIGIT+
380 // RANGE = NUMBER "-" NUMBER
FormatList(const SBListChunkRanges & list)381 std::string FormatList(const SBListChunkRanges& list) {
382 std::string formatted_results = list.name;
383 formatted_results.append(";");
384
385 if (!list.adds.empty())
386 formatted_results.append("a:").append(list.adds);
387 if (!list.adds.empty() && !list.subs.empty())
388 formatted_results.append(":");
389 if (!list.subs.empty())
390 formatted_results.append("s:").append(list.subs);
391 formatted_results.append("\n");
392
393 return formatted_results;
394 }
395
396 } // namespace safe_browsing
397