• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
6 
7 // TODOv3(shess): Review these changes carefully.
8 
9 #include <stdlib.h>
10 
11 #include "base/format_macros.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/sys_byteorder.h"
17 #include "base/time/time.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/safe_browsing/protocol_parser.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
21 
22 namespace {
23 
24 // Helper class for scanning a buffer.
25 class BufferReader {
26  public:
BufferReader(const char * data,size_t length)27   BufferReader(const char* data, size_t length)
28       : data_(data),
29         length_(length) {
30   }
31 
32   // Return info about remaining buffer data.
length() const33   size_t length() const {
34     return length_;
35   }
data() const36   const char* data() const {
37     return data_;
38   }
empty() const39   bool empty() const {
40     return length_ == 0;
41   }
42 
43   // Remove |l| characters from the buffer.
Advance(size_t l)44   void Advance(size_t l) {
45     DCHECK_LE(l, length());
46     data_ += l;
47     length_ -= l;
48   }
49 
50   // Get a reference to data in the buffer.
51   // TODO(shess): I'm not sure I like this.  Fill out a StringPiece instead?
RefData(const void ** pptr,size_t l)52   bool RefData(const void** pptr, size_t l) {
53     if (length() < l) {
54       Advance(length());  // poison
55       return false;
56     }
57 
58     *pptr = data();
59     Advance(l);
60     return true;
61   }
62 
63   // Copy data out of the buffer.
GetData(void * ptr,size_t l)64   bool GetData(void* ptr, size_t l) {
65     const void* buf_ptr;
66     if (!RefData(&buf_ptr, l))
67       return false;
68 
69     memcpy(ptr, buf_ptr, l);
70     return true;
71   }
72 
73   // Read a 32-bit integer in network byte order into a local uint32.
GetNet32(uint32 * i)74   bool GetNet32(uint32* i) {
75     if (!GetData(i, sizeof(*i)))
76       return false;
77 
78     *i = base::NetToHost32(*i);
79     return true;
80   }
81 
82   // Returns false if there is no data, otherwise fills |*line| with a reference
83   // to the next line of data in the buffer.
GetLine(base::StringPiece * line)84   bool GetLine(base::StringPiece* line) {
85     if (!length_)
86       return false;
87 
88     // Find the end of the line, or the end of the input.
89     size_t eol = 0;
90     while (eol < length_ && data_[eol] != '\n') {
91       ++eol;
92     }
93     line->set(data_, eol);
94     Advance(eol);
95 
96     // Skip the newline if present.
97     if (length_ && data_[0] == '\n')
98       Advance(1);
99 
100     return true;
101   }
102 
103   // Read out |c| colon-separated pieces from the next line.  The resulting
104   // pieces point into the original data buffer.
GetPieces(size_t c,std::vector<base::StringPiece> * pieces)105   bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
106     base::StringPiece line;
107     if (!GetLine(&line))
108       return false;
109 
110     // Find the parts separated by ':'.
111     while (pieces->size() + 1 < c) {
112       size_t colon_ofs = line.find(':');
113       if (colon_ofs == base::StringPiece::npos) {
114         Advance(length_);
115         return false;
116       }
117 
118       pieces->push_back(line.substr(0, colon_ofs));
119       line.remove_prefix(colon_ofs + 1);
120     }
121 
122     // The last piece runs to the end of the line.
123     pieces->push_back(line);
124     return true;
125   }
126 
127  private:
128   const char* data_;
129   size_t length_;
130 
131   DISALLOW_COPY_AND_ASSIGN(BufferReader);
132 };
133 
ParseGetHashMetadata(size_t hash_count,BufferReader * reader,std::vector<SBFullHashResult> * full_hashes)134 bool ParseGetHashMetadata(size_t hash_count,
135                           BufferReader* reader,
136                           std::vector<SBFullHashResult>* full_hashes) {
137   for (size_t i = 0; i < hash_count; ++i) {
138     base::StringPiece line;
139     if (!reader->GetLine(&line))
140       return false;
141 
142     size_t meta_data_len;
143     if (!base::StringToSizeT(line, &meta_data_len))
144       return false;
145 
146     const void* meta_data;
147     if (!reader->RefData(&meta_data, meta_data_len))
148       return false;
149 
150     if (full_hashes) {
151       (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign(
152           reinterpret_cast<const char*>(meta_data), meta_data_len);
153     }
154   }
155   return true;
156 }
157 
158 }  // namespace
159 
160 namespace safe_browsing {
161 
162 // BODY          = CACHELIFETIME LF HASHENTRY* EOF
163 // CACHELIFETIME = DIGIT+
164 // HASHENTRY     = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
165 //                 HASHDATA (METADATALEN LF METADATA)*
166 // HASHSIZE      = DIGIT+                  # Length of each full hash
167 // NUMRESPONSES  = DIGIT+                  # Number of full hashes in HASHDATA
168 // HASHDATA      = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
169 // METADATALEN   = DIGIT+
170 // METADATA      = <METADATALEN number of unsigned bytes>
ParseGetHash(const char * chunk_data,size_t chunk_len,base::TimeDelta * cache_lifetime,std::vector<SBFullHashResult> * full_hashes)171 bool ParseGetHash(const char* chunk_data,
172                   size_t chunk_len,
173                   base::TimeDelta* cache_lifetime,
174                   std::vector<SBFullHashResult>* full_hashes) {
175   full_hashes->clear();
176   BufferReader reader(chunk_data, chunk_len);
177 
178   // Parse out cache lifetime.
179   {
180     base::StringPiece line;
181     if (!reader.GetLine(&line))
182       return false;
183 
184     int64_t cache_lifetime_seconds;
185     if (!base::StringToInt64(line, &cache_lifetime_seconds))
186       return false;
187 
188     // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
189     // either.  Maybe there should be a threshold involved.
190     if (cache_lifetime_seconds < 0)
191       return false;
192 
193     *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
194   }
195 
196   while (!reader.empty()) {
197     std::vector<base::StringPiece> cmd_parts;
198     if (!reader.GetPieces(3, &cmd_parts))
199       return false;
200 
201     SBFullHashResult full_hash;
202     full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
203 
204     size_t hash_len;
205     if (!base::StringToSizeT(cmd_parts[1], &hash_len))
206       return false;
207 
208     // TODO(shess): Is this possible?  If not, why the length present?
209     if (hash_len != sizeof(SBFullHash))
210       return false;
211 
212     // Metadata is indicated by an optional ":m" at the end of the line.
213     bool has_metadata = false;
214     base::StringPiece hash_count_string = cmd_parts[2];
215     size_t optional_colon = hash_count_string.find(':', 0);
216     if (optional_colon != base::StringPiece::npos) {
217       if (hash_count_string.substr(optional_colon) != ":m")
218         return false;
219       has_metadata = true;
220       hash_count_string.remove_suffix(2);
221     }
222 
223     size_t hash_count;
224     if (!base::StringToSizeT(hash_count_string, &hash_count))
225       return false;
226 
227     if (hash_len * hash_count > reader.length())
228       return false;
229 
230     // Ignore hash results from lists we don't recognize.
231     if (full_hash.list_id < 0) {
232       reader.Advance(hash_len * hash_count);
233       if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL))
234         return false;
235       continue;
236     }
237 
238     for (size_t i = 0; i < hash_count; ++i) {
239       if (!reader.GetData(&full_hash.hash, hash_len))
240         return false;
241       full_hashes->push_back(full_hash);
242     }
243 
244     if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes))
245       return false;
246   }
247 
248   return reader.empty();
249 }
250 
251 // BODY       = HEADER LF PREFIXES EOF
252 // HEADER     = PREFIXSIZE ":" LENGTH
253 // PREFIXSIZE = DIGIT+         # Size of each prefix in bytes
254 // LENGTH     = DIGIT+         # Size of PREFIXES in bytes
FormatGetHash(const std::vector<SBPrefix> & prefixes)255 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
256   std::string request;
257   request.append(base::Uint64ToString(sizeof(SBPrefix)));
258   request.append(":");
259   request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
260   request.append("\n");
261 
262   // SBPrefix values are read without concern for byte order, so write back the
263   // same way.
264   for (size_t i = 0; i < prefixes.size(); ++i) {
265     request.append(reinterpret_cast<const char*>(&prefixes[i]),
266                    sizeof(SBPrefix));
267   }
268 
269   return request;
270 }
271 
ParseUpdate(const char * chunk_data,size_t chunk_len,size_t * next_update_sec,bool * reset,std::vector<SBChunkDelete> * deletes,std::vector<ChunkUrl> * chunk_urls)272 bool ParseUpdate(const char* chunk_data,
273                  size_t chunk_len,
274                  size_t* next_update_sec,
275                  bool* reset,
276                  std::vector<SBChunkDelete>* deletes,
277                  std::vector<ChunkUrl>* chunk_urls) {
278   DCHECK(next_update_sec);
279   DCHECK(deletes);
280   DCHECK(chunk_urls);
281 
282   BufferReader reader(chunk_data, chunk_len);
283 
284   // Populated below.
285   std::string list_name;
286 
287   while (!reader.empty()) {
288     std::vector<base::StringPiece> pieces;
289     if (!reader.GetPieces(2, &pieces))
290       return false;
291 
292     base::StringPiece& command = pieces[0];
293 
294     // Differentiate on the first character of the command (which is usually
295     // only one character, with the exception of the 'ad' and 'sd' commands).
296     switch (command[0]) {
297       case 'a':
298       case 's': {
299         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
300         // have also parsed the list name before getting here, or the add-del
301         // or sub-del will have no context.
302         if (list_name.empty() || (command != "ad" && command != "sd"))
303           return false;
304         SBChunkDelete chunk_delete;
305         chunk_delete.is_sub_del = command[0] == 's';
306         StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
307         chunk_delete.list_name = list_name;
308         deletes->push_back(chunk_delete);
309         break;
310       }
311 
312       case 'i':
313         // The line providing the name of the list (i.e. 'goog-phish-shavar').
314         list_name = pieces[1].as_string();
315         break;
316 
317       case 'n':
318         // The line providing the next earliest time (in seconds) to re-query.
319         if (!base::StringToSizeT(pieces[1], next_update_sec))
320           return false;
321         break;
322 
323       case 'u': {
324         ChunkUrl chunk_url;
325         chunk_url.url = pieces[1].as_string();  // Skip the initial "u:".
326         chunk_url.list_name = list_name;
327         chunk_urls->push_back(chunk_url);
328         break;
329       }
330 
331       case 'r':
332         if (pieces[1] != "pleasereset")
333           return false;
334         *reset = true;
335         break;
336 
337       default:
338         // According to the spec, we ignore commands we don't understand.
339         // TODO(shess): Does this apply to r:unknown or n:not-integer?
340         break;
341     }
342   }
343 
344   return true;
345 }
346 
347 // BODY      = (UINT32 CHUNKDATA)+
348 // UINT32    = Unsigned 32-bit integer in network byte order
349 // CHUNKDATA = Encoded ChunkData protocol message
ParseChunk(const char * data,size_t length,ScopedVector<SBChunkData> * chunks)350 bool ParseChunk(const char* data,
351                 size_t length,
352                 ScopedVector<SBChunkData>* chunks) {
353   BufferReader reader(data, length);
354 
355   while (!reader.empty()) {
356     uint32 l = 0;
357     if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
358       return false;
359 
360     const void* p = NULL;
361     if (!reader.RefData(&p, l))
362       return false;
363 
364     scoped_ptr<SBChunkData> chunk(new SBChunkData());
365     if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
366       return false;
367 
368     chunks->push_back(chunk.release());
369   }
370 
371   DCHECK(reader.empty());
372   return true;
373 }
374 
375 // LIST      = LISTNAME ";" LISTINFO (":" LISTINFO)*
376 // LISTINFO  = CHUNKTYPE ":" CHUNKLIST
377 // CHUNKTYPE = "a" | "s"
378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
379 // NUMBER    = DIGIT+
380 // RANGE     = NUMBER "-" NUMBER
FormatList(const SBListChunkRanges & list)381 std::string FormatList(const SBListChunkRanges& list) {
382   std::string formatted_results = list.name;
383   formatted_results.append(";");
384 
385   if (!list.adds.empty())
386     formatted_results.append("a:").append(list.adds);
387   if (!list.adds.empty() && !list.subs.empty())
388     formatted_results.append(":");
389   if (!list.subs.empty())
390     formatted_results.append("s:").append(list.subs);
391   formatted_results.append("\n");
392 
393   return formatted_results;
394 }
395 
396 }  // namespace safe_browsing
397