• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
6 
7 // TODOv3(shess): Review these changes carefully.
8 
9 #include <stdlib.h>
10 
11 #include "base/format_macros.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/sys_byteorder.h"
17 #include "base/time/time.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/safe_browsing/protocol_parser.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
21 
22 namespace {
23 
24 // Helper class for scanning a buffer.
25 class BufferReader {
26  public:
BufferReader(const char * data,size_t length)27   BufferReader(const char* data, size_t length)
28       : data_(data),
29         length_(length) {
30   }
31 
32   // Return info about remaining buffer data.
length() const33   size_t length() const {
34     return length_;
35   }
data() const36   const char* data() const {
37     return data_;
38   }
empty() const39   bool empty() const {
40     return length_ == 0;
41   }
42 
43   // Remove |l| characters from the buffer.
Advance(size_t l)44   void Advance(size_t l) {
45     DCHECK_LE(l, length());
46     data_ += l;
47     length_ -= l;
48   }
49 
50   // Get a reference to data in the buffer.
51   // TODO(shess): I'm not sure I like this.  Fill out a StringPiece instead?
RefData(const void ** pptr,size_t l)52   bool RefData(const void** pptr, size_t l) {
53     if (length() < l) {
54       Advance(length());  // poison
55       return false;
56     }
57 
58     *pptr = data();
59     Advance(l);
60     return true;
61   }
62 
63   // Copy data out of the buffer.
GetData(void * ptr,size_t l)64   bool GetData(void* ptr, size_t l) {
65     const void* buf_ptr;
66     if (!RefData(&buf_ptr, l))
67       return false;
68 
69     memcpy(ptr, buf_ptr, l);
70     return true;
71   }
72 
73   // Read a 32-bit integer in network byte order into a local uint32.
GetNet32(uint32 * i)74   bool GetNet32(uint32* i) {
75     if (!GetData(i, sizeof(*i)))
76       return false;
77 
78     *i = base::NetToHost32(*i);
79     return true;
80   }
81 
82   // Returns false if there is no data, otherwise fills |*line| with a reference
83   // to the next line of data in the buffer.
GetLine(base::StringPiece * line)84   bool GetLine(base::StringPiece* line) {
85     if (!length_)
86       return false;
87 
88     // Find the end of the line, or the end of the input.
89     size_t eol = 0;
90     while (eol < length_ && data_[eol] != '\n') {
91       ++eol;
92     }
93     line->set(data_, eol);
94     Advance(eol);
95 
96     // Skip the newline if present.
97     if (length_ && data_[0] == '\n')
98       Advance(1);
99 
100     return true;
101   }
102 
103   // Read out |c| colon-separated pieces from the next line.  The resulting
104   // pieces point into the original data buffer.
GetPieces(size_t c,std::vector<base::StringPiece> * pieces)105   bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
106     base::StringPiece line;
107     if (!GetLine(&line))
108       return false;
109 
110     // Find the parts separated by ':'.
111     while (pieces->size() + 1 < c) {
112       size_t colon_ofs = line.find(':');
113       if (colon_ofs == base::StringPiece::npos) {
114         Advance(length_);
115         return false;
116       }
117 
118       pieces->push_back(line.substr(0, colon_ofs));
119       line.remove_prefix(colon_ofs + 1);
120     }
121 
122     // The last piece runs to the end of the line.
123     pieces->push_back(line);
124     return true;
125   }
126 
127  private:
128   const char* data_;
129   size_t length_;
130 
131   DISALLOW_COPY_AND_ASSIGN(BufferReader);
132 };
133 
134 }  // namespace
135 
136 namespace safe_browsing {
137 
138 // BODY          = CACHELIFETIME LF HASHENTRY* EOF
139 // CACHELIFETIME = DIGIT+
140 // HASHENTRY     = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
141 //                 HASHDATA (METADATALEN LF METADATA)*
142 // HASHSIZE      = DIGIT+                  # Length of each full hash
143 // NUMRESPONSES  = DIGIT+                  # Number of full hashes in HASHDATA
144 // HASHDATA      = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
145 // METADATALEN   = DIGIT+
146 // METADATA      = <METADATALEN number of unsigned bytes>
ParseGetHash(const char * chunk_data,size_t chunk_len,base::TimeDelta * cache_lifetime,std::vector<SBFullHashResult> * full_hashes)147 bool ParseGetHash(const char* chunk_data,
148                   size_t chunk_len,
149                   base::TimeDelta* cache_lifetime,
150                   std::vector<SBFullHashResult>* full_hashes) {
151   full_hashes->clear();
152   BufferReader reader(chunk_data, chunk_len);
153 
154   // Parse out cache lifetime.
155   {
156     base::StringPiece line;
157     if (!reader.GetLine(&line))
158       return false;
159 
160     int64_t cache_lifetime_seconds;
161     if (!base::StringToInt64(line, &cache_lifetime_seconds))
162       return false;
163 
164     // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
165     // either.  Maybe there should be a threshold involved.
166     if (cache_lifetime_seconds < 0)
167       return false;
168 
169     *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
170   }
171 
172   while (!reader.empty()) {
173     std::vector<base::StringPiece> cmd_parts;
174     if (!reader.GetPieces(3, &cmd_parts))
175       return false;
176 
177     SBFullHashResult full_hash;
178     full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
179 
180     size_t hash_len;
181     if (!base::StringToSizeT(cmd_parts[1], &hash_len))
182       return false;
183 
184     // TODO(shess): Is this possible?  If not, why the length present?
185     if (hash_len != sizeof(SBFullHash))
186       return false;
187 
188     // Metadata is indicated by an optional ":m" at the end of the line.
189     bool has_metadata = false;
190     base::StringPiece hash_count_string = cmd_parts[2];
191     size_t optional_colon = hash_count_string.find(':', 0);
192     if (optional_colon != base::StringPiece::npos) {
193       if (hash_count_string.substr(optional_colon) != ":m")
194         return false;
195       has_metadata = true;
196       hash_count_string.remove_suffix(2);
197     }
198 
199     size_t hash_count;
200     if (!base::StringToSizeT(hash_count_string, &hash_count))
201       return false;
202 
203     if (hash_len * hash_count > reader.length())
204       return false;
205 
206     // Ignore hash results from lists we don't recognize.
207     if (full_hash.list_id < 0) {
208       reader.Advance(hash_len * hash_count);
209       continue;
210     }
211 
212     for (size_t i = 0; i < hash_count; ++i) {
213       if (!reader.GetData(&full_hash.hash, hash_len))
214         return false;
215       full_hashes->push_back(full_hash);
216     }
217 
218     // Discard the metadata for now.
219     if (has_metadata) {
220       for (size_t i = 0; i < hash_count; ++i) {
221         base::StringPiece line;
222         if (!reader.GetLine(&line))
223           return false;
224 
225         size_t meta_data_len;
226         if (!base::StringToSizeT(line, &meta_data_len))
227           return false;
228 
229         const void* meta_data;
230         if (!reader.RefData(&meta_data, meta_data_len))
231           return false;
232       }
233     }
234   }
235 
236   return reader.empty();
237 }
238 
239 // BODY       = HEADER LF PREFIXES EOF
240 // HEADER     = PREFIXSIZE ":" LENGTH
241 // PREFIXSIZE = DIGIT+         # Size of each prefix in bytes
242 // LENGTH     = DIGIT+         # Size of PREFIXES in bytes
FormatGetHash(const std::vector<SBPrefix> & prefixes)243 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
244   std::string request;
245   request.append(base::Uint64ToString(sizeof(SBPrefix)));
246   request.append(":");
247   request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
248   request.append("\n");
249 
250   // SBPrefix values are read without concern for byte order, so write back the
251   // same way.
252   for (size_t i = 0; i < prefixes.size(); ++i) {
253     request.append(reinterpret_cast<const char*>(&prefixes[i]),
254                    sizeof(SBPrefix));
255   }
256 
257   return request;
258 }
259 
ParseUpdate(const char * chunk_data,size_t chunk_len,size_t * next_update_sec,bool * reset,std::vector<SBChunkDelete> * deletes,std::vector<ChunkUrl> * chunk_urls)260 bool ParseUpdate(const char* chunk_data,
261                  size_t chunk_len,
262                  size_t* next_update_sec,
263                  bool* reset,
264                  std::vector<SBChunkDelete>* deletes,
265                  std::vector<ChunkUrl>* chunk_urls) {
266   DCHECK(next_update_sec);
267   DCHECK(deletes);
268   DCHECK(chunk_urls);
269 
270   BufferReader reader(chunk_data, chunk_len);
271 
272   // Populated below.
273   std::string list_name;
274 
275   while (!reader.empty()) {
276     std::vector<base::StringPiece> pieces;
277     if (!reader.GetPieces(2, &pieces))
278       return false;
279 
280     base::StringPiece& command = pieces[0];
281 
282     // Differentiate on the first character of the command (which is usually
283     // only one character, with the exception of the 'ad' and 'sd' commands).
284     switch (command[0]) {
285       case 'a':
286       case 's': {
287         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
288         // have also parsed the list name before getting here, or the add-del
289         // or sub-del will have no context.
290         if (list_name.empty() || (command != "ad" && command != "sd"))
291           return false;
292         SBChunkDelete chunk_delete;
293         chunk_delete.is_sub_del = command[0] == 's';
294         StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
295         chunk_delete.list_name = list_name;
296         deletes->push_back(chunk_delete);
297         break;
298       }
299 
300       case 'i':
301         // The line providing the name of the list (i.e. 'goog-phish-shavar').
302         list_name = pieces[1].as_string();
303         break;
304 
305       case 'n':
306         // The line providing the next earliest time (in seconds) to re-query.
307         if (!base::StringToSizeT(pieces[1], next_update_sec))
308           return false;
309         break;
310 
311       case 'u': {
312         ChunkUrl chunk_url;
313         chunk_url.url = pieces[1].as_string();  // Skip the initial "u:".
314         chunk_url.list_name = list_name;
315         chunk_urls->push_back(chunk_url);
316         break;
317       }
318 
319       case 'r':
320         if (pieces[1] != "pleasereset")
321           return false;
322         *reset = true;
323         break;
324 
325       default:
326         // According to the spec, we ignore commands we don't understand.
327         // TODO(shess): Does this apply to r:unknown or n:not-integer?
328         break;
329     }
330   }
331 
332   return true;
333 }
334 
335 // BODY      = (UINT32 CHUNKDATA)+
336 // UINT32    = Unsigned 32-bit integer in network byte order
337 // CHUNKDATA = Encoded ChunkData protocol message
ParseChunk(const char * data,size_t length,ScopedVector<SBChunkData> * chunks)338 bool ParseChunk(const char* data,
339                 size_t length,
340                 ScopedVector<SBChunkData>* chunks) {
341   BufferReader reader(data, length);
342 
343   while (!reader.empty()) {
344     uint32 l = 0;
345     if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
346       return false;
347 
348     const void* p = NULL;
349     if (!reader.RefData(&p, l))
350       return false;
351 
352     scoped_ptr<SBChunkData> chunk(new SBChunkData());
353     if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
354       return false;
355 
356     chunks->push_back(chunk.release());
357   }
358 
359   DCHECK(reader.empty());
360   return true;
361 }
362 
363 // LIST      = LISTNAME ";" LISTINFO (":" LISTINFO)*
364 // LISTINFO  = CHUNKTYPE ":" CHUNKLIST
365 // CHUNKTYPE = "a" | "s"
366 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
367 // NUMBER    = DIGIT+
368 // RANGE     = NUMBER "-" NUMBER
FormatList(const SBListChunkRanges & list)369 std::string FormatList(const SBListChunkRanges& list) {
370   std::string formatted_results = list.name;
371   formatted_results.append(";");
372 
373   if (!list.adds.empty())
374     formatted_results.append("a:").append(list.adds);
375   if (!list.adds.empty() && !list.subs.empty())
376     formatted_results.append(":");
377   if (!list.subs.empty())
378     formatted_results.append("s:").append(list.subs);
379   formatted_results.append("\n");
380 
381   return formatted_results;
382 }
383 
384 }  // namespace safe_browsing
385