• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Portions of this code based on Mozilla:
6 //   (netwerk/cookie/src/nsCookieService.cpp)
7 /* ***** BEGIN LICENSE BLOCK *****
8  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
9  *
10  * The contents of this file are subject to the Mozilla Public License Version
11  * 1.1 (the "License"); you may not use this file except in compliance with
12  * the License. You may obtain a copy of the License at
13  * http://www.mozilla.org/MPL/
14  *
15  * Software distributed under the License is distributed on an "AS IS" basis,
16  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
17  * for the specific language governing rights and limitations under the
18  * License.
19  *
20  * The Original Code is mozilla.org code.
21  *
22  * The Initial Developer of the Original Code is
23  * Netscape Communications Corporation.
24  * Portions created by the Initial Developer are Copyright (C) 2003
25  * the Initial Developer. All Rights Reserved.
26  *
27  * Contributor(s):
28  *   Daniel Witte (dwitte@stanford.edu)
29  *   Michiel van Leeuwen (mvl@exedo.nl)
30  *
31  * Alternatively, the contents of this file may be used under the terms of
32  * either the GNU General Public License Version 2 or later (the "GPL"), or
33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34  * in which case the provisions of the GPL or the LGPL are applicable instead
35  * of those above. If you wish to allow use of your version of this file only
36  * under the terms of either the GPL or the LGPL, and not to allow others to
37  * use your version of this file under the terms of the MPL, indicate your
38  * decision by deleting the provisions above and replace them with the notice
39  * and other provisions required by the GPL or the LGPL. If you do not delete
40  * the provisions above, a recipient may use your version of this file under
41  * the terms of any one of the MPL, the GPL or the LGPL.
42  *
43  * ***** END LICENSE BLOCK ***** */
44 
45 #include "net/base/cookie_monster.h"
46 
47 #include <algorithm>
48 
49 #include "base/basictypes.h"
50 #include "base/format_macros.h"
51 #include "base/logging.h"
52 #include "base/memory/scoped_ptr.h"
53 #include "base/message_loop.h"
54 #include "base/metrics/histogram.h"
55 #include "base/string_tokenizer.h"
56 #include "base/string_util.h"
57 #include "base/stringprintf.h"
58 #include "googleurl/src/gurl.h"
59 #include "googleurl/src/url_canon.h"
60 #include "net/base/net_util.h"
61 #include "net/base/registry_controlled_domain.h"
62 
63 using base::Time;
64 using base::TimeDelta;
65 using base::TimeTicks;
66 
67 static const int kMinutesInTenYears = 10 * 365 * 24 * 60;
68 
69 namespace net {
70 
71 // See comments at declaration of these variables in cookie_monster.h
72 // for details.
73 const size_t CookieMonster::kDomainMaxCookies           = 180;
74 const size_t CookieMonster::kDomainPurgeCookies         = 30;
75 const size_t CookieMonster::kMaxCookies                 = 3300;
76 const size_t CookieMonster::kPurgeCookies               = 300;
77 const int CookieMonster::kSafeFromGlobalPurgeDays       = 30;
78 
79 namespace {
80 
81 // Default minimum delay after updating a cookie's LastAccessDate before we
82 // will update it again.
83 const int kDefaultAccessUpdateThresholdSeconds = 60;
84 
85 // Comparator to sort cookies from highest creation date to lowest
86 // creation date.
87 struct OrderByCreationTimeDesc {
operator ()net::__anonfc4401bf0111::OrderByCreationTimeDesc88   bool operator()(const CookieMonster::CookieMap::iterator& a,
89                   const CookieMonster::CookieMap::iterator& b) const {
90     return a->second->CreationDate() > b->second->CreationDate();
91   }
92 };
93 
94 // Constants for use in VLOG
95 const int kVlogPerCookieMonster = 1;
96 const int kVlogPeriodic = 3;
97 const int kVlogGarbageCollection = 5;
98 const int kVlogSetCookies = 7;
99 const int kVlogGetCookies = 9;
100 
101 // Mozilla sorts on the path length (longest first), and then it
102 // sorts by creation time (oldest first).
103 // The RFC says the sort order for the domain attribute is undefined.
CookieSorter(CookieMonster::CanonicalCookie * cc1,CookieMonster::CanonicalCookie * cc2)104 bool CookieSorter(CookieMonster::CanonicalCookie* cc1,
105                   CookieMonster::CanonicalCookie* cc2) {
106   if (cc1->Path().length() == cc2->Path().length())
107     return cc1->CreationDate() < cc2->CreationDate();
108   return cc1->Path().length() > cc2->Path().length();
109 }
110 
LRUCookieSorter(const CookieMonster::CookieMap::iterator & it1,const CookieMonster::CookieMap::iterator & it2)111 bool LRUCookieSorter(const CookieMonster::CookieMap::iterator& it1,
112                      const CookieMonster::CookieMap::iterator& it2) {
113   // Cookies accessed less recently should be deleted first.
114   if (it1->second->LastAccessDate() != it2->second->LastAccessDate())
115     return it1->second->LastAccessDate() < it2->second->LastAccessDate();
116 
117   // In rare cases we might have two cookies with identical last access times.
118   // To preserve the stability of the sort, in these cases prefer to delete
119   // older cookies over newer ones.  CreationDate() is guaranteed to be unique.
120   return it1->second->CreationDate() < it2->second->CreationDate();
121 }
122 
123 // Our strategy to find duplicates is:
124 // (1) Build a map from (cookiename, cookiepath) to
125 //     {list of cookies with this signature, sorted by creation time}.
126 // (2) For each list with more than 1 entry, keep the cookie having the
127 //     most recent creation time, and delete the others.
128 //
129 // Two cookies are considered equivalent if they have the same domain,
130 // name, and path.
131 struct CookieSignature {
132  public:
CookieSignaturenet::__anonfc4401bf0111::CookieSignature133   CookieSignature(const std::string& name, const std::string& domain,
134                   const std::string& path)
135       : name(name),
136         domain(domain),
137         path(path) {}
138 
139   // To be a key for a map this class needs to be assignable, copyable,
140   // and have an operator<.  The default assignment operator
141   // and copy constructor are exactly what we want.
142 
operator <net::__anonfc4401bf0111::CookieSignature143   bool operator<(const CookieSignature& cs) const {
144     // Name compare dominates, then domain, then path.
145     int diff = name.compare(cs.name);
146     if (diff != 0)
147       return diff < 0;
148 
149     diff = domain.compare(cs.domain);
150     if (diff != 0)
151       return diff < 0;
152 
153     return path.compare(cs.path) < 0;
154   }
155 
156   std::string name;
157   std::string domain;
158   std::string path;
159 };
160 
161 // Returns the effective TLD+1 for a given host. This only makes sense for http
162 // and https schemes. For other schemes, the host will be returned unchanged
163 // (minus any leading period).
GetEffectiveDomain(const std::string & scheme,const std::string & host)164 std::string GetEffectiveDomain(const std::string& scheme,
165                                const std::string& host) {
166   if (scheme == "http" || scheme == "https")
167     return RegistryControlledDomainService::GetDomainAndRegistry(host);
168 
169   if (!CookieMonster::DomainIsHostOnly(host))
170     return host.substr(1);
171   return host;
172 }
173 
174 // Determine the actual cookie domain based on the domain string passed
175 // (if any) and the URL from which the cookie came.
176 // On success returns true, and sets cookie_domain to either a
177 //   -host cookie domain (ex: "google.com")
178 //   -domain cookie domain (ex: ".google.com")
GetCookieDomainWithString(const GURL & url,const std::string & domain_string,std::string * result)179 bool GetCookieDomainWithString(const GURL& url,
180                                const std::string& domain_string,
181                                std::string* result) {
182   const std::string url_host(url.host());
183 
184   // If no domain was specified in the domain string, default to a host cookie.
185   // We match IE/Firefox in allowing a domain=IPADDR if it matches the url
186   // ip address hostname exactly.  It should be treated as a host cookie.
187   if (domain_string.empty() ||
188       (url.HostIsIPAddress() && url_host == domain_string)) {
189     *result = url_host;
190     DCHECK(CookieMonster::DomainIsHostOnly(*result));
191     return true;
192   }
193 
194   // Get the normalized domain specified in cookie line.
195   // Note: The RFC says we can reject a cookie if the domain
196   // attribute does not start with a dot. IE/FF/Safari however, allow a cookie
197   // of the form domain=my.domain.com, treating it the same as
198   // domain=.my.domain.com -- for compatibility we do the same here.  Firefox
199   // also treats domain=.....my.domain.com like domain=.my.domain.com, but
200   // neither IE nor Safari do this, and we don't either.
201   url_canon::CanonHostInfo ignored;
202   std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
203   if (cookie_domain.empty())
204     return false;
205   if (cookie_domain[0] != '.')
206     cookie_domain = "." + cookie_domain;
207 
208   // Ensure |url| and |cookie_domain| have the same domain+registry.
209   const std::string url_scheme(url.scheme());
210   const std::string url_domain_and_registry(
211       GetEffectiveDomain(url_scheme, url_host));
212   if (url_domain_and_registry.empty())
213     return false;  // IP addresses/intranet hosts can't set domain cookies.
214   const std::string cookie_domain_and_registry(
215       GetEffectiveDomain(url_scheme, cookie_domain));
216   if (url_domain_and_registry != cookie_domain_and_registry)
217     return false;  // Can't set a cookie on a different domain + registry.
218 
219   // Ensure |url_host| is |cookie_domain| or one of its subdomains.  Given that
220   // we know the domain+registry are the same from the above checks, this is
221   // basically a simple string suffix check.
222   if ((url_host.length() < cookie_domain.length()) ?
223       (cookie_domain != ("." + url_host)) :
224       url_host.compare(url_host.length() - cookie_domain.length(),
225                        cookie_domain.length(), cookie_domain))
226     return false;
227 
228   *result = cookie_domain;
229   return true;
230 }
231 
232 // Determine the cookie domain to use for setting the specified cookie.
GetCookieDomain(const GURL & url,const CookieMonster::ParsedCookie & pc,std::string * result)233 bool GetCookieDomain(const GURL& url,
234                      const CookieMonster::ParsedCookie& pc,
235                      std::string* result) {
236   std::string domain_string;
237   if (pc.HasDomain())
238     domain_string = pc.Domain();
239   return GetCookieDomainWithString(url, domain_string, result);
240 }
241 
CanonPathWithString(const GURL & url,const std::string & path_string)242 std::string CanonPathWithString(const GURL& url,
243                                 const std::string& path_string) {
244   // The RFC says the path should be a prefix of the current URL path.
245   // However, Mozilla allows you to set any path for compatibility with
246   // broken websites.  We unfortunately will mimic this behavior.  We try
247   // to be generous and accept cookies with an invalid path attribute, and
248   // default the path to something reasonable.
249 
250   // The path was supplied in the cookie, we'll take it.
251   if (!path_string.empty() && path_string[0] == '/')
252     return path_string;
253 
254   // The path was not supplied in the cookie or invalid, we will default
255   // to the current URL path.
256   // """Defaults to the path of the request URL that generated the
257   //    Set-Cookie response, up to, but not including, the
258   //    right-most /."""
259   // How would this work for a cookie on /?  We will include it then.
260   const std::string& url_path = url.path();
261 
262   size_t idx = url_path.find_last_of('/');
263 
264   // The cookie path was invalid or a single '/'.
265   if (idx == 0 || idx == std::string::npos)
266     return std::string("/");
267 
268   // Return up to the rightmost '/'.
269   return url_path.substr(0, idx);
270 }
271 
CanonPath(const GURL & url,const CookieMonster::ParsedCookie & pc)272 std::string CanonPath(const GURL& url,
273                       const CookieMonster::ParsedCookie& pc) {
274   std::string path_string;
275   if (pc.HasPath())
276     path_string = pc.Path();
277   return CanonPathWithString(url, path_string);
278 }
279 
CanonExpirationInternal(const CookieMonster::ParsedCookie & pc,const Time & current)280 Time CanonExpirationInternal(const CookieMonster::ParsedCookie& pc,
281                              const Time& current) {
282   // First, try the Max-Age attribute.
283   uint64 max_age = 0;
284   if (pc.HasMaxAge() &&
285 #ifdef COMPILER_MSVC
286       sscanf_s(
287 #else
288       sscanf(
289 #endif
290              pc.MaxAge().c_str(), " %" PRIu64, &max_age) == 1) {
291     return current + TimeDelta::FromSeconds(max_age);
292   }
293 
294   // Try the Expires attribute.
295   if (pc.HasExpires())
296     return CookieMonster::ParseCookieTime(pc.Expires());
297 
298   // Invalid or no expiration, persistent cookie.
299   return Time();
300 }
301 
CanonExpiration(const CookieMonster::ParsedCookie & pc,const Time & current,const CookieOptions & options)302 Time CanonExpiration(const CookieMonster::ParsedCookie& pc,
303                      const Time& current,
304                      const CookieOptions& options) {
305   Time expiration_time = CanonExpirationInternal(pc, current);
306 
307   if (options.force_session()) {
308     // Only override the expiry  adte if it's in the future. If the expiry date
309     // is before the creation date, the cookie is supposed to be deleted.
310     if (expiration_time.is_null() || expiration_time > current)
311       return Time();
312   }
313 
314   return expiration_time;
315 }
316 
317 // Helper for GarbageCollection.  If |cookie_its->size() > num_max|, remove the
318 // |num_max - num_purge| most recently accessed cookies from cookie_its.
319 // (In other words, leave the entries that are candidates for
320 // eviction in cookie_its.)  The cookies returned will be in order sorted by
321 // access time, least recently accessed first.  The access time of the least
322 // recently accessed entry not returned will be placed in
323 // |*lra_removed| if that pointer is set.  FindLeastRecentlyAccessed
324 // returns false if no manipulation is done (because the list size is less
325 // than num_max), true otherwise.
FindLeastRecentlyAccessed(size_t num_max,size_t num_purge,Time * lra_removed,std::vector<CookieMonster::CookieMap::iterator> * cookie_its)326 bool FindLeastRecentlyAccessed(
327     size_t num_max,
328     size_t num_purge,
329     Time* lra_removed,
330     std::vector<CookieMonster::CookieMap::iterator>* cookie_its) {
331   DCHECK_LE(num_purge, num_max);
332   if (cookie_its->size() > num_max) {
333     VLOG(kVlogGarbageCollection)
334         << "FindLeastRecentlyAccessed() Deep Garbage Collect.";
335     num_purge += cookie_its->size() - num_max;
336     DCHECK_GT(cookie_its->size(), num_purge);
337 
338     // Add 1 so that we can get the last time left in the store.
339     std::partial_sort(cookie_its->begin(), cookie_its->begin() + num_purge + 1,
340                       cookie_its->end(), LRUCookieSorter);
341     *lra_removed =
342         (*(cookie_its->begin() + num_purge))->second->LastAccessDate();
343     cookie_its->erase(cookie_its->begin() + num_purge, cookie_its->end());
344     return true;
345   }
346   return false;
347 }
348 
349 // Mapping between DeletionCause and Delegate::ChangeCause; the mapping also
350 // provides a boolean that specifies whether or not an OnCookieChanged
351 // notification ought to be generated.
352 typedef struct ChangeCausePair_struct {
353   CookieMonster::Delegate::ChangeCause cause;
354   bool notify;
355 } ChangeCausePair;
356 ChangeCausePair ChangeCauseMapping[] = {
357   // DELETE_COOKIE_EXPLICIT
358   { CookieMonster::Delegate::CHANGE_COOKIE_EXPLICIT, true },
359   // DELETE_COOKIE_OVERWRITE
360   { CookieMonster::Delegate::CHANGE_COOKIE_OVERWRITE, true },
361   // DELETE_COOKIE_EXPIRED
362   { CookieMonster::Delegate::CHANGE_COOKIE_EXPIRED, true },
363   // DELETE_COOKIE_EVICTED
364   { CookieMonster::Delegate::CHANGE_COOKIE_EVICTED, true },
365   // DELETE_COOKIE_DUPLICATE_IN_BACKING_STORE
366   { CookieMonster::Delegate::CHANGE_COOKIE_EXPLICIT, false },
367   // DELETE_COOKIE_DONT_RECORD
368   { CookieMonster::Delegate::CHANGE_COOKIE_EXPLICIT, false },
369   // DELETE_COOKIE_EVICTED_DOMAIN
370   { CookieMonster::Delegate::CHANGE_COOKIE_EVICTED, true },
371   // DELETE_COOKIE_EVICTED_GLOBAL
372   { CookieMonster::Delegate::CHANGE_COOKIE_EVICTED, true },
373   // DELETE_COOKIE_EVICTED_DOMAIN_PRE_SAFE
374   { CookieMonster::Delegate::CHANGE_COOKIE_EVICTED, true },
375   // DELETE_COOKIE_EVICTED_DOMAIN_POST_SAFE
376   { CookieMonster::Delegate::CHANGE_COOKIE_EVICTED, true },
377   // DELETE_COOKIE_EXPIRED_OVERWRITE
378   { CookieMonster::Delegate::CHANGE_COOKIE_EXPIRED_OVERWRITE, true },
379   // DELETE_COOKIE_LAST_ENTRY
380   { CookieMonster::Delegate::CHANGE_COOKIE_EXPLICIT, false }
381 };
382 
383 }  // namespace
384 
385 // static
386 bool CookieMonster::enable_file_scheme_ = false;
387 
CookieMonster(PersistentCookieStore * store,Delegate * delegate)388 CookieMonster::CookieMonster(PersistentCookieStore* store, Delegate* delegate)
389     : initialized_(false),
390       expiry_and_key_scheme_(expiry_and_key_default_),
391       store_(store),
392       last_access_threshold_(
393           TimeDelta::FromSeconds(kDefaultAccessUpdateThresholdSeconds)),
394       delegate_(delegate),
395       last_statistic_record_time_(Time::Now()),
396       keep_expired_cookies_(false) {
397   InitializeHistograms();
398   SetDefaultCookieableSchemes();
399 }
400 
CookieMonster(PersistentCookieStore * store,Delegate * delegate,int last_access_threshold_milliseconds)401 CookieMonster::CookieMonster(PersistentCookieStore* store,
402                              Delegate* delegate,
403                              int last_access_threshold_milliseconds)
404     : initialized_(false),
405       expiry_and_key_scheme_(expiry_and_key_default_),
406       store_(store),
407       last_access_threshold_(base::TimeDelta::FromMilliseconds(
408           last_access_threshold_milliseconds)),
409       delegate_(delegate),
410       last_statistic_record_time_(base::Time::Now()),
411       keep_expired_cookies_(false) {
412   InitializeHistograms();
413   SetDefaultCookieableSchemes();
414 }
415 
416 // Parse a cookie expiration time.  We try to be lenient, but we need to
417 // assume some order to distinguish the fields.  The basic rules:
418 //  - The month name must be present and prefix the first 3 letters of the
419 //    full month name (jan for January, jun for June).
420 //  - If the year is <= 2 digits, it must occur after the day of month.
421 //  - The time must be of the format hh:mm:ss.
422 // An average cookie expiration will look something like this:
423 //   Sat, 15-Apr-17 21:01:22 GMT
ParseCookieTime(const std::string & time_string)424 Time CookieMonster::ParseCookieTime(const std::string& time_string) {
425   static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
426                                    "jul", "aug", "sep", "oct", "nov", "dec" };
427   static const int kMonthsLen = arraysize(kMonths);
428   // We want to be pretty liberal, and support most non-ascii and non-digit
429   // characters as a delimiter.  We can't treat : as a delimiter, because it
430   // is the delimiter for hh:mm:ss, and we want to keep this field together.
431   // We make sure to include - and +, since they could prefix numbers.
432   // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
433   // will be preserved, and we will get them here.  So we make sure to include
434   // quote characters, and also \ for anything that was internally escaped.
435   static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
436 
437   Time::Exploded exploded = {0};
438 
439   StringTokenizer tokenizer(time_string, kDelimiters);
440 
441   bool found_day_of_month = false;
442   bool found_month = false;
443   bool found_time = false;
444   bool found_year = false;
445 
446   while (tokenizer.GetNext()) {
447     const std::string token = tokenizer.token();
448     DCHECK(!token.empty());
449     bool numerical = IsAsciiDigit(token[0]);
450 
451     // String field
452     if (!numerical) {
453       if (!found_month) {
454         for (int i = 0; i < kMonthsLen; ++i) {
455           // Match prefix, so we could match January, etc
456           if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
457             exploded.month = i + 1;
458             found_month = true;
459             break;
460           }
461         }
462       } else {
463         // If we've gotten here, it means we've already found and parsed our
464         // month, and we have another string, which we would expect to be the
465         // the time zone name.  According to the RFC and my experiments with
466         // how sites format their expirations, we don't have much of a reason
467         // to support timezones.  We don't want to ever barf on user input,
468         // but this DCHECK should pass for well-formed data.
469         // DCHECK(token == "GMT");
470       }
471     // Numeric field w/ a colon
472     } else if (token.find(':') != std::string::npos) {
473       if (!found_time &&
474 #ifdef COMPILER_MSVC
475           sscanf_s(
476 #else
477           sscanf(
478 #endif
479                  token.c_str(), "%2u:%2u:%2u", &exploded.hour,
480                  &exploded.minute, &exploded.second) == 3) {
481         found_time = true;
482       } else {
483         // We should only ever encounter one time-like thing.  If we're here,
484         // it means we've found a second, which shouldn't happen.  We keep
485         // the first.  This check should be ok for well-formed input:
486         // NOTREACHED();
487       }
488     // Numeric field
489     } else {
490       // Overflow with atoi() is unspecified, so we enforce a max length.
491       if (!found_day_of_month && token.length() <= 2) {
492         exploded.day_of_month = atoi(token.c_str());
493         found_day_of_month = true;
494       } else if (!found_year && token.length() <= 5) {
495         exploded.year = atoi(token.c_str());
496         found_year = true;
497       } else {
498         // If we're here, it means we've either found an extra numeric field,
499         // or a numeric field which was too long.  For well-formed input, the
500         // following check would be reasonable:
501         // NOTREACHED();
502       }
503     }
504   }
505 
506   if (!found_day_of_month || !found_month || !found_time || !found_year) {
507     // We didn't find all of the fields we need.  For well-formed input, the
508     // following check would be reasonable:
509     // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
510     return Time();
511   }
512 
513   // Normalize the year to expand abbreviated years to the full year.
514   if (exploded.year >= 69 && exploded.year <= 99)
515     exploded.year += 1900;
516   if (exploded.year >= 0 && exploded.year <= 68)
517     exploded.year += 2000;
518 
519   // If our values are within their correct ranges, we got our time.
520   if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
521       exploded.month >= 1 && exploded.month <= 12 &&
522       exploded.year >= 1601 && exploded.year <= 30827 &&
523       exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
524     return Time::FromUTCExploded(exploded);
525   }
526 
527   // One of our values was out of expected range.  For well-formed input,
528   // the following check would be reasonable:
529   // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
530 
531   return Time();
532 }
533 
DomainIsHostOnly(const std::string & domain_string)534 bool CookieMonster::DomainIsHostOnly(const std::string& domain_string) {
535   return (domain_string.empty() || domain_string[0] != '.');
536 }
537 
SetCookieWithDetails(const GURL & url,const std::string & name,const std::string & value,const std::string & domain,const std::string & path,const base::Time & expiration_time,bool secure,bool http_only)538 bool CookieMonster::SetCookieWithDetails(
539     const GURL& url, const std::string& name, const std::string& value,
540     const std::string& domain, const std::string& path,
541     const base::Time& expiration_time, bool secure, bool http_only) {
542   base::AutoLock autolock(lock_);
543 
544   if (!HasCookieableScheme(url))
545     return false;
546 
547   InitIfNecessary();
548 
549   Time creation_time = CurrentTime();
550   last_time_seen_ = creation_time;
551 
552   scoped_ptr<CanonicalCookie> cc;
553   cc.reset(CanonicalCookie::Create(
554       url, name, value, domain, path,
555       creation_time, expiration_time,
556       secure, http_only));
557 
558   if (!cc.get())
559     return false;
560 
561   CookieOptions options;
562   options.set_include_httponly();
563   return SetCanonicalCookie(&cc, creation_time, options);
564 }
565 
566 
GetAllCookies()567 CookieList CookieMonster::GetAllCookies() {
568   base::AutoLock autolock(lock_);
569   InitIfNecessary();
570 
571   // This function is being called to scrape the cookie list for management UI
572   // or similar.  We shouldn't show expired cookies in this list since it will
573   // just be confusing to users, and this function is called rarely enough (and
574   // is already slow enough) that it's OK to take the time to garbage collect
575   // the expired cookies now.
576   //
577   // Note that this does not prune cookies to be below our limits (if we've
578   // exceeded them) the way that calling GarbageCollect() would.
579   GarbageCollectExpired(Time::Now(),
580                         CookieMapItPair(cookies_.begin(), cookies_.end()),
581                         NULL);
582 
583   // Copy the CanonicalCookie pointers from the map so that we can use the same
584   // sorter as elsewhere, then copy the result out.
585   std::vector<CanonicalCookie*> cookie_ptrs;
586   cookie_ptrs.reserve(cookies_.size());
587   for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it)
588     cookie_ptrs.push_back(it->second);
589   std::sort(cookie_ptrs.begin(), cookie_ptrs.end(), CookieSorter);
590 
591   CookieList cookie_list;
592   cookie_list.reserve(cookie_ptrs.size());
593   for (std::vector<CanonicalCookie*>::const_iterator it = cookie_ptrs.begin();
594        it != cookie_ptrs.end(); ++it)
595     cookie_list.push_back(**it);
596 
597   return cookie_list;
598 }
599 
GetAllCookiesForURLWithOptions(const GURL & url,const CookieOptions & options)600 CookieList CookieMonster::GetAllCookiesForURLWithOptions(
601     const GURL& url,
602     const CookieOptions& options) {
603   base::AutoLock autolock(lock_);
604   InitIfNecessary();
605 
606   std::vector<CanonicalCookie*> cookie_ptrs;
607   FindCookiesForHostAndDomain(url, options, false, &cookie_ptrs);
608   std::sort(cookie_ptrs.begin(), cookie_ptrs.end(), CookieSorter);
609 
610   CookieList cookies;
611   for (std::vector<CanonicalCookie*>::const_iterator it = cookie_ptrs.begin();
612        it != cookie_ptrs.end(); it++)
613     cookies.push_back(**it);
614 
615   return cookies;
616 }
617 
GetAllCookiesForURL(const GURL & url)618 CookieList CookieMonster::GetAllCookiesForURL(const GURL& url) {
619   CookieOptions options;
620   options.set_include_httponly();
621 
622   return GetAllCookiesForURLWithOptions(url, options);
623 }
624 
DeleteAll(bool sync_to_store)625 int CookieMonster::DeleteAll(bool sync_to_store) {
626   base::AutoLock autolock(lock_);
627   if (sync_to_store)
628     InitIfNecessary();
629 
630   int num_deleted = 0;
631   for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
632     CookieMap::iterator curit = it;
633     ++it;
634     InternalDeleteCookie(curit, sync_to_store,
635                          sync_to_store ? DELETE_COOKIE_EXPLICIT :
636                              DELETE_COOKIE_DONT_RECORD /* Destruction. */);
637     ++num_deleted;
638   }
639 
640   return num_deleted;
641 }
642 
DeleteAllCreatedBetween(const Time & delete_begin,const Time & delete_end,bool sync_to_store)643 int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin,
644                                            const Time& delete_end,
645                                            bool sync_to_store) {
646   base::AutoLock autolock(lock_);
647   InitIfNecessary();
648 
649   int num_deleted = 0;
650   for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
651     CookieMap::iterator curit = it;
652     CanonicalCookie* cc = curit->second;
653     ++it;
654 
655     if (cc->CreationDate() >= delete_begin &&
656         (delete_end.is_null() || cc->CreationDate() < delete_end)) {
657       InternalDeleteCookie(curit, sync_to_store, DELETE_COOKIE_EXPLICIT);
658       ++num_deleted;
659     }
660   }
661 
662   return num_deleted;
663 }
664 
DeleteAllCreatedAfter(const Time & delete_begin,bool sync_to_store)665 int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin,
666                                          bool sync_to_store) {
667   return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store);
668 }
669 
DeleteAllForHost(const GURL & url)670 int CookieMonster::DeleteAllForHost(const GURL& url) {
671   base::AutoLock autolock(lock_);
672   InitIfNecessary();
673 
674   if (!HasCookieableScheme(url))
675     return 0;
676 
677   const std::string scheme(url.scheme());
678   const std::string host(url.host());
679 
680   // We store host cookies in the store by their canonical host name;
681   // domain cookies are stored with a leading ".".  So this is a pretty
682   // simple lookup and per-cookie delete.
683   int num_deleted = 0;
684   for (CookieMapItPair its = cookies_.equal_range(GetKey(host));
685        its.first != its.second;) {
686     CookieMap::iterator curit = its.first;
687     ++its.first;
688 
689     const CanonicalCookie* const cc = curit->second;
690 
691     // Delete only on a match as a host cookie.
692     if (cc->IsHostCookie() && cc->IsDomainMatch(scheme, host)) {
693       num_deleted++;
694 
695       InternalDeleteCookie(curit, true, DELETE_COOKIE_EXPLICIT);
696     }
697   }
698   return num_deleted;
699 }
700 
DeleteCanonicalCookie(const CanonicalCookie & cookie)701 bool CookieMonster::DeleteCanonicalCookie(const CanonicalCookie& cookie) {
702   base::AutoLock autolock(lock_);
703   InitIfNecessary();
704 
705   for (CookieMapItPair its = cookies_.equal_range(GetKey(cookie.Domain()));
706        its.first != its.second; ++its.first) {
707     // The creation date acts as our unique index...
708     if (its.first->second->CreationDate() == cookie.CreationDate()) {
709       InternalDeleteCookie(its.first, true, DELETE_COOKIE_EXPLICIT);
710       return true;
711     }
712   }
713   return false;
714 }
715 
SetCookieableSchemes(const char * schemes[],size_t num_schemes)716 void CookieMonster::SetCookieableSchemes(
717     const char* schemes[], size_t num_schemes) {
718   base::AutoLock autolock(lock_);
719 
720   // Cookieable Schemes must be set before first use of function.
721   DCHECK(!initialized_);
722 
723   cookieable_schemes_.clear();
724   cookieable_schemes_.insert(cookieable_schemes_.end(),
725                              schemes, schemes + num_schemes);
726 }
727 
SetExpiryAndKeyScheme(ExpiryAndKeyScheme key_scheme)728 void CookieMonster::SetExpiryAndKeyScheme(ExpiryAndKeyScheme key_scheme) {
729   DCHECK(!initialized_);
730   expiry_and_key_scheme_ = key_scheme;
731 }
732 
SetKeepExpiredCookies()733 void CookieMonster::SetKeepExpiredCookies() {
734   keep_expired_cookies_ = true;
735 }
736 
SetClearPersistentStoreOnExit(bool clear_local_store)737 void CookieMonster::SetClearPersistentStoreOnExit(bool clear_local_store) {
738   if (store_)
739     store_->SetClearLocalStateOnExit(clear_local_store);
740 }
741 
742 // static
EnableFileScheme()743 void CookieMonster::EnableFileScheme() {
744   enable_file_scheme_ = true;
745 }
746 
FlushStore(Task * completion_task)747 void CookieMonster::FlushStore(Task* completion_task) {
748   base::AutoLock autolock(lock_);
749   if (initialized_ && store_)
750     store_->Flush(completion_task);
751   else if (completion_task)
752     MessageLoop::current()->PostTask(FROM_HERE, completion_task);
753 }
754 
SetCookieWithOptions(const GURL & url,const std::string & cookie_line,const CookieOptions & options)755 bool CookieMonster::SetCookieWithOptions(const GURL& url,
756                                          const std::string& cookie_line,
757                                          const CookieOptions& options) {
758   base::AutoLock autolock(lock_);
759 
760   if (!HasCookieableScheme(url)) {
761     return false;
762   }
763 
764   InitIfNecessary();
765 
766   return SetCookieWithCreationTimeAndOptions(url, cookie_line, Time(), options);
767 }
768 
GetCookiesWithOptions(const GURL & url,const CookieOptions & options)769 std::string CookieMonster::GetCookiesWithOptions(const GURL& url,
770                                                  const CookieOptions& options) {
771   base::AutoLock autolock(lock_);
772   InitIfNecessary();
773 
774   if (!HasCookieableScheme(url)) {
775     return std::string();
776   }
777 
778   TimeTicks start_time(TimeTicks::Now());
779 
780   // Get the cookies for this host and its domain(s).
781   std::vector<CanonicalCookie*> cookies;
782   FindCookiesForHostAndDomain(url, options, true, &cookies);
783   std::sort(cookies.begin(), cookies.end(), CookieSorter);
784 
785   std::string cookie_line;
786   for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
787        it != cookies.end(); ++it) {
788     if (it != cookies.begin())
789       cookie_line += "; ";
790     // In Mozilla if you set a cookie like AAAA, it will have an empty token
791     // and a value of AAAA.  When it sends the cookie back, it will send AAAA,
792     // so we need to avoid sending =AAAA for a blank token value.
793     if (!(*it)->Name().empty())
794       cookie_line += (*it)->Name() + "=";
795     cookie_line += (*it)->Value();
796   }
797 
798   histogram_time_get_->AddTime(TimeTicks::Now() - start_time);
799 
800   VLOG(kVlogGetCookies) << "GetCookies() result: " << cookie_line;
801 
802   return cookie_line;
803 }
804 
DeleteCookie(const GURL & url,const std::string & cookie_name)805 void CookieMonster::DeleteCookie(const GURL& url,
806                                  const std::string& cookie_name) {
807   base::AutoLock autolock(lock_);
808   InitIfNecessary();
809 
810   if (!HasCookieableScheme(url))
811     return;
812 
813   CookieOptions options;
814   options.set_include_httponly();
815   // Get the cookies for this host and its domain(s).
816   std::vector<CanonicalCookie*> cookies;
817   FindCookiesForHostAndDomain(url, options, true, &cookies);
818   std::set<CanonicalCookie*> matching_cookies;
819 
820   for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
821        it != cookies.end(); ++it) {
822     if ((*it)->Name() != cookie_name)
823       continue;
824     if (url.path().find((*it)->Path()))
825       continue;
826     matching_cookies.insert(*it);
827   }
828 
829   for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) {
830     CookieMap::iterator curit = it;
831     ++it;
832     if (matching_cookies.find(curit->second) != matching_cookies.end()) {
833       InternalDeleteCookie(curit, true, DELETE_COOKIE_EXPLICIT);
834     }
835   }
836 }
837 
GetCookieMonster()838 CookieMonster* CookieMonster::GetCookieMonster() {
839   return this;
840 }
841 
~CookieMonster()842 CookieMonster::~CookieMonster() {
843   DeleteAll(false);
844 }
845 
SetCookieWithCreationTime(const GURL & url,const std::string & cookie_line,const base::Time & creation_time)846 bool CookieMonster::SetCookieWithCreationTime(const GURL& url,
847                                               const std::string& cookie_line,
848                                               const base::Time& creation_time) {
849   base::AutoLock autolock(lock_);
850 
851   if (!HasCookieableScheme(url)) {
852     return false;
853   }
854 
855   InitIfNecessary();
856   return SetCookieWithCreationTimeAndOptions(url, cookie_line, creation_time,
857                                              CookieOptions());
858 }
859 
InitStore()860 void CookieMonster::InitStore() {
861   DCHECK(store_) << "Store must exist to initialize";
862 
863   TimeTicks beginning_time(TimeTicks::Now());
864 
865   // Initialize the store and sync in any saved persistent cookies.  We don't
866   // care if it's expired, insert it so it can be garbage collected, removed,
867   // and sync'd.
868   std::vector<CanonicalCookie*> cookies;
869   // Reserve space for the maximum amount of cookies a database should have.
870   // This prevents multiple vector growth / copies as we append cookies.
871   cookies.reserve(kMaxCookies);
872   store_->Load(&cookies);
873 
874   // Avoid ever letting cookies with duplicate creation times into the store;
875   // that way we don't have to worry about what sections of code are safe
876   // to call while it's in that state.
877   std::set<int64> creation_times;
878 
879   // Presumably later than any access time in the store.
880   Time earliest_access_time;
881 
882   for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin();
883        it != cookies.end(); ++it) {
884     int64 cookie_creation_time = (*it)->CreationDate().ToInternalValue();
885 
886     if (creation_times.insert(cookie_creation_time).second) {
887       InternalInsertCookie(GetKey((*it)->Domain()), *it, false);
888       const Time cookie_access_time((*it)->LastAccessDate());
889       if (earliest_access_time.is_null() ||
890           cookie_access_time < earliest_access_time)
891         earliest_access_time = cookie_access_time;
892     } else {
893       LOG(ERROR) << base::StringPrintf("Found cookies with duplicate creation "
894                                        "times in backing store: "
895                                        "{name='%s', domain='%s', path='%s'}",
896                                        (*it)->Name().c_str(),
897                                        (*it)->Domain().c_str(),
898                                        (*it)->Path().c_str());
899       // We've been given ownership of the cookie and are throwing it
900       // away; reclaim the space.
901       delete (*it);
902     }
903   }
904   earliest_access_time_= earliest_access_time;
905 
906   // After importing cookies from the PersistentCookieStore, verify that
907   // none of our other constraints are violated.
908   //
909   // In particular, the backing store might have given us duplicate cookies.
910   EnsureCookiesMapIsValid();
911 
912   histogram_time_load_->AddTime(TimeTicks::Now() - beginning_time);
913 }
914 
EnsureCookiesMapIsValid()915 void CookieMonster::EnsureCookiesMapIsValid() {
916   lock_.AssertAcquired();
917 
918   int num_duplicates_trimmed = 0;
919 
920   // Iterate through all the of the cookies, grouped by host.
921   CookieMap::iterator prev_range_end = cookies_.begin();
922   while (prev_range_end != cookies_.end()) {
923     CookieMap::iterator cur_range_begin = prev_range_end;
924     const std::string key = cur_range_begin->first;  // Keep a copy.
925     CookieMap::iterator cur_range_end = cookies_.upper_bound(key);
926     prev_range_end = cur_range_end;
927 
928     // Ensure no equivalent cookies for this host.
929     num_duplicates_trimmed +=
930         TrimDuplicateCookiesForKey(key, cur_range_begin, cur_range_end);
931   }
932 
933   // Record how many duplicates were found in the database.
934   // See InitializeHistograms() for details.
935   histogram_cookie_deletion_cause_->Add(num_duplicates_trimmed);
936 }
937 
TrimDuplicateCookiesForKey(const std::string & key,CookieMap::iterator begin,CookieMap::iterator end)938 int CookieMonster::TrimDuplicateCookiesForKey(
939     const std::string& key,
940     CookieMap::iterator begin,
941     CookieMap::iterator end) {
942   lock_.AssertAcquired();
943 
944   // Set of cookies ordered by creation time.
945   typedef std::set<CookieMap::iterator, OrderByCreationTimeDesc> CookieSet;
946 
947   // Helper map we populate to find the duplicates.
948   typedef std::map<CookieSignature, CookieSet> EquivalenceMap;
949   EquivalenceMap equivalent_cookies;
950 
951   // The number of duplicate cookies that have been found.
952   int num_duplicates = 0;
953 
954   // Iterate through all of the cookies in our range, and insert them into
955   // the equivalence map.
956   for (CookieMap::iterator it = begin; it != end; ++it) {
957     DCHECK_EQ(key, it->first);
958     CanonicalCookie* cookie = it->second;
959 
960     CookieSignature signature(cookie->Name(), cookie->Domain(),
961                               cookie->Path());
962     CookieSet& set = equivalent_cookies[signature];
963 
964     // We found a duplicate!
965     if (!set.empty())
966       num_duplicates++;
967 
968     // We save the iterator into |cookies_| rather than the actual cookie
969     // pointer, since we may need to delete it later.
970     bool insert_success = set.insert(it).second;
971     DCHECK(insert_success) <<
972         "Duplicate creation times found in duplicate cookie name scan.";
973   }
974 
975   // If there were no duplicates, we are done!
976   if (num_duplicates == 0)
977     return 0;
978 
979   // Make sure we find everything below that we did above.
980   int num_duplicates_found = 0;
981 
982   // Otherwise, delete all the duplicate cookies, both from our in-memory store
983   // and from the backing store.
984   for (EquivalenceMap::iterator it = equivalent_cookies.begin();
985        it != equivalent_cookies.end();
986        ++it) {
987     const CookieSignature& signature = it->first;
988     CookieSet& dupes = it->second;
989 
990     if (dupes.size() <= 1)
991       continue;  // This cookiename/path has no duplicates.
992     num_duplicates_found += dupes.size() - 1;
993 
994     // Since |dups| is sorted by creation time (descending), the first cookie
995     // is the most recent one, so we will keep it. The rest are duplicates.
996     dupes.erase(dupes.begin());
997 
998     LOG(ERROR) << base::StringPrintf(
999         "Found %d duplicate cookies for host='%s', "
1000         "with {name='%s', domain='%s', path='%s'}",
1001         static_cast<int>(dupes.size()),
1002         key.c_str(),
1003         signature.name.c_str(),
1004         signature.domain.c_str(),
1005         signature.path.c_str());
1006 
1007     // Remove all the cookies identified by |dupes|. It is valid to delete our
1008     // list of iterators one at a time, since |cookies_| is a multimap (they
1009     // don't invalidate existing iterators following deletion).
1010     for (CookieSet::iterator dupes_it = dupes.begin();
1011          dupes_it != dupes.end();
1012          ++dupes_it) {
1013       InternalDeleteCookie(*dupes_it, true /*sync_to_store*/,
1014                            DELETE_COOKIE_DUPLICATE_IN_BACKING_STORE);
1015     }
1016   }
1017   DCHECK_EQ(num_duplicates, num_duplicates_found);
1018 
1019   return num_duplicates;
1020 }
1021 
1022 // Note: file must be the last scheme.
1023 const char* CookieMonster::kDefaultCookieableSchemes[] =
1024     { "http", "https", "file" };
1025 const int CookieMonster::kDefaultCookieableSchemesCount =
1026     arraysize(CookieMonster::kDefaultCookieableSchemes);
1027 
SetDefaultCookieableSchemes()1028 void CookieMonster::SetDefaultCookieableSchemes() {
1029   int num_schemes = enable_file_scheme_ ?
1030       kDefaultCookieableSchemesCount : kDefaultCookieableSchemesCount - 1;
1031   SetCookieableSchemes(kDefaultCookieableSchemes, num_schemes);
1032 }
1033 
1034 
FindCookiesForHostAndDomain(const GURL & url,const CookieOptions & options,bool update_access_time,std::vector<CanonicalCookie * > * cookies)1035 void CookieMonster::FindCookiesForHostAndDomain(
1036     const GURL& url,
1037     const CookieOptions& options,
1038     bool update_access_time,
1039     std::vector<CanonicalCookie*>* cookies) {
1040   lock_.AssertAcquired();
1041 
1042   const Time current_time(CurrentTime());
1043 
1044   // Probe to save statistics relatively frequently.  We do it here rather
1045   // than in the set path as many websites won't set cookies, and we
1046   // want to collect statistics whenever the browser's being used.
1047   RecordPeriodicStats(current_time);
1048 
1049   if (expiry_and_key_scheme_ == EKS_DISCARD_RECENT_AND_PURGE_DOMAIN) {
1050     // Can just dispatch to FindCookiesForKey
1051     const std::string key(GetKey(url.host()));
1052     FindCookiesForKey(key, url, options, current_time,
1053                       update_access_time, cookies);
1054   } else {
1055     // Need to probe for all domains that might have relevant
1056     // cookies for us.
1057 
1058     // Query for the full host, For example: 'a.c.blah.com'.
1059     std::string key(GetKey(url.host()));
1060     FindCookiesForKey(key, url, options, current_time, update_access_time,
1061                       cookies);
1062 
1063     // See if we can search for domain cookies, i.e. if the host has a TLD + 1.
1064     const std::string domain(GetEffectiveDomain(url.scheme(), key));
1065     if (domain.empty())
1066       return;
1067     DCHECK_LE(domain.length(), key.length());
1068     DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(),
1069                              domain));
1070 
1071     // Walk through the string and query at the dot points (GURL should have
1072     // canonicalized the dots, so this should be safe).  Stop once we reach the
1073     // domain + registry; we can't write cookies past this point, and with some
1074     // registrars other domains can, in which case we don't want to read their
1075     // cookies.
1076     for (key = "." + key; key.length() > domain.length(); ) {
1077       FindCookiesForKey(key, url, options, current_time, update_access_time,
1078                         cookies);
1079       const size_t next_dot = key.find('.', 1);  // Skip over leading dot.
1080       key.erase(0, next_dot);
1081     }
1082   }
1083 }
1084 
FindCookiesForKey(const std::string & key,const GURL & url,const CookieOptions & options,const Time & current,bool update_access_time,std::vector<CanonicalCookie * > * cookies)1085 void CookieMonster::FindCookiesForKey(
1086     const std::string& key,
1087     const GURL& url,
1088     const CookieOptions& options,
1089     const Time& current,
1090     bool update_access_time,
1091     std::vector<CanonicalCookie*>* cookies) {
1092   lock_.AssertAcquired();
1093 
1094   const std::string scheme(url.scheme());
1095   const std::string host(url.host());
1096   bool secure = url.SchemeIsSecure();
1097 
1098   for (CookieMapItPair its = cookies_.equal_range(key);
1099        its.first != its.second; ) {
1100     CookieMap::iterator curit = its.first;
1101     CanonicalCookie* cc = curit->second;
1102     ++its.first;
1103 
1104     // If the cookie is expired, delete it.
1105     if (cc->IsExpired(current) && !keep_expired_cookies_) {
1106       InternalDeleteCookie(curit, true, DELETE_COOKIE_EXPIRED);
1107       continue;
1108     }
1109 
1110     // Filter out HttpOnly cookies, per options.
1111     if (options.exclude_httponly() && cc->IsHttpOnly())
1112       continue;
1113 
1114     // Filter out secure cookies unless we're https.
1115     if (!secure && cc->IsSecure())
1116       continue;
1117 
1118     // Filter out cookies that don't apply to this domain.
1119     if (expiry_and_key_scheme_ == EKS_KEEP_RECENT_AND_PURGE_ETLDP1
1120         && !cc->IsDomainMatch(scheme, host))
1121       continue;
1122 
1123     if (!cc->IsOnPath(url.path()))
1124       continue;
1125 
1126     // Add this cookie to the set of matching cookies.  Update the access
1127     // time if we've been requested to do so.
1128     if (update_access_time) {
1129       InternalUpdateCookieAccessTime(cc, current);
1130     }
1131     cookies->push_back(cc);
1132   }
1133 }
1134 
DeleteAnyEquivalentCookie(const std::string & key,const CanonicalCookie & ecc,bool skip_httponly,bool already_expired)1135 bool CookieMonster::DeleteAnyEquivalentCookie(const std::string& key,
1136                                               const CanonicalCookie& ecc,
1137                                               bool skip_httponly,
1138                                               bool already_expired) {
1139   lock_.AssertAcquired();
1140 
1141   bool found_equivalent_cookie = false;
1142   bool skipped_httponly = false;
1143   for (CookieMapItPair its = cookies_.equal_range(key);
1144        its.first != its.second; ) {
1145     CookieMap::iterator curit = its.first;
1146     CanonicalCookie* cc = curit->second;
1147     ++its.first;
1148 
1149     if (ecc.IsEquivalent(*cc)) {
1150       // We should never have more than one equivalent cookie, since they should
1151       // overwrite each other.
1152       CHECK(!found_equivalent_cookie) <<
1153           "Duplicate equivalent cookies found, cookie store is corrupted.";
1154       if (skip_httponly && cc->IsHttpOnly()) {
1155         skipped_httponly = true;
1156       } else {
1157         InternalDeleteCookie(curit, true, already_expired ?
1158             DELETE_COOKIE_EXPIRED_OVERWRITE : DELETE_COOKIE_OVERWRITE);
1159       }
1160       found_equivalent_cookie = true;
1161     }
1162   }
1163   return skipped_httponly;
1164 }
1165 
InternalInsertCookie(const std::string & key,CanonicalCookie * cc,bool sync_to_store)1166 void CookieMonster::InternalInsertCookie(const std::string& key,
1167                                          CanonicalCookie* cc,
1168                                          bool sync_to_store) {
1169   lock_.AssertAcquired();
1170 
1171   if (cc->IsPersistent() && store_ && sync_to_store)
1172     store_->AddCookie(*cc);
1173   cookies_.insert(CookieMap::value_type(key, cc));
1174   if (delegate_.get()) {
1175     delegate_->OnCookieChanged(
1176         *cc, false, CookieMonster::Delegate::CHANGE_COOKIE_EXPLICIT);
1177   }
1178 }
1179 
SetCookieWithCreationTimeAndOptions(const GURL & url,const std::string & cookie_line,const Time & creation_time_or_null,const CookieOptions & options)1180 bool CookieMonster::SetCookieWithCreationTimeAndOptions(
1181     const GURL& url,
1182     const std::string& cookie_line,
1183     const Time& creation_time_or_null,
1184     const CookieOptions& options) {
1185   lock_.AssertAcquired();
1186 
1187   VLOG(kVlogSetCookies) << "SetCookie() line: " << cookie_line;
1188 
1189   Time creation_time = creation_time_or_null;
1190   if (creation_time.is_null()) {
1191     creation_time = CurrentTime();
1192     last_time_seen_ = creation_time;
1193   }
1194 
1195   // Parse the cookie.
1196   ParsedCookie pc(cookie_line);
1197 
1198   if (!pc.IsValid()) {
1199     VLOG(kVlogSetCookies) << "WARNING: Couldn't parse cookie";
1200     return false;
1201   }
1202 
1203   if (options.exclude_httponly() && pc.IsHttpOnly()) {
1204     VLOG(kVlogSetCookies) << "SetCookie() not setting httponly cookie";
1205     return false;
1206   }
1207 
1208   std::string cookie_domain;
1209   if (!GetCookieDomain(url, pc, &cookie_domain)) {
1210     return false;
1211   }
1212 
1213   std::string cookie_path = CanonPath(url, pc);
1214 
1215   scoped_ptr<CanonicalCookie> cc;
1216   Time cookie_expires = CanonExpiration(pc, creation_time, options);
1217 
1218   cc.reset(new CanonicalCookie(url, pc.Name(), pc.Value(), cookie_domain,
1219                                cookie_path, creation_time, cookie_expires,
1220                                creation_time, pc.IsSecure(), pc.IsHttpOnly(),
1221                                !cookie_expires.is_null()));
1222 
1223   if (!cc.get()) {
1224     VLOG(kVlogSetCookies) << "WARNING: Failed to allocate CanonicalCookie";
1225     return false;
1226   }
1227   return SetCanonicalCookie(&cc, creation_time, options);
1228 }
1229 
SetCanonicalCookie(scoped_ptr<CanonicalCookie> * cc,const Time & creation_time,const CookieOptions & options)1230 bool CookieMonster::SetCanonicalCookie(scoped_ptr<CanonicalCookie>* cc,
1231                                        const Time& creation_time,
1232                                        const CookieOptions& options) {
1233   const std::string key(GetKey((*cc)->Domain()));
1234   bool already_expired = (*cc)->IsExpired(creation_time);
1235   if (DeleteAnyEquivalentCookie(key, **cc, options.exclude_httponly(),
1236                                 already_expired)) {
1237     VLOG(kVlogSetCookies) << "SetCookie() not clobbering httponly cookie";
1238     return false;
1239   }
1240 
1241   VLOG(kVlogSetCookies) << "SetCookie() key: " << key << " cc: "
1242                         << (*cc)->DebugString();
1243 
1244   // Realize that we might be setting an expired cookie, and the only point
1245   // was to delete the cookie which we've already done.
1246   if (!already_expired || keep_expired_cookies_) {
1247     // See InitializeHistograms() for details.
1248     if ((*cc)->DoesExpire()) {
1249       histogram_expiration_duration_minutes_->Add(
1250           ((*cc)->ExpiryDate() - creation_time).InMinutes());
1251     }
1252 
1253     InternalInsertCookie(key, cc->release(), true);
1254   }
1255 
1256   // We assume that hopefully setting a cookie will be less common than
1257   // querying a cookie.  Since setting a cookie can put us over our limits,
1258   // make sure that we garbage collect...  We can also make the assumption that
1259   // if a cookie was set, in the common case it will be used soon after,
1260   // and we will purge the expired cookies in GetCookies().
1261   GarbageCollect(creation_time, key);
1262 
1263   return true;
1264 }
1265 
InternalUpdateCookieAccessTime(CanonicalCookie * cc,const Time & current)1266 void CookieMonster::InternalUpdateCookieAccessTime(CanonicalCookie* cc,
1267                                                    const Time& current) {
1268   lock_.AssertAcquired();
1269 
1270   // Based off the Mozilla code.  When a cookie has been accessed recently,
1271   // don't bother updating its access time again.  This reduces the number of
1272   // updates we do during pageload, which in turn reduces the chance our storage
1273   // backend will hit its batch thresholds and be forced to update.
1274   if ((current - cc->LastAccessDate()) < last_access_threshold_)
1275     return;
1276 
1277   // See InitializeHistograms() for details.
1278   histogram_between_access_interval_minutes_->Add(
1279       (current - cc->LastAccessDate()).InMinutes());
1280 
1281   cc->SetLastAccessDate(current);
1282   if (cc->IsPersistent() && store_)
1283     store_->UpdateCookieAccessTime(*cc);
1284 }
1285 
InternalDeleteCookie(CookieMap::iterator it,bool sync_to_store,DeletionCause deletion_cause)1286 void CookieMonster::InternalDeleteCookie(CookieMap::iterator it,
1287                                          bool sync_to_store,
1288                                          DeletionCause deletion_cause) {
1289   lock_.AssertAcquired();
1290 
1291   // Ideally, this would be asserted up where we define ChangeCauseMapping,
1292   // but DeletionCause's visibility (or lack thereof) forces us to make
1293   // this check here.
1294   COMPILE_ASSERT(arraysize(ChangeCauseMapping) == DELETE_COOKIE_LAST_ENTRY + 1,
1295                  ChangeCauseMapping_size_not_eq_DeletionCause_enum_size);
1296 
1297   // See InitializeHistograms() for details.
1298   if (deletion_cause != DELETE_COOKIE_DONT_RECORD)
1299     histogram_cookie_deletion_cause_->Add(deletion_cause);
1300 
1301   CanonicalCookie* cc = it->second;
1302   VLOG(kVlogSetCookies) << "InternalDeleteCookie() cc: " << cc->DebugString();
1303 
1304   if (cc->IsPersistent() && store_ && sync_to_store)
1305     store_->DeleteCookie(*cc);
1306   if (delegate_.get()) {
1307     ChangeCausePair mapping = ChangeCauseMapping[deletion_cause];
1308 
1309     if (mapping.notify)
1310       delegate_->OnCookieChanged(*cc, true, mapping.cause);
1311   }
1312   cookies_.erase(it);
1313   delete cc;
1314 }
1315 
1316 // Domain expiry behavior is unchanged by key/expiry scheme (the
1317 // meaning of the key is different, but that's not visible to this
1318 // routine).  Global garbage collection is dependent on key/expiry
1319 // scheme in that recently touched cookies are not saved if
1320 // expiry_and_key_scheme_ == EKS_DISCARD_RECENT_AND_PURGE_DOMAIN.
GarbageCollect(const Time & current,const std::string & key)1321 int CookieMonster::GarbageCollect(const Time& current,
1322                                   const std::string& key) {
1323   lock_.AssertAcquired();
1324 
1325   int num_deleted = 0;
1326 
1327   // Collect garbage for this key.
1328   if (cookies_.count(key) > kDomainMaxCookies) {
1329     VLOG(kVlogGarbageCollection) << "GarbageCollect() key: " << key;
1330 
1331     std::vector<CookieMap::iterator> cookie_its;
1332     num_deleted += GarbageCollectExpired(
1333         current, cookies_.equal_range(key), &cookie_its);
1334     base::Time oldest_removed;
1335     if (FindLeastRecentlyAccessed(kDomainMaxCookies, kDomainPurgeCookies,
1336                                   &oldest_removed, &cookie_its)) {
1337       // Delete in two passes so we can figure out what we're nuking
1338       // that would be kept at the global level.
1339       int num_subject_to_global_purge =
1340           GarbageCollectDeleteList(
1341               current,
1342               Time::Now() - TimeDelta::FromDays(kSafeFromGlobalPurgeDays),
1343               DELETE_COOKIE_EVICTED_DOMAIN_PRE_SAFE,
1344               cookie_its);
1345       num_deleted += num_subject_to_global_purge;
1346       // Correct because FindLeastRecentlyAccessed returns a sorted list.
1347       cookie_its.erase(cookie_its.begin(),
1348                        cookie_its.begin() + num_subject_to_global_purge);
1349       num_deleted +=
1350           GarbageCollectDeleteList(
1351               current,
1352               Time(),
1353               DELETE_COOKIE_EVICTED_DOMAIN_POST_SAFE,
1354               cookie_its);
1355     }
1356   }
1357 
1358   // Collect garbage for everything.  With firefox style we want to
1359   // preserve cookies touched in kSafeFromGlobalPurgeDays, otherwise
1360   // not.
1361   if (cookies_.size() > kMaxCookies &&
1362       (expiry_and_key_scheme_ == EKS_DISCARD_RECENT_AND_PURGE_DOMAIN ||
1363        earliest_access_time_ <
1364        Time::Now() - TimeDelta::FromDays(kSafeFromGlobalPurgeDays))) {
1365     VLOG(kVlogGarbageCollection) << "GarbageCollect() everything";
1366     std::vector<CookieMap::iterator> cookie_its;
1367     base::Time oldest_left;
1368     num_deleted += GarbageCollectExpired(
1369         current, CookieMapItPair(cookies_.begin(), cookies_.end()),
1370         &cookie_its);
1371     if (FindLeastRecentlyAccessed(kMaxCookies, kPurgeCookies,
1372                                   &oldest_left, &cookie_its)) {
1373       Time oldest_safe_cookie(
1374           expiry_and_key_scheme_ == EKS_KEEP_RECENT_AND_PURGE_ETLDP1 ?
1375               (Time::Now() - TimeDelta::FromDays(kSafeFromGlobalPurgeDays)) :
1376               Time());                  // Null time == ignore access time.
1377       int num_evicted = GarbageCollectDeleteList(
1378           current,
1379           oldest_safe_cookie,
1380           DELETE_COOKIE_EVICTED_GLOBAL,
1381           cookie_its);
1382 
1383       // If no cookies were preserved by the time limit, the global last
1384       // access is set to the value returned from FindLeastRecentlyAccessed.
1385       // If the time limit preserved some cookies, we use the last access of
1386       // the oldest preserved cookie.
1387       if (num_evicted == static_cast<int>(cookie_its.size())) {
1388         earliest_access_time_ = oldest_left;
1389       } else {
1390         earliest_access_time_ =
1391             (*(cookie_its.begin() + num_evicted))->second->LastAccessDate();
1392       }
1393       num_deleted += num_evicted;
1394     }
1395   }
1396 
1397   return num_deleted;
1398 }
1399 
GarbageCollectExpired(const Time & current,const CookieMapItPair & itpair,std::vector<CookieMap::iterator> * cookie_its)1400 int CookieMonster::GarbageCollectExpired(
1401     const Time& current,
1402     const CookieMapItPair& itpair,
1403     std::vector<CookieMap::iterator>* cookie_its) {
1404   if (keep_expired_cookies_)
1405     return 0;
1406 
1407   lock_.AssertAcquired();
1408 
1409   int num_deleted = 0;
1410   for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) {
1411     CookieMap::iterator curit = it;
1412     ++it;
1413 
1414     if (curit->second->IsExpired(current)) {
1415       InternalDeleteCookie(curit, true, DELETE_COOKIE_EXPIRED);
1416       ++num_deleted;
1417     } else if (cookie_its) {
1418       cookie_its->push_back(curit);
1419     }
1420   }
1421 
1422   return num_deleted;
1423 }
1424 
GarbageCollectDeleteList(const Time & current,const Time & keep_accessed_after,DeletionCause cause,std::vector<CookieMap::iterator> & cookie_its)1425 int CookieMonster::GarbageCollectDeleteList(
1426     const Time& current,
1427     const Time& keep_accessed_after,
1428     DeletionCause cause,
1429     std::vector<CookieMap::iterator>& cookie_its) {
1430   int num_deleted = 0;
1431   for (std::vector<CookieMap::iterator>::iterator it = cookie_its.begin();
1432        it != cookie_its.end(); it++) {
1433     if (keep_accessed_after.is_null() ||
1434         (*it)->second->LastAccessDate() < keep_accessed_after) {
1435       histogram_evicted_last_access_minutes_->Add(
1436           (current - (*it)->second->LastAccessDate()).InMinutes());
1437       InternalDeleteCookie((*it), true, cause);
1438       num_deleted++;
1439     }
1440   }
1441   return num_deleted;
1442 }
1443 
1444 // A wrapper around RegistryControlledDomainService::GetDomainAndRegistry
1445 // to make clear we're creating a key for our local map.  Here and
1446 // in FindCookiesForHostAndDomain() are the only two places where
1447 // we need to conditionalize based on key type.
1448 //
1449 // Note that this key algorithm explicitly ignores the scheme.  This is
1450 // because when we're entering cookies into the map from the backing store,
1451 // we in general won't have the scheme at that point.
1452 // In practical terms, this means that file cookies will be stored
1453 // in the map either by an empty string or by UNC name (and will be
1454 // limited by kMaxCookiesPerHost), and extension cookies will be stored
1455 // based on the single extension id, as the extension id won't have the
1456 // form of a DNS host and hence GetKey() will return it unchanged.
1457 //
1458 // Arguably the right thing to do here is to make the key
1459 // algorithm dependent on the scheme, and make sure that the scheme is
1460 // available everywhere the key must be obtained (specfically at backing
1461 // store load time).  This would require either changing the backing store
1462 // database schema to include the scheme (far more trouble than it's worth), or
1463 // separating out file cookies into their own CookieMonster instance and
1464 // thus restricting each scheme to a single cookie monster (which might
1465 // be worth it, but is still too much trouble to solve what is currently a
1466 // non-problem).
GetKey(const std::string & domain) const1467 std::string CookieMonster::GetKey(const std::string& domain) const {
1468   if (expiry_and_key_scheme_ == EKS_DISCARD_RECENT_AND_PURGE_DOMAIN)
1469     return domain;
1470 
1471   std::string effective_domain(
1472       RegistryControlledDomainService::GetDomainAndRegistry(domain));
1473   if (effective_domain.empty())
1474     effective_domain = domain;
1475 
1476   if (!effective_domain.empty() && effective_domain[0] == '.')
1477     return effective_domain.substr(1);
1478   return effective_domain;
1479 }
1480 
HasCookieableScheme(const GURL & url)1481 bool CookieMonster::HasCookieableScheme(const GURL& url) {
1482   lock_.AssertAcquired();
1483 
1484   // Make sure the request is on a cookie-able url scheme.
1485   for (size_t i = 0; i < cookieable_schemes_.size(); ++i) {
1486     // We matched a scheme.
1487     if (url.SchemeIs(cookieable_schemes_[i].c_str())) {
1488       // We've matched a supported scheme.
1489       return true;
1490     }
1491   }
1492 
1493   // The scheme didn't match any in our whitelist.
1494   VLOG(kVlogPerCookieMonster) << "WARNING: Unsupported cookie scheme: "
1495                               << url.scheme();
1496   return false;
1497 }
1498 
1499 // Test to see if stats should be recorded, and record them if so.
1500 // The goal here is to get sampling for the average browser-hour of
1501 // activity.  We won't take samples when the web isn't being surfed,
1502 // and when the web is being surfed, we'll take samples about every
1503 // kRecordStatisticsIntervalSeconds.
1504 // last_statistic_record_time_ is initialized to Now() rather than null
1505 // in the constructor so that we won't take statistics right after
1506 // startup, to avoid bias from browsers that are started but not used.
RecordPeriodicStats(const base::Time & current_time)1507 void CookieMonster::RecordPeriodicStats(const base::Time& current_time) {
1508   const base::TimeDelta kRecordStatisticsIntervalTime(
1509       base::TimeDelta::FromSeconds(kRecordStatisticsIntervalSeconds));
1510 
1511   // If we've taken statistics recently, return.
1512   if (current_time - last_statistic_record_time_ <=
1513       kRecordStatisticsIntervalTime) {
1514     return;
1515   }
1516 
1517   // See InitializeHistograms() for details.
1518   histogram_count_->Add(cookies_.size());
1519 
1520   // More detailed statistics on cookie counts at different granularities.
1521   TimeTicks beginning_of_time(TimeTicks::Now());
1522 
1523   for (CookieMap::const_iterator it_key = cookies_.begin();
1524        it_key != cookies_.end(); ) {
1525     const std::string& key(it_key->first);
1526 
1527     int key_count = 0;
1528     typedef std::map<std::string, unsigned int> DomainMap;
1529     DomainMap domain_map;
1530     CookieMapItPair its_cookies = cookies_.equal_range(key);
1531     while (its_cookies.first != its_cookies.second) {
1532       key_count++;
1533       const std::string& cookie_domain(its_cookies.first->second->Domain());
1534       domain_map[cookie_domain]++;
1535 
1536       its_cookies.first++;
1537     }
1538     histogram_etldp1_count_->Add(key_count);
1539     histogram_domain_per_etldp1_count_->Add(domain_map.size());
1540     for (DomainMap::const_iterator domain_map_it = domain_map.begin();
1541          domain_map_it != domain_map.end(); domain_map_it++)
1542       histogram_domain_count_->Add(domain_map_it->second);
1543 
1544     it_key = its_cookies.second;
1545   }
1546 
1547   VLOG(kVlogPeriodic)
1548       << "Time for recording cookie stats (us): "
1549       << (TimeTicks::Now() - beginning_of_time).InMicroseconds();
1550 
1551   last_statistic_record_time_ = current_time;
1552 }
1553 
1554 // Initialize all histogram counter variables used in this class.
1555 //
1556 // Normal histogram usage involves using the macros defined in
1557 // histogram.h, which automatically takes care of declaring these
1558 // variables (as statics), initializing them, and accumulating into
1559 // them, all from a single entry point.  Unfortunately, that solution
1560 // doesn't work for the CookieMonster, as it's vulnerable to races between
1561 // separate threads executing the same functions and hence initializing the
1562 // same static variables.  There isn't a race danger in the histogram
1563 // accumulation calls; they are written to be resilient to simultaneous
1564 // calls from multiple threads.
1565 //
1566 // The solution taken here is to have per-CookieMonster instance
1567 // variables that are constructed during CookieMonster construction.
1568 // Note that these variables refer to the same underlying histogram,
1569 // so we still race (but safely) with other CookieMonster instances
1570 // for accumulation.
1571 //
1572 // To do this we've expanded out the individual histogram macros calls,
1573 // with declarations of the variables in the class decl, initialization here
1574 // (done from the class constructor) and direct calls to the accumulation
1575 // methods where needed.  The specific histogram macro calls on which the
1576 // initialization is based are included in comments below.
InitializeHistograms()1577 void CookieMonster::InitializeHistograms() {
1578   // From UMA_HISTOGRAM_CUSTOM_COUNTS
1579   histogram_expiration_duration_minutes_ = base::Histogram::FactoryGet(
1580       "Cookie.ExpirationDurationMinutes",
1581       1, kMinutesInTenYears, 50,
1582       base::Histogram::kUmaTargetedHistogramFlag);
1583   histogram_between_access_interval_minutes_ = base::Histogram::FactoryGet(
1584       "Cookie.BetweenAccessIntervalMinutes",
1585       1, kMinutesInTenYears, 50,
1586       base::Histogram::kUmaTargetedHistogramFlag);
1587   histogram_evicted_last_access_minutes_ = base::Histogram::FactoryGet(
1588       "Cookie.EvictedLastAccessMinutes",
1589       1, kMinutesInTenYears, 50,
1590       base::Histogram::kUmaTargetedHistogramFlag);
1591   histogram_count_ = base::Histogram::FactoryGet(
1592       "Cookie.Count", 1, 4000, 50,
1593       base::Histogram::kUmaTargetedHistogramFlag);
1594   histogram_domain_count_ = base::Histogram::FactoryGet(
1595       "Cookie.DomainCount", 1, 4000, 50,
1596       base::Histogram::kUmaTargetedHistogramFlag);
1597   histogram_etldp1_count_ = base::Histogram::FactoryGet(
1598       "Cookie.Etldp1Count", 1, 4000, 50,
1599       base::Histogram::kUmaTargetedHistogramFlag);
1600   histogram_domain_per_etldp1_count_ = base::Histogram::FactoryGet(
1601       "Cookie.DomainPerEtldp1Count", 1, 4000, 50,
1602       base::Histogram::kUmaTargetedHistogramFlag);
1603 
1604   // From UMA_HISTOGRAM_COUNTS_10000 & UMA_HISTOGRAM_CUSTOM_COUNTS
1605   histogram_number_duplicate_db_cookies_ = base::Histogram::FactoryGet(
1606       "Net.NumDuplicateCookiesInDb", 1, 10000, 50,
1607       base::Histogram::kUmaTargetedHistogramFlag);
1608 
1609   // From UMA_HISTOGRAM_ENUMERATION
1610   histogram_cookie_deletion_cause_ = base::LinearHistogram::FactoryGet(
1611       "Cookie.DeletionCause", 1,
1612       DELETE_COOKIE_LAST_ENTRY - 1, DELETE_COOKIE_LAST_ENTRY,
1613       base::Histogram::kUmaTargetedHistogramFlag);
1614 
1615   // From UMA_HISTOGRAM_{CUSTOM_,}TIMES
1616   histogram_time_get_ = base::Histogram::FactoryTimeGet("Cookie.TimeGet",
1617       base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromMinutes(1),
1618       50, base::Histogram::kUmaTargetedHistogramFlag);
1619   histogram_time_load_ = base::Histogram::FactoryTimeGet("Cookie.TimeLoad",
1620       base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromMinutes(1),
1621       50, base::Histogram::kUmaTargetedHistogramFlag);
1622 }
1623 
1624 
1625 // The system resolution is not high enough, so we can have multiple
1626 // set cookies that result in the same system time.  When this happens, we
1627 // increment by one Time unit.  Let's hope computers don't get too fast.
CurrentTime()1628 Time CookieMonster::CurrentTime() {
1629   return std::max(Time::Now(),
1630       Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1));
1631 }
1632 
ParsedCookie(const std::string & cookie_line)1633 CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line)
1634     : is_valid_(false),
1635       path_index_(0),
1636       domain_index_(0),
1637       expires_index_(0),
1638       maxage_index_(0),
1639       secure_index_(0),
1640       httponly_index_(0) {
1641 
1642   if (cookie_line.size() > kMaxCookieSize) {
1643     VLOG(1) << "Not parsing cookie, too large: " << cookie_line.size();
1644     return;
1645   }
1646 
1647   ParseTokenValuePairs(cookie_line);
1648   if (!pairs_.empty()) {
1649     is_valid_ = true;
1650     SetupAttributes();
1651   }
1652 }
1653 
~ParsedCookie()1654 CookieMonster::ParsedCookie::~ParsedCookie() {
1655 }
1656 
1657 // Returns true if |c| occurs in |chars|
1658 // TODO maybe make this take an iterator, could check for end also?
CharIsA(const char c,const char * chars)1659 static inline bool CharIsA(const char c, const char* chars) {
1660   return strchr(chars, c) != NULL;
1661 }
1662 // Seek the iterator to the first occurrence of a character in |chars|.
1663 // Returns true if it hit the end, false otherwise.
SeekTo(std::string::const_iterator * it,const std::string::const_iterator & end,const char * chars)1664 static inline bool SeekTo(std::string::const_iterator* it,
1665                           const std::string::const_iterator& end,
1666                           const char* chars) {
1667   for (; *it != end && !CharIsA(**it, chars); ++(*it)) {}
1668   return *it == end;
1669 }
1670 // Seek the iterator to the first occurrence of a character not in |chars|.
1671 // Returns true if it hit the end, false otherwise.
SeekPast(std::string::const_iterator * it,const std::string::const_iterator & end,const char * chars)1672 static inline bool SeekPast(std::string::const_iterator* it,
1673                             const std::string::const_iterator& end,
1674                             const char* chars) {
1675   for (; *it != end && CharIsA(**it, chars); ++(*it)) {}
1676   return *it == end;
1677 }
SeekBackPast(std::string::const_iterator * it,const std::string::const_iterator & end,const char * chars)1678 static inline bool SeekBackPast(std::string::const_iterator* it,
1679                                 const std::string::const_iterator& end,
1680                                 const char* chars) {
1681   for (; *it != end && CharIsA(**it, chars); --(*it)) {}
1682   return *it == end;
1683 }
1684 
1685 const char CookieMonster::ParsedCookie::kTerminator[] = "\n\r\0";
1686 const int CookieMonster::ParsedCookie::kTerminatorLen =
1687     sizeof(kTerminator) - 1;
1688 const char CookieMonster::ParsedCookie::kWhitespace[] = " \t";
1689 const char CookieMonster::ParsedCookie::kValueSeparator[] = ";";
1690 const char CookieMonster::ParsedCookie::kTokenSeparator[] = ";=";
1691 
1692 // Create a cookie-line for the cookie.  For debugging only!
1693 // If we want to use this for something more than debugging, we
1694 // should rewrite it better...
DebugString() const1695 std::string CookieMonster::ParsedCookie::DebugString() const {
1696   std::string out;
1697   for (PairList::const_iterator it = pairs_.begin();
1698        it != pairs_.end(); ++it) {
1699     out.append(it->first);
1700     out.append("=");
1701     out.append(it->second);
1702     out.append("; ");
1703   }
1704   return out;
1705 }
1706 
FindFirstTerminator(const std::string & s)1707 std::string::const_iterator CookieMonster::ParsedCookie::FindFirstTerminator(
1708     const std::string& s) {
1709   std::string::const_iterator end = s.end();
1710   size_t term_pos =
1711       s.find_first_of(std::string(kTerminator, kTerminatorLen));
1712   if (term_pos != std::string::npos) {
1713     // We found a character we should treat as an end of string.
1714     end = s.begin() + term_pos;
1715   }
1716   return end;
1717 }
1718 
ParseToken(std::string::const_iterator * it,const std::string::const_iterator & end,std::string::const_iterator * token_start,std::string::const_iterator * token_end)1719 bool CookieMonster::ParsedCookie::ParseToken(
1720     std::string::const_iterator* it,
1721     const std::string::const_iterator& end,
1722     std::string::const_iterator* token_start,
1723     std::string::const_iterator* token_end) {
1724   DCHECK(it && token_start && token_end);
1725   std::string::const_iterator token_real_end;
1726 
1727   // Seek past any whitespace before the "token" (the name).
1728   // token_start should point at the first character in the token
1729   if (SeekPast(it, end, kWhitespace))
1730     return false;  // No token, whitespace or empty.
1731   *token_start = *it;
1732 
1733   // Seek over the token, to the token separator.
1734   // token_real_end should point at the token separator, i.e. '='.
1735   // If it == end after the seek, we probably have a token-value.
1736   SeekTo(it, end, kTokenSeparator);
1737   token_real_end = *it;
1738 
1739   // Ignore any whitespace between the token and the token separator.
1740   // token_end should point after the last interesting token character,
1741   // pointing at either whitespace, or at '=' (and equal to token_real_end).
1742   if (*it != *token_start) {  // We could have an empty token name.
1743     --(*it);  // Go back before the token separator.
1744     // Skip over any whitespace to the first non-whitespace character.
1745     SeekBackPast(it, *token_start, kWhitespace);
1746     // Point after it.
1747     ++(*it);
1748   }
1749   *token_end = *it;
1750 
1751   // Seek us back to the end of the token.
1752   *it = token_real_end;
1753   return true;
1754 }
1755 
ParseValue(std::string::const_iterator * it,const std::string::const_iterator & end,std::string::const_iterator * value_start,std::string::const_iterator * value_end)1756 void CookieMonster::ParsedCookie::ParseValue(
1757     std::string::const_iterator* it,
1758     const std::string::const_iterator& end,
1759     std::string::const_iterator* value_start,
1760     std::string::const_iterator* value_end) {
1761   DCHECK(it && value_start && value_end);
1762 
1763   // Seek past any whitespace that might in-between the token and value.
1764   SeekPast(it, end, kWhitespace);
1765   // value_start should point at the first character of the value.
1766   *value_start = *it;
1767 
1768 #ifdef ANDROID
1769   // It is unclear exactly how quoted string values should be handled.
1770   // Major browsers do different things, for example, Firefox and Safari support
1771   // semicolons embedded in a quoted value, while IE does not.  Looking at
1772   // the specs, RFC 2109 and 2965 allow for a quoted-string as the value.
1773   // However, these specs were apparently written after browsers had
1774   // implemented cookies, and they seem very distant from the reality of
1775   // what is actually implemented and used on the web.  The original spec
1776   // from Netscape is possibly what is closest to the cookies used today.
1777   // This spec didn't have explicit support for double quoted strings, and
1778   // states that ; is not allowed as part of a value.
1779 
1780   // The spec disallows control characters and separators in the unquoted value,
1781   // but we allow all of these, other than the value separator ';'.
1782 #ifdef ALLOW_QUOTED_COOKIE_VALUES
1783   // When a value is quoted, the spec states that any text (other than a double
1784   // quote character, but including an escaped double quote character) is
1785   // treated as an opaque part of the value. The spec allows for exactly one
1786   // such quoted string for the value, but we allow multiple such strings,
1787   // possibly with intermediate separators. This matches our policy of allowing
1788   // separators in the unquoted value. This matches the behaviour of Safari, but
1789   // not that of FF.
1790   bool isInQuotedString = false;
1791   for (; *it != end && (isInQuotedString || !CharIsA(**it, kValueSeparator)); ++(*it)) {
1792     // Inside a quoted string, a backslash esacpes the next character, which can
1793     // be any character and should be ignored.
1794     if (isInQuotedString && **it == '\\') {
1795       ++(*it);
1796       // If this backslash is the last character, we just take what we have.
1797       if (*it == end)
1798         break;
1799       continue;
1800     }
1801 
1802     if (**it == '\"')
1803       isInQuotedString = !isInQuotedString;
1804   }
1805 #else
1806   // Just look for ';' to terminate ('=' allowed).
1807   // We can hit the end, maybe they didn't terminate.
1808   SeekTo(it, end, kValueSeparator);
1809 #endif
1810 #endif // ANDROID
1811 
1812   // Will be pointed at the ; seperator or the end.
1813   *value_end = *it;
1814 
1815   // Ignore any unwanted whitespace after the value.
1816   if (*value_end != *value_start) {  // Could have an empty value
1817     --(*value_end);
1818     SeekBackPast(value_end, *value_start, kWhitespace);
1819     ++(*value_end);
1820   }
1821 }
1822 
ParseTokenString(const std::string & token)1823 std::string CookieMonster::ParsedCookie::ParseTokenString(
1824     const std::string& token) {
1825   std::string::const_iterator it = token.begin();
1826   std::string::const_iterator end = FindFirstTerminator(token);
1827 
1828   std::string::const_iterator token_start, token_end;
1829   if (ParseToken(&it, end, &token_start, &token_end))
1830     return std::string(token_start, token_end);
1831   return std::string();
1832 }
1833 
ParseValueString(const std::string & value)1834 std::string CookieMonster::ParsedCookie::ParseValueString(
1835     const std::string& value) {
1836   std::string::const_iterator it = value.begin();
1837   std::string::const_iterator end = FindFirstTerminator(value);
1838 
1839   std::string::const_iterator value_start, value_end;
1840   ParseValue(&it, end, &value_start, &value_end);
1841   return std::string(value_start, value_end);
1842 }
1843 
1844 // Parse all token/value pairs and populate pairs_.
ParseTokenValuePairs(const std::string & cookie_line)1845 void CookieMonster::ParsedCookie::ParseTokenValuePairs(
1846     const std::string& cookie_line) {
1847   pairs_.clear();
1848 
1849   // Ok, here we go.  We should be expecting to be starting somewhere
1850   // before the cookie line, not including any header name...
1851   std::string::const_iterator start = cookie_line.begin();
1852   std::string::const_iterator it = start;
1853 
1854   // TODO Make sure we're stripping \r\n in the network code.  Then we
1855   // can log any unexpected terminators.
1856   std::string::const_iterator end = FindFirstTerminator(cookie_line);
1857 
1858   for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
1859     TokenValuePair pair;
1860 
1861     std::string::const_iterator token_start, token_end;
1862     if (!ParseToken(&it, end, &token_start, &token_end))
1863       break;
1864 
1865     if (it == end || *it != '=') {
1866       // We have a token-value, we didn't have any token name.
1867       if (pair_num == 0) {
1868         // For the first time around, we want to treat single values
1869         // as a value with an empty name. (Mozilla bug 169091).
1870         // IE seems to also have this behavior, ex "AAA", and "AAA=10" will
1871         // set 2 different cookies, and setting "BBB" will then replace "AAA".
1872         pair.first = "";
1873         // Rewind to the beginning of what we thought was the token name,
1874         // and let it get parsed as a value.
1875         it = token_start;
1876       } else {
1877         // Any not-first attribute we want to treat a value as a
1878         // name with an empty value...  This is so something like
1879         // "secure;" will get parsed as a Token name, and not a value.
1880         pair.first = std::string(token_start, token_end);
1881       }
1882     } else {
1883       // We have a TOKEN=VALUE.
1884       pair.first = std::string(token_start, token_end);
1885       ++it;  // Skip past the '='.
1886     }
1887 
1888     // OK, now try to parse a value.
1889     std::string::const_iterator value_start, value_end;
1890     ParseValue(&it, end, &value_start, &value_end);
1891     // OK, we're finished with a Token/Value.
1892     pair.second = std::string(value_start, value_end);
1893 
1894     // From RFC2109: "Attributes (names) (attr) are case-insensitive."
1895     if (pair_num != 0)
1896       StringToLowerASCII(&pair.first);
1897     pairs_.push_back(pair);
1898 
1899     // We've processed a token/value pair, we're either at the end of
1900     // the string or a ValueSeparator like ';', which we want to skip.
1901     if (it != end)
1902       ++it;
1903   }
1904 }
1905 
SetupAttributes()1906 void CookieMonster::ParsedCookie::SetupAttributes() {
1907   static const char kPathTokenName[]      = "path";
1908   static const char kDomainTokenName[]    = "domain";
1909   static const char kExpiresTokenName[]   = "expires";
1910   static const char kMaxAgeTokenName[]    = "max-age";
1911   static const char kSecureTokenName[]    = "secure";
1912   static const char kHttpOnlyTokenName[]  = "httponly";
1913 
1914   // We skip over the first token/value, the user supplied one.
1915   for (size_t i = 1; i < pairs_.size(); ++i) {
1916     if (pairs_[i].first == kPathTokenName) {
1917       path_index_ = i;
1918     } else if (pairs_[i].first == kDomainTokenName) {
1919       domain_index_ = i;
1920     } else if (pairs_[i].first == kExpiresTokenName) {
1921       expires_index_ = i;
1922     } else if (pairs_[i].first == kMaxAgeTokenName) {
1923       maxage_index_ = i;
1924     } else if (pairs_[i].first == kSecureTokenName) {
1925       secure_index_ = i;
1926     } else if (pairs_[i].first == kHttpOnlyTokenName) {
1927       httponly_index_ = i;
1928     } else {
1929       /* some attribute we don't know or don't care about. */
1930     }
1931   }
1932 }
1933 
CanonicalCookie()1934 CookieMonster::CanonicalCookie::CanonicalCookie()
1935     : secure_(false),
1936       httponly_(false),
1937       has_expires_(false) {
1938 }
1939 
CanonicalCookie(const GURL & url,const std::string & name,const std::string & value,const std::string & domain,const std::string & path,const base::Time & creation,const base::Time & expiration,const base::Time & last_access,bool secure,bool httponly,bool has_expires)1940 CookieMonster::CanonicalCookie::CanonicalCookie(const GURL& url,
1941                                                 const std::string& name,
1942                                                 const std::string& value,
1943                                                 const std::string& domain,
1944                                                 const std::string& path,
1945                                                 const base::Time& creation,
1946                                                 const base::Time& expiration,
1947                                                 const base::Time& last_access,
1948                                                 bool secure,
1949                                                 bool httponly,
1950                                                 bool has_expires)
1951     : source_(GetCookieSourceFromURL(url)),
1952       name_(name),
1953       value_(value),
1954       domain_(domain),
1955       path_(path),
1956       creation_date_(creation),
1957       expiry_date_(expiration),
1958       last_access_date_(last_access),
1959       secure_(secure),
1960       httponly_(httponly),
1961       has_expires_(has_expires) {
1962 }
1963 
CanonicalCookie(const GURL & url,const ParsedCookie & pc)1964 CookieMonster::CanonicalCookie::CanonicalCookie(const GURL& url,
1965                                                 const ParsedCookie& pc)
1966     : source_(GetCookieSourceFromURL(url)),
1967       name_(pc.Name()),
1968       value_(pc.Value()),
1969       path_(CanonPath(url, pc)),
1970       creation_date_(Time::Now()),
1971       last_access_date_(Time()),
1972       secure_(pc.IsSecure()),
1973       httponly_(pc.IsHttpOnly()),
1974       has_expires_(pc.HasExpires()) {
1975   if (has_expires_)
1976     expiry_date_ = CanonExpiration(pc, creation_date_, CookieOptions());
1977 
1978   // Do the best we can with the domain.
1979   std::string cookie_domain;
1980   std::string domain_string;
1981   if (pc.HasDomain()) {
1982     domain_string = pc.Domain();
1983   }
1984   bool result
1985       = GetCookieDomainWithString(url, domain_string,
1986                                   &cookie_domain);
1987   // Caller is responsible for passing in good arguments.
1988   DCHECK(result);
1989   domain_ = cookie_domain;
1990 }
1991 
~CanonicalCookie()1992 CookieMonster::CanonicalCookie::~CanonicalCookie() {
1993 }
1994 
GetCookieSourceFromURL(const GURL & url)1995 std::string CookieMonster::CanonicalCookie::GetCookieSourceFromURL(
1996     const GURL& url) {
1997   if (url.SchemeIsFile())
1998     return url.spec();
1999 
2000   url_canon::Replacements<char> replacements;
2001   replacements.ClearPort();
2002   if (url.SchemeIsSecure())
2003     replacements.SetScheme("http", url_parse::Component(0, 4));
2004 
2005   return url.GetOrigin().ReplaceComponents(replacements).spec();
2006 }
2007 
Create(const GURL & url,const std::string & name,const std::string & value,const std::string & domain,const std::string & path,const base::Time & creation,const base::Time & expiration,bool secure,bool http_only)2008 CookieMonster::CanonicalCookie* CookieMonster::CanonicalCookie::Create(
2009       const GURL& url,
2010       const std::string& name,
2011       const std::string& value,
2012       const std::string& domain,
2013       const std::string& path,
2014       const base::Time& creation,
2015       const base::Time& expiration,
2016       bool secure,
2017       bool http_only) {
2018   // Expect valid attribute tokens and values, as defined by the ParsedCookie
2019   // logic, otherwise don't create the cookie.
2020   std::string parsed_name = ParsedCookie::ParseTokenString(name);
2021   if (parsed_name != name)
2022     return NULL;
2023   std::string parsed_value = ParsedCookie::ParseValueString(value);
2024   if (parsed_value != value)
2025     return NULL;
2026 
2027   std::string parsed_domain = ParsedCookie::ParseValueString(domain);
2028   if (parsed_domain != domain)
2029     return NULL;
2030   std::string cookie_domain;
2031   if (!GetCookieDomainWithString(url, parsed_domain, &cookie_domain))
2032     return NULL;
2033 
2034   std::string parsed_path = ParsedCookie::ParseValueString(path);
2035   if (parsed_path != path)
2036     return NULL;
2037 
2038   std::string cookie_path = CanonPathWithString(url, parsed_path);
2039   // Expect that the path was either not specified (empty), or is valid.
2040   if (!parsed_path.empty() && cookie_path != parsed_path)
2041     return NULL;
2042   // Canonicalize path again to make sure it escapes characters as needed.
2043   url_parse::Component path_component(0, cookie_path.length());
2044   url_canon::RawCanonOutputT<char> canon_path;
2045   url_parse::Component canon_path_component;
2046   url_canon::CanonicalizePath(cookie_path.data(), path_component,
2047                               &canon_path, &canon_path_component);
2048   cookie_path = std::string(canon_path.data() + canon_path_component.begin,
2049                             canon_path_component.len);
2050 
2051   return new CanonicalCookie(url, parsed_name, parsed_value, cookie_domain,
2052                              cookie_path, creation, expiration, creation,
2053                              secure, http_only, !expiration.is_null());
2054 }
2055 
IsOnPath(const std::string & url_path) const2056 bool CookieMonster::CanonicalCookie::IsOnPath(
2057     const std::string& url_path) const {
2058 
2059   // A zero length would be unsafe for our trailing '/' checks, and
2060   // would also make no sense for our prefix match.  The code that
2061   // creates a CanonicalCookie should make sure the path is never zero length,
2062   // but we double check anyway.
2063   if (path_.empty())
2064     return false;
2065 
2066   // The Mozilla code broke it into 3 cases, if it's strings lengths
2067   // are less than, equal, or greater.  I think this is simpler:
2068 
2069   // Make sure the cookie path is a prefix of the url path.  If the
2070   // url path is shorter than the cookie path, then the cookie path
2071   // can't be a prefix.
2072   if (url_path.find(path_) != 0)
2073     return false;
2074 
2075   // Now we know that url_path is >= cookie_path, and that cookie_path
2076   // is a prefix of url_path.  If they are the are the same length then
2077   // they are identical, otherwise we need an additional check:
2078 
2079   // In order to avoid in correctly matching a cookie path of /blah
2080   // with a request path of '/blahblah/', we need to make sure that either
2081   // the cookie path ends in a trailing '/', or that we prefix up to a '/'
2082   // in the url path.  Since we know that the url path length is greater
2083   // than the cookie path length, it's safe to index one byte past.
2084   if (path_.length() != url_path.length() &&
2085       path_[path_.length() - 1] != '/' &&
2086       url_path[path_.length()] != '/')
2087     return false;
2088 
2089   return true;
2090 }
2091 
IsDomainMatch(const std::string & scheme,const std::string & host) const2092 bool CookieMonster::CanonicalCookie::IsDomainMatch(
2093     const std::string& scheme,
2094     const std::string& host) const {
2095   // Can domain match in two ways; as a domain cookie (where the cookie
2096   // domain begins with ".") or as a host cookie (where it doesn't).
2097 
2098   // Some consumers of the CookieMonster expect to set cookies on
2099   // URLs like http://.strange.url.  To retrieve cookies in this instance,
2100   // we allow matching as a host cookie even when the domain_ starts with
2101   // a period.
2102   if (host == domain_)
2103     return true;
2104 
2105   // Domain cookie must have an initial ".".  To match, it must be
2106   // equal to url's host with initial period removed, or a suffix of
2107   // it.
2108 
2109   // Arguably this should only apply to "http" or "https" cookies, but
2110   // extension cookie tests currently use the funtionality, and if we
2111   // ever decide to implement that it should be done by preventing
2112   // such cookies from being set.
2113   if (domain_.empty() || domain_[0] != '.')
2114     return false;
2115 
2116   // The host with a "." prefixed.
2117   if (domain_.compare(1, std::string::npos, host) == 0)
2118     return true;
2119 
2120   // A pure suffix of the host (ok since we know the domain already
2121   // starts with a ".")
2122   return (host.length() > domain_.length() &&
2123           host.compare(host.length() - domain_.length(),
2124                        domain_.length(), domain_) == 0);
2125 }
2126 
DebugString() const2127 std::string CookieMonster::CanonicalCookie::DebugString() const {
2128   return base::StringPrintf(
2129       "name: %s value: %s domain: %s path: %s creation: %"
2130       PRId64,
2131       name_.c_str(), value_.c_str(),
2132       domain_.c_str(), path_.c_str(),
2133       static_cast<int64>(creation_date_.ToTimeT()));
2134 }
2135 
2136 }  // namespace
2137