• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // The rules for header parsing were borrowed from Firefox:
6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp
7 // The rules for parsing content-types were also borrowed from Firefox:
8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
9 
10 #include "net/http/http_response_headers.h"
11 
12 #include <algorithm>
13 
14 #include "base/logging.h"
15 #include "base/pickle.h"
16 #include "base/string_util.h"
17 #include "base/time.h"
18 #include "net/base/escape.h"
19 #include "net/http/http_util.h"
20 
21 using base::Time;
22 using base::TimeDelta;
23 
24 namespace net {
25 
26 //-----------------------------------------------------------------------------
27 
28 namespace {
29 
30 // These headers are RFC 2616 hop-by-hop headers;
31 // not to be stored by caches.
32 const char* const kHopByHopResponseHeaders[] = {
33   "connection",
34   "proxy-connection",
35   "keep-alive",
36   "trailer",
37   "transfer-encoding",
38   "upgrade"
39 };
40 
41 // These headers are challenge response headers;
42 // not to be stored by caches.
43 const char* const kChallengeResponseHeaders[] = {
44   "www-authenticate",
45   "proxy-authenticate"
46 };
47 
48 // These headers are cookie setting headers;
49 // not to be stored by caches or disclosed otherwise.
50 const char* const kCookieResponseHeaders[] = {
51   "set-cookie",
52   "set-cookie2"
53 };
54 
55 // These response headers are not copied from a 304/206 response to the cached
56 // response headers.  This list is based on Mozilla's nsHttpResponseHead.cpp.
57 const char* const kNonUpdatedHeaders[] = {
58   "connection",
59   "proxy-connection",
60   "keep-alive",
61   "www-authenticate",
62   "proxy-authenticate",
63   "trailer",
64   "transfer-encoding",
65   "upgrade",
66   // these should never change:
67   "content-location",
68   "content-md5",
69   "etag",
70   // assume cache-control: no-transform
71   "content-encoding",
72   "content-range",
73   "content-type",
74   // some broken microsoft servers send 'content-length: 0' with 304s
75   "content-length"
76 };
77 
ShouldUpdateHeader(const std::string::const_iterator & name_begin,const std::string::const_iterator & name_end)78 bool ShouldUpdateHeader(const std::string::const_iterator& name_begin,
79                         const std::string::const_iterator& name_end) {
80   for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) {
81     if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i]))
82       return false;
83   }
84   return true;
85 }
86 
87 }  // namespace
88 
89 //-----------------------------------------------------------------------------
90 
HttpResponseHeaders(const std::string & raw_input)91 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input)
92     : response_code_(-1) {
93   Parse(raw_input);
94 }
95 
HttpResponseHeaders(const Pickle & pickle,void ** iter)96 HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter)
97     : response_code_(-1) {
98   std::string raw_input;
99   if (pickle.ReadString(iter, &raw_input))
100     Parse(raw_input);
101 }
102 
Persist(Pickle * pickle,PersistOptions options)103 void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) {
104   if (options == PERSIST_RAW) {
105     pickle->WriteString(raw_headers_);
106     return;  // Done.
107   }
108 
109   HeaderSet filter_headers;
110 
111   // Construct set of headers to filter out based on options.
112   if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE)
113     AddNonCacheableHeaders(&filter_headers);
114 
115   if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES)
116     AddCookieHeaders(&filter_headers);
117 
118   if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES)
119     AddChallengeHeaders(&filter_headers);
120 
121   if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP)
122     AddHopByHopHeaders(&filter_headers);
123 
124   if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES)
125     AddHopContentRangeHeaders(&filter_headers);
126 
127   std::string blob;
128   blob.reserve(raw_headers_.size());
129 
130   // This copies the status line w/ terminator null.
131   // Note raw_headers_ has embedded nulls instead of \n,
132   // so this just copies the first header line.
133   blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1);
134 
135   for (size_t i = 0; i < parsed_.size(); ++i) {
136     DCHECK(!parsed_[i].is_continuation());
137 
138     // Locate the start of the next header.
139     size_t k = i;
140     while (++k < parsed_.size() && parsed_[k].is_continuation());
141     --k;
142 
143     std::string header_name(parsed_[i].name_begin, parsed_[i].name_end);
144     StringToLowerASCII(&header_name);
145 
146     if (filter_headers.find(header_name) == filter_headers.end()) {
147       // Make sure there is a null after the value.
148       blob.append(parsed_[i].name_begin, parsed_[k].value_end);
149       blob.push_back('\0');
150     }
151 
152     i = k;
153   }
154   blob.push_back('\0');
155 
156   pickle->WriteString(blob);
157 }
158 
Update(const HttpResponseHeaders & new_headers)159 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) {
160   DCHECK(new_headers.response_code() == 304 ||
161          new_headers.response_code() == 206);
162 
163   // Copy up to the null byte.  This just copies the status line.
164   std::string new_raw_headers(raw_headers_.c_str());
165   new_raw_headers.push_back('\0');
166 
167   HeaderSet updated_headers;
168 
169   // NOTE: we write the new headers then the old headers for convenience.  The
170   // order should not matter.
171 
172   // Figure out which headers we want to take from new_headers:
173   for (size_t i = 0; i < new_headers.parsed_.size(); ++i) {
174     const HeaderList& new_parsed = new_headers.parsed_;
175 
176     DCHECK(!new_parsed[i].is_continuation());
177 
178     // Locate the start of the next header.
179     size_t k = i;
180     while (++k < new_parsed.size() && new_parsed[k].is_continuation());
181     --k;
182 
183     const std::string::const_iterator& name_begin = new_parsed[i].name_begin;
184     const std::string::const_iterator& name_end = new_parsed[i].name_end;
185     if (ShouldUpdateHeader(name_begin, name_end)) {
186       std::string name(name_begin, name_end);
187       StringToLowerASCII(&name);
188       updated_headers.insert(name);
189 
190       // Preserve this header line in the merged result, making sure there is
191       // a null after the value.
192       new_raw_headers.append(name_begin, new_parsed[k].value_end);
193       new_raw_headers.push_back('\0');
194     }
195 
196     i = k;
197   }
198 
199   // Now, build the new raw headers.
200   MergeWithHeaders(new_raw_headers, updated_headers);
201 }
202 
MergeWithHeaders(const std::string & raw_headers,const HeaderSet & headers_to_remove)203 void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers,
204                                            const HeaderSet& headers_to_remove) {
205   std::string new_raw_headers(raw_headers);
206   for (size_t i = 0; i < parsed_.size(); ++i) {
207     DCHECK(!parsed_[i].is_continuation());
208 
209     // Locate the start of the next header.
210     size_t k = i;
211     while (++k < parsed_.size() && parsed_[k].is_continuation());
212     --k;
213 
214     std::string name(parsed_[i].name_begin, parsed_[i].name_end);
215     StringToLowerASCII(&name);
216     if (headers_to_remove.find(name) == headers_to_remove.end()) {
217       // It's ok to preserve this header in the final result.
218       new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end);
219       new_raw_headers.push_back('\0');
220     }
221 
222     i = k;
223   }
224   new_raw_headers.push_back('\0');
225 
226   // Make this object hold the new data.
227   raw_headers_.clear();
228   parsed_.clear();
229   Parse(new_raw_headers);
230 }
231 
RemoveHeader(const std::string & name)232 void HttpResponseHeaders::RemoveHeader(const std::string& name) {
233   // Copy up to the null byte.  This just copies the status line.
234   std::string new_raw_headers(raw_headers_.c_str());
235   new_raw_headers.push_back('\0');
236 
237   std::string lowercase_name(name);
238   StringToLowerASCII(&lowercase_name);
239   HeaderSet to_remove;
240   to_remove.insert(lowercase_name);
241   MergeWithHeaders(new_raw_headers, to_remove);
242 }
243 
AddHeader(const std::string & header)244 void HttpResponseHeaders::AddHeader(const std::string& header) {
245   DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
246   DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
247   // Don't copy the last null.
248   std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1);
249   new_raw_headers.append(header);
250   new_raw_headers.push_back('\0');
251   new_raw_headers.push_back('\0');
252 
253   // Make this object hold the new data.
254   raw_headers_.clear();
255   parsed_.clear();
256   Parse(new_raw_headers);
257 }
258 
ReplaceStatusLine(const std::string & new_status)259 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) {
260   // Copy up to the null byte.  This just copies the status line.
261   std::string new_raw_headers(new_status);
262   new_raw_headers.push_back('\0');
263 
264   HeaderSet empty_to_remove;
265   MergeWithHeaders(new_raw_headers, empty_to_remove);
266 }
267 
Parse(const std::string & raw_input)268 void HttpResponseHeaders::Parse(const std::string& raw_input) {
269   raw_headers_.reserve(raw_input.size());
270 
271   // ParseStatusLine adds a normalized status line to raw_headers_
272   std::string::const_iterator line_begin = raw_input.begin();
273   std::string::const_iterator line_end =
274       find(line_begin, raw_input.end(), '\0');
275   // has_headers = true, if there is any data following the status line.
276   // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0.
277   bool has_headers = line_end != raw_input.end() &&
278       (line_end + 1) != raw_input.end() && *(line_end + 1) != '\0';
279   ParseStatusLine(line_begin, line_end, has_headers);
280 
281   if (line_end == raw_input.end()) {
282     raw_headers_.push_back('\0');
283     return;
284   }
285 
286   // Including a terminating null byte.
287   size_t status_line_len = raw_headers_.size();
288 
289   // Now, we add the rest of the raw headers to raw_headers_, and begin parsing
290   // it (to populate our parsed_ vector).
291   raw_headers_.append(line_end + 1, raw_input.end());
292 
293   // Adjust to point at the null byte following the status line
294   line_end = raw_headers_.begin() + status_line_len - 1;
295 
296   HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(),
297                                     std::string(1, '\0'));
298   while (headers.GetNext()) {
299     AddHeader(headers.name_begin(),
300               headers.name_end(),
301               headers.values_begin(),
302               headers.values_end());
303   }
304 }
305 
306 // Append all of our headers to the final output string.
GetNormalizedHeaders(std::string * output) const307 void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const {
308   // copy up to the null byte.  this just copies the status line.
309   output->assign(raw_headers_.c_str());
310 
311   // headers may appear multiple times (not necessarily in succession) in the
312   // header data, so we build a map from header name to generated header lines.
313   // to preserve the order of the original headers, the actual values are kept
314   // in a separate list.  finally, the list of headers is flattened to form
315   // the normalized block of headers.
316   //
317   // NOTE: We take special care to preserve the whitespace around any commas
318   // that may occur in the original response headers.  Because our consumer may
319   // be a web app, we cannot be certain of the semantics of commas despite the
320   // fact that RFC 2616 says that they should be regarded as value separators.
321   //
322   typedef base::hash_map<std::string, size_t> HeadersMap;
323   HeadersMap headers_map;
324   HeadersMap::iterator iter = headers_map.end();
325 
326   std::vector<std::string> headers;
327 
328   for (size_t i = 0; i < parsed_.size(); ++i) {
329     DCHECK(!parsed_[i].is_continuation());
330 
331     std::string name(parsed_[i].name_begin, parsed_[i].name_end);
332     std::string lower_name = StringToLowerASCII(name);
333 
334     iter = headers_map.find(lower_name);
335     if (iter == headers_map.end()) {
336       iter = headers_map.insert(
337           HeadersMap::value_type(lower_name, headers.size())).first;
338       headers.push_back(name + ": ");
339     } else {
340       headers[iter->second].append(", ");
341     }
342 
343     std::string::const_iterator value_begin = parsed_[i].value_begin;
344     std::string::const_iterator value_end = parsed_[i].value_end;
345     while (++i < parsed_.size() && parsed_[i].is_continuation())
346       value_end = parsed_[i].value_end;
347     --i;
348 
349     headers[iter->second].append(value_begin, value_end);
350   }
351 
352   for (size_t i = 0; i < headers.size(); ++i) {
353     output->push_back('\n');
354     output->append(headers[i]);
355   }
356 
357   output->push_back('\n');
358 }
359 
GetNormalizedHeader(const std::string & name,std::string * value) const360 bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name,
361                                               std::string* value) const {
362   // If you hit this assertion, please use EnumerateHeader instead!
363   DCHECK(!HttpUtil::IsNonCoalescingHeader(name));
364 
365   value->clear();
366 
367   bool found = false;
368   size_t i = 0;
369   while (i < parsed_.size()) {
370     i = FindHeader(i, name);
371     if (i == std::string::npos)
372       break;
373 
374     found = true;
375 
376     if (!value->empty())
377       value->append(", ");
378 
379     std::string::const_iterator value_begin = parsed_[i].value_begin;
380     std::string::const_iterator value_end = parsed_[i].value_end;
381     while (++i < parsed_.size() && parsed_[i].is_continuation())
382       value_end = parsed_[i].value_end;
383     value->append(value_begin, value_end);
384   }
385 
386   return found;
387 }
388 
GetStatusLine() const389 std::string HttpResponseHeaders::GetStatusLine() const {
390   // copy up to the null byte.
391   return std::string(raw_headers_.c_str());
392 }
393 
GetStatusText() const394 std::string HttpResponseHeaders::GetStatusText() const {
395   // GetStatusLine() is already normalized, so it has the format:
396   // <http_version> SP <response_code> SP <status_text>
397   std::string status_text = GetStatusLine();
398   std::string::const_iterator begin = status_text.begin();
399   std::string::const_iterator end = status_text.end();
400   for (int i = 0; i < 2; ++i)
401     begin = find(begin, end, ' ') + 1;
402   return std::string(begin, end);
403 }
404 
EnumerateHeaderLines(void ** iter,std::string * name,std::string * value) const405 bool HttpResponseHeaders::EnumerateHeaderLines(void** iter,
406                                                std::string* name,
407                                                std::string* value) const {
408   size_t i = reinterpret_cast<size_t>(*iter);
409   if (i == parsed_.size())
410     return false;
411 
412   DCHECK(!parsed_[i].is_continuation());
413 
414   name->assign(parsed_[i].name_begin, parsed_[i].name_end);
415 
416   std::string::const_iterator value_begin = parsed_[i].value_begin;
417   std::string::const_iterator value_end = parsed_[i].value_end;
418   while (++i < parsed_.size() && parsed_[i].is_continuation())
419     value_end = parsed_[i].value_end;
420 
421   value->assign(value_begin, value_end);
422 
423   *iter = reinterpret_cast<void*>(i);
424   return true;
425 }
426 
EnumerateHeader(void ** iter,const std::string & name,std::string * value) const427 bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name,
428                                           std::string* value) const {
429   size_t i;
430   if (!iter || !*iter) {
431     i = FindHeader(0, name);
432   } else {
433     i = reinterpret_cast<size_t>(*iter);
434     if (i >= parsed_.size()) {
435       i = std::string::npos;
436     } else if (!parsed_[i].is_continuation()) {
437       i = FindHeader(i, name);
438     }
439   }
440 
441   if (i == std::string::npos) {
442     value->clear();
443     return false;
444   }
445 
446   if (iter)
447     *iter = reinterpret_cast<void*>(i + 1);
448   value->assign(parsed_[i].value_begin, parsed_[i].value_end);
449   return true;
450 }
451 
HasHeaderValue(const std::string & name,const std::string & value) const452 bool HttpResponseHeaders::HasHeaderValue(const std::string& name,
453                                          const std::string& value) const {
454   // The value has to be an exact match.  This is important since
455   // 'cache-control: no-cache' != 'cache-control: no-cache="foo"'
456   void* iter = NULL;
457   std::string temp;
458   while (EnumerateHeader(&iter, name, &temp)) {
459     if (value.size() == temp.size() &&
460         std::equal(temp.begin(), temp.end(), value.begin(),
461                    CaseInsensitiveCompare<char>()))
462       return true;
463   }
464   return false;
465 }
466 
467 // Note: this implementation implicitly assumes that line_end points at a valid
468 // sentinel character (such as '\0').
469 // static
ParseVersion(std::string::const_iterator line_begin,std::string::const_iterator line_end)470 HttpVersion HttpResponseHeaders::ParseVersion(
471     std::string::const_iterator line_begin,
472     std::string::const_iterator line_end) {
473   std::string::const_iterator p = line_begin;
474 
475   // RFC2616 sec 3.1: HTTP-Version   = "HTTP" "/" 1*DIGIT "." 1*DIGIT
476   // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1).
477   // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1.
478 
479   if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) {
480     DLOG(INFO) << "missing status line";
481     return HttpVersion();
482   }
483 
484   p += 4;
485 
486   if (p >= line_end || *p != '/') {
487     DLOG(INFO) << "missing version";
488     return HttpVersion();
489   }
490 
491   std::string::const_iterator dot = find(p, line_end, '.');
492   if (dot == line_end) {
493     DLOG(INFO) << "malformed version";
494     return HttpVersion();
495   }
496 
497   ++p;  // from / to first digit.
498   ++dot;  // from . to second digit.
499 
500   if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) {
501     DLOG(INFO) << "malformed version number";
502     return HttpVersion();
503   }
504 
505   uint16 major = *p - '0';
506   uint16 minor = *dot - '0';
507 
508   return HttpVersion(major, minor);
509 }
510 
511 // Note: this implementation implicitly assumes that line_end points at a valid
512 // sentinel character (such as '\0').
ParseStatusLine(std::string::const_iterator line_begin,std::string::const_iterator line_end,bool has_headers)513 void HttpResponseHeaders::ParseStatusLine(
514     std::string::const_iterator line_begin,
515     std::string::const_iterator line_end,
516     bool has_headers) {
517   // Extract the version number
518   parsed_http_version_ = ParseVersion(line_begin, line_end);
519 
520   // Clamp the version number to one of: {0.9, 1.0, 1.1}
521   if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) {
522     http_version_ = HttpVersion(0, 9);
523     raw_headers_ = "HTTP/0.9";
524   } else if (parsed_http_version_ >= HttpVersion(1, 1)) {
525     http_version_ = HttpVersion(1, 1);
526     raw_headers_ = "HTTP/1.1";
527   } else {
528     // Treat everything else like HTTP 1.0
529     http_version_ = HttpVersion(1, 0);
530     raw_headers_ = "HTTP/1.0";
531   }
532   if (parsed_http_version_ != http_version_) {
533     DLOG(INFO) << "assuming HTTP/" << http_version_.major_value() << "."
534                << http_version_.minor_value();
535   }
536 
537   // TODO(eroman): this doesn't make sense if ParseVersion failed.
538   std::string::const_iterator p = find(line_begin, line_end, ' ');
539 
540   if (p == line_end) {
541     DLOG(INFO) << "missing response status; assuming 200 OK";
542     raw_headers_.append(" 200 OK");
543     raw_headers_.push_back('\0');
544     response_code_ = 200;
545     return;
546   }
547 
548   // Skip whitespace.
549   while (*p == ' ')
550     ++p;
551 
552   std::string::const_iterator code = p;
553   while (*p >= '0' && *p <= '9')
554     ++p;
555 
556   if (p == code) {
557     DLOG(INFO) << "missing response status number; assuming 200";
558     raw_headers_.append(" 200 OK");
559     response_code_ = 200;
560     return;
561   }
562   raw_headers_.push_back(' ');
563   raw_headers_.append(code, p);
564   raw_headers_.push_back(' ');
565   response_code_ = static_cast<int>(StringToInt64(std::string(code, p)));
566 
567   // Skip whitespace.
568   while (*p == ' ')
569     ++p;
570 
571   // Trim trailing whitespace.
572   while (line_end > p && line_end[-1] == ' ')
573     --line_end;
574 
575   if (p == line_end) {
576     DLOG(INFO) << "missing response status text; assuming OK";
577     // Not super critical what we put here. Just use "OK"
578     // even if it isn't descriptive of response_code_.
579     raw_headers_.append("OK");
580   } else {
581     raw_headers_.append(p, line_end);
582   }
583 
584   raw_headers_.push_back('\0');
585 }
586 
FindHeader(size_t from,const std::string & search) const587 size_t HttpResponseHeaders::FindHeader(size_t from,
588                                        const std::string& search) const {
589   for (size_t i = from; i < parsed_.size(); ++i) {
590     if (parsed_[i].is_continuation())
591       continue;
592     const std::string::const_iterator& name_begin = parsed_[i].name_begin;
593     const std::string::const_iterator& name_end = parsed_[i].name_end;
594     if (static_cast<size_t>(name_end - name_begin) == search.size() &&
595         std::equal(name_begin, name_end, search.begin(),
596                    CaseInsensitiveCompare<char>()))
597       return i;
598   }
599 
600   return std::string::npos;
601 }
602 
AddHeader(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator values_begin,std::string::const_iterator values_end)603 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin,
604                                     std::string::const_iterator name_end,
605                                     std::string::const_iterator values_begin,
606                                     std::string::const_iterator values_end) {
607   // If the header can be coalesced, then we should split it up.
608   if (values_begin == values_end ||
609       HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) {
610     AddToParsed(name_begin, name_end, values_begin, values_end);
611   } else {
612     HttpUtil::ValuesIterator it(values_begin, values_end, ',');
613     while (it.GetNext()) {
614       AddToParsed(name_begin, name_end, it.value_begin(), it.value_end());
615       // clobber these so that subsequent values are treated as continuations
616       name_begin = name_end = raw_headers_.end();
617     }
618   }
619 }
620 
AddToParsed(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator value_begin,std::string::const_iterator value_end)621 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin,
622                                       std::string::const_iterator name_end,
623                                       std::string::const_iterator value_begin,
624                                       std::string::const_iterator value_end) {
625   ParsedHeader header;
626   header.name_begin = name_begin;
627   header.name_end = name_end;
628   header.value_begin = value_begin;
629   header.value_end = value_end;
630   parsed_.push_back(header);
631 }
632 
AddNonCacheableHeaders(HeaderSet * result) const633 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const {
634   // Add server specified transients.  Any 'cache-control: no-cache="foo,bar"'
635   // headers present in the response specify additional headers that we should
636   // not store in the cache.
637   const std::string kCacheControl = "cache-control";
638   const std::string kPrefix = "no-cache=\"";
639   std::string value;
640   void* iter = NULL;
641   while (EnumerateHeader(&iter, kCacheControl, &value)) {
642     if (value.size() > kPrefix.size() &&
643         value.compare(0, kPrefix.size(), kPrefix) == 0) {
644       // if it doesn't end with a quote, then treat as malformed
645       if (value[value.size()-1] != '\"')
646         continue;
647 
648       // trim off leading and trailing bits
649       size_t len = value.size() - kPrefix.size() - 1;
650       TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value);
651 
652       size_t begin_pos = 0;
653       for (;;) {
654         // find the end of this header name
655         size_t comma_pos = value.find(',', begin_pos);
656         if (comma_pos == std::string::npos)
657           comma_pos = value.size();
658         size_t end = comma_pos;
659         while (end > begin_pos && strchr(HTTP_LWS, value[end - 1]))
660           end--;
661 
662         // assuming the header is not emtpy, lowercase and insert into set
663         if (end > begin_pos) {
664           std::string name = value.substr(begin_pos, end - begin_pos);
665           StringToLowerASCII(&name);
666           result->insert(name);
667         }
668 
669         // repeat
670         begin_pos = comma_pos + 1;
671         while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos]))
672           begin_pos++;
673         if (begin_pos >= value.size())
674           break;
675       }
676     }
677   }
678 }
679 
AddHopByHopHeaders(HeaderSet * result)680 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) {
681   for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i)
682     result->insert(std::string(kHopByHopResponseHeaders[i]));
683 }
684 
AddCookieHeaders(HeaderSet * result)685 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) {
686   for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i)
687     result->insert(std::string(kCookieResponseHeaders[i]));
688 }
689 
AddChallengeHeaders(HeaderSet * result)690 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) {
691   for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i)
692     result->insert(std::string(kChallengeResponseHeaders[i]));
693 }
694 
AddHopContentRangeHeaders(HeaderSet * result)695 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) {
696   result->insert("content-range");
697 }
698 
GetMimeTypeAndCharset(std::string * mime_type,std::string * charset) const699 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
700                                                 std::string* charset) const {
701   mime_type->clear();
702   charset->clear();
703 
704   std::string name = "content-type";
705   std::string value;
706 
707   bool had_charset = false;
708 
709   void* iter = NULL;
710   while (EnumerateHeader(&iter, name, &value))
711     HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
712 }
713 
GetMimeType(std::string * mime_type) const714 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
715   std::string unused;
716   GetMimeTypeAndCharset(mime_type, &unused);
717   return !mime_type->empty();
718 }
719 
GetCharset(std::string * charset) const720 bool HttpResponseHeaders::GetCharset(std::string* charset) const {
721   std::string unused;
722   GetMimeTypeAndCharset(&unused, charset);
723   return !charset->empty();
724 }
725 
IsRedirect(std::string * location) const726 bool HttpResponseHeaders::IsRedirect(std::string* location) const {
727   if (!IsRedirectResponseCode(response_code_))
728     return false;
729 
730   // If we lack a Location header, then we can't treat this as a redirect.
731   // We assume that the first non-empty location value is the target URL that
732   // we want to follow.  TODO(darin): Is this consistent with other browsers?
733   size_t i = std::string::npos;
734   do {
735     i = FindHeader(++i, "location");
736     if (i == std::string::npos)
737       return false;
738     // If the location value is empty, then it doesn't count.
739   } while (parsed_[i].value_begin == parsed_[i].value_end);
740 
741   if (location) {
742     // Escape any non-ASCII characters to preserve them.  The server should
743     // only be returning ASCII here, but for compat we need to do this.
744     *location = EscapeNonASCII(
745         std::string(parsed_[i].value_begin, parsed_[i].value_end));
746   }
747 
748   return true;
749 }
750 
751 // static
IsRedirectResponseCode(int response_code)752 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) {
753   // Users probably want to see 300 (multiple choice) pages, so we don't count
754   // them as redirects that need to be followed.
755   return (response_code == 301 ||
756           response_code == 302 ||
757           response_code == 303 ||
758           response_code == 307);
759 }
760 
761 // From RFC 2616 section 13.2.4:
762 //
763 // The calculation to determine if a response has expired is quite simple:
764 //
765 //   response_is_fresh = (freshness_lifetime > current_age)
766 //
767 // Of course, there are other factors that can force a response to always be
768 // validated or re-fetched.
769 //
RequiresValidation(const Time & request_time,const Time & response_time,const Time & current_time) const770 bool HttpResponseHeaders::RequiresValidation(const Time& request_time,
771                                              const Time& response_time,
772                                              const Time& current_time) const {
773   TimeDelta lifetime =
774       GetFreshnessLifetime(response_time);
775   if (lifetime == TimeDelta())
776     return true;
777 
778   return lifetime <= GetCurrentAge(request_time, response_time, current_time);
779 }
780 
781 // From RFC 2616 section 13.2.4:
782 //
783 // The max-age directive takes priority over Expires, so if max-age is present
784 // in a response, the calculation is simply:
785 //
786 //   freshness_lifetime = max_age_value
787 //
788 // Otherwise, if Expires is present in the response, the calculation is:
789 //
790 //   freshness_lifetime = expires_value - date_value
791 //
792 // Note that neither of these calculations is vulnerable to clock skew, since
793 // all of the information comes from the origin server.
794 //
795 // Also, if the response does have a Last-Modified time, the heuristic
796 // expiration value SHOULD be no more than some fraction of the interval since
797 // that time. A typical setting of this fraction might be 10%:
798 //
799 //   freshness_lifetime = (date_value - last_modified_value) * 0.10
800 //
GetFreshnessLifetime(const Time & response_time) const801 TimeDelta HttpResponseHeaders::GetFreshnessLifetime(
802     const Time& response_time) const {
803   // Check for headers that force a response to never be fresh.  For backwards
804   // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control:
805   // no-cache" even though RFC 2616 does not specify it.
806   if (HasHeaderValue("cache-control", "no-cache") ||
807       HasHeaderValue("cache-control", "no-store") ||
808       HasHeaderValue("pragma", "no-cache") ||
809       HasHeaderValue("vary", "*"))  // see RFC 2616 section 13.6
810     return TimeDelta();  // not fresh
811 
812   // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the
813   // Expires header after checking for max-age in GetFreshnessLifetime.  This
814   // is important since "Expires: <date in the past>" means not fresh, but
815   // it should not trump a max-age value.
816 
817   TimeDelta max_age_value;
818   if (GetMaxAgeValue(&max_age_value))
819     return max_age_value;
820 
821   // If there is no Date header, then assume that the server response was
822   // generated at the time when we received the response.
823   Time date_value;
824   if (!GetDateValue(&date_value))
825     date_value = response_time;
826 
827   Time expires_value;
828   if (GetExpiresValue(&expires_value)) {
829     // The expires value can be a date in the past!
830     if (expires_value > date_value)
831       return expires_value - date_value;
832 
833     return TimeDelta();  // not fresh
834   }
835 
836   // From RFC 2616 section 13.4:
837   //
838   //   A response received with a status code of 200, 203, 206, 300, 301 or 410
839   //   MAY be stored by a cache and used in reply to a subsequent request,
840   //   subject to the expiration mechanism, unless a cache-control directive
841   //   prohibits caching.
842   //   ...
843   //   A response received with any other status code (e.g. status codes 302
844   //   and 307) MUST NOT be returned in a reply to a subsequent request unless
845   //   there are cache-control directives or another header(s) that explicitly
846   //   allow it.
847   //
848   // From RFC 2616 section 14.9.4:
849   //
850   //   When the must-revalidate directive is present in a response received by
851   //   a cache, that cache MUST NOT use the entry after it becomes stale to
852   //   respond to a subsequent request without first revalidating it with the
853   //   origin server. (I.e., the cache MUST do an end-to-end revalidation every
854   //   time, if, based solely on the origin server's Expires or max-age value,
855   //   the cached response is stale.)
856   //
857   if ((response_code_ == 200 || response_code_ == 203 ||
858        response_code_ == 206) &&
859       !HasHeaderValue("cache-control", "must-revalidate")) {
860     // TODO(darin): Implement a smarter heuristic.
861     Time last_modified_value;
862     if (GetLastModifiedValue(&last_modified_value)) {
863       // The last-modified value can be a date in the past!
864       if (last_modified_value <= date_value)
865         return (date_value - last_modified_value) / 10;
866     }
867   }
868 
869   // These responses are implicitly fresh (unless otherwise overruled):
870   if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410)
871     return TimeDelta::FromMicroseconds(kint64max);
872 
873   return TimeDelta();  // not fresh
874 }
875 
876 // From RFC 2616 section 13.2.3:
877 //
878 // Summary of age calculation algorithm, when a cache receives a response:
879 //
880 //   /*
881 //    * age_value
882 //    *      is the value of Age: header received by the cache with
883 //    *              this response.
884 //    * date_value
885 //    *      is the value of the origin server's Date: header
886 //    * request_time
887 //    *      is the (local) time when the cache made the request
888 //    *              that resulted in this cached response
889 //    * response_time
890 //    *      is the (local) time when the cache received the
891 //    *              response
892 //    * now
893 //    *      is the current (local) time
894 //    */
895 //   apparent_age = max(0, response_time - date_value);
896 //   corrected_received_age = max(apparent_age, age_value);
897 //   response_delay = response_time - request_time;
898 //   corrected_initial_age = corrected_received_age + response_delay;
899 //   resident_time = now - response_time;
900 //   current_age   = corrected_initial_age + resident_time;
901 //
GetCurrentAge(const Time & request_time,const Time & response_time,const Time & current_time) const902 TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time,
903                                              const Time& response_time,
904                                              const Time& current_time) const {
905   // If there is no Date header, then assume that the server response was
906   // generated at the time when we received the response.
907   Time date_value;
908   if (!GetDateValue(&date_value))
909     date_value = response_time;
910 
911   // If there is no Age header, then assume age is zero.  GetAgeValue does not
912   // modify its out param if the value does not exist.
913   TimeDelta age_value;
914   GetAgeValue(&age_value);
915 
916   TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value);
917   TimeDelta corrected_received_age = std::max(apparent_age, age_value);
918   TimeDelta response_delay = response_time - request_time;
919   TimeDelta corrected_initial_age = corrected_received_age + response_delay;
920   TimeDelta resident_time = current_time - response_time;
921   TimeDelta current_age = corrected_initial_age + resident_time;
922 
923   return current_age;
924 }
925 
GetMaxAgeValue(TimeDelta * result) const926 bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const {
927   std::string name = "cache-control";
928   std::string value;
929 
930   const char kMaxAgePrefix[] = "max-age=";
931   const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1;
932 
933   void* iter = NULL;
934   while (EnumerateHeader(&iter, name, &value)) {
935     if (value.size() > kMaxAgePrefixLen) {
936       if (LowerCaseEqualsASCII(value.begin(),
937                                value.begin() + kMaxAgePrefixLen,
938                                kMaxAgePrefix)) {
939         *result = TimeDelta::FromSeconds(
940             StringToInt64(value.substr(kMaxAgePrefixLen)));
941         return true;
942       }
943     }
944   }
945 
946   return false;
947 }
948 
GetAgeValue(TimeDelta * result) const949 bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const {
950   std::string value;
951   if (!EnumerateHeader(NULL, "Age", &value))
952     return false;
953 
954   *result = TimeDelta::FromSeconds(StringToInt64(value));
955   return true;
956 }
957 
GetDateValue(Time * result) const958 bool HttpResponseHeaders::GetDateValue(Time* result) const {
959   return GetTimeValuedHeader("Date", result);
960 }
961 
GetLastModifiedValue(Time * result) const962 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const {
963   return GetTimeValuedHeader("Last-Modified", result);
964 }
965 
GetExpiresValue(Time * result) const966 bool HttpResponseHeaders::GetExpiresValue(Time* result) const {
967   return GetTimeValuedHeader("Expires", result);
968 }
969 
GetTimeValuedHeader(const std::string & name,Time * result) const970 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name,
971                                               Time* result) const {
972   std::string value;
973   if (!EnumerateHeader(NULL, name, &value))
974     return false;
975 
976   std::wstring value_wide(value.begin(), value.end());  // inflate ascii
977   return Time::FromString(value_wide.c_str(), result);
978 }
979 
IsKeepAlive() const980 bool HttpResponseHeaders::IsKeepAlive() const {
981   if (http_version_ < HttpVersion(1, 0))
982     return false;
983 
984   // NOTE: It is perhaps risky to assume that a Proxy-Connection header is
985   // meaningful when we don't know that this response was from a proxy, but
986   // Mozilla also does this, so we'll do the same.
987   std::string connection_val;
988   if (!EnumerateHeader(NULL, "connection", &connection_val))
989     EnumerateHeader(NULL, "proxy-connection", &connection_val);
990 
991   bool keep_alive;
992 
993   if (http_version_ == HttpVersion(1, 0)) {
994     // HTTP/1.0 responses default to NOT keep-alive
995     keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive");
996   } else {
997     // HTTP/1.1 responses default to keep-alive
998     keep_alive = !LowerCaseEqualsASCII(connection_val, "close");
999   }
1000 
1001   return keep_alive;
1002 }
1003 
HasStrongValidators() const1004 bool HttpResponseHeaders::HasStrongValidators() const {
1005   std::string etag_value;
1006   EnumerateHeader(NULL, "etag", &etag_value);
1007   if (!etag_value.empty()) {
1008     size_t slash = etag_value.find('/');
1009     if (slash == std::string::npos || slash == 0)
1010       return true;
1011 
1012     std::string::const_iterator i = etag_value.begin();
1013     std::string::const_iterator j = etag_value.begin() + slash;
1014     HttpUtil::TrimLWS(&i, &j);
1015     if (!LowerCaseEqualsASCII(i, j, "w"))
1016       return true;
1017   }
1018 
1019   Time last_modified;
1020   if (!GetLastModifiedValue(&last_modified))
1021     return false;
1022 
1023   Time date;
1024   if (!GetDateValue(&date))
1025     return false;
1026 
1027   return ((date - last_modified).InSeconds() >= 60);
1028 }
1029 
1030 // From RFC 2616:
1031 // Content-Length = "Content-Length" ":" 1*DIGIT
GetContentLength() const1032 int64 HttpResponseHeaders::GetContentLength() const {
1033   void* iter = NULL;
1034   std::string content_length_val;
1035   if (!EnumerateHeader(&iter, "content-length", &content_length_val))
1036     return -1;
1037 
1038   if (content_length_val.empty())
1039     return -1;
1040 
1041   if (content_length_val[0] == '+')
1042     return -1;
1043 
1044   int64 result;
1045   bool ok = StringToInt64(content_length_val, &result);
1046   if (!ok || result < 0)
1047     return -1;
1048 
1049   return result;
1050 }
1051 
1052 // From RFC 2616 14.16:
1053 // content-range-spec =
1054 //     bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
1055 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
1056 // instance-length = 1*DIGIT
1057 // bytes-unit = "bytes"
GetContentRange(int64 * first_byte_position,int64 * last_byte_position,int64 * instance_length) const1058 bool HttpResponseHeaders::GetContentRange(int64* first_byte_position,
1059                                           int64* last_byte_position,
1060                                           int64* instance_length) const {
1061   void* iter = NULL;
1062   std::string content_range_spec;
1063   *first_byte_position = *last_byte_position = *instance_length = -1;
1064   if (!EnumerateHeader(&iter, "content-range", &content_range_spec))
1065     return false;
1066 
1067   // If the header value is empty, we have an invalid header.
1068   if (content_range_spec.empty())
1069     return false;
1070 
1071   size_t space_position = content_range_spec.find(' ');
1072   if (space_position == std::string::npos)
1073     return false;
1074 
1075   // Invalid header if it doesn't contain "bytes-unit".
1076   std::string::const_iterator content_range_spec_begin =
1077       content_range_spec.begin();
1078   std::string::const_iterator content_range_spec_end =
1079       content_range_spec.begin() + space_position;
1080   HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end);
1081   if (!LowerCaseEqualsASCII(content_range_spec_begin,
1082                             content_range_spec_end,
1083                             "bytes")) {
1084     return false;
1085   }
1086 
1087   size_t slash_position = content_range_spec.find('/', space_position + 1);
1088   if (slash_position == std::string::npos)
1089     return false;
1090 
1091   // Obtain the part behind the space and before slash.
1092   std::string::const_iterator byte_range_resp_spec_begin =
1093       content_range_spec.begin() + space_position + 1;
1094   std::string::const_iterator byte_range_resp_spec_end =
1095       content_range_spec.begin() + slash_position;
1096   HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end);
1097 
1098   // Parse the byte-range-resp-spec part.
1099   std::string byte_range_resp_spec(byte_range_resp_spec_begin,
1100                                    byte_range_resp_spec_end);
1101   // If byte-range-resp-spec != "*".
1102   if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) {
1103     size_t minus_position = byte_range_resp_spec.find('-');
1104     if (minus_position != std::string::npos) {
1105       // Obtain first-byte-pos.
1106       std::string::const_iterator first_byte_pos_begin =
1107           byte_range_resp_spec.begin();
1108       std::string::const_iterator first_byte_pos_end =
1109           byte_range_resp_spec.begin() + minus_position;
1110       HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
1111 
1112       bool ok = StringToInt64(
1113           std::string(first_byte_pos_begin, first_byte_pos_end),
1114           first_byte_position);
1115 
1116       // Obtain last-byte-pos.
1117       std::string::const_iterator last_byte_pos_begin =
1118            byte_range_resp_spec.begin() + minus_position + 1;
1119       std::string::const_iterator last_byte_pos_end =
1120            byte_range_resp_spec.end();
1121       HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
1122 
1123       ok &= StringToInt64(
1124           std::string(last_byte_pos_begin, last_byte_pos_end),
1125           last_byte_position);
1126       if (!ok) {
1127         *first_byte_position = *last_byte_position = -1;
1128         return false;
1129       }
1130       if (*first_byte_position < 0 || *last_byte_position < 0 ||
1131           *first_byte_position > *last_byte_position)
1132         return false;
1133     } else {
1134       return false;
1135     }
1136   }
1137 
1138   // Parse the instance-length part.
1139   // If instance-length == "*".
1140   std::string::const_iterator instance_length_begin =
1141       content_range_spec.begin() + slash_position + 1;
1142   std::string::const_iterator instance_length_end =
1143       content_range_spec.end();
1144   HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end);
1145 
1146   if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) {
1147     return false;
1148   } else if (!StringToInt64(
1149                  std::string(instance_length_begin, instance_length_end),
1150                  instance_length)) {
1151     *instance_length = -1;
1152     return false;
1153   }
1154 
1155   // We have all the values; let's verify that they make sense for a 206
1156   // response.
1157   if (*first_byte_position < 0 || *last_byte_position < 0 ||
1158       *instance_length < 0 || *instance_length - 1 < *last_byte_position)
1159     return false;
1160 
1161   return true;
1162 }
1163 
1164 }  // namespace net
1165