1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The rules for header parsing were borrowed from Firefox:
6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp
7 // The rules for parsing content-types were also borrowed from Firefox:
8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
9
10 #include "net/http/http_response_headers.h"
11
12 #include <algorithm>
13
14 #include "base/logging.h"
15 #include "base/metrics/histogram.h"
16 #include "base/pickle.h"
17 #include "base/string_number_conversions.h"
18 #include "base/string_util.h"
19 #include "base/time.h"
20 #include "net/base/escape.h"
21 #include "net/http/http_util.h"
22
23 using base::Time;
24 using base::TimeDelta;
25
26 namespace net {
27
28 //-----------------------------------------------------------------------------
29
30 namespace {
31
32 // These headers are RFC 2616 hop-by-hop headers;
33 // not to be stored by caches.
34 const char* const kHopByHopResponseHeaders[] = {
35 "connection",
36 "proxy-connection",
37 "keep-alive",
38 "trailer",
39 "transfer-encoding",
40 "upgrade"
41 };
42
43 // These headers are challenge response headers;
44 // not to be stored by caches.
45 const char* const kChallengeResponseHeaders[] = {
46 "www-authenticate",
47 "proxy-authenticate"
48 };
49
50 // These headers are cookie setting headers;
51 // not to be stored by caches or disclosed otherwise.
52 const char* const kCookieResponseHeaders[] = {
53 "set-cookie",
54 "set-cookie2"
55 };
56
57 // These response headers are not copied from a 304/206 response to the cached
58 // response headers. This list is based on Mozilla's nsHttpResponseHead.cpp.
59 const char* const kNonUpdatedHeaders[] = {
60 "connection",
61 "proxy-connection",
62 "keep-alive",
63 "www-authenticate",
64 "proxy-authenticate",
65 "trailer",
66 "transfer-encoding",
67 "upgrade",
68 // these should never change:
69 "content-location",
70 "content-md5",
71 "etag",
72 // assume cache-control: no-transform
73 "content-encoding",
74 "content-range",
75 "content-type",
76 // some broken microsoft servers send 'content-length: 0' with 304s
77 "content-length"
78 };
79
ShouldUpdateHeader(const std::string::const_iterator & name_begin,const std::string::const_iterator & name_end)80 bool ShouldUpdateHeader(const std::string::const_iterator& name_begin,
81 const std::string::const_iterator& name_end) {
82 for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) {
83 if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i]))
84 return false;
85 }
86 return true;
87 }
88
89 // Functions for histogram initialization. The code 0 is put in the
90 // response map to track response codes that are invalid.
91 // TODO(gavinp): Greatly prune the collected codes once we learn which
92 // ones are not sent in practice, to reduce upload size & memory use.
93
94 enum {
95 HISTOGRAM_MIN_HTTP_RESPONSE_CODE = 100,
96 HISTOGRAM_MAX_HTTP_RESPONSE_CODE = 599,
97 };
98
GetAllHttpResponseCodes()99 std::vector<int> GetAllHttpResponseCodes() {
100 std::vector<int> codes;
101 codes.reserve(
102 HISTOGRAM_MAX_HTTP_RESPONSE_CODE - HISTOGRAM_MIN_HTTP_RESPONSE_CODE + 2);
103 codes.push_back(0);
104 for (int i = HISTOGRAM_MIN_HTTP_RESPONSE_CODE;
105 i <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE; ++i)
106 codes.push_back(i);
107 return codes;
108 }
109
MapHttpResponseCode(int code)110 int MapHttpResponseCode(int code) {
111 if (HISTOGRAM_MIN_HTTP_RESPONSE_CODE <= code &&
112 code <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE)
113 return code;
114 return 0;
115 }
116
117 } // namespace
118
119 struct HttpResponseHeaders::ParsedHeader {
120 // A header "continuation" contains only a subsequent value for the
121 // preceding header. (Header values are comma separated.)
is_continuationnet::HttpResponseHeaders::ParsedHeader122 bool is_continuation() const { return name_begin == name_end; }
123
124 std::string::const_iterator name_begin;
125 std::string::const_iterator name_end;
126 std::string::const_iterator value_begin;
127 std::string::const_iterator value_end;
128 };
129
130 //-----------------------------------------------------------------------------
131
HttpResponseHeaders(const std::string & raw_input)132 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input)
133 : response_code_(-1) {
134 Parse(raw_input);
135
136 // The most important thing to do with this histogram is find out
137 // the existence of unusual HTTP response codes. As it happens
138 // right now, there aren't double-constructions of response headers
139 // using this constructor, so our counts should also be accurate,
140 // without instantiating the histogram in two places. It is also
141 // important that this histogram not collect data in the other
142 // constructor, which rebuilds an histogram from a pickle, since
143 // that would actually create a double call between the original
144 // HttpResponseHeader that was serialized, and initialization of the
145 // new object from that pickle.
146 UMA_HISTOGRAM_CUSTOM_ENUMERATION("Net.HttpResponseCode",
147 MapHttpResponseCode(response_code_),
148 // Note the third argument is only
149 // evaluated once, see macro
150 // definition for details.
151 GetAllHttpResponseCodes());
152 }
153
HttpResponseHeaders(const Pickle & pickle,void ** iter)154 HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter)
155 : response_code_(-1) {
156 std::string raw_input;
157 if (pickle.ReadString(iter, &raw_input))
158 Parse(raw_input);
159 }
160
Persist(Pickle * pickle,PersistOptions options)161 void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) {
162 if (options == PERSIST_RAW) {
163 pickle->WriteString(raw_headers_);
164 return; // Done.
165 }
166
167 HeaderSet filter_headers;
168
169 // Construct set of headers to filter out based on options.
170 if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE)
171 AddNonCacheableHeaders(&filter_headers);
172
173 if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES)
174 AddCookieHeaders(&filter_headers);
175
176 if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES)
177 AddChallengeHeaders(&filter_headers);
178
179 if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP)
180 AddHopByHopHeaders(&filter_headers);
181
182 if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES)
183 AddHopContentRangeHeaders(&filter_headers);
184
185 std::string blob;
186 blob.reserve(raw_headers_.size());
187
188 // This copies the status line w/ terminator null.
189 // Note raw_headers_ has embedded nulls instead of \n,
190 // so this just copies the first header line.
191 blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1);
192
193 for (size_t i = 0; i < parsed_.size(); ++i) {
194 DCHECK(!parsed_[i].is_continuation());
195
196 // Locate the start of the next header.
197 size_t k = i;
198 while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
199 --k;
200
201 std::string header_name(parsed_[i].name_begin, parsed_[i].name_end);
202 StringToLowerASCII(&header_name);
203
204 if (filter_headers.find(header_name) == filter_headers.end()) {
205 // Make sure there is a null after the value.
206 blob.append(parsed_[i].name_begin, parsed_[k].value_end);
207 blob.push_back('\0');
208 }
209
210 i = k;
211 }
212 blob.push_back('\0');
213
214 pickle->WriteString(blob);
215 }
216
Update(const HttpResponseHeaders & new_headers)217 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) {
218 DCHECK(new_headers.response_code() == 304 ||
219 new_headers.response_code() == 206);
220
221 // Copy up to the null byte. This just copies the status line.
222 std::string new_raw_headers(raw_headers_.c_str());
223 new_raw_headers.push_back('\0');
224
225 HeaderSet updated_headers;
226
227 // NOTE: we write the new headers then the old headers for convenience. The
228 // order should not matter.
229
230 // Figure out which headers we want to take from new_headers:
231 for (size_t i = 0; i < new_headers.parsed_.size(); ++i) {
232 const HeaderList& new_parsed = new_headers.parsed_;
233
234 DCHECK(!new_parsed[i].is_continuation());
235
236 // Locate the start of the next header.
237 size_t k = i;
238 while (++k < new_parsed.size() && new_parsed[k].is_continuation()) {}
239 --k;
240
241 const std::string::const_iterator& name_begin = new_parsed[i].name_begin;
242 const std::string::const_iterator& name_end = new_parsed[i].name_end;
243 if (ShouldUpdateHeader(name_begin, name_end)) {
244 std::string name(name_begin, name_end);
245 StringToLowerASCII(&name);
246 updated_headers.insert(name);
247
248 // Preserve this header line in the merged result, making sure there is
249 // a null after the value.
250 new_raw_headers.append(name_begin, new_parsed[k].value_end);
251 new_raw_headers.push_back('\0');
252 }
253
254 i = k;
255 }
256
257 // Now, build the new raw headers.
258 MergeWithHeaders(new_raw_headers, updated_headers);
259 }
260
MergeWithHeaders(const std::string & raw_headers,const HeaderSet & headers_to_remove)261 void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers,
262 const HeaderSet& headers_to_remove) {
263 std::string new_raw_headers(raw_headers);
264 for (size_t i = 0; i < parsed_.size(); ++i) {
265 DCHECK(!parsed_[i].is_continuation());
266
267 // Locate the start of the next header.
268 size_t k = i;
269 while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
270 --k;
271
272 std::string name(parsed_[i].name_begin, parsed_[i].name_end);
273 StringToLowerASCII(&name);
274 if (headers_to_remove.find(name) == headers_to_remove.end()) {
275 // It's ok to preserve this header in the final result.
276 new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end);
277 new_raw_headers.push_back('\0');
278 }
279
280 i = k;
281 }
282 new_raw_headers.push_back('\0');
283
284 // Make this object hold the new data.
285 raw_headers_.clear();
286 parsed_.clear();
287 Parse(new_raw_headers);
288 }
289
RemoveHeader(const std::string & name)290 void HttpResponseHeaders::RemoveHeader(const std::string& name) {
291 // Copy up to the null byte. This just copies the status line.
292 std::string new_raw_headers(raw_headers_.c_str());
293 new_raw_headers.push_back('\0');
294
295 std::string lowercase_name(name);
296 StringToLowerASCII(&lowercase_name);
297 HeaderSet to_remove;
298 to_remove.insert(lowercase_name);
299 MergeWithHeaders(new_raw_headers, to_remove);
300 }
301
AddHeader(const std::string & header)302 void HttpResponseHeaders::AddHeader(const std::string& header) {
303 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
304 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
305 // Don't copy the last null.
306 std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1);
307 new_raw_headers.append(header);
308 new_raw_headers.push_back('\0');
309 new_raw_headers.push_back('\0');
310
311 // Make this object hold the new data.
312 raw_headers_.clear();
313 parsed_.clear();
314 Parse(new_raw_headers);
315 }
316
ReplaceStatusLine(const std::string & new_status)317 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) {
318 // Copy up to the null byte. This just copies the status line.
319 std::string new_raw_headers(new_status);
320 new_raw_headers.push_back('\0');
321
322 HeaderSet empty_to_remove;
323 MergeWithHeaders(new_raw_headers, empty_to_remove);
324 }
325
Parse(const std::string & raw_input)326 void HttpResponseHeaders::Parse(const std::string& raw_input) {
327 raw_headers_.reserve(raw_input.size());
328
329 // ParseStatusLine adds a normalized status line to raw_headers_
330 std::string::const_iterator line_begin = raw_input.begin();
331 std::string::const_iterator line_end =
332 find(line_begin, raw_input.end(), '\0');
333 // has_headers = true, if there is any data following the status line.
334 // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0.
335 bool has_headers = (line_end != raw_input.end() &&
336 (line_end + 1) != raw_input.end() &&
337 *(line_end + 1) != '\0');
338 ParseStatusLine(line_begin, line_end, has_headers);
339
340 if (line_end == raw_input.end()) {
341 raw_headers_.push_back('\0');
342 return;
343 }
344
345 // Including a terminating null byte.
346 size_t status_line_len = raw_headers_.size();
347
348 // Now, we add the rest of the raw headers to raw_headers_, and begin parsing
349 // it (to populate our parsed_ vector).
350 raw_headers_.append(line_end + 1, raw_input.end());
351
352 // Adjust to point at the null byte following the status line
353 line_end = raw_headers_.begin() + status_line_len - 1;
354
355 HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(),
356 std::string(1, '\0'));
357 while (headers.GetNext()) {
358 AddHeader(headers.name_begin(),
359 headers.name_end(),
360 headers.values_begin(),
361 headers.values_end());
362 }
363 }
364
365 // Append all of our headers to the final output string.
GetNormalizedHeaders(std::string * output) const366 void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const {
367 // copy up to the null byte. this just copies the status line.
368 output->assign(raw_headers_.c_str());
369
370 // headers may appear multiple times (not necessarily in succession) in the
371 // header data, so we build a map from header name to generated header lines.
372 // to preserve the order of the original headers, the actual values are kept
373 // in a separate list. finally, the list of headers is flattened to form
374 // the normalized block of headers.
375 //
376 // NOTE: We take special care to preserve the whitespace around any commas
377 // that may occur in the original response headers. Because our consumer may
378 // be a web app, we cannot be certain of the semantics of commas despite the
379 // fact that RFC 2616 says that they should be regarded as value separators.
380 //
381 typedef base::hash_map<std::string, size_t> HeadersMap;
382 HeadersMap headers_map;
383 HeadersMap::iterator iter = headers_map.end();
384
385 std::vector<std::string> headers;
386
387 for (size_t i = 0; i < parsed_.size(); ++i) {
388 DCHECK(!parsed_[i].is_continuation());
389
390 std::string name(parsed_[i].name_begin, parsed_[i].name_end);
391 std::string lower_name = StringToLowerASCII(name);
392
393 iter = headers_map.find(lower_name);
394 if (iter == headers_map.end()) {
395 iter = headers_map.insert(
396 HeadersMap::value_type(lower_name, headers.size())).first;
397 headers.push_back(name + ": ");
398 } else {
399 headers[iter->second].append(", ");
400 }
401
402 std::string::const_iterator value_begin = parsed_[i].value_begin;
403 std::string::const_iterator value_end = parsed_[i].value_end;
404 while (++i < parsed_.size() && parsed_[i].is_continuation())
405 value_end = parsed_[i].value_end;
406 --i;
407
408 headers[iter->second].append(value_begin, value_end);
409 }
410
411 for (size_t i = 0; i < headers.size(); ++i) {
412 output->push_back('\n');
413 output->append(headers[i]);
414 }
415
416 output->push_back('\n');
417 }
418
GetNormalizedHeader(const std::string & name,std::string * value) const419 bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name,
420 std::string* value) const {
421 // If you hit this assertion, please use EnumerateHeader instead!
422 DCHECK(!HttpUtil::IsNonCoalescingHeader(name));
423
424 value->clear();
425
426 bool found = false;
427 size_t i = 0;
428 while (i < parsed_.size()) {
429 i = FindHeader(i, name);
430 if (i == std::string::npos)
431 break;
432
433 found = true;
434
435 if (!value->empty())
436 value->append(", ");
437
438 std::string::const_iterator value_begin = parsed_[i].value_begin;
439 std::string::const_iterator value_end = parsed_[i].value_end;
440 while (++i < parsed_.size() && parsed_[i].is_continuation())
441 value_end = parsed_[i].value_end;
442 value->append(value_begin, value_end);
443 }
444
445 return found;
446 }
447
GetStatusLine() const448 std::string HttpResponseHeaders::GetStatusLine() const {
449 // copy up to the null byte.
450 return std::string(raw_headers_.c_str());
451 }
452
GetStatusText() const453 std::string HttpResponseHeaders::GetStatusText() const {
454 // GetStatusLine() is already normalized, so it has the format:
455 // <http_version> SP <response_code> SP <status_text>
456 std::string status_text = GetStatusLine();
457 std::string::const_iterator begin = status_text.begin();
458 std::string::const_iterator end = status_text.end();
459 for (int i = 0; i < 2; ++i)
460 begin = find(begin, end, ' ') + 1;
461 return std::string(begin, end);
462 }
463
EnumerateHeaderLines(void ** iter,std::string * name,std::string * value) const464 bool HttpResponseHeaders::EnumerateHeaderLines(void** iter,
465 std::string* name,
466 std::string* value) const {
467 size_t i = reinterpret_cast<size_t>(*iter);
468 if (i == parsed_.size())
469 return false;
470
471 DCHECK(!parsed_[i].is_continuation());
472
473 name->assign(parsed_[i].name_begin, parsed_[i].name_end);
474
475 std::string::const_iterator value_begin = parsed_[i].value_begin;
476 std::string::const_iterator value_end = parsed_[i].value_end;
477 while (++i < parsed_.size() && parsed_[i].is_continuation())
478 value_end = parsed_[i].value_end;
479
480 value->assign(value_begin, value_end);
481
482 *iter = reinterpret_cast<void*>(i);
483 return true;
484 }
485
EnumerateHeader(void ** iter,const std::string & name,std::string * value) const486 bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name,
487 std::string* value) const {
488 size_t i;
489 if (!iter || !*iter) {
490 i = FindHeader(0, name);
491 } else {
492 i = reinterpret_cast<size_t>(*iter);
493 if (i >= parsed_.size()) {
494 i = std::string::npos;
495 } else if (!parsed_[i].is_continuation()) {
496 i = FindHeader(i, name);
497 }
498 }
499
500 if (i == std::string::npos) {
501 value->clear();
502 return false;
503 }
504
505 if (iter)
506 *iter = reinterpret_cast<void*>(i + 1);
507 value->assign(parsed_[i].value_begin, parsed_[i].value_end);
508 return true;
509 }
510
HasHeaderValue(const std::string & name,const std::string & value) const511 bool HttpResponseHeaders::HasHeaderValue(const std::string& name,
512 const std::string& value) const {
513 // The value has to be an exact match. This is important since
514 // 'cache-control: no-cache' != 'cache-control: no-cache="foo"'
515 void* iter = NULL;
516 std::string temp;
517 while (EnumerateHeader(&iter, name, &temp)) {
518 if (value.size() == temp.size() &&
519 std::equal(temp.begin(), temp.end(), value.begin(),
520 base::CaseInsensitiveCompare<char>()))
521 return true;
522 }
523 return false;
524 }
525
HasHeader(const std::string & name) const526 bool HttpResponseHeaders::HasHeader(const std::string& name) const {
527 return FindHeader(0, name) != std::string::npos;
528 }
529
HttpResponseHeaders()530 HttpResponseHeaders::HttpResponseHeaders() : response_code_(-1) {
531 }
532
~HttpResponseHeaders()533 HttpResponseHeaders::~HttpResponseHeaders() {
534 }
535
536 // Note: this implementation implicitly assumes that line_end points at a valid
537 // sentinel character (such as '\0').
538 // static
ParseVersion(std::string::const_iterator line_begin,std::string::const_iterator line_end)539 HttpVersion HttpResponseHeaders::ParseVersion(
540 std::string::const_iterator line_begin,
541 std::string::const_iterator line_end) {
542 std::string::const_iterator p = line_begin;
543
544 // RFC2616 sec 3.1: HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT
545 // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1).
546 // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1.
547
548 if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) {
549 DVLOG(1) << "missing status line";
550 return HttpVersion();
551 }
552
553 p += 4;
554
555 if (p >= line_end || *p != '/') {
556 DVLOG(1) << "missing version";
557 return HttpVersion();
558 }
559
560 std::string::const_iterator dot = find(p, line_end, '.');
561 if (dot == line_end) {
562 DVLOG(1) << "malformed version";
563 return HttpVersion();
564 }
565
566 ++p; // from / to first digit.
567 ++dot; // from . to second digit.
568
569 if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) {
570 DVLOG(1) << "malformed version number";
571 return HttpVersion();
572 }
573
574 uint16 major = *p - '0';
575 uint16 minor = *dot - '0';
576
577 return HttpVersion(major, minor);
578 }
579
580 // Note: this implementation implicitly assumes that line_end points at a valid
581 // sentinel character (such as '\0').
ParseStatusLine(std::string::const_iterator line_begin,std::string::const_iterator line_end,bool has_headers)582 void HttpResponseHeaders::ParseStatusLine(
583 std::string::const_iterator line_begin,
584 std::string::const_iterator line_end,
585 bool has_headers) {
586 // Extract the version number
587 parsed_http_version_ = ParseVersion(line_begin, line_end);
588
589 // Clamp the version number to one of: {0.9, 1.0, 1.1}
590 if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) {
591 http_version_ = HttpVersion(0, 9);
592 raw_headers_ = "HTTP/0.9";
593 } else if (parsed_http_version_ >= HttpVersion(1, 1)) {
594 http_version_ = HttpVersion(1, 1);
595 raw_headers_ = "HTTP/1.1";
596 } else {
597 // Treat everything else like HTTP 1.0
598 http_version_ = HttpVersion(1, 0);
599 raw_headers_ = "HTTP/1.0";
600 }
601 if (parsed_http_version_ != http_version_) {
602 DVLOG(1) << "assuming HTTP/" << http_version_.major_value() << "."
603 << http_version_.minor_value();
604 }
605
606 // TODO(eroman): this doesn't make sense if ParseVersion failed.
607 std::string::const_iterator p = find(line_begin, line_end, ' ');
608
609 if (p == line_end) {
610 DVLOG(1) << "missing response status; assuming 200 OK";
611 raw_headers_.append(" 200 OK");
612 raw_headers_.push_back('\0');
613 response_code_ = 200;
614 return;
615 }
616
617 // Skip whitespace.
618 while (*p == ' ')
619 ++p;
620
621 std::string::const_iterator code = p;
622 while (*p >= '0' && *p <= '9')
623 ++p;
624
625 if (p == code) {
626 DVLOG(1) << "missing response status number; assuming 200";
627 raw_headers_.append(" 200 OK");
628 response_code_ = 200;
629 return;
630 }
631 raw_headers_.push_back(' ');
632 raw_headers_.append(code, p);
633 raw_headers_.push_back(' ');
634 base::StringToInt(code, p, &response_code_);
635
636 // Skip whitespace.
637 while (*p == ' ')
638 ++p;
639
640 // Trim trailing whitespace.
641 while (line_end > p && line_end[-1] == ' ')
642 --line_end;
643
644 if (p == line_end) {
645 DVLOG(1) << "missing response status text; assuming OK";
646 // Not super critical what we put here. Just use "OK"
647 // even if it isn't descriptive of response_code_.
648 raw_headers_.append("OK");
649 } else {
650 raw_headers_.append(p, line_end);
651 }
652
653 raw_headers_.push_back('\0');
654 }
655
FindHeader(size_t from,const std::string & search) const656 size_t HttpResponseHeaders::FindHeader(size_t from,
657 const std::string& search) const {
658 for (size_t i = from; i < parsed_.size(); ++i) {
659 if (parsed_[i].is_continuation())
660 continue;
661 const std::string::const_iterator& name_begin = parsed_[i].name_begin;
662 const std::string::const_iterator& name_end = parsed_[i].name_end;
663 if (static_cast<size_t>(name_end - name_begin) == search.size() &&
664 std::equal(name_begin, name_end, search.begin(),
665 base::CaseInsensitiveCompare<char>()))
666 return i;
667 }
668
669 return std::string::npos;
670 }
671
AddHeader(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator values_begin,std::string::const_iterator values_end)672 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin,
673 std::string::const_iterator name_end,
674 std::string::const_iterator values_begin,
675 std::string::const_iterator values_end) {
676 // If the header can be coalesced, then we should split it up.
677 if (values_begin == values_end ||
678 HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) {
679 AddToParsed(name_begin, name_end, values_begin, values_end);
680 } else {
681 HttpUtil::ValuesIterator it(values_begin, values_end, ',');
682 while (it.GetNext()) {
683 AddToParsed(name_begin, name_end, it.value_begin(), it.value_end());
684 // clobber these so that subsequent values are treated as continuations
685 name_begin = name_end = raw_headers_.end();
686 }
687 }
688 }
689
AddToParsed(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator value_begin,std::string::const_iterator value_end)690 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin,
691 std::string::const_iterator name_end,
692 std::string::const_iterator value_begin,
693 std::string::const_iterator value_end) {
694 ParsedHeader header;
695 header.name_begin = name_begin;
696 header.name_end = name_end;
697 header.value_begin = value_begin;
698 header.value_end = value_end;
699 parsed_.push_back(header);
700 }
701
AddNonCacheableHeaders(HeaderSet * result) const702 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const {
703 // Add server specified transients. Any 'cache-control: no-cache="foo,bar"'
704 // headers present in the response specify additional headers that we should
705 // not store in the cache.
706 const std::string kCacheControl = "cache-control";
707 const std::string kPrefix = "no-cache=\"";
708 std::string value;
709 void* iter = NULL;
710 while (EnumerateHeader(&iter, kCacheControl, &value)) {
711 if (value.size() > kPrefix.size() &&
712 value.compare(0, kPrefix.size(), kPrefix) == 0) {
713 // if it doesn't end with a quote, then treat as malformed
714 if (value[value.size()-1] != '\"')
715 continue;
716
717 // trim off leading and trailing bits
718 size_t len = value.size() - kPrefix.size() - 1;
719 TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value);
720
721 size_t begin_pos = 0;
722 for (;;) {
723 // find the end of this header name
724 size_t comma_pos = value.find(',', begin_pos);
725 if (comma_pos == std::string::npos)
726 comma_pos = value.size();
727 size_t end = comma_pos;
728 while (end > begin_pos && strchr(HTTP_LWS, value[end - 1]))
729 end--;
730
731 // assuming the header is not emtpy, lowercase and insert into set
732 if (end > begin_pos) {
733 std::string name = value.substr(begin_pos, end - begin_pos);
734 StringToLowerASCII(&name);
735 result->insert(name);
736 }
737
738 // repeat
739 begin_pos = comma_pos + 1;
740 while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos]))
741 begin_pos++;
742 if (begin_pos >= value.size())
743 break;
744 }
745 }
746 }
747 }
748
AddHopByHopHeaders(HeaderSet * result)749 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) {
750 for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i)
751 result->insert(std::string(kHopByHopResponseHeaders[i]));
752 }
753
AddCookieHeaders(HeaderSet * result)754 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) {
755 for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i)
756 result->insert(std::string(kCookieResponseHeaders[i]));
757 }
758
AddChallengeHeaders(HeaderSet * result)759 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) {
760 for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i)
761 result->insert(std::string(kChallengeResponseHeaders[i]));
762 }
763
AddHopContentRangeHeaders(HeaderSet * result)764 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) {
765 result->insert("content-range");
766 }
767
GetMimeTypeAndCharset(std::string * mime_type,std::string * charset) const768 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
769 std::string* charset) const {
770 mime_type->clear();
771 charset->clear();
772
773 std::string name = "content-type";
774 std::string value;
775
776 bool had_charset = false;
777
778 void* iter = NULL;
779 while (EnumerateHeader(&iter, name, &value))
780 HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
781 }
782
GetMimeType(std::string * mime_type) const783 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
784 std::string unused;
785 GetMimeTypeAndCharset(mime_type, &unused);
786 return !mime_type->empty();
787 }
788
GetCharset(std::string * charset) const789 bool HttpResponseHeaders::GetCharset(std::string* charset) const {
790 std::string unused;
791 GetMimeTypeAndCharset(&unused, charset);
792 return !charset->empty();
793 }
794
IsRedirect(std::string * location) const795 bool HttpResponseHeaders::IsRedirect(std::string* location) const {
796 if (!IsRedirectResponseCode(response_code_))
797 return false;
798
799 // If we lack a Location header, then we can't treat this as a redirect.
800 // We assume that the first non-empty location value is the target URL that
801 // we want to follow. TODO(darin): Is this consistent with other browsers?
802 size_t i = std::string::npos;
803 do {
804 i = FindHeader(++i, "location");
805 if (i == std::string::npos)
806 return false;
807 // If the location value is empty, then it doesn't count.
808 } while (parsed_[i].value_begin == parsed_[i].value_end);
809
810 if (location) {
811 // Escape any non-ASCII characters to preserve them. The server should
812 // only be returning ASCII here, but for compat we need to do this.
813 *location = EscapeNonASCII(
814 std::string(parsed_[i].value_begin, parsed_[i].value_end));
815 }
816
817 return true;
818 }
819
820 // static
IsRedirectResponseCode(int response_code)821 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) {
822 // Users probably want to see 300 (multiple choice) pages, so we don't count
823 // them as redirects that need to be followed.
824 return (response_code == 301 ||
825 response_code == 302 ||
826 response_code == 303 ||
827 response_code == 307);
828 }
829
830 // From RFC 2616 section 13.2.4:
831 //
832 // The calculation to determine if a response has expired is quite simple:
833 //
834 // response_is_fresh = (freshness_lifetime > current_age)
835 //
836 // Of course, there are other factors that can force a response to always be
837 // validated or re-fetched.
838 //
RequiresValidation(const Time & request_time,const Time & response_time,const Time & current_time) const839 bool HttpResponseHeaders::RequiresValidation(const Time& request_time,
840 const Time& response_time,
841 const Time& current_time) const {
842 TimeDelta lifetime =
843 GetFreshnessLifetime(response_time);
844 if (lifetime == TimeDelta())
845 return true;
846
847 return lifetime <= GetCurrentAge(request_time, response_time, current_time);
848 }
849
850 // From RFC 2616 section 13.2.4:
851 //
852 // The max-age directive takes priority over Expires, so if max-age is present
853 // in a response, the calculation is simply:
854 //
855 // freshness_lifetime = max_age_value
856 //
857 // Otherwise, if Expires is present in the response, the calculation is:
858 //
859 // freshness_lifetime = expires_value - date_value
860 //
861 // Note that neither of these calculations is vulnerable to clock skew, since
862 // all of the information comes from the origin server.
863 //
864 // Also, if the response does have a Last-Modified time, the heuristic
865 // expiration value SHOULD be no more than some fraction of the interval since
866 // that time. A typical setting of this fraction might be 10%:
867 //
868 // freshness_lifetime = (date_value - last_modified_value) * 0.10
869 //
GetFreshnessLifetime(const Time & response_time) const870 TimeDelta HttpResponseHeaders::GetFreshnessLifetime(
871 const Time& response_time) const {
872 // Check for headers that force a response to never be fresh. For backwards
873 // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control:
874 // no-cache" even though RFC 2616 does not specify it.
875 if (HasHeaderValue("cache-control", "no-cache") ||
876 HasHeaderValue("cache-control", "no-store") ||
877 HasHeaderValue("pragma", "no-cache") ||
878 HasHeaderValue("vary", "*")) // see RFC 2616 section 13.6
879 return TimeDelta(); // not fresh
880
881 // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the
882 // Expires header after checking for max-age in GetFreshnessLifetime. This
883 // is important since "Expires: <date in the past>" means not fresh, but
884 // it should not trump a max-age value.
885
886 TimeDelta max_age_value;
887 if (GetMaxAgeValue(&max_age_value))
888 return max_age_value;
889
890 // If there is no Date header, then assume that the server response was
891 // generated at the time when we received the response.
892 Time date_value;
893 if (!GetDateValue(&date_value))
894 date_value = response_time;
895
896 Time expires_value;
897 if (GetExpiresValue(&expires_value)) {
898 // The expires value can be a date in the past!
899 if (expires_value > date_value)
900 return expires_value - date_value;
901
902 return TimeDelta(); // not fresh
903 }
904
905 // From RFC 2616 section 13.4:
906 //
907 // A response received with a status code of 200, 203, 206, 300, 301 or 410
908 // MAY be stored by a cache and used in reply to a subsequent request,
909 // subject to the expiration mechanism, unless a cache-control directive
910 // prohibits caching.
911 // ...
912 // A response received with any other status code (e.g. status codes 302
913 // and 307) MUST NOT be returned in a reply to a subsequent request unless
914 // there are cache-control directives or another header(s) that explicitly
915 // allow it.
916 //
917 // From RFC 2616 section 14.9.4:
918 //
919 // When the must-revalidate directive is present in a response received by
920 // a cache, that cache MUST NOT use the entry after it becomes stale to
921 // respond to a subsequent request without first revalidating it with the
922 // origin server. (I.e., the cache MUST do an end-to-end revalidation every
923 // time, if, based solely on the origin server's Expires or max-age value,
924 // the cached response is stale.)
925 //
926 if ((response_code_ == 200 || response_code_ == 203 ||
927 response_code_ == 206) &&
928 !HasHeaderValue("cache-control", "must-revalidate")) {
929 // TODO(darin): Implement a smarter heuristic.
930 Time last_modified_value;
931 if (GetLastModifiedValue(&last_modified_value)) {
932 // The last-modified value can be a date in the past!
933 if (last_modified_value <= date_value)
934 return (date_value - last_modified_value) / 10;
935 }
936 }
937
938 // These responses are implicitly fresh (unless otherwise overruled):
939 if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410)
940 return TimeDelta::FromMicroseconds(kint64max);
941
942 return TimeDelta(); // not fresh
943 }
944
945 // From RFC 2616 section 13.2.3:
946 //
947 // Summary of age calculation algorithm, when a cache receives a response:
948 //
949 // /*
950 // * age_value
951 // * is the value of Age: header received by the cache with
952 // * this response.
953 // * date_value
954 // * is the value of the origin server's Date: header
955 // * request_time
956 // * is the (local) time when the cache made the request
957 // * that resulted in this cached response
958 // * response_time
959 // * is the (local) time when the cache received the
960 // * response
961 // * now
962 // * is the current (local) time
963 // */
964 // apparent_age = max(0, response_time - date_value);
965 // corrected_received_age = max(apparent_age, age_value);
966 // response_delay = response_time - request_time;
967 // corrected_initial_age = corrected_received_age + response_delay;
968 // resident_time = now - response_time;
969 // current_age = corrected_initial_age + resident_time;
970 //
GetCurrentAge(const Time & request_time,const Time & response_time,const Time & current_time) const971 TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time,
972 const Time& response_time,
973 const Time& current_time) const {
974 // If there is no Date header, then assume that the server response was
975 // generated at the time when we received the response.
976 Time date_value;
977 if (!GetDateValue(&date_value))
978 date_value = response_time;
979
980 // If there is no Age header, then assume age is zero. GetAgeValue does not
981 // modify its out param if the value does not exist.
982 TimeDelta age_value;
983 GetAgeValue(&age_value);
984
985 TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value);
986 TimeDelta corrected_received_age = std::max(apparent_age, age_value);
987 TimeDelta response_delay = response_time - request_time;
988 TimeDelta corrected_initial_age = corrected_received_age + response_delay;
989 TimeDelta resident_time = current_time - response_time;
990 TimeDelta current_age = corrected_initial_age + resident_time;
991
992 return current_age;
993 }
994
GetMaxAgeValue(TimeDelta * result) const995 bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const {
996 std::string name = "cache-control";
997 std::string value;
998
999 const char kMaxAgePrefix[] = "max-age=";
1000 const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1;
1001
1002 void* iter = NULL;
1003 while (EnumerateHeader(&iter, name, &value)) {
1004 if (value.size() > kMaxAgePrefixLen) {
1005 if (LowerCaseEqualsASCII(value.begin(),
1006 value.begin() + kMaxAgePrefixLen,
1007 kMaxAgePrefix)) {
1008 int64 seconds;
1009 base::StringToInt64(value.begin() + kMaxAgePrefixLen,
1010 value.end(),
1011 &seconds);
1012 *result = TimeDelta::FromSeconds(seconds);
1013 return true;
1014 }
1015 }
1016 }
1017
1018 return false;
1019 }
1020
GetAgeValue(TimeDelta * result) const1021 bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const {
1022 std::string value;
1023 if (!EnumerateHeader(NULL, "Age", &value))
1024 return false;
1025
1026 int64 seconds;
1027 base::StringToInt64(value, &seconds);
1028 *result = TimeDelta::FromSeconds(seconds);
1029 return true;
1030 }
1031
GetDateValue(Time * result) const1032 bool HttpResponseHeaders::GetDateValue(Time* result) const {
1033 return GetTimeValuedHeader("Date", result);
1034 }
1035
GetLastModifiedValue(Time * result) const1036 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const {
1037 return GetTimeValuedHeader("Last-Modified", result);
1038 }
1039
GetExpiresValue(Time * result) const1040 bool HttpResponseHeaders::GetExpiresValue(Time* result) const {
1041 return GetTimeValuedHeader("Expires", result);
1042 }
1043
GetTimeValuedHeader(const std::string & name,Time * result) const1044 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name,
1045 Time* result) const {
1046 std::string value;
1047 if (!EnumerateHeader(NULL, name, &value))
1048 return false;
1049
1050 std::wstring value_wide(value.begin(), value.end()); // inflate ascii
1051 return Time::FromString(value_wide.c_str(), result);
1052 }
1053
IsKeepAlive() const1054 bool HttpResponseHeaders::IsKeepAlive() const {
1055 if (http_version_ < HttpVersion(1, 0))
1056 return false;
1057
1058 // NOTE: It is perhaps risky to assume that a Proxy-Connection header is
1059 // meaningful when we don't know that this response was from a proxy, but
1060 // Mozilla also does this, so we'll do the same.
1061 std::string connection_val;
1062 if (!EnumerateHeader(NULL, "connection", &connection_val))
1063 EnumerateHeader(NULL, "proxy-connection", &connection_val);
1064
1065 bool keep_alive;
1066
1067 if (http_version_ == HttpVersion(1, 0)) {
1068 // HTTP/1.0 responses default to NOT keep-alive
1069 keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive");
1070 } else {
1071 // HTTP/1.1 responses default to keep-alive
1072 keep_alive = !LowerCaseEqualsASCII(connection_val, "close");
1073 }
1074
1075 return keep_alive;
1076 }
1077
HasStrongValidators() const1078 bool HttpResponseHeaders::HasStrongValidators() const {
1079 std::string etag_value;
1080 EnumerateHeader(NULL, "etag", &etag_value);
1081 if (!etag_value.empty()) {
1082 size_t slash = etag_value.find('/');
1083 if (slash == std::string::npos || slash == 0)
1084 return true;
1085
1086 std::string::const_iterator i = etag_value.begin();
1087 std::string::const_iterator j = etag_value.begin() + slash;
1088 HttpUtil::TrimLWS(&i, &j);
1089 if (!LowerCaseEqualsASCII(i, j, "w"))
1090 return true;
1091 }
1092
1093 Time last_modified;
1094 if (!GetLastModifiedValue(&last_modified))
1095 return false;
1096
1097 Time date;
1098 if (!GetDateValue(&date))
1099 return false;
1100
1101 return ((date - last_modified).InSeconds() >= 60);
1102 }
1103
1104 // From RFC 2616:
1105 // Content-Length = "Content-Length" ":" 1*DIGIT
GetContentLength() const1106 int64 HttpResponseHeaders::GetContentLength() const {
1107 void* iter = NULL;
1108 std::string content_length_val;
1109 if (!EnumerateHeader(&iter, "content-length", &content_length_val))
1110 return -1;
1111
1112 if (content_length_val.empty())
1113 return -1;
1114
1115 if (content_length_val[0] == '+')
1116 return -1;
1117
1118 int64 result;
1119 bool ok = base::StringToInt64(content_length_val, &result);
1120 if (!ok || result < 0)
1121 return -1;
1122
1123 return result;
1124 }
1125
1126 // From RFC 2616 14.16:
1127 // content-range-spec =
1128 // bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
1129 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
1130 // instance-length = 1*DIGIT
1131 // bytes-unit = "bytes"
GetContentRange(int64 * first_byte_position,int64 * last_byte_position,int64 * instance_length) const1132 bool HttpResponseHeaders::GetContentRange(int64* first_byte_position,
1133 int64* last_byte_position,
1134 int64* instance_length) const {
1135 void* iter = NULL;
1136 std::string content_range_spec;
1137 *first_byte_position = *last_byte_position = *instance_length = -1;
1138 if (!EnumerateHeader(&iter, "content-range", &content_range_spec))
1139 return false;
1140
1141 // If the header value is empty, we have an invalid header.
1142 if (content_range_spec.empty())
1143 return false;
1144
1145 size_t space_position = content_range_spec.find(' ');
1146 if (space_position == std::string::npos)
1147 return false;
1148
1149 // Invalid header if it doesn't contain "bytes-unit".
1150 std::string::const_iterator content_range_spec_begin =
1151 content_range_spec.begin();
1152 std::string::const_iterator content_range_spec_end =
1153 content_range_spec.begin() + space_position;
1154 HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end);
1155 if (!LowerCaseEqualsASCII(content_range_spec_begin,
1156 content_range_spec_end,
1157 "bytes")) {
1158 return false;
1159 }
1160
1161 size_t slash_position = content_range_spec.find('/', space_position + 1);
1162 if (slash_position == std::string::npos)
1163 return false;
1164
1165 // Obtain the part behind the space and before slash.
1166 std::string::const_iterator byte_range_resp_spec_begin =
1167 content_range_spec.begin() + space_position + 1;
1168 std::string::const_iterator byte_range_resp_spec_end =
1169 content_range_spec.begin() + slash_position;
1170 HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end);
1171
1172 // Parse the byte-range-resp-spec part.
1173 std::string byte_range_resp_spec(byte_range_resp_spec_begin,
1174 byte_range_resp_spec_end);
1175 // If byte-range-resp-spec != "*".
1176 if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) {
1177 size_t minus_position = byte_range_resp_spec.find('-');
1178 if (minus_position != std::string::npos) {
1179 // Obtain first-byte-pos.
1180 std::string::const_iterator first_byte_pos_begin =
1181 byte_range_resp_spec.begin();
1182 std::string::const_iterator first_byte_pos_end =
1183 byte_range_resp_spec.begin() + minus_position;
1184 HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
1185
1186 bool ok = base::StringToInt64(first_byte_pos_begin,
1187 first_byte_pos_end,
1188 first_byte_position);
1189
1190 // Obtain last-byte-pos.
1191 std::string::const_iterator last_byte_pos_begin =
1192 byte_range_resp_spec.begin() + minus_position + 1;
1193 std::string::const_iterator last_byte_pos_end =
1194 byte_range_resp_spec.end();
1195 HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
1196
1197 ok &= base::StringToInt64(last_byte_pos_begin,
1198 last_byte_pos_end,
1199 last_byte_position);
1200 if (!ok) {
1201 *first_byte_position = *last_byte_position = -1;
1202 return false;
1203 }
1204 if (*first_byte_position < 0 || *last_byte_position < 0 ||
1205 *first_byte_position > *last_byte_position)
1206 return false;
1207 } else {
1208 return false;
1209 }
1210 }
1211
1212 // Parse the instance-length part.
1213 // If instance-length == "*".
1214 std::string::const_iterator instance_length_begin =
1215 content_range_spec.begin() + slash_position + 1;
1216 std::string::const_iterator instance_length_end =
1217 content_range_spec.end();
1218 HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end);
1219
1220 if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) {
1221 return false;
1222 } else if (!base::StringToInt64(instance_length_begin,
1223 instance_length_end,
1224 instance_length)) {
1225 *instance_length = -1;
1226 return false;
1227 }
1228
1229 // We have all the values; let's verify that they make sense for a 206
1230 // response.
1231 if (*first_byte_position < 0 || *last_byte_position < 0 ||
1232 *instance_length < 0 || *instance_length - 1 < *last_byte_position)
1233 return false;
1234
1235 return true;
1236 }
1237
1238 } // namespace net
1239