1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The rules for header parsing were borrowed from Firefox:
6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp
7 // The rules for parsing content-types were also borrowed from Firefox:
8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
9
10 #include "net/http/http_response_headers.h"
11
12 #include <algorithm>
13
14 #include "base/logging.h"
15 #include "base/pickle.h"
16 #include "base/string_util.h"
17 #include "base/time.h"
18 #include "net/base/escape.h"
19 #include "net/http/http_util.h"
20
21 using base::Time;
22 using base::TimeDelta;
23
24 namespace net {
25
26 //-----------------------------------------------------------------------------
27
28 namespace {
29
30 // These headers are RFC 2616 hop-by-hop headers;
31 // not to be stored by caches.
32 const char* const kHopByHopResponseHeaders[] = {
33 "connection",
34 "proxy-connection",
35 "keep-alive",
36 "trailer",
37 "transfer-encoding",
38 "upgrade"
39 };
40
41 // These headers are challenge response headers;
42 // not to be stored by caches.
43 const char* const kChallengeResponseHeaders[] = {
44 "www-authenticate",
45 "proxy-authenticate"
46 };
47
48 // These headers are cookie setting headers;
49 // not to be stored by caches or disclosed otherwise.
50 const char* const kCookieResponseHeaders[] = {
51 "set-cookie",
52 "set-cookie2"
53 };
54
55 // These response headers are not copied from a 304/206 response to the cached
56 // response headers. This list is based on Mozilla's nsHttpResponseHead.cpp.
57 const char* const kNonUpdatedHeaders[] = {
58 "connection",
59 "proxy-connection",
60 "keep-alive",
61 "www-authenticate",
62 "proxy-authenticate",
63 "trailer",
64 "transfer-encoding",
65 "upgrade",
66 // these should never change:
67 "content-location",
68 "content-md5",
69 "etag",
70 // assume cache-control: no-transform
71 "content-encoding",
72 "content-range",
73 "content-type",
74 // some broken microsoft servers send 'content-length: 0' with 304s
75 "content-length"
76 };
77
ShouldUpdateHeader(const std::string::const_iterator & name_begin,const std::string::const_iterator & name_end)78 bool ShouldUpdateHeader(const std::string::const_iterator& name_begin,
79 const std::string::const_iterator& name_end) {
80 for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) {
81 if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i]))
82 return false;
83 }
84 return true;
85 }
86
87 } // namespace
88
89 //-----------------------------------------------------------------------------
90
HttpResponseHeaders(const std::string & raw_input)91 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input)
92 : response_code_(-1) {
93 Parse(raw_input);
94 }
95
HttpResponseHeaders(const Pickle & pickle,void ** iter)96 HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter)
97 : response_code_(-1) {
98 std::string raw_input;
99 if (pickle.ReadString(iter, &raw_input))
100 Parse(raw_input);
101 }
102
Persist(Pickle * pickle,PersistOptions options)103 void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) {
104 if (options == PERSIST_RAW) {
105 pickle->WriteString(raw_headers_);
106 return; // Done.
107 }
108
109 HeaderSet filter_headers;
110
111 // Construct set of headers to filter out based on options.
112 if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE)
113 AddNonCacheableHeaders(&filter_headers);
114
115 if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES)
116 AddCookieHeaders(&filter_headers);
117
118 if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES)
119 AddChallengeHeaders(&filter_headers);
120
121 if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP)
122 AddHopByHopHeaders(&filter_headers);
123
124 if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES)
125 AddHopContentRangeHeaders(&filter_headers);
126
127 std::string blob;
128 blob.reserve(raw_headers_.size());
129
130 // This copies the status line w/ terminator null.
131 // Note raw_headers_ has embedded nulls instead of \n,
132 // so this just copies the first header line.
133 blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1);
134
135 for (size_t i = 0; i < parsed_.size(); ++i) {
136 DCHECK(!parsed_[i].is_continuation());
137
138 // Locate the start of the next header.
139 size_t k = i;
140 while (++k < parsed_.size() && parsed_[k].is_continuation());
141 --k;
142
143 std::string header_name(parsed_[i].name_begin, parsed_[i].name_end);
144 StringToLowerASCII(&header_name);
145
146 if (filter_headers.find(header_name) == filter_headers.end()) {
147 // Make sure there is a null after the value.
148 blob.append(parsed_[i].name_begin, parsed_[k].value_end);
149 blob.push_back('\0');
150 }
151
152 i = k;
153 }
154 blob.push_back('\0');
155
156 pickle->WriteString(blob);
157 }
158
Update(const HttpResponseHeaders & new_headers)159 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) {
160 DCHECK(new_headers.response_code() == 304 ||
161 new_headers.response_code() == 206);
162
163 // Copy up to the null byte. This just copies the status line.
164 std::string new_raw_headers(raw_headers_.c_str());
165 new_raw_headers.push_back('\0');
166
167 HeaderSet updated_headers;
168
169 // NOTE: we write the new headers then the old headers for convenience. The
170 // order should not matter.
171
172 // Figure out which headers we want to take from new_headers:
173 for (size_t i = 0; i < new_headers.parsed_.size(); ++i) {
174 const HeaderList& new_parsed = new_headers.parsed_;
175
176 DCHECK(!new_parsed[i].is_continuation());
177
178 // Locate the start of the next header.
179 size_t k = i;
180 while (++k < new_parsed.size() && new_parsed[k].is_continuation());
181 --k;
182
183 const std::string::const_iterator& name_begin = new_parsed[i].name_begin;
184 const std::string::const_iterator& name_end = new_parsed[i].name_end;
185 if (ShouldUpdateHeader(name_begin, name_end)) {
186 std::string name(name_begin, name_end);
187 StringToLowerASCII(&name);
188 updated_headers.insert(name);
189
190 // Preserve this header line in the merged result, making sure there is
191 // a null after the value.
192 new_raw_headers.append(name_begin, new_parsed[k].value_end);
193 new_raw_headers.push_back('\0');
194 }
195
196 i = k;
197 }
198
199 // Now, build the new raw headers.
200 MergeWithHeaders(new_raw_headers, updated_headers);
201 }
202
MergeWithHeaders(const std::string & raw_headers,const HeaderSet & headers_to_remove)203 void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers,
204 const HeaderSet& headers_to_remove) {
205 std::string new_raw_headers(raw_headers);
206 for (size_t i = 0; i < parsed_.size(); ++i) {
207 DCHECK(!parsed_[i].is_continuation());
208
209 // Locate the start of the next header.
210 size_t k = i;
211 while (++k < parsed_.size() && parsed_[k].is_continuation());
212 --k;
213
214 std::string name(parsed_[i].name_begin, parsed_[i].name_end);
215 StringToLowerASCII(&name);
216 if (headers_to_remove.find(name) == headers_to_remove.end()) {
217 // It's ok to preserve this header in the final result.
218 new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end);
219 new_raw_headers.push_back('\0');
220 }
221
222 i = k;
223 }
224 new_raw_headers.push_back('\0');
225
226 // Make this object hold the new data.
227 raw_headers_.clear();
228 parsed_.clear();
229 Parse(new_raw_headers);
230 }
231
RemoveHeader(const std::string & name)232 void HttpResponseHeaders::RemoveHeader(const std::string& name) {
233 // Copy up to the null byte. This just copies the status line.
234 std::string new_raw_headers(raw_headers_.c_str());
235 new_raw_headers.push_back('\0');
236
237 std::string lowercase_name(name);
238 StringToLowerASCII(&lowercase_name);
239 HeaderSet to_remove;
240 to_remove.insert(lowercase_name);
241 MergeWithHeaders(new_raw_headers, to_remove);
242 }
243
AddHeader(const std::string & header)244 void HttpResponseHeaders::AddHeader(const std::string& header) {
245 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
246 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
247 // Don't copy the last null.
248 std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1);
249 new_raw_headers.append(header);
250 new_raw_headers.push_back('\0');
251 new_raw_headers.push_back('\0');
252
253 // Make this object hold the new data.
254 raw_headers_.clear();
255 parsed_.clear();
256 Parse(new_raw_headers);
257 }
258
ReplaceStatusLine(const std::string & new_status)259 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) {
260 // Copy up to the null byte. This just copies the status line.
261 std::string new_raw_headers(new_status);
262 new_raw_headers.push_back('\0');
263
264 HeaderSet empty_to_remove;
265 MergeWithHeaders(new_raw_headers, empty_to_remove);
266 }
267
Parse(const std::string & raw_input)268 void HttpResponseHeaders::Parse(const std::string& raw_input) {
269 raw_headers_.reserve(raw_input.size());
270
271 // ParseStatusLine adds a normalized status line to raw_headers_
272 std::string::const_iterator line_begin = raw_input.begin();
273 std::string::const_iterator line_end =
274 find(line_begin, raw_input.end(), '\0');
275 // has_headers = true, if there is any data following the status line.
276 // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0.
277 bool has_headers = line_end != raw_input.end() &&
278 (line_end + 1) != raw_input.end() && *(line_end + 1) != '\0';
279 ParseStatusLine(line_begin, line_end, has_headers);
280
281 if (line_end == raw_input.end()) {
282 raw_headers_.push_back('\0');
283 return;
284 }
285
286 // Including a terminating null byte.
287 size_t status_line_len = raw_headers_.size();
288
289 // Now, we add the rest of the raw headers to raw_headers_, and begin parsing
290 // it (to populate our parsed_ vector).
291 raw_headers_.append(line_end + 1, raw_input.end());
292
293 // Adjust to point at the null byte following the status line
294 line_end = raw_headers_.begin() + status_line_len - 1;
295
296 HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(),
297 std::string(1, '\0'));
298 while (headers.GetNext()) {
299 AddHeader(headers.name_begin(),
300 headers.name_end(),
301 headers.values_begin(),
302 headers.values_end());
303 }
304 }
305
306 // Append all of our headers to the final output string.
GetNormalizedHeaders(std::string * output) const307 void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const {
308 // copy up to the null byte. this just copies the status line.
309 output->assign(raw_headers_.c_str());
310
311 // headers may appear multiple times (not necessarily in succession) in the
312 // header data, so we build a map from header name to generated header lines.
313 // to preserve the order of the original headers, the actual values are kept
314 // in a separate list. finally, the list of headers is flattened to form
315 // the normalized block of headers.
316 //
317 // NOTE: We take special care to preserve the whitespace around any commas
318 // that may occur in the original response headers. Because our consumer may
319 // be a web app, we cannot be certain of the semantics of commas despite the
320 // fact that RFC 2616 says that they should be regarded as value separators.
321 //
322 typedef base::hash_map<std::string, size_t> HeadersMap;
323 HeadersMap headers_map;
324 HeadersMap::iterator iter = headers_map.end();
325
326 std::vector<std::string> headers;
327
328 for (size_t i = 0; i < parsed_.size(); ++i) {
329 DCHECK(!parsed_[i].is_continuation());
330
331 std::string name(parsed_[i].name_begin, parsed_[i].name_end);
332 std::string lower_name = StringToLowerASCII(name);
333
334 iter = headers_map.find(lower_name);
335 if (iter == headers_map.end()) {
336 iter = headers_map.insert(
337 HeadersMap::value_type(lower_name, headers.size())).first;
338 headers.push_back(name + ": ");
339 } else {
340 headers[iter->second].append(", ");
341 }
342
343 std::string::const_iterator value_begin = parsed_[i].value_begin;
344 std::string::const_iterator value_end = parsed_[i].value_end;
345 while (++i < parsed_.size() && parsed_[i].is_continuation())
346 value_end = parsed_[i].value_end;
347 --i;
348
349 headers[iter->second].append(value_begin, value_end);
350 }
351
352 for (size_t i = 0; i < headers.size(); ++i) {
353 output->push_back('\n');
354 output->append(headers[i]);
355 }
356
357 output->push_back('\n');
358 }
359
GetNormalizedHeader(const std::string & name,std::string * value) const360 bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name,
361 std::string* value) const {
362 // If you hit this assertion, please use EnumerateHeader instead!
363 DCHECK(!HttpUtil::IsNonCoalescingHeader(name));
364
365 value->clear();
366
367 bool found = false;
368 size_t i = 0;
369 while (i < parsed_.size()) {
370 i = FindHeader(i, name);
371 if (i == std::string::npos)
372 break;
373
374 found = true;
375
376 if (!value->empty())
377 value->append(", ");
378
379 std::string::const_iterator value_begin = parsed_[i].value_begin;
380 std::string::const_iterator value_end = parsed_[i].value_end;
381 while (++i < parsed_.size() && parsed_[i].is_continuation())
382 value_end = parsed_[i].value_end;
383 value->append(value_begin, value_end);
384 }
385
386 return found;
387 }
388
GetStatusLine() const389 std::string HttpResponseHeaders::GetStatusLine() const {
390 // copy up to the null byte.
391 return std::string(raw_headers_.c_str());
392 }
393
GetStatusText() const394 std::string HttpResponseHeaders::GetStatusText() const {
395 // GetStatusLine() is already normalized, so it has the format:
396 // <http_version> SP <response_code> SP <status_text>
397 std::string status_text = GetStatusLine();
398 std::string::const_iterator begin = status_text.begin();
399 std::string::const_iterator end = status_text.end();
400 for (int i = 0; i < 2; ++i)
401 begin = find(begin, end, ' ') + 1;
402 return std::string(begin, end);
403 }
404
EnumerateHeaderLines(void ** iter,std::string * name,std::string * value) const405 bool HttpResponseHeaders::EnumerateHeaderLines(void** iter,
406 std::string* name,
407 std::string* value) const {
408 size_t i = reinterpret_cast<size_t>(*iter);
409 if (i == parsed_.size())
410 return false;
411
412 DCHECK(!parsed_[i].is_continuation());
413
414 name->assign(parsed_[i].name_begin, parsed_[i].name_end);
415
416 std::string::const_iterator value_begin = parsed_[i].value_begin;
417 std::string::const_iterator value_end = parsed_[i].value_end;
418 while (++i < parsed_.size() && parsed_[i].is_continuation())
419 value_end = parsed_[i].value_end;
420
421 value->assign(value_begin, value_end);
422
423 *iter = reinterpret_cast<void*>(i);
424 return true;
425 }
426
EnumerateHeader(void ** iter,const std::string & name,std::string * value) const427 bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name,
428 std::string* value) const {
429 size_t i;
430 if (!iter || !*iter) {
431 i = FindHeader(0, name);
432 } else {
433 i = reinterpret_cast<size_t>(*iter);
434 if (i >= parsed_.size()) {
435 i = std::string::npos;
436 } else if (!parsed_[i].is_continuation()) {
437 i = FindHeader(i, name);
438 }
439 }
440
441 if (i == std::string::npos) {
442 value->clear();
443 return false;
444 }
445
446 if (iter)
447 *iter = reinterpret_cast<void*>(i + 1);
448 value->assign(parsed_[i].value_begin, parsed_[i].value_end);
449 return true;
450 }
451
HasHeaderValue(const std::string & name,const std::string & value) const452 bool HttpResponseHeaders::HasHeaderValue(const std::string& name,
453 const std::string& value) const {
454 // The value has to be an exact match. This is important since
455 // 'cache-control: no-cache' != 'cache-control: no-cache="foo"'
456 void* iter = NULL;
457 std::string temp;
458 while (EnumerateHeader(&iter, name, &temp)) {
459 if (value.size() == temp.size() &&
460 std::equal(temp.begin(), temp.end(), value.begin(),
461 CaseInsensitiveCompare<char>()))
462 return true;
463 }
464 return false;
465 }
466
467 // Note: this implementation implicitly assumes that line_end points at a valid
468 // sentinel character (such as '\0').
469 // static
ParseVersion(std::string::const_iterator line_begin,std::string::const_iterator line_end)470 HttpVersion HttpResponseHeaders::ParseVersion(
471 std::string::const_iterator line_begin,
472 std::string::const_iterator line_end) {
473 std::string::const_iterator p = line_begin;
474
475 // RFC2616 sec 3.1: HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT
476 // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1).
477 // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1.
478
479 if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) {
480 DLOG(INFO) << "missing status line";
481 return HttpVersion();
482 }
483
484 p += 4;
485
486 if (p >= line_end || *p != '/') {
487 DLOG(INFO) << "missing version";
488 return HttpVersion();
489 }
490
491 std::string::const_iterator dot = find(p, line_end, '.');
492 if (dot == line_end) {
493 DLOG(INFO) << "malformed version";
494 return HttpVersion();
495 }
496
497 ++p; // from / to first digit.
498 ++dot; // from . to second digit.
499
500 if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) {
501 DLOG(INFO) << "malformed version number";
502 return HttpVersion();
503 }
504
505 uint16 major = *p - '0';
506 uint16 minor = *dot - '0';
507
508 return HttpVersion(major, minor);
509 }
510
511 // Note: this implementation implicitly assumes that line_end points at a valid
512 // sentinel character (such as '\0').
ParseStatusLine(std::string::const_iterator line_begin,std::string::const_iterator line_end,bool has_headers)513 void HttpResponseHeaders::ParseStatusLine(
514 std::string::const_iterator line_begin,
515 std::string::const_iterator line_end,
516 bool has_headers) {
517 // Extract the version number
518 parsed_http_version_ = ParseVersion(line_begin, line_end);
519
520 // Clamp the version number to one of: {0.9, 1.0, 1.1}
521 if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) {
522 http_version_ = HttpVersion(0, 9);
523 raw_headers_ = "HTTP/0.9";
524 } else if (parsed_http_version_ >= HttpVersion(1, 1)) {
525 http_version_ = HttpVersion(1, 1);
526 raw_headers_ = "HTTP/1.1";
527 } else {
528 // Treat everything else like HTTP 1.0
529 http_version_ = HttpVersion(1, 0);
530 raw_headers_ = "HTTP/1.0";
531 }
532 if (parsed_http_version_ != http_version_) {
533 DLOG(INFO) << "assuming HTTP/" << http_version_.major_value() << "."
534 << http_version_.minor_value();
535 }
536
537 // TODO(eroman): this doesn't make sense if ParseVersion failed.
538 std::string::const_iterator p = find(line_begin, line_end, ' ');
539
540 if (p == line_end) {
541 DLOG(INFO) << "missing response status; assuming 200 OK";
542 raw_headers_.append(" 200 OK");
543 raw_headers_.push_back('\0');
544 response_code_ = 200;
545 return;
546 }
547
548 // Skip whitespace.
549 while (*p == ' ')
550 ++p;
551
552 std::string::const_iterator code = p;
553 while (*p >= '0' && *p <= '9')
554 ++p;
555
556 if (p == code) {
557 DLOG(INFO) << "missing response status number; assuming 200";
558 raw_headers_.append(" 200 OK");
559 response_code_ = 200;
560 return;
561 }
562 raw_headers_.push_back(' ');
563 raw_headers_.append(code, p);
564 raw_headers_.push_back(' ');
565 response_code_ = static_cast<int>(StringToInt64(std::string(code, p)));
566
567 // Skip whitespace.
568 while (*p == ' ')
569 ++p;
570
571 // Trim trailing whitespace.
572 while (line_end > p && line_end[-1] == ' ')
573 --line_end;
574
575 if (p == line_end) {
576 DLOG(INFO) << "missing response status text; assuming OK";
577 // Not super critical what we put here. Just use "OK"
578 // even if it isn't descriptive of response_code_.
579 raw_headers_.append("OK");
580 } else {
581 raw_headers_.append(p, line_end);
582 }
583
584 raw_headers_.push_back('\0');
585 }
586
FindHeader(size_t from,const std::string & search) const587 size_t HttpResponseHeaders::FindHeader(size_t from,
588 const std::string& search) const {
589 for (size_t i = from; i < parsed_.size(); ++i) {
590 if (parsed_[i].is_continuation())
591 continue;
592 const std::string::const_iterator& name_begin = parsed_[i].name_begin;
593 const std::string::const_iterator& name_end = parsed_[i].name_end;
594 if (static_cast<size_t>(name_end - name_begin) == search.size() &&
595 std::equal(name_begin, name_end, search.begin(),
596 CaseInsensitiveCompare<char>()))
597 return i;
598 }
599
600 return std::string::npos;
601 }
602
AddHeader(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator values_begin,std::string::const_iterator values_end)603 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin,
604 std::string::const_iterator name_end,
605 std::string::const_iterator values_begin,
606 std::string::const_iterator values_end) {
607 // If the header can be coalesced, then we should split it up.
608 if (values_begin == values_end ||
609 HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) {
610 AddToParsed(name_begin, name_end, values_begin, values_end);
611 } else {
612 HttpUtil::ValuesIterator it(values_begin, values_end, ',');
613 while (it.GetNext()) {
614 AddToParsed(name_begin, name_end, it.value_begin(), it.value_end());
615 // clobber these so that subsequent values are treated as continuations
616 name_begin = name_end = raw_headers_.end();
617 }
618 }
619 }
620
AddToParsed(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator value_begin,std::string::const_iterator value_end)621 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin,
622 std::string::const_iterator name_end,
623 std::string::const_iterator value_begin,
624 std::string::const_iterator value_end) {
625 ParsedHeader header;
626 header.name_begin = name_begin;
627 header.name_end = name_end;
628 header.value_begin = value_begin;
629 header.value_end = value_end;
630 parsed_.push_back(header);
631 }
632
AddNonCacheableHeaders(HeaderSet * result) const633 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const {
634 // Add server specified transients. Any 'cache-control: no-cache="foo,bar"'
635 // headers present in the response specify additional headers that we should
636 // not store in the cache.
637 const std::string kCacheControl = "cache-control";
638 const std::string kPrefix = "no-cache=\"";
639 std::string value;
640 void* iter = NULL;
641 while (EnumerateHeader(&iter, kCacheControl, &value)) {
642 if (value.size() > kPrefix.size() &&
643 value.compare(0, kPrefix.size(), kPrefix) == 0) {
644 // if it doesn't end with a quote, then treat as malformed
645 if (value[value.size()-1] != '\"')
646 continue;
647
648 // trim off leading and trailing bits
649 size_t len = value.size() - kPrefix.size() - 1;
650 TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value);
651
652 size_t begin_pos = 0;
653 for (;;) {
654 // find the end of this header name
655 size_t comma_pos = value.find(',', begin_pos);
656 if (comma_pos == std::string::npos)
657 comma_pos = value.size();
658 size_t end = comma_pos;
659 while (end > begin_pos && strchr(HTTP_LWS, value[end - 1]))
660 end--;
661
662 // assuming the header is not emtpy, lowercase and insert into set
663 if (end > begin_pos) {
664 std::string name = value.substr(begin_pos, end - begin_pos);
665 StringToLowerASCII(&name);
666 result->insert(name);
667 }
668
669 // repeat
670 begin_pos = comma_pos + 1;
671 while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos]))
672 begin_pos++;
673 if (begin_pos >= value.size())
674 break;
675 }
676 }
677 }
678 }
679
AddHopByHopHeaders(HeaderSet * result)680 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) {
681 for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i)
682 result->insert(std::string(kHopByHopResponseHeaders[i]));
683 }
684
AddCookieHeaders(HeaderSet * result)685 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) {
686 for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i)
687 result->insert(std::string(kCookieResponseHeaders[i]));
688 }
689
AddChallengeHeaders(HeaderSet * result)690 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) {
691 for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i)
692 result->insert(std::string(kChallengeResponseHeaders[i]));
693 }
694
AddHopContentRangeHeaders(HeaderSet * result)695 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) {
696 result->insert("content-range");
697 }
698
GetMimeTypeAndCharset(std::string * mime_type,std::string * charset) const699 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
700 std::string* charset) const {
701 mime_type->clear();
702 charset->clear();
703
704 std::string name = "content-type";
705 std::string value;
706
707 bool had_charset = false;
708
709 void* iter = NULL;
710 while (EnumerateHeader(&iter, name, &value))
711 HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
712 }
713
GetMimeType(std::string * mime_type) const714 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
715 std::string unused;
716 GetMimeTypeAndCharset(mime_type, &unused);
717 return !mime_type->empty();
718 }
719
GetCharset(std::string * charset) const720 bool HttpResponseHeaders::GetCharset(std::string* charset) const {
721 std::string unused;
722 GetMimeTypeAndCharset(&unused, charset);
723 return !charset->empty();
724 }
725
IsRedirect(std::string * location) const726 bool HttpResponseHeaders::IsRedirect(std::string* location) const {
727 if (!IsRedirectResponseCode(response_code_))
728 return false;
729
730 // If we lack a Location header, then we can't treat this as a redirect.
731 // We assume that the first non-empty location value is the target URL that
732 // we want to follow. TODO(darin): Is this consistent with other browsers?
733 size_t i = std::string::npos;
734 do {
735 i = FindHeader(++i, "location");
736 if (i == std::string::npos)
737 return false;
738 // If the location value is empty, then it doesn't count.
739 } while (parsed_[i].value_begin == parsed_[i].value_end);
740
741 if (location) {
742 // Escape any non-ASCII characters to preserve them. The server should
743 // only be returning ASCII here, but for compat we need to do this.
744 *location = EscapeNonASCII(
745 std::string(parsed_[i].value_begin, parsed_[i].value_end));
746 }
747
748 return true;
749 }
750
751 // static
IsRedirectResponseCode(int response_code)752 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) {
753 // Users probably want to see 300 (multiple choice) pages, so we don't count
754 // them as redirects that need to be followed.
755 return (response_code == 301 ||
756 response_code == 302 ||
757 response_code == 303 ||
758 response_code == 307);
759 }
760
761 // From RFC 2616 section 13.2.4:
762 //
763 // The calculation to determine if a response has expired is quite simple:
764 //
765 // response_is_fresh = (freshness_lifetime > current_age)
766 //
767 // Of course, there are other factors that can force a response to always be
768 // validated or re-fetched.
769 //
RequiresValidation(const Time & request_time,const Time & response_time,const Time & current_time) const770 bool HttpResponseHeaders::RequiresValidation(const Time& request_time,
771 const Time& response_time,
772 const Time& current_time) const {
773 TimeDelta lifetime =
774 GetFreshnessLifetime(response_time);
775 if (lifetime == TimeDelta())
776 return true;
777
778 return lifetime <= GetCurrentAge(request_time, response_time, current_time);
779 }
780
781 // From RFC 2616 section 13.2.4:
782 //
783 // The max-age directive takes priority over Expires, so if max-age is present
784 // in a response, the calculation is simply:
785 //
786 // freshness_lifetime = max_age_value
787 //
788 // Otherwise, if Expires is present in the response, the calculation is:
789 //
790 // freshness_lifetime = expires_value - date_value
791 //
792 // Note that neither of these calculations is vulnerable to clock skew, since
793 // all of the information comes from the origin server.
794 //
795 // Also, if the response does have a Last-Modified time, the heuristic
796 // expiration value SHOULD be no more than some fraction of the interval since
797 // that time. A typical setting of this fraction might be 10%:
798 //
799 // freshness_lifetime = (date_value - last_modified_value) * 0.10
800 //
GetFreshnessLifetime(const Time & response_time) const801 TimeDelta HttpResponseHeaders::GetFreshnessLifetime(
802 const Time& response_time) const {
803 // Check for headers that force a response to never be fresh. For backwards
804 // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control:
805 // no-cache" even though RFC 2616 does not specify it.
806 if (HasHeaderValue("cache-control", "no-cache") ||
807 HasHeaderValue("cache-control", "no-store") ||
808 HasHeaderValue("pragma", "no-cache") ||
809 HasHeaderValue("vary", "*")) // see RFC 2616 section 13.6
810 return TimeDelta(); // not fresh
811
812 // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the
813 // Expires header after checking for max-age in GetFreshnessLifetime. This
814 // is important since "Expires: <date in the past>" means not fresh, but
815 // it should not trump a max-age value.
816
817 TimeDelta max_age_value;
818 if (GetMaxAgeValue(&max_age_value))
819 return max_age_value;
820
821 // If there is no Date header, then assume that the server response was
822 // generated at the time when we received the response.
823 Time date_value;
824 if (!GetDateValue(&date_value))
825 date_value = response_time;
826
827 Time expires_value;
828 if (GetExpiresValue(&expires_value)) {
829 // The expires value can be a date in the past!
830 if (expires_value > date_value)
831 return expires_value - date_value;
832
833 return TimeDelta(); // not fresh
834 }
835
836 // From RFC 2616 section 13.4:
837 //
838 // A response received with a status code of 200, 203, 206, 300, 301 or 410
839 // MAY be stored by a cache and used in reply to a subsequent request,
840 // subject to the expiration mechanism, unless a cache-control directive
841 // prohibits caching.
842 // ...
843 // A response received with any other status code (e.g. status codes 302
844 // and 307) MUST NOT be returned in a reply to a subsequent request unless
845 // there are cache-control directives or another header(s) that explicitly
846 // allow it.
847 //
848 // From RFC 2616 section 14.9.4:
849 //
850 // When the must-revalidate directive is present in a response received by
851 // a cache, that cache MUST NOT use the entry after it becomes stale to
852 // respond to a subsequent request without first revalidating it with the
853 // origin server. (I.e., the cache MUST do an end-to-end revalidation every
854 // time, if, based solely on the origin server's Expires or max-age value,
855 // the cached response is stale.)
856 //
857 if ((response_code_ == 200 || response_code_ == 203 ||
858 response_code_ == 206) &&
859 !HasHeaderValue("cache-control", "must-revalidate")) {
860 // TODO(darin): Implement a smarter heuristic.
861 Time last_modified_value;
862 if (GetLastModifiedValue(&last_modified_value)) {
863 // The last-modified value can be a date in the past!
864 if (last_modified_value <= date_value)
865 return (date_value - last_modified_value) / 10;
866 }
867 }
868
869 // These responses are implicitly fresh (unless otherwise overruled):
870 if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410)
871 return TimeDelta::FromMicroseconds(kint64max);
872
873 return TimeDelta(); // not fresh
874 }
875
876 // From RFC 2616 section 13.2.3:
877 //
878 // Summary of age calculation algorithm, when a cache receives a response:
879 //
880 // /*
881 // * age_value
882 // * is the value of Age: header received by the cache with
883 // * this response.
884 // * date_value
885 // * is the value of the origin server's Date: header
886 // * request_time
887 // * is the (local) time when the cache made the request
888 // * that resulted in this cached response
889 // * response_time
890 // * is the (local) time when the cache received the
891 // * response
892 // * now
893 // * is the current (local) time
894 // */
895 // apparent_age = max(0, response_time - date_value);
896 // corrected_received_age = max(apparent_age, age_value);
897 // response_delay = response_time - request_time;
898 // corrected_initial_age = corrected_received_age + response_delay;
899 // resident_time = now - response_time;
900 // current_age = corrected_initial_age + resident_time;
901 //
GetCurrentAge(const Time & request_time,const Time & response_time,const Time & current_time) const902 TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time,
903 const Time& response_time,
904 const Time& current_time) const {
905 // If there is no Date header, then assume that the server response was
906 // generated at the time when we received the response.
907 Time date_value;
908 if (!GetDateValue(&date_value))
909 date_value = response_time;
910
911 // If there is no Age header, then assume age is zero. GetAgeValue does not
912 // modify its out param if the value does not exist.
913 TimeDelta age_value;
914 GetAgeValue(&age_value);
915
916 TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value);
917 TimeDelta corrected_received_age = std::max(apparent_age, age_value);
918 TimeDelta response_delay = response_time - request_time;
919 TimeDelta corrected_initial_age = corrected_received_age + response_delay;
920 TimeDelta resident_time = current_time - response_time;
921 TimeDelta current_age = corrected_initial_age + resident_time;
922
923 return current_age;
924 }
925
GetMaxAgeValue(TimeDelta * result) const926 bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const {
927 std::string name = "cache-control";
928 std::string value;
929
930 const char kMaxAgePrefix[] = "max-age=";
931 const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1;
932
933 void* iter = NULL;
934 while (EnumerateHeader(&iter, name, &value)) {
935 if (value.size() > kMaxAgePrefixLen) {
936 if (LowerCaseEqualsASCII(value.begin(),
937 value.begin() + kMaxAgePrefixLen,
938 kMaxAgePrefix)) {
939 *result = TimeDelta::FromSeconds(
940 StringToInt64(value.substr(kMaxAgePrefixLen)));
941 return true;
942 }
943 }
944 }
945
946 return false;
947 }
948
GetAgeValue(TimeDelta * result) const949 bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const {
950 std::string value;
951 if (!EnumerateHeader(NULL, "Age", &value))
952 return false;
953
954 *result = TimeDelta::FromSeconds(StringToInt64(value));
955 return true;
956 }
957
GetDateValue(Time * result) const958 bool HttpResponseHeaders::GetDateValue(Time* result) const {
959 return GetTimeValuedHeader("Date", result);
960 }
961
GetLastModifiedValue(Time * result) const962 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const {
963 return GetTimeValuedHeader("Last-Modified", result);
964 }
965
GetExpiresValue(Time * result) const966 bool HttpResponseHeaders::GetExpiresValue(Time* result) const {
967 return GetTimeValuedHeader("Expires", result);
968 }
969
GetTimeValuedHeader(const std::string & name,Time * result) const970 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name,
971 Time* result) const {
972 std::string value;
973 if (!EnumerateHeader(NULL, name, &value))
974 return false;
975
976 std::wstring value_wide(value.begin(), value.end()); // inflate ascii
977 return Time::FromString(value_wide.c_str(), result);
978 }
979
IsKeepAlive() const980 bool HttpResponseHeaders::IsKeepAlive() const {
981 if (http_version_ < HttpVersion(1, 0))
982 return false;
983
984 // NOTE: It is perhaps risky to assume that a Proxy-Connection header is
985 // meaningful when we don't know that this response was from a proxy, but
986 // Mozilla also does this, so we'll do the same.
987 std::string connection_val;
988 if (!EnumerateHeader(NULL, "connection", &connection_val))
989 EnumerateHeader(NULL, "proxy-connection", &connection_val);
990
991 bool keep_alive;
992
993 if (http_version_ == HttpVersion(1, 0)) {
994 // HTTP/1.0 responses default to NOT keep-alive
995 keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive");
996 } else {
997 // HTTP/1.1 responses default to keep-alive
998 keep_alive = !LowerCaseEqualsASCII(connection_val, "close");
999 }
1000
1001 return keep_alive;
1002 }
1003
HasStrongValidators() const1004 bool HttpResponseHeaders::HasStrongValidators() const {
1005 std::string etag_value;
1006 EnumerateHeader(NULL, "etag", &etag_value);
1007 if (!etag_value.empty()) {
1008 size_t slash = etag_value.find('/');
1009 if (slash == std::string::npos || slash == 0)
1010 return true;
1011
1012 std::string::const_iterator i = etag_value.begin();
1013 std::string::const_iterator j = etag_value.begin() + slash;
1014 HttpUtil::TrimLWS(&i, &j);
1015 if (!LowerCaseEqualsASCII(i, j, "w"))
1016 return true;
1017 }
1018
1019 Time last_modified;
1020 if (!GetLastModifiedValue(&last_modified))
1021 return false;
1022
1023 Time date;
1024 if (!GetDateValue(&date))
1025 return false;
1026
1027 return ((date - last_modified).InSeconds() >= 60);
1028 }
1029
1030 // From RFC 2616:
1031 // Content-Length = "Content-Length" ":" 1*DIGIT
GetContentLength() const1032 int64 HttpResponseHeaders::GetContentLength() const {
1033 void* iter = NULL;
1034 std::string content_length_val;
1035 if (!EnumerateHeader(&iter, "content-length", &content_length_val))
1036 return -1;
1037
1038 if (content_length_val.empty())
1039 return -1;
1040
1041 if (content_length_val[0] == '+')
1042 return -1;
1043
1044 int64 result;
1045 bool ok = StringToInt64(content_length_val, &result);
1046 if (!ok || result < 0)
1047 return -1;
1048
1049 return result;
1050 }
1051
1052 // From RFC 2616 14.16:
1053 // content-range-spec =
1054 // bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
1055 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
1056 // instance-length = 1*DIGIT
1057 // bytes-unit = "bytes"
GetContentRange(int64 * first_byte_position,int64 * last_byte_position,int64 * instance_length) const1058 bool HttpResponseHeaders::GetContentRange(int64* first_byte_position,
1059 int64* last_byte_position,
1060 int64* instance_length) const {
1061 void* iter = NULL;
1062 std::string content_range_spec;
1063 *first_byte_position = *last_byte_position = *instance_length = -1;
1064 if (!EnumerateHeader(&iter, "content-range", &content_range_spec))
1065 return false;
1066
1067 // If the header value is empty, we have an invalid header.
1068 if (content_range_spec.empty())
1069 return false;
1070
1071 size_t space_position = content_range_spec.find(' ');
1072 if (space_position == std::string::npos)
1073 return false;
1074
1075 // Invalid header if it doesn't contain "bytes-unit".
1076 std::string::const_iterator content_range_spec_begin =
1077 content_range_spec.begin();
1078 std::string::const_iterator content_range_spec_end =
1079 content_range_spec.begin() + space_position;
1080 HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end);
1081 if (!LowerCaseEqualsASCII(content_range_spec_begin,
1082 content_range_spec_end,
1083 "bytes")) {
1084 return false;
1085 }
1086
1087 size_t slash_position = content_range_spec.find('/', space_position + 1);
1088 if (slash_position == std::string::npos)
1089 return false;
1090
1091 // Obtain the part behind the space and before slash.
1092 std::string::const_iterator byte_range_resp_spec_begin =
1093 content_range_spec.begin() + space_position + 1;
1094 std::string::const_iterator byte_range_resp_spec_end =
1095 content_range_spec.begin() + slash_position;
1096 HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end);
1097
1098 // Parse the byte-range-resp-spec part.
1099 std::string byte_range_resp_spec(byte_range_resp_spec_begin,
1100 byte_range_resp_spec_end);
1101 // If byte-range-resp-spec != "*".
1102 if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) {
1103 size_t minus_position = byte_range_resp_spec.find('-');
1104 if (minus_position != std::string::npos) {
1105 // Obtain first-byte-pos.
1106 std::string::const_iterator first_byte_pos_begin =
1107 byte_range_resp_spec.begin();
1108 std::string::const_iterator first_byte_pos_end =
1109 byte_range_resp_spec.begin() + minus_position;
1110 HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
1111
1112 bool ok = StringToInt64(
1113 std::string(first_byte_pos_begin, first_byte_pos_end),
1114 first_byte_position);
1115
1116 // Obtain last-byte-pos.
1117 std::string::const_iterator last_byte_pos_begin =
1118 byte_range_resp_spec.begin() + minus_position + 1;
1119 std::string::const_iterator last_byte_pos_end =
1120 byte_range_resp_spec.end();
1121 HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
1122
1123 ok &= StringToInt64(
1124 std::string(last_byte_pos_begin, last_byte_pos_end),
1125 last_byte_position);
1126 if (!ok) {
1127 *first_byte_position = *last_byte_position = -1;
1128 return false;
1129 }
1130 if (*first_byte_position < 0 || *last_byte_position < 0 ||
1131 *first_byte_position > *last_byte_position)
1132 return false;
1133 } else {
1134 return false;
1135 }
1136 }
1137
1138 // Parse the instance-length part.
1139 // If instance-length == "*".
1140 std::string::const_iterator instance_length_begin =
1141 content_range_spec.begin() + slash_position + 1;
1142 std::string::const_iterator instance_length_end =
1143 content_range_spec.end();
1144 HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end);
1145
1146 if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) {
1147 return false;
1148 } else if (!StringToInt64(
1149 std::string(instance_length_begin, instance_length_end),
1150 instance_length)) {
1151 *instance_length = -1;
1152 return false;
1153 }
1154
1155 // We have all the values; let's verify that they make sense for a 206
1156 // response.
1157 if (*first_byte_position < 0 || *last_byte_position < 0 ||
1158 *instance_length < 0 || *instance_length - 1 < *last_byte_position)
1159 return false;
1160
1161 return true;
1162 }
1163
1164 } // namespace net
1165