1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 // The rules for parsing content-types were borrowed from Firefox:
11 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
12
13 #include "net/http/http_util.h"
14
15 #include <algorithm>
16 #include <optional>
17 #include <string>
18 #include <string_view>
19
20 #include "base/check_op.h"
21 #include "base/strings/strcat.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/strings/string_split.h"
24 #include "base/strings/string_tokenizer.h"
25 #include "base/strings/string_util.h"
26 #include "base/strings/stringprintf.h"
27 #include "base/time/time.h"
28 #include "net/base/features.h"
29 #include "net/base/mime_util.h"
30 #include "net/base/parse_number.h"
31 #include "net/base/url_util.h"
32 #include "net/http/http_response_headers.h"
33
34 namespace net {
35
36 namespace {
37
38 template <typename ConstIterator>
TrimLWSImplementation(ConstIterator * begin,ConstIterator * end)39 void TrimLWSImplementation(ConstIterator* begin, ConstIterator* end) {
40 // leading whitespace
41 while (*begin < *end && HttpUtil::IsLWS((*begin)[0]))
42 ++(*begin);
43
44 // trailing whitespace
45 while (*begin < *end && HttpUtil::IsLWS((*end)[-1]))
46 --(*end);
47 }
48
49 // Helper class that builds the list of languages for the Accept-Language
50 // headers.
51 // The output is a comma-separated list of languages as string.
52 // Duplicates are removed.
53 class AcceptLanguageBuilder {
54 public:
55 // Adds a language to the string.
56 // Duplicates are ignored.
AddLanguageCode(const std::string & language)57 void AddLanguageCode(const std::string& language) {
58 // No Q score supported, only supports ASCII.
59 DCHECK_EQ(std::string::npos, language.find_first_of("; "));
60 DCHECK(base::IsStringASCII(language));
61 if (seen_.find(language) == seen_.end()) {
62 if (str_.empty()) {
63 base::StringAppendF(&str_, "%s", language.c_str());
64 } else {
65 base::StringAppendF(&str_, ",%s", language.c_str());
66 }
67 seen_.insert(language);
68 }
69 }
70
71 // Returns the string constructed up to this point.
GetString() const72 std::string GetString() const { return str_; }
73
74 private:
75 // The string that contains the list of languages, comma-separated.
76 std::string str_;
77 // Set the remove duplicates.
78 std::unordered_set<std::string> seen_;
79 };
80
81 // Extract the base language code from a language code.
82 // If there is no '-' in the code, the original code is returned.
GetBaseLanguageCode(const std::string & language_code)83 std::string GetBaseLanguageCode(const std::string& language_code) {
84 std::vector<std::string> tokens = base::SplitString(
85 language_code, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
86 return tokens.empty() ? "" : std::move(tokens[0]);
87 }
88
89 } // namespace
90
91 // HttpUtil -------------------------------------------------------------------
92
GenerateRequestLine(std::string_view method,const GURL & url,bool is_for_get_to_http_proxy)93 std::string HttpUtil::GenerateRequestLine(std::string_view method,
94 const GURL& url,
95 bool is_for_get_to_http_proxy) {
96 static constexpr char kSuffix[] = " HTTP/1.1\r\n";
97 const std::string path = is_for_get_to_http_proxy
98 ? HttpUtil::SpecForRequest(url)
99 : url.PathForRequest();
100 return base::StrCat({method, " ", path, kSuffix});
101 }
102
103 // static
SpecForRequest(const GURL & url)104 std::string HttpUtil::SpecForRequest(const GURL& url) {
105 DCHECK(url.is_valid() &&
106 (url.SchemeIsHTTPOrHTTPS() || url.SchemeIsWSOrWSS()));
107 return SimplifyUrlForRequest(url).spec();
108 }
109
110 // static
ParseContentType(std::string_view content_type_str,std::string * mime_type,std::string * charset,bool * had_charset,std::string * boundary)111 void HttpUtil::ParseContentType(std::string_view content_type_str,
112 std::string* mime_type,
113 std::string* charset,
114 bool* had_charset,
115 std::string* boundary) {
116 std::string mime_type_value;
117 base::StringPairs params;
118 bool result = ParseMimeType(content_type_str, &mime_type_value, ¶ms);
119 // If the server sent "*/*", it is meaningless, so do not store it.
120 // Also, reject a mime-type if it does not include a slash.
121 // Some servers give junk after the charset parameter, which may
122 // include a comma, so this check makes us a bit more tolerant.
123 if (!result || content_type_str == "*/*")
124 return;
125
126 std::string charset_value;
127 bool type_has_charset = false;
128 bool type_has_boundary = false;
129 for (const auto& param : params) {
130 // Trim LWS from param value, ParseMimeType() leaves WS for quoted-string.
131 // TODO(mmenke): Check that name has only valid characters.
132 if (!type_has_charset &&
133 base::EqualsCaseInsensitiveASCII(param.first, "charset")) {
134 type_has_charset = true;
135 charset_value = std::string(HttpUtil::TrimLWS(param.second));
136 continue;
137 }
138
139 if (boundary && !type_has_boundary &&
140 base::EqualsCaseInsensitiveASCII(param.first, "boundary")) {
141 type_has_boundary = true;
142 *boundary = std::string(HttpUtil::TrimLWS(param.second));
143 continue;
144 }
145 }
146
147 // If `mime_type_value` is the same as `mime_type`, then just update
148 // `charset`. However, if `charset` is empty and `mime_type` hasn't changed,
149 // then don't wipe-out an existing `charset`.
150 bool eq = base::EqualsCaseInsensitiveASCII(mime_type_value, *mime_type);
151 if (!eq) {
152 *mime_type = base::ToLowerASCII(mime_type_value);
153 }
154 if ((!eq && *had_charset) || type_has_charset) {
155 *had_charset = true;
156 *charset = base::ToLowerASCII(charset_value);
157 }
158 }
159
160 // static
ParseRangeHeader(const std::string & ranges_specifier,std::vector<HttpByteRange> * ranges)161 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
162 std::vector<HttpByteRange>* ranges) {
163 size_t equal_char_offset = ranges_specifier.find('=');
164 if (equal_char_offset == std::string::npos)
165 return false;
166
167 // Try to extract bytes-unit part.
168 std::string_view bytes_unit =
169 std::string_view(ranges_specifier).substr(0, equal_char_offset);
170
171 // "bytes" unit identifier is not found.
172 bytes_unit = TrimLWS(bytes_unit);
173 if (!base::EqualsCaseInsensitiveASCII(bytes_unit, "bytes")) {
174 return false;
175 }
176
177 std::string::const_iterator byte_range_set_begin =
178 ranges_specifier.begin() + equal_char_offset + 1;
179 std::string::const_iterator byte_range_set_end = ranges_specifier.end();
180
181 ValuesIterator byte_range_set_iterator(
182 std::string_view(byte_range_set_begin, byte_range_set_end),
183 /*delimiter=*/',');
184 while (byte_range_set_iterator.GetNext()) {
185 std::string_view value = byte_range_set_iterator.value();
186 size_t minus_char_offset = value.find('-');
187 // If '-' character is not found, reports failure.
188 if (minus_char_offset == std::string::npos)
189 return false;
190
191 std::string_view first_byte_pos = value.substr(0, minus_char_offset);
192 first_byte_pos = TrimLWS(first_byte_pos);
193
194 HttpByteRange range;
195 // Try to obtain first-byte-pos.
196 if (!first_byte_pos.empty()) {
197 int64_t first_byte_position = -1;
198 if (!base::StringToInt64(first_byte_pos, &first_byte_position))
199 return false;
200 range.set_first_byte_position(first_byte_position);
201 }
202
203 std::string_view last_byte_pos = value.substr(minus_char_offset + 1);
204 last_byte_pos = TrimLWS(last_byte_pos);
205
206 // We have last-byte-pos or suffix-byte-range-spec in this case.
207 if (!last_byte_pos.empty()) {
208 int64_t last_byte_position;
209 if (!base::StringToInt64(last_byte_pos, &last_byte_position))
210 return false;
211 if (range.HasFirstBytePosition())
212 range.set_last_byte_position(last_byte_position);
213 else
214 range.set_suffix_length(last_byte_position);
215 } else if (!range.HasFirstBytePosition()) {
216 return false;
217 }
218
219 // Do a final check on the HttpByteRange object.
220 if (!range.IsValid())
221 return false;
222 ranges->push_back(range);
223 }
224 return !ranges->empty();
225 }
226
227 // static
228 // From RFC 2616 14.16:
229 // content-range-spec =
230 // bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
231 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
232 // instance-length = 1*DIGIT
233 // bytes-unit = "bytes"
ParseContentRangeHeaderFor206(std::string_view content_range_spec,int64_t * first_byte_position,int64_t * last_byte_position,int64_t * instance_length)234 bool HttpUtil::ParseContentRangeHeaderFor206(
235 std::string_view content_range_spec,
236 int64_t* first_byte_position,
237 int64_t* last_byte_position,
238 int64_t* instance_length) {
239 *first_byte_position = *last_byte_position = *instance_length = -1;
240 content_range_spec = TrimLWS(content_range_spec);
241
242 size_t space_position = content_range_spec.find(' ');
243 if (space_position == std::string_view::npos) {
244 return false;
245 }
246
247 // Invalid header if it doesn't contain "bytes-unit".
248 if (!base::EqualsCaseInsensitiveASCII(
249 TrimLWS(content_range_spec.substr(0, space_position)), "bytes")) {
250 return false;
251 }
252
253 size_t minus_position = content_range_spec.find('-', space_position + 1);
254 if (minus_position == std::string_view::npos) {
255 return false;
256 }
257 size_t slash_position = content_range_spec.find('/', minus_position + 1);
258 if (slash_position == std::string_view::npos) {
259 return false;
260 }
261
262 if (base::StringToInt64(
263 TrimLWS(content_range_spec.substr(
264 space_position + 1, minus_position - (space_position + 1))),
265 first_byte_position) &&
266 *first_byte_position >= 0 &&
267 base::StringToInt64(
268 TrimLWS(content_range_spec.substr(
269 minus_position + 1, slash_position - (minus_position + 1))),
270 last_byte_position) &&
271 *last_byte_position >= *first_byte_position &&
272 base::StringToInt64(
273 TrimLWS(content_range_spec.substr(slash_position + 1)),
274 instance_length) &&
275 *instance_length > *last_byte_position) {
276 return true;
277 }
278 *first_byte_position = *last_byte_position = *instance_length = -1;
279 return false;
280 }
281
282 // static
ParseRetryAfterHeader(const std::string & retry_after_string,base::Time now,base::TimeDelta * retry_after)283 bool HttpUtil::ParseRetryAfterHeader(const std::string& retry_after_string,
284 base::Time now,
285 base::TimeDelta* retry_after) {
286 uint32_t seconds;
287 base::Time time;
288 base::TimeDelta interval;
289
290 if (ParseUint32(retry_after_string, ParseIntFormat::NON_NEGATIVE, &seconds)) {
291 interval = base::Seconds(seconds);
292 } else if (base::Time::FromUTCString(retry_after_string.c_str(), &time)) {
293 interval = time - now;
294 } else {
295 return false;
296 }
297
298 if (interval < base::Seconds(0))
299 return false;
300
301 *retry_after = interval;
302 return true;
303 }
304
305 // static
TimeFormatHTTP(base::Time time)306 std::string HttpUtil::TimeFormatHTTP(base::Time time) {
307 static constexpr char kWeekdayName[7][4] = {"Sun", "Mon", "Tue", "Wed",
308 "Thu", "Fri", "Sat"};
309 static constexpr char kMonthName[12][4] = {"Jan", "Feb", "Mar", "Apr",
310 "May", "Jun", "Jul", "Aug",
311 "Sep", "Oct", "Nov", "Dec"};
312 base::Time::Exploded exploded;
313 time.UTCExplode(&exploded);
314 return base::StringPrintf(
315 "%s, %02d %s %04d %02d:%02d:%02d GMT", kWeekdayName[exploded.day_of_week],
316 exploded.day_of_month, kMonthName[exploded.month - 1], exploded.year,
317 exploded.hour, exploded.minute, exploded.second);
318 }
319
320 namespace {
321
322 // A header string containing any of the following fields will cause
323 // an error. The list comes from the fetch standard.
324 const char* const kForbiddenHeaderFields[] = {
325 "accept-charset",
326 "accept-encoding",
327 "access-control-request-headers",
328 "access-control-request-method",
329 "access-control-request-private-network",
330 "connection",
331 "content-length",
332 "cookie",
333 "cookie2",
334 "date",
335 "dnt",
336 "expect",
337 "host",
338 "keep-alive",
339 "origin",
340 "referer",
341 "set-cookie",
342 "te",
343 "trailer",
344 "transfer-encoding",
345 "upgrade",
346 // TODO(mmenke): This is no longer banned, but still here due to issues
347 // mentioned in https://crbug.com/571722.
348 "user-agent",
349 "via",
350 };
351
352 // A header string containing any of the following fields with a forbidden
353 // method name in the value will cause an error. The list comes from the fetch
354 // standard.
355 const char* const kForbiddenHeaderFieldsWithForbiddenMethod[] = {
356 "x-http-method",
357 "x-http-method-override",
358 "x-method-override",
359 };
360
361 // The forbidden method names that is defined in the fetch standard, and used
362 // to check the kForbiddenHeaderFileWithForbiddenMethod above.
363 const char* const kForbiddenMethods[] = {
364 "connect",
365 "trace",
366 "track",
367 };
368
369 } // namespace
370
371 // static
IsMethodSafe(std::string_view method)372 bool HttpUtil::IsMethodSafe(std::string_view method) {
373 return method == "GET" || method == "HEAD" || method == "OPTIONS" ||
374 method == "TRACE";
375 }
376
377 // static
IsMethodIdempotent(std::string_view method)378 bool HttpUtil::IsMethodIdempotent(std::string_view method) {
379 return IsMethodSafe(method) || method == "PUT" || method == "DELETE";
380 }
381
382 // static
IsSafeHeader(std::string_view name,std::string_view value)383 bool HttpUtil::IsSafeHeader(std::string_view name, std::string_view value) {
384 if (base::StartsWith(name, "proxy-", base::CompareCase::INSENSITIVE_ASCII) ||
385 base::StartsWith(name, "sec-", base::CompareCase::INSENSITIVE_ASCII))
386 return false;
387
388 for (const char* field : kForbiddenHeaderFields) {
389 if (base::EqualsCaseInsensitiveASCII(name, field))
390 return false;
391 }
392
393 bool is_forbidden_header_fields_with_forbidden_method = false;
394 for (const char* field : kForbiddenHeaderFieldsWithForbiddenMethod) {
395 if (base::EqualsCaseInsensitiveASCII(name, field)) {
396 is_forbidden_header_fields_with_forbidden_method = true;
397 break;
398 }
399 }
400 if (is_forbidden_header_fields_with_forbidden_method) {
401 ValuesIterator method_iterator(value, ',');
402 while (method_iterator.GetNext()) {
403 std::string_view method = method_iterator.value();
404 for (const char* forbidden_method : kForbiddenMethods) {
405 if (base::EqualsCaseInsensitiveASCII(method, forbidden_method))
406 return false;
407 }
408 }
409 }
410 return true;
411 }
412
413 // static
IsValidHeaderName(std::string_view name)414 bool HttpUtil::IsValidHeaderName(std::string_view name) {
415 // Check whether the header name is RFC 2616-compliant.
416 return HttpUtil::IsToken(name);
417 }
418
419 // static
IsValidHeaderValue(std::string_view value)420 bool HttpUtil::IsValidHeaderValue(std::string_view value) {
421 // Just a sanity check: disallow NUL, CR and LF.
422 for (char c : value) {
423 if (c == '\0' || c == '\r' || c == '\n')
424 return false;
425 }
426 return true;
427 }
428
429 // static
IsNonCoalescingHeader(std::string_view name)430 bool HttpUtil::IsNonCoalescingHeader(std::string_view name) {
431 // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
432 // have to list them here.
433 // As of 2023, using FlatSet here actually makes the lookup slower, and
434 // unordered_set is even slower than that.
435 static constexpr std::string_view kNonCoalescingHeaders[] = {
436 "date", "expires", "last-modified",
437 "location", // See bug 1050541 for details
438 "retry-after", "set-cookie",
439 // The format of auth-challenges mixes both space separated tokens and
440 // comma separated properties, so coalescing on comma won't work.
441 "www-authenticate", "proxy-authenticate",
442 // STS specifies that UAs must not process any STS headers after the first
443 // one.
444 "strict-transport-security"};
445
446 for (std::string_view header : kNonCoalescingHeaders) {
447 if (base::EqualsCaseInsensitiveASCII(name, header)) {
448 return true;
449 }
450 }
451 return false;
452 }
453
454 // static
TrimLWS(std::string::const_iterator * begin,std::string::const_iterator * end)455 void HttpUtil::TrimLWS(std::string::const_iterator* begin,
456 std::string::const_iterator* end) {
457 TrimLWSImplementation(begin, end);
458 }
459
460 // static
TrimLWS(std::string_view string)461 std::string_view HttpUtil::TrimLWS(std::string_view string) {
462 const char* begin = string.data();
463 const char* end = string.data() + string.size();
464 TrimLWSImplementation(&begin, &end);
465 return std::string_view(begin, end - begin);
466 }
467
IsTokenChar(char c)468 bool HttpUtil::IsTokenChar(char c) {
469 return !(c >= 0x7F || c <= 0x20 || c == '(' || c == ')' || c == '<' ||
470 c == '>' || c == '@' || c == ',' || c == ';' || c == ':' ||
471 c == '\\' || c == '"' || c == '/' || c == '[' || c == ']' ||
472 c == '?' || c == '=' || c == '{' || c == '}');
473 }
474
475 // See RFC 7230 Sec 3.2.6 for the definition of |token|.
IsToken(std::string_view string)476 bool HttpUtil::IsToken(std::string_view string) {
477 if (string.empty())
478 return false;
479 for (char c : string) {
480 if (!IsTokenChar(c))
481 return false;
482 }
483 return true;
484 }
485
486 // See RFC 5987 Sec 3.2.1 for the definition of |parmname|.
IsParmName(std::string_view str)487 bool HttpUtil::IsParmName(std::string_view str) {
488 if (str.empty())
489 return false;
490 for (char c : str) {
491 if (!IsTokenChar(c) || c == '*' || c == '\'' || c == '%')
492 return false;
493 }
494 return true;
495 }
496
497 namespace {
498
IsQuote(char c)499 bool IsQuote(char c) {
500 return c == '"';
501 }
502
UnquoteImpl(std::string_view str,bool strict_quotes,std::string * out)503 bool UnquoteImpl(std::string_view str, bool strict_quotes, std::string* out) {
504 if (str.empty())
505 return false;
506
507 // Nothing to unquote.
508 if (!IsQuote(str[0]))
509 return false;
510
511 // No terminal quote mark.
512 if (str.size() < 2 || str.front() != str.back())
513 return false;
514
515 // Strip quotemarks
516 str.remove_prefix(1);
517 str.remove_suffix(1);
518
519 // Unescape quoted-pair (defined in RFC 2616 section 2.2)
520 bool prev_escape = false;
521 std::string unescaped;
522 for (char c : str) {
523 if (c == '\\' && !prev_escape) {
524 prev_escape = true;
525 continue;
526 }
527 if (strict_quotes && !prev_escape && IsQuote(c))
528 return false;
529 prev_escape = false;
530 unescaped.push_back(c);
531 }
532
533 // Terminal quote is escaped.
534 if (strict_quotes && prev_escape)
535 return false;
536
537 *out = std::move(unescaped);
538 return true;
539 }
540
541 } // anonymous namespace
542
543 // static
Unquote(std::string_view str)544 std::string HttpUtil::Unquote(std::string_view str) {
545 std::string result;
546 if (!UnquoteImpl(str, false, &result))
547 return std::string(str);
548
549 return result;
550 }
551
552 // static
StrictUnquote(std::string_view str,std::string * out)553 bool HttpUtil::StrictUnquote(std::string_view str, std::string* out) {
554 return UnquoteImpl(str, true, out);
555 }
556
557 // static
Quote(std::string_view str)558 std::string HttpUtil::Quote(std::string_view str) {
559 std::string escaped;
560 escaped.reserve(2 + str.size());
561
562 // Esape any backslashes or quotemarks within the string, and
563 // then surround with quotes.
564 escaped.push_back('"');
565 for (char c : str) {
566 if (c == '"' || c == '\\')
567 escaped.push_back('\\');
568 escaped.push_back(c);
569 }
570 escaped.push_back('"');
571 return escaped;
572 }
573
574 // Find the "http" substring in a status line. This allows for
575 // some slop at the start. If the "http" string could not be found
576 // then returns std::string::npos.
577 // static
LocateStartOfStatusLine(base::span<const uint8_t> buf)578 size_t HttpUtil::LocateStartOfStatusLine(base::span<const uint8_t> buf) {
579 const size_t slop = 4;
580 const size_t http_len = 4;
581
582 if (buf.size() >= http_len) {
583 size_t i_max = std::min(buf.size() - http_len, slop);
584 for (size_t i = 0; i <= i_max; ++i) {
585 if (base::EqualsCaseInsensitiveASCII(
586 base::as_string_view(buf.subspan(i, http_len)), "http")) {
587 return i;
588 }
589 }
590 }
591 return std::string::npos; // Not found
592 }
593
LocateEndOfHeadersHelper(base::span<const uint8_t> buf,size_t i,bool accept_empty_header_list)594 static size_t LocateEndOfHeadersHelper(base::span<const uint8_t> buf,
595 size_t i,
596 bool accept_empty_header_list) {
597 char last_c = '\0';
598 bool was_lf = false;
599 if (accept_empty_header_list) {
600 // Normally two line breaks signal the end of a header list. An empty header
601 // list ends with a single line break at the start of the buffer.
602 last_c = '\n';
603 was_lf = true;
604 }
605
606 for (; i < buf.size(); ++i) {
607 char c = buf[i];
608 if (c == '\n') {
609 if (was_lf)
610 return i + 1;
611 was_lf = true;
612 } else if (c != '\r' || last_c != '\n') {
613 was_lf = false;
614 }
615 last_c = c;
616 }
617 return std::string::npos;
618 }
619
LocateEndOfAdditionalHeaders(base::span<const uint8_t> buf,size_t i)620 size_t HttpUtil::LocateEndOfAdditionalHeaders(base::span<const uint8_t> buf,
621 size_t i) {
622 return LocateEndOfHeadersHelper(buf, i, true);
623 }
624
LocateEndOfHeaders(base::span<const uint8_t> buf,size_t i)625 size_t HttpUtil::LocateEndOfHeaders(base::span<const uint8_t> buf, size_t i) {
626 return LocateEndOfHeadersHelper(buf, i, false);
627 }
628
629 // In order for a line to be continuable, it must specify a
630 // non-blank header-name. Line continuations are specifically for
631 // header values -- do not allow headers names to span lines.
IsLineSegmentContinuable(std::string_view line)632 static bool IsLineSegmentContinuable(std::string_view line) {
633 if (line.empty())
634 return false;
635
636 size_t colon = line.find(':');
637 if (colon == std::string_view::npos) {
638 return false;
639 }
640
641 std::string_view name = line.substr(0, colon);
642
643 // Name can't be empty.
644 if (name.empty())
645 return false;
646
647 // Can't start with LWS (this would imply the segment is a continuation)
648 if (HttpUtil::IsLWS(name[0]))
649 return false;
650
651 return true;
652 }
653
654 // Helper used by AssembleRawHeaders, to find the end of the status line.
FindStatusLineEnd(std::string_view str)655 static size_t FindStatusLineEnd(std::string_view str) {
656 size_t i = str.find_first_of("\r\n");
657 if (i == std::string_view::npos) {
658 return str.size();
659 }
660 return i;
661 }
662
663 // Helper used by AssembleRawHeaders, to skip past leading LWS.
RemoveLeadingNonLWS(std::string_view str)664 static std::string_view RemoveLeadingNonLWS(std::string_view str) {
665 for (size_t i = 0; i < str.size(); i++) {
666 if (!HttpUtil::IsLWS(str[i]))
667 return str.substr(i);
668 }
669 return std::string_view(); // Remove everything.
670 }
671
AssembleRawHeaders(std::string_view input)672 std::string HttpUtil::AssembleRawHeaders(std::string_view input) {
673 std::string raw_headers;
674 raw_headers.reserve(input.size());
675
676 // Skip any leading slop, since the consumers of this output
677 // (HttpResponseHeaders) don't deal with it.
678 size_t status_begin_offset =
679 LocateStartOfStatusLine(base::as_byte_span(input));
680 if (status_begin_offset != std::string::npos)
681 input.remove_prefix(status_begin_offset);
682
683 // Copy the status line.
684 size_t status_line_end = FindStatusLineEnd(input);
685 raw_headers.append(input.data(), status_line_end);
686 input.remove_prefix(status_line_end);
687
688 // After the status line, every subsequent line is a header line segment.
689 // Should a segment start with LWS, it is a continuation of the previous
690 // line's field-value.
691
692 // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
693 base::CStringTokenizer lines(input.data(), input.data() + input.size(),
694 "\r\n");
695
696 // This variable is true when the previous line was continuable.
697 bool prev_line_continuable = false;
698
699 while (lines.GetNext()) {
700 std::string_view line = lines.token_piece();
701
702 if (prev_line_continuable && IsLWS(line[0])) {
703 // Join continuation; reduce the leading LWS to a single SP.
704 base::StrAppend(&raw_headers, {" ", RemoveLeadingNonLWS(line)});
705 } else {
706 // Terminate the previous line and copy the raw data to output.
707 base::StrAppend(&raw_headers, {"\n", line});
708
709 // Check if the current line can be continued.
710 prev_line_continuable = IsLineSegmentContinuable(line);
711 }
712 }
713
714 raw_headers.append("\n\n", 2);
715
716 // Use '\0' as the canonical line terminator. If the input already contained
717 // any embeded '\0' characters we will strip them first to avoid interpreting
718 // them as line breaks.
719 std::erase(raw_headers, '\0');
720
721 std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
722
723 return raw_headers;
724 }
725
ConvertHeadersBackToHTTPResponse(const std::string & str)726 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
727 std::string disassembled_headers;
728 base::StringTokenizer tokenizer(str, std::string(1, '\0'));
729 while (tokenizer.GetNext()) {
730 base::StrAppend(&disassembled_headers, {tokenizer.token_piece(), "\r\n"});
731 }
732 disassembled_headers.append("\r\n");
733
734 return disassembled_headers;
735 }
736
ExpandLanguageList(const std::string & language_prefs)737 std::string HttpUtil::ExpandLanguageList(const std::string& language_prefs) {
738 const std::vector<std::string> languages = base::SplitString(
739 language_prefs, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
740
741 if (languages.empty())
742 return "";
743
744 AcceptLanguageBuilder builder;
745
746 const size_t size = languages.size();
747 for (size_t i = 0; i < size; ++i) {
748 const std::string& language = languages[i];
749 builder.AddLanguageCode(language);
750
751 // Extract the primary language subtag.
752 const std::string& base_language = GetBaseLanguageCode(language);
753
754 // Skip 'x' and 'i' as a primary language subtag per RFC 5646 section 2.1.1.
755 if (base_language == "x" || base_language == "i")
756 continue;
757
758 // Look ahead and add the primary language subtag as a language if the next
759 // language is not part of the same family. This may not be perfect because
760 // an input of "en-US,fr,en" will yield "en-US,en,fr,en" and later make "en"
761 // a higher priority than "fr" despite the original preference.
762 const size_t j = i + 1;
763 if (j >= size || GetBaseLanguageCode(languages[j]) != base_language) {
764 builder.AddLanguageCode(base_language);
765 }
766 }
767
768 return builder.GetString();
769 }
770
771 // TODO(jungshik): This function assumes that the input is a comma separated
772 // list without any whitespace. As long as it comes from the preference and
773 // a user does not manually edit the preference file, it's the case. Still,
774 // we may have to make it more robust.
GenerateAcceptLanguageHeader(const std::string & raw_language_list)775 std::string HttpUtil::GenerateAcceptLanguageHeader(
776 const std::string& raw_language_list) {
777 // We use integers for qvalue and qvalue decrement that are 10 times
778 // larger than actual values to avoid a problem with comparing
779 // two floating point numbers.
780 const unsigned int kQvalueDecrement10 = 1;
781 unsigned int qvalue10 = 10;
782 base::StringTokenizer t(raw_language_list, ",");
783 std::string lang_list_with_q;
784 while (t.GetNext()) {
785 std::string language = t.token();
786 if (qvalue10 == 10) {
787 // q=1.0 is implicit.
788 lang_list_with_q = language;
789 } else {
790 DCHECK_LT(qvalue10, 10U);
791 base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
792 qvalue10);
793 }
794 // It does not make sense to have 'q=0'.
795 if (qvalue10 > kQvalueDecrement10)
796 qvalue10 -= kQvalueDecrement10;
797 }
798 return lang_list_with_q;
799 }
800
HasStrongValidators(HttpVersion version,std::optional<std::string_view> etag_header,std::optional<std::string_view> last_modified_header,std::optional<std::string_view> date_header)801 bool HttpUtil::HasStrongValidators(
802 HttpVersion version,
803 std::optional<std::string_view> etag_header,
804 std::optional<std::string_view> last_modified_header,
805 std::optional<std::string_view> date_header) {
806 if (version < HttpVersion(1, 1))
807 return false;
808
809 if (etag_header && !etag_header->empty()) {
810 size_t slash = etag_header->find('/');
811 if (slash == std::string_view::npos || slash == 0) {
812 return true;
813 }
814
815 std::string_view trimmed_etag = TrimLWS(etag_header->substr(0, slash));
816 if (!base::EqualsCaseInsensitiveASCII(trimmed_etag, "w")) {
817 return true;
818 }
819 }
820
821 base::Time last_modified;
822 if (!last_modified_header ||
823 !base::Time::FromString(std::string(*last_modified_header).c_str(),
824 &last_modified)) {
825 return false;
826 }
827
828 base::Time date;
829 if (!date_header ||
830 !base::Time::FromString(std::string(*date_header).c_str(), &date)) {
831 return false;
832 }
833
834 // Last-Modified is implicitly weak unless it is at least 60 seconds before
835 // the Date value.
836 return ((date - last_modified).InSeconds() >= 60);
837 }
838
HasValidators(HttpVersion version,std::optional<std::string_view> etag_header,std::optional<std::string_view> last_modified_header)839 bool HttpUtil::HasValidators(
840 HttpVersion version,
841 std::optional<std::string_view> etag_header,
842 std::optional<std::string_view> last_modified_header) {
843 if (version < HttpVersion(1, 0))
844 return false;
845
846 base::Time last_modified;
847 // Have to construct a C-style string here, since that's what
848 // base::Time::FromString requires.
849 if (last_modified_header &&
850 base::Time::FromString(std::string(*last_modified_header).c_str(),
851 &last_modified)) {
852 return true;
853 }
854
855 // It is OK to consider an empty string in etag_header to be a missing header
856 // since valid ETags are always quoted-strings (see RFC 2616 3.11) and thus
857 // empty ETags aren't empty strings (i.e., an empty ETag might be "\"\"").
858 return version >= HttpVersion(1, 1) && etag_header && !etag_header->empty();
859 }
860
861 // Functions for histogram initialization. The code 0 is put in the map to
862 // track status codes that are invalid.
863 // TODO(gavinp): Greatly prune the collected codes once we learn which
864 // ones are not sent in practice, to reduce upload size & memory use.
865
866 enum {
867 HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
868 HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
869 };
870
871 // static
GetStatusCodesForHistogram()872 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
873 std::vector<int> codes;
874 codes.reserve(
875 HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
876 codes.push_back(0);
877 for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
878 i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
879 codes.push_back(i);
880 return codes;
881 }
882
883 // static
MapStatusCodeForHistogram(int code)884 int HttpUtil::MapStatusCodeForHistogram(int code) {
885 if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
886 code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
887 return code;
888 return 0;
889 }
890
891 // BNF from section 4.2 of RFC 2616:
892 //
893 // message-header = field-name ":" [ field-value ]
894 // field-name = token
895 // field-value = *( field-content | LWS )
896 // field-content = <the OCTETs making up the field-value
897 // and consisting of either *TEXT or combinations
898 // of token, separators, and quoted-string>
899 //
900
HeadersIterator(std::string::const_iterator headers_begin,std::string::const_iterator headers_end,const std::string & line_delimiter)901 HttpUtil::HeadersIterator::HeadersIterator(
902 std::string::const_iterator headers_begin,
903 std::string::const_iterator headers_end,
904 const std::string& line_delimiter)
905 : lines_(headers_begin, headers_end, line_delimiter) {
906 }
907
908 HttpUtil::HeadersIterator::~HeadersIterator() = default;
909
GetNext()910 bool HttpUtil::HeadersIterator::GetNext() {
911 while (lines_.GetNext()) {
912 name_begin_ = lines_.token_begin();
913 values_end_ = lines_.token_end();
914
915 std::string::const_iterator colon(std::find(name_begin_, values_end_, ':'));
916 if (colon == values_end_)
917 continue; // skip malformed header
918
919 name_end_ = colon;
920
921 // If the name starts with LWS, it is an invalid line.
922 // Leading LWS implies a line continuation, and these should have
923 // already been joined by AssembleRawHeaders().
924 if (name_begin_ == name_end_ || IsLWS(*name_begin_))
925 continue;
926
927 TrimLWS(&name_begin_, &name_end_);
928 DCHECK(name_begin_ < name_end_);
929 if (!IsToken(base::MakeStringPiece(name_begin_, name_end_)))
930 continue; // skip malformed header
931
932 values_begin_ = colon + 1;
933 TrimLWS(&values_begin_, &values_end_);
934
935 // if we got a header name, then we are done.
936 return true;
937 }
938 return false;
939 }
940
AdvanceTo(const char * name)941 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
942 DCHECK(name != nullptr);
943 DCHECK_EQ(0, base::ToLowerASCII(name).compare(name))
944 << "the header name must be in all lower case";
945
946 while (GetNext()) {
947 if (base::EqualsCaseInsensitiveASCII(
948 base::MakeStringPiece(name_begin_, name_end_), name)) {
949 return true;
950 }
951 }
952
953 return false;
954 }
955
ValuesIterator(std::string_view values,char delimiter,bool ignore_empty_values)956 HttpUtil::ValuesIterator::ValuesIterator(std::string_view values,
957 char delimiter,
958 bool ignore_empty_values)
959 : values_(values, std::string(1, delimiter)),
960 ignore_empty_values_(ignore_empty_values) {
961 values_.set_quote_chars("\"");
962 // Could set this unconditionally, since code below has to check for empty
963 // values after trimming, anyways, but may provide a minor performance
964 // improvement.
965 if (!ignore_empty_values_)
966 values_.set_options(base::StringTokenizer::RETURN_EMPTY_TOKENS);
967 }
968
969 HttpUtil::ValuesIterator::ValuesIterator(const ValuesIterator& other) = default;
970
971 HttpUtil::ValuesIterator::~ValuesIterator() = default;
972
GetNext()973 bool HttpUtil::ValuesIterator::GetNext() {
974 while (values_.GetNext()) {
975 value_ = TrimLWS(values_.token());
976
977 if (!ignore_empty_values_ || !value_.empty()) {
978 return true;
979 }
980 }
981 return false;
982 }
983
NameValuePairsIterator(std::string_view value,char delimiter,Values optional_values,Quotes strict_quotes)984 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(std::string_view value,
985 char delimiter,
986 Values optional_values,
987 Quotes strict_quotes)
988 : props_(value, delimiter),
989 values_optional_(optional_values == Values::NOT_REQUIRED),
990 strict_quotes_(strict_quotes == Quotes::STRICT_QUOTES) {}
991
992 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
993 const NameValuePairsIterator& other) = default;
994
995 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() = default;
996
997 // We expect properties to be formatted as one of:
998 // name="value"
999 // name='value'
1000 // name='\'value\''
1001 // name=value
1002 // name = value
1003 // name (if values_optional_ is true)
1004 // Due to buggy implementations found in some embedded devices, we also
1005 // accept values with missing close quotemark (http://crbug.com/39836):
1006 // name="value
GetNext()1007 bool HttpUtil::NameValuePairsIterator::GetNext() {
1008 CHECK(valid_);
1009 // Not an error, but nothing left to do.
1010 if (props_.GetNext()) {
1011 // State only becomes invalid if there's another element, but parsing it
1012 // fails.
1013 valid_ = ParseNameValuePair(props_.value());
1014 if (valid_) {
1015 return true;
1016 }
1017 }
1018
1019 // Clear all fields when returning false, regardless of whether `valid` is
1020 // true or not, since any populated data is no longer valid.
1021 name_ = std::string_view();
1022 value_ = std::string_view();
1023 unquoted_value_.clear();
1024 value_is_quoted_ = false;
1025 return false;
1026 }
1027
ParseNameValuePair(std::string_view name_value_pair)1028 bool HttpUtil::NameValuePairsIterator::ParseNameValuePair(
1029 std::string_view name_value_pair) {
1030 // Scan for the equals sign.
1031 const size_t equals = name_value_pair.find('=');
1032 if (equals == 0) {
1033 return false; // Malformed, no name
1034 }
1035 const bool has_value = (equals != std::string_view::npos);
1036 if (!has_value && !values_optional_) {
1037 return false; // Malformed, no equals sign and values are required
1038 }
1039
1040 // Make `name_` everything up until the equals sign.
1041 name_ = TrimLWS(name_value_pair.substr(0, equals));
1042 // Clear rest of state.
1043 value_ = std::string_view();
1044 value_is_quoted_ = false;
1045 unquoted_value_.clear();
1046
1047 // If there is a value, do additional checking and calculate the value.
1048 if (has_value) {
1049 // Check that no quote appears before the equals sign.
1050 if (base::ranges::any_of(name_, IsQuote)) {
1051 return false;
1052 }
1053
1054 // Value consists of everything after the equals sign, with whitespace
1055 // trimmed.
1056 value_ = TrimLWS(name_value_pair.substr(equals + 1));
1057 if (value_.empty()) {
1058 // Malformed; value is empty
1059 return false;
1060 }
1061 }
1062
1063 if (has_value && IsQuote(value_.front())) {
1064 value_is_quoted_ = true;
1065
1066 if (strict_quotes_) {
1067 return HttpUtil::StrictUnquote(value_, &unquoted_value_);
1068 }
1069
1070 // Trim surrounding quotemarks off the value
1071 if (value_.front() != value_.back() || value_.size() == 1) {
1072 // NOTE: This is not as graceful as it sounds:
1073 // * quoted-pairs will no longer be unquoted
1074 // (["\"hello] should give ["hello]).
1075 // * Does not detect when the final quote is escaped
1076 // (["value\"] should give [value"])
1077 value_is_quoted_ = false;
1078 value_ = value_.substr(1); // Gracefully recover from mismatching quotes.
1079 } else {
1080 // Do not store iterators into this. See declaration of `unquoted_value_`.
1081 unquoted_value_ = HttpUtil::Unquote(value_);
1082 }
1083 }
1084
1085 return true;
1086 }
1087
ParseAcceptEncoding(const std::string & accept_encoding,std::set<std::string> * allowed_encodings)1088 bool HttpUtil::ParseAcceptEncoding(const std::string& accept_encoding,
1089 std::set<std::string>* allowed_encodings) {
1090 DCHECK(allowed_encodings);
1091 if (accept_encoding.find_first_of("\"") != std::string::npos)
1092 return false;
1093 allowed_encodings->clear();
1094
1095 base::StringTokenizer tokenizer(accept_encoding.begin(),
1096 accept_encoding.end(), ",");
1097 while (tokenizer.GetNext()) {
1098 std::string_view entry = tokenizer.token_piece();
1099 entry = TrimLWS(entry);
1100 size_t semicolon_pos = entry.find(';');
1101 if (semicolon_pos == std::string_view::npos) {
1102 if (entry.find_first_of(HTTP_LWS) != std::string_view::npos) {
1103 return false;
1104 }
1105 allowed_encodings->insert(base::ToLowerASCII(entry));
1106 continue;
1107 }
1108 std::string_view encoding = entry.substr(0, semicolon_pos);
1109 encoding = TrimLWS(encoding);
1110 if (encoding.find_first_of(HTTP_LWS) != std::string_view::npos) {
1111 return false;
1112 }
1113 std::string_view params = entry.substr(semicolon_pos + 1);
1114 params = TrimLWS(params);
1115 size_t equals_pos = params.find('=');
1116 if (equals_pos == std::string_view::npos) {
1117 return false;
1118 }
1119 std::string_view param_name = params.substr(0, equals_pos);
1120 param_name = TrimLWS(param_name);
1121 if (!base::EqualsCaseInsensitiveASCII(param_name, "q"))
1122 return false;
1123 std::string_view qvalue = params.substr(equals_pos + 1);
1124 qvalue = TrimLWS(qvalue);
1125 if (qvalue.empty())
1126 return false;
1127 if (qvalue[0] == '1') {
1128 if (std::string_view("1.000").starts_with(qvalue)) {
1129 allowed_encodings->insert(base::ToLowerASCII(encoding));
1130 continue;
1131 }
1132 return false;
1133 }
1134 if (qvalue[0] != '0')
1135 return false;
1136 if (qvalue.length() == 1)
1137 continue;
1138 if (qvalue.length() <= 2 || qvalue.length() > 5)
1139 return false;
1140 if (qvalue[1] != '.')
1141 return false;
1142 bool nonzero_number = false;
1143 for (size_t i = 2; i < qvalue.length(); ++i) {
1144 if (!base::IsAsciiDigit(qvalue[i]))
1145 return false;
1146 if (qvalue[i] != '0')
1147 nonzero_number = true;
1148 }
1149 if (nonzero_number)
1150 allowed_encodings->insert(base::ToLowerASCII(encoding));
1151 }
1152
1153 // RFC 7231 5.3.4 "A request without an Accept-Encoding header field implies
1154 // that the user agent has no preferences regarding content-codings."
1155 if (allowed_encodings->empty()) {
1156 allowed_encodings->insert("*");
1157 return true;
1158 }
1159
1160 // Any browser must support "identity".
1161 allowed_encodings->insert("identity");
1162
1163 // RFC says gzip == x-gzip; mirror it here for easier matching.
1164 if (allowed_encodings->find("gzip") != allowed_encodings->end())
1165 allowed_encodings->insert("x-gzip");
1166 if (allowed_encodings->find("x-gzip") != allowed_encodings->end())
1167 allowed_encodings->insert("gzip");
1168
1169 // RFC says compress == x-compress; mirror it here for easier matching.
1170 if (allowed_encodings->find("compress") != allowed_encodings->end())
1171 allowed_encodings->insert("x-compress");
1172 if (allowed_encodings->find("x-compress") != allowed_encodings->end())
1173 allowed_encodings->insert("compress");
1174 return true;
1175 }
1176
ParseContentEncoding(const std::string & content_encoding,std::set<std::string> * used_encodings)1177 bool HttpUtil::ParseContentEncoding(const std::string& content_encoding,
1178 std::set<std::string>* used_encodings) {
1179 DCHECK(used_encodings);
1180 if (content_encoding.find_first_of("\"=;*") != std::string::npos)
1181 return false;
1182 used_encodings->clear();
1183
1184 base::StringTokenizer encoding_tokenizer(content_encoding.begin(),
1185 content_encoding.end(), ",");
1186 while (encoding_tokenizer.GetNext()) {
1187 std::string_view encoding = TrimLWS(encoding_tokenizer.token_piece());
1188 if (encoding.find_first_of(HTTP_LWS) != std::string_view::npos) {
1189 return false;
1190 }
1191 used_encodings->insert(base::ToLowerASCII(encoding));
1192 }
1193 return true;
1194 }
1195
HeadersContainMultipleCopiesOfField(const HttpResponseHeaders & headers,const std::string & field_name)1196 bool HttpUtil::HeadersContainMultipleCopiesOfField(
1197 const HttpResponseHeaders& headers,
1198 const std::string& field_name) {
1199 size_t it = 0;
1200 std::optional<std::string_view> field_value =
1201 headers.EnumerateHeader(&it, field_name);
1202 if (!field_value) {
1203 return false;
1204 }
1205 // There's at least one `field_name` header. Check if there are any more
1206 // such headers, and if so, return true if they have different values.
1207 std::optional<std::string_view> field_value2;
1208 while ((field_value2 = headers.EnumerateHeader(&it, field_name))) {
1209 if (field_value != field_value2)
1210 return true;
1211 }
1212 return false;
1213 }
1214
1215 } // namespace net
1216