• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 // The rules for parsing content-types were borrowed from Firefox:
11 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
12 
13 #include "net/http/http_util.h"
14 
15 #include <algorithm>
16 #include <optional>
17 #include <string>
18 #include <string_view>
19 
20 #include "base/check_op.h"
21 #include "base/strings/strcat.h"
22 #include "base/strings/string_number_conversions.h"
23 #include "base/strings/string_split.h"
24 #include "base/strings/string_tokenizer.h"
25 #include "base/strings/string_util.h"
26 #include "base/strings/stringprintf.h"
27 #include "base/time/time.h"
28 #include "net/base/features.h"
29 #include "net/base/mime_util.h"
30 #include "net/base/parse_number.h"
31 #include "net/base/url_util.h"
32 #include "net/http/http_response_headers.h"
33 
34 namespace net {
35 
36 namespace {
37 
38 template <typename ConstIterator>
TrimLWSImplementation(ConstIterator * begin,ConstIterator * end)39 void TrimLWSImplementation(ConstIterator* begin, ConstIterator* end) {
40   // leading whitespace
41   while (*begin < *end && HttpUtil::IsLWS((*begin)[0]))
42     ++(*begin);
43 
44   // trailing whitespace
45   while (*begin < *end && HttpUtil::IsLWS((*end)[-1]))
46     --(*end);
47 }
48 
49 // Helper class that builds the list of languages for the Accept-Language
50 // headers.
51 // The output is a comma-separated list of languages as string.
52 // Duplicates are removed.
53 class AcceptLanguageBuilder {
54  public:
55   // Adds a language to the string.
56   // Duplicates are ignored.
AddLanguageCode(const std::string & language)57   void AddLanguageCode(const std::string& language) {
58     // No Q score supported, only supports ASCII.
59     DCHECK_EQ(std::string::npos, language.find_first_of("; "));
60     DCHECK(base::IsStringASCII(language));
61     if (seen_.find(language) == seen_.end()) {
62       if (str_.empty()) {
63         base::StringAppendF(&str_, "%s", language.c_str());
64       } else {
65         base::StringAppendF(&str_, ",%s", language.c_str());
66       }
67       seen_.insert(language);
68     }
69   }
70 
71   // Returns the string constructed up to this point.
GetString() const72   std::string GetString() const { return str_; }
73 
74  private:
75   // The string that contains the list of languages, comma-separated.
76   std::string str_;
77   // Set the remove duplicates.
78   std::unordered_set<std::string> seen_;
79 };
80 
81 // Extract the base language code from a language code.
82 // If there is no '-' in the code, the original code is returned.
GetBaseLanguageCode(const std::string & language_code)83 std::string GetBaseLanguageCode(const std::string& language_code) {
84   std::vector<std::string> tokens = base::SplitString(
85       language_code, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
86   return tokens.empty() ? "" : std::move(tokens[0]);
87 }
88 
89 }  // namespace
90 
91 // HttpUtil -------------------------------------------------------------------
92 
GenerateRequestLine(std::string_view method,const GURL & url,bool is_for_get_to_http_proxy)93 std::string HttpUtil::GenerateRequestLine(std::string_view method,
94                                           const GURL& url,
95                                           bool is_for_get_to_http_proxy) {
96   static constexpr char kSuffix[] = " HTTP/1.1\r\n";
97   const std::string path = is_for_get_to_http_proxy
98                                ? HttpUtil::SpecForRequest(url)
99                                : url.PathForRequest();
100   return base::StrCat({method, " ", path, kSuffix});
101 }
102 
103 // static
SpecForRequest(const GURL & url)104 std::string HttpUtil::SpecForRequest(const GURL& url) {
105   DCHECK(url.is_valid() &&
106          (url.SchemeIsHTTPOrHTTPS() || url.SchemeIsWSOrWSS()));
107   return SimplifyUrlForRequest(url).spec();
108 }
109 
110 // static
ParseContentType(std::string_view content_type_str,std::string * mime_type,std::string * charset,bool * had_charset,std::string * boundary)111 void HttpUtil::ParseContentType(std::string_view content_type_str,
112                                 std::string* mime_type,
113                                 std::string* charset,
114                                 bool* had_charset,
115                                 std::string* boundary) {
116   std::string mime_type_value;
117   base::StringPairs params;
118   bool result = ParseMimeType(content_type_str, &mime_type_value, &params);
119   // If the server sent "*/*", it is meaningless, so do not store it.
120   // Also, reject a mime-type if it does not include a slash.
121   // Some servers give junk after the charset parameter, which may
122   // include a comma, so this check makes us a bit more tolerant.
123   if (!result || content_type_str == "*/*")
124     return;
125 
126   std::string charset_value;
127   bool type_has_charset = false;
128   bool type_has_boundary = false;
129   for (const auto& param : params) {
130     // Trim LWS from param value, ParseMimeType() leaves WS for quoted-string.
131     // TODO(mmenke): Check that name has only valid characters.
132     if (!type_has_charset &&
133         base::EqualsCaseInsensitiveASCII(param.first, "charset")) {
134       type_has_charset = true;
135       charset_value = std::string(HttpUtil::TrimLWS(param.second));
136       continue;
137     }
138 
139     if (boundary && !type_has_boundary &&
140         base::EqualsCaseInsensitiveASCII(param.first, "boundary")) {
141       type_has_boundary = true;
142       *boundary = std::string(HttpUtil::TrimLWS(param.second));
143       continue;
144     }
145   }
146 
147   // If `mime_type_value` is the same as `mime_type`, then just update
148   // `charset`. However, if `charset` is empty and `mime_type` hasn't changed,
149   // then don't wipe-out an existing `charset`.
150   bool eq = base::EqualsCaseInsensitiveASCII(mime_type_value, *mime_type);
151   if (!eq) {
152     *mime_type = base::ToLowerASCII(mime_type_value);
153   }
154   if ((!eq && *had_charset) || type_has_charset) {
155     *had_charset = true;
156     *charset = base::ToLowerASCII(charset_value);
157   }
158 }
159 
160 // static
ParseRangeHeader(const std::string & ranges_specifier,std::vector<HttpByteRange> * ranges)161 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
162                                 std::vector<HttpByteRange>* ranges) {
163   size_t equal_char_offset = ranges_specifier.find('=');
164   if (equal_char_offset == std::string::npos)
165     return false;
166 
167   // Try to extract bytes-unit part.
168   std::string_view bytes_unit =
169       std::string_view(ranges_specifier).substr(0, equal_char_offset);
170 
171   // "bytes" unit identifier is not found.
172   bytes_unit = TrimLWS(bytes_unit);
173   if (!base::EqualsCaseInsensitiveASCII(bytes_unit, "bytes")) {
174     return false;
175   }
176 
177   std::string::const_iterator byte_range_set_begin =
178       ranges_specifier.begin() + equal_char_offset + 1;
179   std::string::const_iterator byte_range_set_end = ranges_specifier.end();
180 
181   ValuesIterator byte_range_set_iterator(
182       std::string_view(byte_range_set_begin, byte_range_set_end),
183       /*delimiter=*/',');
184   while (byte_range_set_iterator.GetNext()) {
185     std::string_view value = byte_range_set_iterator.value();
186     size_t minus_char_offset = value.find('-');
187     // If '-' character is not found, reports failure.
188     if (minus_char_offset == std::string::npos)
189       return false;
190 
191     std::string_view first_byte_pos = value.substr(0, minus_char_offset);
192     first_byte_pos = TrimLWS(first_byte_pos);
193 
194     HttpByteRange range;
195     // Try to obtain first-byte-pos.
196     if (!first_byte_pos.empty()) {
197       int64_t first_byte_position = -1;
198       if (!base::StringToInt64(first_byte_pos, &first_byte_position))
199         return false;
200       range.set_first_byte_position(first_byte_position);
201     }
202 
203     std::string_view last_byte_pos = value.substr(minus_char_offset + 1);
204     last_byte_pos = TrimLWS(last_byte_pos);
205 
206     // We have last-byte-pos or suffix-byte-range-spec in this case.
207     if (!last_byte_pos.empty()) {
208       int64_t last_byte_position;
209       if (!base::StringToInt64(last_byte_pos, &last_byte_position))
210         return false;
211       if (range.HasFirstBytePosition())
212         range.set_last_byte_position(last_byte_position);
213       else
214         range.set_suffix_length(last_byte_position);
215     } else if (!range.HasFirstBytePosition()) {
216       return false;
217     }
218 
219     // Do a final check on the HttpByteRange object.
220     if (!range.IsValid())
221       return false;
222     ranges->push_back(range);
223   }
224   return !ranges->empty();
225 }
226 
227 // static
228 // From RFC 2616 14.16:
229 // content-range-spec =
230 //     bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
231 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
232 // instance-length = 1*DIGIT
233 // bytes-unit = "bytes"
ParseContentRangeHeaderFor206(std::string_view content_range_spec,int64_t * first_byte_position,int64_t * last_byte_position,int64_t * instance_length)234 bool HttpUtil::ParseContentRangeHeaderFor206(
235     std::string_view content_range_spec,
236     int64_t* first_byte_position,
237     int64_t* last_byte_position,
238     int64_t* instance_length) {
239   *first_byte_position = *last_byte_position = *instance_length = -1;
240   content_range_spec = TrimLWS(content_range_spec);
241 
242   size_t space_position = content_range_spec.find(' ');
243   if (space_position == std::string_view::npos) {
244     return false;
245   }
246 
247   // Invalid header if it doesn't contain "bytes-unit".
248   if (!base::EqualsCaseInsensitiveASCII(
249           TrimLWS(content_range_spec.substr(0, space_position)), "bytes")) {
250     return false;
251   }
252 
253   size_t minus_position = content_range_spec.find('-', space_position + 1);
254   if (minus_position == std::string_view::npos) {
255     return false;
256   }
257   size_t slash_position = content_range_spec.find('/', minus_position + 1);
258   if (slash_position == std::string_view::npos) {
259     return false;
260   }
261 
262   if (base::StringToInt64(
263           TrimLWS(content_range_spec.substr(
264               space_position + 1, minus_position - (space_position + 1))),
265           first_byte_position) &&
266       *first_byte_position >= 0 &&
267       base::StringToInt64(
268           TrimLWS(content_range_spec.substr(
269               minus_position + 1, slash_position - (minus_position + 1))),
270           last_byte_position) &&
271       *last_byte_position >= *first_byte_position &&
272       base::StringToInt64(
273           TrimLWS(content_range_spec.substr(slash_position + 1)),
274           instance_length) &&
275       *instance_length > *last_byte_position) {
276     return true;
277   }
278   *first_byte_position = *last_byte_position = *instance_length = -1;
279   return false;
280 }
281 
282 // static
ParseRetryAfterHeader(const std::string & retry_after_string,base::Time now,base::TimeDelta * retry_after)283 bool HttpUtil::ParseRetryAfterHeader(const std::string& retry_after_string,
284                                      base::Time now,
285                                      base::TimeDelta* retry_after) {
286   uint32_t seconds;
287   base::Time time;
288   base::TimeDelta interval;
289 
290   if (ParseUint32(retry_after_string, ParseIntFormat::NON_NEGATIVE, &seconds)) {
291     interval = base::Seconds(seconds);
292   } else if (base::Time::FromUTCString(retry_after_string.c_str(), &time)) {
293     interval = time - now;
294   } else {
295     return false;
296   }
297 
298   if (interval < base::Seconds(0))
299     return false;
300 
301   *retry_after = interval;
302   return true;
303 }
304 
305 // static
TimeFormatHTTP(base::Time time)306 std::string HttpUtil::TimeFormatHTTP(base::Time time) {
307   static constexpr char kWeekdayName[7][4] = {"Sun", "Mon", "Tue", "Wed",
308                                               "Thu", "Fri", "Sat"};
309   static constexpr char kMonthName[12][4] = {"Jan", "Feb", "Mar", "Apr",
310                                              "May", "Jun", "Jul", "Aug",
311                                              "Sep", "Oct", "Nov", "Dec"};
312   base::Time::Exploded exploded;
313   time.UTCExplode(&exploded);
314   return base::StringPrintf(
315       "%s, %02d %s %04d %02d:%02d:%02d GMT", kWeekdayName[exploded.day_of_week],
316       exploded.day_of_month, kMonthName[exploded.month - 1], exploded.year,
317       exploded.hour, exploded.minute, exploded.second);
318 }
319 
320 namespace {
321 
322 // A header string containing any of the following fields will cause
323 // an error. The list comes from the fetch standard.
324 const char* const kForbiddenHeaderFields[] = {
325     "accept-charset",
326     "accept-encoding",
327     "access-control-request-headers",
328     "access-control-request-method",
329     "access-control-request-private-network",
330     "connection",
331     "content-length",
332     "cookie",
333     "cookie2",
334     "date",
335     "dnt",
336     "expect",
337     "host",
338     "keep-alive",
339     "origin",
340     "referer",
341     "set-cookie",
342     "te",
343     "trailer",
344     "transfer-encoding",
345     "upgrade",
346     // TODO(mmenke): This is no longer banned, but still here due to issues
347     // mentioned in https://crbug.com/571722.
348     "user-agent",
349     "via",
350 };
351 
352 // A header string containing any of the following fields with a forbidden
353 // method name in the value will cause an error. The list comes from the fetch
354 // standard.
355 const char* const kForbiddenHeaderFieldsWithForbiddenMethod[] = {
356     "x-http-method",
357     "x-http-method-override",
358     "x-method-override",
359 };
360 
361 // The forbidden method names that is defined in the fetch standard, and used
362 // to check the kForbiddenHeaderFileWithForbiddenMethod above.
363 const char* const kForbiddenMethods[] = {
364     "connect",
365     "trace",
366     "track",
367 };
368 
369 }  // namespace
370 
371 // static
IsMethodSafe(std::string_view method)372 bool HttpUtil::IsMethodSafe(std::string_view method) {
373   return method == "GET" || method == "HEAD" || method == "OPTIONS" ||
374          method == "TRACE";
375 }
376 
377 // static
IsMethodIdempotent(std::string_view method)378 bool HttpUtil::IsMethodIdempotent(std::string_view method) {
379   return IsMethodSafe(method) || method == "PUT" || method == "DELETE";
380 }
381 
382 // static
IsSafeHeader(std::string_view name,std::string_view value)383 bool HttpUtil::IsSafeHeader(std::string_view name, std::string_view value) {
384   if (base::StartsWith(name, "proxy-", base::CompareCase::INSENSITIVE_ASCII) ||
385       base::StartsWith(name, "sec-", base::CompareCase::INSENSITIVE_ASCII))
386     return false;
387 
388   for (const char* field : kForbiddenHeaderFields) {
389     if (base::EqualsCaseInsensitiveASCII(name, field))
390       return false;
391   }
392 
393   bool is_forbidden_header_fields_with_forbidden_method = false;
394   for (const char* field : kForbiddenHeaderFieldsWithForbiddenMethod) {
395     if (base::EqualsCaseInsensitiveASCII(name, field)) {
396       is_forbidden_header_fields_with_forbidden_method = true;
397       break;
398     }
399   }
400   if (is_forbidden_header_fields_with_forbidden_method) {
401     ValuesIterator method_iterator(value, ',');
402     while (method_iterator.GetNext()) {
403       std::string_view method = method_iterator.value();
404       for (const char* forbidden_method : kForbiddenMethods) {
405         if (base::EqualsCaseInsensitiveASCII(method, forbidden_method))
406           return false;
407       }
408     }
409   }
410   return true;
411 }
412 
413 // static
IsValidHeaderName(std::string_view name)414 bool HttpUtil::IsValidHeaderName(std::string_view name) {
415   // Check whether the header name is RFC 2616-compliant.
416   return HttpUtil::IsToken(name);
417 }
418 
419 // static
IsValidHeaderValue(std::string_view value)420 bool HttpUtil::IsValidHeaderValue(std::string_view value) {
421   // Just a sanity check: disallow NUL, CR and LF.
422   for (char c : value) {
423     if (c == '\0' || c == '\r' || c == '\n')
424       return false;
425   }
426   return true;
427 }
428 
429 // static
IsNonCoalescingHeader(std::string_view name)430 bool HttpUtil::IsNonCoalescingHeader(std::string_view name) {
431   // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
432   // have to list them here.
433   // As of 2023, using FlatSet here actually makes the lookup slower, and
434   // unordered_set is even slower than that.
435   static constexpr std::string_view kNonCoalescingHeaders[] = {
436       "date", "expires", "last-modified",
437       "location",  // See bug 1050541 for details
438       "retry-after", "set-cookie",
439       // The format of auth-challenges mixes both space separated tokens and
440       // comma separated properties, so coalescing on comma won't work.
441       "www-authenticate", "proxy-authenticate",
442       // STS specifies that UAs must not process any STS headers after the first
443       // one.
444       "strict-transport-security"};
445 
446   for (std::string_view header : kNonCoalescingHeaders) {
447     if (base::EqualsCaseInsensitiveASCII(name, header)) {
448       return true;
449     }
450   }
451   return false;
452 }
453 
454 // static
TrimLWS(std::string::const_iterator * begin,std::string::const_iterator * end)455 void HttpUtil::TrimLWS(std::string::const_iterator* begin,
456                        std::string::const_iterator* end) {
457   TrimLWSImplementation(begin, end);
458 }
459 
460 // static
TrimLWS(std::string_view string)461 std::string_view HttpUtil::TrimLWS(std::string_view string) {
462   const char* begin = string.data();
463   const char* end = string.data() + string.size();
464   TrimLWSImplementation(&begin, &end);
465   return std::string_view(begin, end - begin);
466 }
467 
IsTokenChar(char c)468 bool HttpUtil::IsTokenChar(char c) {
469   return !(c >= 0x7F || c <= 0x20 || c == '(' || c == ')' || c == '<' ||
470            c == '>' || c == '@' || c == ',' || c == ';' || c == ':' ||
471            c == '\\' || c == '"' || c == '/' || c == '[' || c == ']' ||
472            c == '?' || c == '=' || c == '{' || c == '}');
473 }
474 
475 // See RFC 7230 Sec 3.2.6 for the definition of |token|.
IsToken(std::string_view string)476 bool HttpUtil::IsToken(std::string_view string) {
477   if (string.empty())
478     return false;
479   for (char c : string) {
480     if (!IsTokenChar(c))
481       return false;
482   }
483   return true;
484 }
485 
486 // See RFC 5987 Sec 3.2.1 for the definition of |parmname|.
IsParmName(std::string_view str)487 bool HttpUtil::IsParmName(std::string_view str) {
488   if (str.empty())
489     return false;
490   for (char c : str) {
491     if (!IsTokenChar(c) || c == '*' || c == '\'' || c == '%')
492       return false;
493   }
494   return true;
495 }
496 
497 namespace {
498 
IsQuote(char c)499 bool IsQuote(char c) {
500   return c == '"';
501 }
502 
UnquoteImpl(std::string_view str,bool strict_quotes,std::string * out)503 bool UnquoteImpl(std::string_view str, bool strict_quotes, std::string* out) {
504   if (str.empty())
505     return false;
506 
507   // Nothing to unquote.
508   if (!IsQuote(str[0]))
509     return false;
510 
511   // No terminal quote mark.
512   if (str.size() < 2 || str.front() != str.back())
513     return false;
514 
515   // Strip quotemarks
516   str.remove_prefix(1);
517   str.remove_suffix(1);
518 
519   // Unescape quoted-pair (defined in RFC 2616 section 2.2)
520   bool prev_escape = false;
521   std::string unescaped;
522   for (char c : str) {
523     if (c == '\\' && !prev_escape) {
524       prev_escape = true;
525       continue;
526     }
527     if (strict_quotes && !prev_escape && IsQuote(c))
528       return false;
529     prev_escape = false;
530     unescaped.push_back(c);
531   }
532 
533   // Terminal quote is escaped.
534   if (strict_quotes && prev_escape)
535     return false;
536 
537   *out = std::move(unescaped);
538   return true;
539 }
540 
541 }  // anonymous namespace
542 
543 // static
Unquote(std::string_view str)544 std::string HttpUtil::Unquote(std::string_view str) {
545   std::string result;
546   if (!UnquoteImpl(str, false, &result))
547     return std::string(str);
548 
549   return result;
550 }
551 
552 // static
StrictUnquote(std::string_view str,std::string * out)553 bool HttpUtil::StrictUnquote(std::string_view str, std::string* out) {
554   return UnquoteImpl(str, true, out);
555 }
556 
557 // static
Quote(std::string_view str)558 std::string HttpUtil::Quote(std::string_view str) {
559   std::string escaped;
560   escaped.reserve(2 + str.size());
561 
562   // Esape any backslashes or quotemarks within the string, and
563   // then surround with quotes.
564   escaped.push_back('"');
565   for (char c : str) {
566     if (c == '"' || c == '\\')
567       escaped.push_back('\\');
568     escaped.push_back(c);
569   }
570   escaped.push_back('"');
571   return escaped;
572 }
573 
574 // Find the "http" substring in a status line. This allows for
575 // some slop at the start. If the "http" string could not be found
576 // then returns std::string::npos.
577 // static
LocateStartOfStatusLine(base::span<const uint8_t> buf)578 size_t HttpUtil::LocateStartOfStatusLine(base::span<const uint8_t> buf) {
579   const size_t slop = 4;
580   const size_t http_len = 4;
581 
582   if (buf.size() >= http_len) {
583     size_t i_max = std::min(buf.size() - http_len, slop);
584     for (size_t i = 0; i <= i_max; ++i) {
585       if (base::EqualsCaseInsensitiveASCII(
586               base::as_string_view(buf.subspan(i, http_len)), "http")) {
587         return i;
588       }
589     }
590   }
591   return std::string::npos;  // Not found
592 }
593 
LocateEndOfHeadersHelper(base::span<const uint8_t> buf,size_t i,bool accept_empty_header_list)594 static size_t LocateEndOfHeadersHelper(base::span<const uint8_t> buf,
595                                        size_t i,
596                                        bool accept_empty_header_list) {
597   char last_c = '\0';
598   bool was_lf = false;
599   if (accept_empty_header_list) {
600     // Normally two line breaks signal the end of a header list. An empty header
601     // list ends with a single line break at the start of the buffer.
602     last_c = '\n';
603     was_lf = true;
604   }
605 
606   for (; i < buf.size(); ++i) {
607     char c = buf[i];
608     if (c == '\n') {
609       if (was_lf)
610         return i + 1;
611       was_lf = true;
612     } else if (c != '\r' || last_c != '\n') {
613       was_lf = false;
614     }
615     last_c = c;
616   }
617   return std::string::npos;
618 }
619 
LocateEndOfAdditionalHeaders(base::span<const uint8_t> buf,size_t i)620 size_t HttpUtil::LocateEndOfAdditionalHeaders(base::span<const uint8_t> buf,
621                                               size_t i) {
622   return LocateEndOfHeadersHelper(buf, i, true);
623 }
624 
LocateEndOfHeaders(base::span<const uint8_t> buf,size_t i)625 size_t HttpUtil::LocateEndOfHeaders(base::span<const uint8_t> buf, size_t i) {
626   return LocateEndOfHeadersHelper(buf, i, false);
627 }
628 
629 // In order for a line to be continuable, it must specify a
630 // non-blank header-name. Line continuations are specifically for
631 // header values -- do not allow headers names to span lines.
IsLineSegmentContinuable(std::string_view line)632 static bool IsLineSegmentContinuable(std::string_view line) {
633   if (line.empty())
634     return false;
635 
636   size_t colon = line.find(':');
637   if (colon == std::string_view::npos) {
638     return false;
639   }
640 
641   std::string_view name = line.substr(0, colon);
642 
643   // Name can't be empty.
644   if (name.empty())
645     return false;
646 
647   // Can't start with LWS (this would imply the segment is a continuation)
648   if (HttpUtil::IsLWS(name[0]))
649     return false;
650 
651   return true;
652 }
653 
654 // Helper used by AssembleRawHeaders, to find the end of the status line.
FindStatusLineEnd(std::string_view str)655 static size_t FindStatusLineEnd(std::string_view str) {
656   size_t i = str.find_first_of("\r\n");
657   if (i == std::string_view::npos) {
658     return str.size();
659   }
660   return i;
661 }
662 
663 // Helper used by AssembleRawHeaders, to skip past leading LWS.
RemoveLeadingNonLWS(std::string_view str)664 static std::string_view RemoveLeadingNonLWS(std::string_view str) {
665   for (size_t i = 0; i < str.size(); i++) {
666     if (!HttpUtil::IsLWS(str[i]))
667       return str.substr(i);
668   }
669   return std::string_view();  // Remove everything.
670 }
671 
AssembleRawHeaders(std::string_view input)672 std::string HttpUtil::AssembleRawHeaders(std::string_view input) {
673   std::string raw_headers;
674   raw_headers.reserve(input.size());
675 
676   // Skip any leading slop, since the consumers of this output
677   // (HttpResponseHeaders) don't deal with it.
678   size_t status_begin_offset =
679       LocateStartOfStatusLine(base::as_byte_span(input));
680   if (status_begin_offset != std::string::npos)
681     input.remove_prefix(status_begin_offset);
682 
683   // Copy the status line.
684   size_t status_line_end = FindStatusLineEnd(input);
685   raw_headers.append(input.data(), status_line_end);
686   input.remove_prefix(status_line_end);
687 
688   // After the status line, every subsequent line is a header line segment.
689   // Should a segment start with LWS, it is a continuation of the previous
690   // line's field-value.
691 
692   // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
693   base::CStringTokenizer lines(input.data(), input.data() + input.size(),
694                                "\r\n");
695 
696   // This variable is true when the previous line was continuable.
697   bool prev_line_continuable = false;
698 
699   while (lines.GetNext()) {
700     std::string_view line = lines.token_piece();
701 
702     if (prev_line_continuable && IsLWS(line[0])) {
703       // Join continuation; reduce the leading LWS to a single SP.
704       base::StrAppend(&raw_headers, {" ", RemoveLeadingNonLWS(line)});
705     } else {
706       // Terminate the previous line and copy the raw data to output.
707       base::StrAppend(&raw_headers, {"\n", line});
708 
709       // Check if the current line can be continued.
710       prev_line_continuable = IsLineSegmentContinuable(line);
711     }
712   }
713 
714   raw_headers.append("\n\n", 2);
715 
716   // Use '\0' as the canonical line terminator. If the input already contained
717   // any embeded '\0' characters we will strip them first to avoid interpreting
718   // them as line breaks.
719   std::erase(raw_headers, '\0');
720 
721   std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
722 
723   return raw_headers;
724 }
725 
ConvertHeadersBackToHTTPResponse(const std::string & str)726 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
727   std::string disassembled_headers;
728   base::StringTokenizer tokenizer(str, std::string(1, '\0'));
729   while (tokenizer.GetNext()) {
730     base::StrAppend(&disassembled_headers, {tokenizer.token_piece(), "\r\n"});
731   }
732   disassembled_headers.append("\r\n");
733 
734   return disassembled_headers;
735 }
736 
ExpandLanguageList(const std::string & language_prefs)737 std::string HttpUtil::ExpandLanguageList(const std::string& language_prefs) {
738   const std::vector<std::string> languages = base::SplitString(
739       language_prefs, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
740 
741   if (languages.empty())
742     return "";
743 
744   AcceptLanguageBuilder builder;
745 
746   const size_t size = languages.size();
747   for (size_t i = 0; i < size; ++i) {
748     const std::string& language = languages[i];
749     builder.AddLanguageCode(language);
750 
751     // Extract the primary language subtag.
752     const std::string& base_language = GetBaseLanguageCode(language);
753 
754     // Skip 'x' and 'i' as a primary language subtag per RFC 5646 section 2.1.1.
755     if (base_language == "x" || base_language == "i")
756       continue;
757 
758     // Look ahead and add the primary language subtag as a language if the next
759     // language is not part of the same family. This may not be perfect because
760     // an input of "en-US,fr,en" will yield "en-US,en,fr,en" and later make "en"
761     // a higher priority than "fr" despite the original preference.
762     const size_t j = i + 1;
763     if (j >= size || GetBaseLanguageCode(languages[j]) != base_language) {
764       builder.AddLanguageCode(base_language);
765     }
766   }
767 
768   return builder.GetString();
769 }
770 
771 // TODO(jungshik): This function assumes that the input is a comma separated
772 // list without any whitespace. As long as it comes from the preference and
773 // a user does not manually edit the preference file, it's the case. Still,
774 // we may have to make it more robust.
GenerateAcceptLanguageHeader(const std::string & raw_language_list)775 std::string HttpUtil::GenerateAcceptLanguageHeader(
776     const std::string& raw_language_list) {
777   // We use integers for qvalue and qvalue decrement that are 10 times
778   // larger than actual values to avoid a problem with comparing
779   // two floating point numbers.
780   const unsigned int kQvalueDecrement10 = 1;
781   unsigned int qvalue10 = 10;
782   base::StringTokenizer t(raw_language_list, ",");
783   std::string lang_list_with_q;
784   while (t.GetNext()) {
785     std::string language = t.token();
786     if (qvalue10 == 10) {
787       // q=1.0 is implicit.
788       lang_list_with_q = language;
789     } else {
790       DCHECK_LT(qvalue10, 10U);
791       base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
792                           qvalue10);
793     }
794     // It does not make sense to have 'q=0'.
795     if (qvalue10 > kQvalueDecrement10)
796       qvalue10 -= kQvalueDecrement10;
797   }
798   return lang_list_with_q;
799 }
800 
HasStrongValidators(HttpVersion version,std::optional<std::string_view> etag_header,std::optional<std::string_view> last_modified_header,std::optional<std::string_view> date_header)801 bool HttpUtil::HasStrongValidators(
802     HttpVersion version,
803     std::optional<std::string_view> etag_header,
804     std::optional<std::string_view> last_modified_header,
805     std::optional<std::string_view> date_header) {
806   if (version < HttpVersion(1, 1))
807     return false;
808 
809   if (etag_header && !etag_header->empty()) {
810     size_t slash = etag_header->find('/');
811     if (slash == std::string_view::npos || slash == 0) {
812       return true;
813     }
814 
815     std::string_view trimmed_etag = TrimLWS(etag_header->substr(0, slash));
816     if (!base::EqualsCaseInsensitiveASCII(trimmed_etag, "w")) {
817       return true;
818     }
819   }
820 
821   base::Time last_modified;
822   if (!last_modified_header ||
823       !base::Time::FromString(std::string(*last_modified_header).c_str(),
824                               &last_modified)) {
825     return false;
826   }
827 
828   base::Time date;
829   if (!date_header ||
830       !base::Time::FromString(std::string(*date_header).c_str(), &date)) {
831     return false;
832   }
833 
834   // Last-Modified is implicitly weak unless it is at least 60 seconds before
835   // the Date value.
836   return ((date - last_modified).InSeconds() >= 60);
837 }
838 
HasValidators(HttpVersion version,std::optional<std::string_view> etag_header,std::optional<std::string_view> last_modified_header)839 bool HttpUtil::HasValidators(
840     HttpVersion version,
841     std::optional<std::string_view> etag_header,
842     std::optional<std::string_view> last_modified_header) {
843   if (version < HttpVersion(1, 0))
844     return false;
845 
846   base::Time last_modified;
847   // Have to construct a C-style string here, since that's what
848   // base::Time::FromString requires.
849   if (last_modified_header &&
850       base::Time::FromString(std::string(*last_modified_header).c_str(),
851                              &last_modified)) {
852     return true;
853   }
854 
855   // It is OK to consider an empty string in etag_header to be a missing header
856   // since valid ETags are always quoted-strings (see RFC 2616 3.11) and thus
857   // empty ETags aren't empty strings (i.e., an empty ETag might be "\"\"").
858   return version >= HttpVersion(1, 1) && etag_header && !etag_header->empty();
859 }
860 
861 // Functions for histogram initialization.  The code 0 is put in the map to
862 // track status codes that are invalid.
863 // TODO(gavinp): Greatly prune the collected codes once we learn which
864 // ones are not sent in practice, to reduce upload size & memory use.
865 
866 enum {
867   HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
868   HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
869 };
870 
871 // static
GetStatusCodesForHistogram()872 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
873   std::vector<int> codes;
874   codes.reserve(
875       HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
876   codes.push_back(0);
877   for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
878        i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
879     codes.push_back(i);
880   return codes;
881 }
882 
883 // static
MapStatusCodeForHistogram(int code)884 int HttpUtil::MapStatusCodeForHistogram(int code) {
885   if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
886       code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
887     return code;
888   return 0;
889 }
890 
891 // BNF from section 4.2 of RFC 2616:
892 //
893 //   message-header = field-name ":" [ field-value ]
894 //   field-name     = token
895 //   field-value    = *( field-content | LWS )
896 //   field-content  = <the OCTETs making up the field-value
897 //                     and consisting of either *TEXT or combinations
898 //                     of token, separators, and quoted-string>
899 //
900 
HeadersIterator(std::string::const_iterator headers_begin,std::string::const_iterator headers_end,const std::string & line_delimiter)901 HttpUtil::HeadersIterator::HeadersIterator(
902     std::string::const_iterator headers_begin,
903     std::string::const_iterator headers_end,
904     const std::string& line_delimiter)
905     : lines_(headers_begin, headers_end, line_delimiter) {
906 }
907 
908 HttpUtil::HeadersIterator::~HeadersIterator() = default;
909 
GetNext()910 bool HttpUtil::HeadersIterator::GetNext() {
911   while (lines_.GetNext()) {
912     name_begin_ = lines_.token_begin();
913     values_end_ = lines_.token_end();
914 
915     std::string::const_iterator colon(std::find(name_begin_, values_end_, ':'));
916     if (colon == values_end_)
917       continue;  // skip malformed header
918 
919     name_end_ = colon;
920 
921     // If the name starts with LWS, it is an invalid line.
922     // Leading LWS implies a line continuation, and these should have
923     // already been joined by AssembleRawHeaders().
924     if (name_begin_ == name_end_ || IsLWS(*name_begin_))
925       continue;
926 
927     TrimLWS(&name_begin_, &name_end_);
928     DCHECK(name_begin_ < name_end_);
929     if (!IsToken(base::MakeStringPiece(name_begin_, name_end_)))
930       continue;  // skip malformed header
931 
932     values_begin_ = colon + 1;
933     TrimLWS(&values_begin_, &values_end_);
934 
935     // if we got a header name, then we are done.
936     return true;
937   }
938   return false;
939 }
940 
AdvanceTo(const char * name)941 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
942   DCHECK(name != nullptr);
943   DCHECK_EQ(0, base::ToLowerASCII(name).compare(name))
944       << "the header name must be in all lower case";
945 
946   while (GetNext()) {
947     if (base::EqualsCaseInsensitiveASCII(
948             base::MakeStringPiece(name_begin_, name_end_), name)) {
949       return true;
950     }
951   }
952 
953   return false;
954 }
955 
ValuesIterator(std::string_view values,char delimiter,bool ignore_empty_values)956 HttpUtil::ValuesIterator::ValuesIterator(std::string_view values,
957                                          char delimiter,
958                                          bool ignore_empty_values)
959     : values_(values, std::string(1, delimiter)),
960       ignore_empty_values_(ignore_empty_values) {
961   values_.set_quote_chars("\"");
962   // Could set this unconditionally, since code below has to check for empty
963   // values after trimming, anyways, but may provide a minor performance
964   // improvement.
965   if (!ignore_empty_values_)
966     values_.set_options(base::StringTokenizer::RETURN_EMPTY_TOKENS);
967 }
968 
969 HttpUtil::ValuesIterator::ValuesIterator(const ValuesIterator& other) = default;
970 
971 HttpUtil::ValuesIterator::~ValuesIterator() = default;
972 
GetNext()973 bool HttpUtil::ValuesIterator::GetNext() {
974   while (values_.GetNext()) {
975     value_ = TrimLWS(values_.token());
976 
977     if (!ignore_empty_values_ || !value_.empty()) {
978       return true;
979     }
980   }
981   return false;
982 }
983 
NameValuePairsIterator(std::string_view value,char delimiter,Values optional_values,Quotes strict_quotes)984 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(std::string_view value,
985                                                          char delimiter,
986                                                          Values optional_values,
987                                                          Quotes strict_quotes)
988     : props_(value, delimiter),
989       values_optional_(optional_values == Values::NOT_REQUIRED),
990       strict_quotes_(strict_quotes == Quotes::STRICT_QUOTES) {}
991 
992 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
993     const NameValuePairsIterator& other) = default;
994 
995 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() = default;
996 
997 // We expect properties to be formatted as one of:
998 //   name="value"
999 //   name='value'
1000 //   name='\'value\''
1001 //   name=value
1002 //   name = value
1003 //   name (if values_optional_ is true)
1004 // Due to buggy implementations found in some embedded devices, we also
1005 // accept values with missing close quotemark (http://crbug.com/39836):
1006 //   name="value
GetNext()1007 bool HttpUtil::NameValuePairsIterator::GetNext() {
1008   CHECK(valid_);
1009   // Not an error, but nothing left to do.
1010   if (props_.GetNext()) {
1011     // State only becomes invalid if there's another element, but parsing it
1012     // fails.
1013     valid_ = ParseNameValuePair(props_.value());
1014     if (valid_) {
1015       return true;
1016     }
1017   }
1018 
1019   // Clear all fields when returning false, regardless of whether `valid` is
1020   // true or not, since any populated data is no longer valid.
1021   name_ = std::string_view();
1022   value_ = std::string_view();
1023   unquoted_value_.clear();
1024   value_is_quoted_ = false;
1025   return false;
1026 }
1027 
ParseNameValuePair(std::string_view name_value_pair)1028 bool HttpUtil::NameValuePairsIterator::ParseNameValuePair(
1029     std::string_view name_value_pair) {
1030   // Scan for the equals sign.
1031   const size_t equals = name_value_pair.find('=');
1032   if (equals == 0) {
1033     return false;  // Malformed, no name
1034   }
1035   const bool has_value = (equals != std::string_view::npos);
1036   if (!has_value && !values_optional_) {
1037     return false;  // Malformed, no equals sign and values are required
1038   }
1039 
1040   // Make `name_` everything up until the equals sign.
1041   name_ = TrimLWS(name_value_pair.substr(0, equals));
1042   // Clear rest of state.
1043   value_ = std::string_view();
1044   value_is_quoted_ = false;
1045   unquoted_value_.clear();
1046 
1047   // If there is a value, do additional checking and calculate the value.
1048   if (has_value) {
1049     // Check that no quote appears before the equals sign.
1050     if (base::ranges::any_of(name_, IsQuote)) {
1051       return false;
1052     }
1053 
1054     // Value consists of everything after the equals sign, with whitespace
1055     // trimmed.
1056     value_ = TrimLWS(name_value_pair.substr(equals + 1));
1057     if (value_.empty()) {
1058       // Malformed; value is empty
1059       return false;
1060     }
1061   }
1062 
1063   if (has_value && IsQuote(value_.front())) {
1064     value_is_quoted_ = true;
1065 
1066     if (strict_quotes_) {
1067       return HttpUtil::StrictUnquote(value_, &unquoted_value_);
1068     }
1069 
1070     // Trim surrounding quotemarks off the value
1071     if (value_.front() != value_.back() || value_.size() == 1) {
1072       // NOTE: This is not as graceful as it sounds:
1073       // * quoted-pairs will no longer be unquoted
1074       //   (["\"hello] should give ["hello]).
1075       // * Does not detect when the final quote is escaped
1076       //   (["value\"] should give [value"])
1077       value_is_quoted_ = false;
1078       value_ = value_.substr(1);  // Gracefully recover from mismatching quotes.
1079     } else {
1080       // Do not store iterators into this. See declaration of `unquoted_value_`.
1081       unquoted_value_ = HttpUtil::Unquote(value_);
1082     }
1083   }
1084 
1085   return true;
1086 }
1087 
ParseAcceptEncoding(const std::string & accept_encoding,std::set<std::string> * allowed_encodings)1088 bool HttpUtil::ParseAcceptEncoding(const std::string& accept_encoding,
1089                                    std::set<std::string>* allowed_encodings) {
1090   DCHECK(allowed_encodings);
1091   if (accept_encoding.find_first_of("\"") != std::string::npos)
1092     return false;
1093   allowed_encodings->clear();
1094 
1095   base::StringTokenizer tokenizer(accept_encoding.begin(),
1096                                   accept_encoding.end(), ",");
1097   while (tokenizer.GetNext()) {
1098     std::string_view entry = tokenizer.token_piece();
1099     entry = TrimLWS(entry);
1100     size_t semicolon_pos = entry.find(';');
1101     if (semicolon_pos == std::string_view::npos) {
1102       if (entry.find_first_of(HTTP_LWS) != std::string_view::npos) {
1103         return false;
1104       }
1105       allowed_encodings->insert(base::ToLowerASCII(entry));
1106       continue;
1107     }
1108     std::string_view encoding = entry.substr(0, semicolon_pos);
1109     encoding = TrimLWS(encoding);
1110     if (encoding.find_first_of(HTTP_LWS) != std::string_view::npos) {
1111       return false;
1112     }
1113     std::string_view params = entry.substr(semicolon_pos + 1);
1114     params = TrimLWS(params);
1115     size_t equals_pos = params.find('=');
1116     if (equals_pos == std::string_view::npos) {
1117       return false;
1118     }
1119     std::string_view param_name = params.substr(0, equals_pos);
1120     param_name = TrimLWS(param_name);
1121     if (!base::EqualsCaseInsensitiveASCII(param_name, "q"))
1122       return false;
1123     std::string_view qvalue = params.substr(equals_pos + 1);
1124     qvalue = TrimLWS(qvalue);
1125     if (qvalue.empty())
1126       return false;
1127     if (qvalue[0] == '1') {
1128       if (std::string_view("1.000").starts_with(qvalue)) {
1129         allowed_encodings->insert(base::ToLowerASCII(encoding));
1130         continue;
1131       }
1132       return false;
1133     }
1134     if (qvalue[0] != '0')
1135       return false;
1136     if (qvalue.length() == 1)
1137       continue;
1138     if (qvalue.length() <= 2 || qvalue.length() > 5)
1139       return false;
1140     if (qvalue[1] != '.')
1141       return false;
1142     bool nonzero_number = false;
1143     for (size_t i = 2; i < qvalue.length(); ++i) {
1144       if (!base::IsAsciiDigit(qvalue[i]))
1145         return false;
1146       if (qvalue[i] != '0')
1147         nonzero_number = true;
1148     }
1149     if (nonzero_number)
1150       allowed_encodings->insert(base::ToLowerASCII(encoding));
1151   }
1152 
1153   // RFC 7231 5.3.4 "A request without an Accept-Encoding header field implies
1154   // that the user agent has no preferences regarding content-codings."
1155   if (allowed_encodings->empty()) {
1156     allowed_encodings->insert("*");
1157     return true;
1158   }
1159 
1160   // Any browser must support "identity".
1161   allowed_encodings->insert("identity");
1162 
1163   // RFC says gzip == x-gzip; mirror it here for easier matching.
1164   if (allowed_encodings->find("gzip") != allowed_encodings->end())
1165     allowed_encodings->insert("x-gzip");
1166   if (allowed_encodings->find("x-gzip") != allowed_encodings->end())
1167     allowed_encodings->insert("gzip");
1168 
1169   // RFC says compress == x-compress; mirror it here for easier matching.
1170   if (allowed_encodings->find("compress") != allowed_encodings->end())
1171     allowed_encodings->insert("x-compress");
1172   if (allowed_encodings->find("x-compress") != allowed_encodings->end())
1173     allowed_encodings->insert("compress");
1174   return true;
1175 }
1176 
ParseContentEncoding(const std::string & content_encoding,std::set<std::string> * used_encodings)1177 bool HttpUtil::ParseContentEncoding(const std::string& content_encoding,
1178                                     std::set<std::string>* used_encodings) {
1179   DCHECK(used_encodings);
1180   if (content_encoding.find_first_of("\"=;*") != std::string::npos)
1181     return false;
1182   used_encodings->clear();
1183 
1184   base::StringTokenizer encoding_tokenizer(content_encoding.begin(),
1185                                            content_encoding.end(), ",");
1186   while (encoding_tokenizer.GetNext()) {
1187     std::string_view encoding = TrimLWS(encoding_tokenizer.token_piece());
1188     if (encoding.find_first_of(HTTP_LWS) != std::string_view::npos) {
1189       return false;
1190     }
1191     used_encodings->insert(base::ToLowerASCII(encoding));
1192   }
1193   return true;
1194 }
1195 
HeadersContainMultipleCopiesOfField(const HttpResponseHeaders & headers,const std::string & field_name)1196 bool HttpUtil::HeadersContainMultipleCopiesOfField(
1197     const HttpResponseHeaders& headers,
1198     const std::string& field_name) {
1199   size_t it = 0;
1200   std::optional<std::string_view> field_value =
1201       headers.EnumerateHeader(&it, field_name);
1202   if (!field_value) {
1203     return false;
1204   }
1205   // There's at least one `field_name` header.  Check if there are any more
1206   // such headers, and if so, return true if they have different values.
1207   std::optional<std::string_view> field_value2;
1208   while ((field_value2 = headers.EnumerateHeader(&it, field_name))) {
1209     if (field_value != field_value2)
1210       return true;
1211   }
1212   return false;
1213 }
1214 
1215 }  // namespace net
1216