1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_HTTP_HTTP_UTIL_H_ 6 #define NET_HTTP_HTTP_UTIL_H_ 7 #pragma once 8 9 #include <string> 10 #include <vector> 11 12 #include "base/memory/ref_counted.h" 13 #include "base/string_tokenizer.h" 14 #include "googleurl/src/gurl.h" 15 #include "net/http/http_byte_range.h" 16 17 // This is a macro to support extending this string literal at compile time. 18 // Please excuse me polluting your global namespace! 19 #define HTTP_LWS " \t" 20 21 namespace net { 22 23 class UploadDataStream; 24 25 class HttpUtil { 26 public: 27 // Returns the absolute path of the URL, to be used for the http request. 28 // The absolute path starts with a '/' and may contain a query. 29 static std::string PathForRequest(const GURL& url); 30 31 // Returns the absolute URL, to be used for the http request. This url is 32 // made up of the protocol, host, [port], path, [query]. Everything else 33 // is stripped (username, password, reference). 34 static std::string SpecForRequest(const GURL& url); 35 36 // Locates the next occurance of delimiter in line, skipping over quoted 37 // strings (e.g., commas will not be treated as delimiters if they appear 38 // within a quoted string). Returns the offset of the found delimiter or 39 // line.size() if no delimiter was found. 40 static size_t FindDelimiter(const std::string& line, 41 size_t search_start, 42 char delimiter); 43 44 // Parses the value of a Content-Type header. The resulting mime_type and 45 // charset values are normalized to lowercase. The mime_type and charset 46 // output values are only modified if the content_type_str contains a mime 47 // type and charset value, respectively. 48 static void ParseContentType(const std::string& content_type_str, 49 std::string* mime_type, 50 std::string* charset, 51 bool *had_charset); 52 53 // Scans the headers and look for the first "Range" header in |headers|, 54 // if "Range" exists and the first one of it is well formatted then returns 55 // true, |ranges| will contain a list of valid ranges. If return 56 // value is false then values in |ranges| should not be used. The format of 57 // "Range" header is defined in RFC 2616 Section 14.35.1. 58 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1 59 static bool ParseRanges(const std::string& headers, 60 std::vector<HttpByteRange>* ranges); 61 62 // Same thing as ParseRanges except the Range header is known and its value 63 // is directly passed in, rather than requiring searching through a string. 64 static bool ParseRangeHeader(const std::string& range_specifier, 65 std::vector<HttpByteRange>* ranges); 66 67 // Scans the '\r\n'-delimited headers for the given header name. Returns 68 // true if a match is found. Input is assumed to be well-formed. 69 // TODO(darin): kill this 70 static bool HasHeader(const std::string& headers, const char* name); 71 72 // Strips all header lines from |headers| whose name matches 73 // |headers_to_remove|. |headers_to_remove| is a list of null-terminated 74 // lower-case header names, with array length |headers_to_remove_len|. 75 // Returns the stripped header lines list, separated by "\r\n". 76 static std::string StripHeaders(const std::string& headers, 77 const char* const headers_to_remove[], 78 size_t headers_to_remove_len); 79 80 // Multiple occurances of some headers cannot be coalesced into a comma- 81 // separated list since their values are (or contain) unquoted HTTP-date 82 // values, which may contain a comma (see RFC 2616 section 3.3.1). 83 static bool IsNonCoalescingHeader(std::string::const_iterator name_begin, 84 std::string::const_iterator name_end); IsNonCoalescingHeader(const std::string & name)85 static bool IsNonCoalescingHeader(const std::string& name) { 86 return IsNonCoalescingHeader(name.begin(), name.end()); 87 } 88 89 // Return true if the character is HTTP "linear white space" (SP | HT). 90 // This definition corresponds with the HTTP_LWS macro, and does not match 91 // newlines. 92 static bool IsLWS(char c); 93 94 // Trim HTTP_LWS chars from the beginning and end of the string. 95 static void TrimLWS(std::string::const_iterator* begin, 96 std::string::const_iterator* end); 97 98 // Whether the character is the start of a quotation mark. 99 static bool IsQuote(char c); 100 101 // RFC 2616 Sec 2.2: 102 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) 103 // Unquote() strips the surrounding quotemarks off a string, and unescapes 104 // any quoted-pair to obtain the value contained by the quoted-string. 105 // If the input is not quoted, then it works like the identity function. 106 static std::string Unquote(std::string::const_iterator begin, 107 std::string::const_iterator end); 108 109 // Same as above. 110 static std::string Unquote(const std::string& str); 111 112 // The reverse of Unquote() -- escapes and surrounds with " 113 static std::string Quote(const std::string& str); 114 115 // Returns the start of the status line, or -1 if no status line was found. 116 // This allows for 4 bytes of junk to precede the status line (which is what 117 // mozilla does too). 118 static int LocateStartOfStatusLine(const char* buf, int buf_len); 119 120 // Returns index beyond the end-of-headers marker or -1 if not found. RFC 121 // 2616 defines the end-of-headers marker as a double CRLF; however, some 122 // servers only send back LFs (e.g., Unix-based CGI scripts written using the 123 // ASIS Apache module). This function therefore accepts the pattern LF[CR]LF 124 // as end-of-headers (just like Mozilla). 125 // The parameter |i| is the offset within |buf| to begin searching from. 126 static int LocateEndOfHeaders(const char* buf, int buf_len, int i = 0); 127 128 // Assemble "raw headers" in the format required by HttpResponseHeaders. 129 // This involves normalizing line terminators, converting [CR]LF to \0 and 130 // handling HTTP line continuations (i.e., lines starting with LWS are 131 // continuations of the previous line). |buf_len| indicates the position of 132 // the end-of-headers marker as defined by LocateEndOfHeaders. 133 static std::string AssembleRawHeaders(const char* buf, int buf_len); 134 135 // Given a comma separated ordered list of language codes, return 136 // the list with a qvalue appended to each language. 137 // The way qvalues are assigned is rather simple. The qvalue 138 // starts with 1.0 and is decremented by 0.2 for each successive entry 139 // in the list until it reaches 0.2. All the entries after that are 140 // assigned the same qvalue of 0.2. Also, note that the 1st language 141 // will not have a qvalue added because the absence of a qvalue implicitly 142 // means q=1.0. 143 // 144 // When making a http request, this should be used to determine what 145 // to put in Accept-Language header. If a comma separated list of language 146 // codes *without* qvalue is sent, web servers regard all 147 // of them as having q=1.0 and pick one of them even though it may not 148 // be at the beginning of the list (see http://crbug.com/5899). 149 static std::string GenerateAcceptLanguageHeader( 150 const std::string& raw_language_list); 151 152 // Given a charset, return the list with a qvalue. If charset is utf-8, 153 // it will return 'utf-8,*;q=0.5'. Otherwise (e.g. 'euc-jp'), it'll return 154 // 'euc-jp,utf-8;q=0.7,*;q=0.3'. 155 static std::string GenerateAcceptCharsetHeader(const std::string& charset); 156 157 // Helper. If |*headers| already contains |header_name| do nothing, 158 // otherwise add <header_name> ": " <header_value> to the end of the list. 159 static void AppendHeaderIfMissing(const char* header_name, 160 const std::string& header_value, 161 std::string* headers); 162 163 // Used to iterate over the name/value pairs of HTTP headers. To iterate 164 // over the values in a multi-value header, use ValuesIterator. 165 // See AssembleRawHeaders for joining line continuations (this iterator 166 // does not expect any). 167 class HeadersIterator { 168 public: 169 HeadersIterator(std::string::const_iterator headers_begin, 170 std::string::const_iterator headers_end, 171 const std::string& line_delimiter); 172 ~HeadersIterator(); 173 174 // Advances the iterator to the next header, if any. Returns true if there 175 // is a next header. Use name* and values* methods to access the resultant 176 // header name and values. 177 bool GetNext(); 178 179 // Iterates through the list of headers, starting with the current position 180 // and looks for the specified header. Note that the name _must_ be 181 // lower cased. 182 // If the header was found, the return value will be true and the current 183 // position points to the header. If the return value is false, the 184 // current position will be at the end of the headers. 185 bool AdvanceTo(const char* lowercase_name); 186 Reset()187 void Reset() { 188 lines_.Reset(); 189 } 190 name_begin()191 std::string::const_iterator name_begin() const { 192 return name_begin_; 193 } name_end()194 std::string::const_iterator name_end() const { 195 return name_end_; 196 } name()197 std::string name() const { 198 return std::string(name_begin_, name_end_); 199 } 200 values_begin()201 std::string::const_iterator values_begin() const { 202 return values_begin_; 203 } values_end()204 std::string::const_iterator values_end() const { 205 return values_end_; 206 } values()207 std::string values() const { 208 return std::string(values_begin_, values_end_); 209 } 210 211 private: 212 StringTokenizer lines_; 213 std::string::const_iterator name_begin_; 214 std::string::const_iterator name_end_; 215 std::string::const_iterator values_begin_; 216 std::string::const_iterator values_end_; 217 }; 218 219 // Iterates over delimited values in an HTTP header. HTTP LWS is 220 // automatically trimmed from the resulting values. 221 // 222 // When using this class to iterate over response header values, be aware that 223 // for some headers (e.g., Last-Modified), commas are not used as delimiters. 224 // This iterator should be avoided for headers like that which are considered 225 // non-coalescing (see IsNonCoalescingHeader). 226 // 227 // This iterator is careful to skip over delimiters found inside an HTTP 228 // quoted string. 229 // 230 class ValuesIterator { 231 public: 232 ValuesIterator(std::string::const_iterator values_begin, 233 std::string::const_iterator values_end, 234 char delimiter); 235 ~ValuesIterator(); 236 237 // Advances the iterator to the next value, if any. Returns true if there 238 // is a next value. Use value* methods to access the resultant value. 239 bool GetNext(); 240 value_begin()241 std::string::const_iterator value_begin() const { 242 return value_begin_; 243 } value_end()244 std::string::const_iterator value_end() const { 245 return value_end_; 246 } value()247 std::string value() const { 248 return std::string(value_begin_, value_end_); 249 } 250 251 private: 252 StringTokenizer values_; 253 std::string::const_iterator value_begin_; 254 std::string::const_iterator value_end_; 255 }; 256 257 // Iterates over a delimited sequence of name-value pairs in an HTTP header. 258 // Each pair consists of a token (the name), an equals sign, and either a 259 // token or quoted-string (the value). Arbitrary HTTP LWS is permitted outside 260 // of and between names, values, and delimiters. 261 // 262 // String iterators returned from this class' methods may be invalidated upon 263 // calls to GetNext() or after the NameValuePairsIterator is destroyed. 264 class NameValuePairsIterator { 265 public: 266 NameValuePairsIterator(std::string::const_iterator begin, 267 std::string::const_iterator end, 268 char delimiter); 269 ~NameValuePairsIterator(); 270 271 // Advances the iterator to the next pair, if any. Returns true if there 272 // is a next pair. Use name* and value* methods to access the resultant 273 // value. 274 bool GetNext(); 275 276 // Returns false if there was a parse error. valid()277 bool valid() const { return valid_; } 278 279 // The name of the current name-value pair. name_begin()280 std::string::const_iterator name_begin() const { return name_begin_; } name_end()281 std::string::const_iterator name_end() const { return name_end_; } name()282 std::string name() const { return std::string(name_begin_, name_end_); } 283 284 // The value of the current name-value pair. value_begin()285 std::string::const_iterator value_begin() const { 286 return value_is_quoted_ ? unquoted_value_.begin() : value_begin_; 287 } value_end()288 std::string::const_iterator value_end() const { 289 return value_is_quoted_ ? unquoted_value_.end() : value_end_; 290 } value()291 std::string value() const { 292 return value_is_quoted_ ? unquoted_value_ : std::string(value_begin_, 293 value_end_); 294 } 295 296 private: 297 HttpUtil::ValuesIterator props_; 298 bool valid_; 299 300 std::string::const_iterator begin_; 301 std::string::const_iterator end_; 302 303 std::string::const_iterator name_begin_; 304 std::string::const_iterator name_end_; 305 306 std::string::const_iterator value_begin_; 307 std::string::const_iterator value_end_; 308 309 // Do not store iterators into this string. The NameValuePairsIterator 310 // is copyable/assignable, and if copied the copy's iterators would point 311 // into the original's unquoted_value_ member. 312 std::string unquoted_value_; 313 314 bool value_is_quoted_; 315 }; 316 }; 317 318 } // namespace net 319 320 #endif // NET_HTTP_HTTP_UTIL_H_ 321