• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_BASE_NET_UTIL_H_
6 #define NET_BASE_NET_UTIL_H_
7 #pragma once
8 
9 #include "build/build_config.h"
10 
11 #if defined(OS_WIN)
12 #include <windows.h>
13 #include <ws2tcpip.h>
14 #elif defined(OS_POSIX)
15 #include <sys/socket.h>
16 #endif
17 
18 #include <list>
19 #include <string>
20 #include <set>
21 #include <vector>
22 
23 #include "base/basictypes.h"
24 #include "base/string16.h"
25 #include "net/base/escape.h"
26 
27 struct addrinfo;
28 class FilePath;
29 class GURL;
30 
31 namespace base {
32 class Time;
33 }
34 
35 namespace url_canon {
36 struct CanonHostInfo;
37 }
38 
39 namespace url_parse {
40 struct Parsed;
41 }
42 
43 namespace net {
44 
45 // Used by FormatUrl to specify handling of certain parts of the url.
46 typedef uint32 FormatUrlType;
47 typedef uint32 FormatUrlTypes;
48 
49 // Used by GetHeaderParamValue to determine how to handle quotes in the value.
50 class QuoteRule {
51  public:
52   enum Type {
53     KEEP_OUTER_QUOTES,
54     REMOVE_OUTER_QUOTES,
55   };
56 
57  private:
58   QuoteRule();
59 };
60 
61 // Nothing is ommitted.
62 extern const FormatUrlType kFormatUrlOmitNothing;
63 
64 // If set, any username and password are removed.
65 extern const FormatUrlType kFormatUrlOmitUsernamePassword;
66 
67 // If the scheme is 'http://', it's removed.
68 extern const FormatUrlType kFormatUrlOmitHTTP;
69 
70 // Omits the path if it is just a slash and there is no query or ref.  This is
71 // meaningful for non-file "standard" URLs.
72 extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
73 
74 // Convenience for omitting all unecessary types.
75 extern const FormatUrlType kFormatUrlOmitAll;
76 
77 // Holds a list of ports that should be accepted despite bans.
78 extern std::multiset<int> explicitly_allowed_ports;
79 
80 // Given the full path to a file name, creates a file: URL. The returned URL
81 // may not be valid if the input is malformed.
82 GURL FilePathToFileURL(const FilePath& path);
83 
84 // Converts a file: URL back to a filename that can be passed to the OS. The
85 // file URL must be well-formed (GURL::is_valid() must return true); we don't
86 // handle degenerate cases here. Returns true on success, false if it isn't a
87 // valid file URL. On failure, *file_path will be empty.
88 bool FileURLToFilePath(const GURL& url, FilePath* file_path);
89 
90 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
91 // Saves the result into |*host| and |*port|. If the input did not have
92 // the optional port, sets |*port| to -1.
93 // Returns true if the parsing was successful, false otherwise.
94 // The returned host is NOT canonicalized, and may be invalid. If <host> is
95 // an IPv6 literal address, the returned host includes the square brackets.
96 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
97                       std::string::const_iterator host_and_port_end,
98                       std::string* host,
99                       int* port);
100 bool ParseHostAndPort(const std::string& host_and_port,
101                       std::string* host,
102                       int* port);
103 
104 // Returns a host:port string for the given URL.
105 std::string GetHostAndPort(const GURL& url);
106 
107 // Returns a host[:port] string for the given URL, where the port is omitted
108 // if it is the default for the URL's scheme.
109 std::string GetHostAndOptionalPort(const GURL& url);
110 
111 // Returns the string representation of an address, like "192.168.0.1".
112 // Returns empty string on failure.
113 std::string NetAddressToString(const struct addrinfo* net_address);
114 std::string NetAddressToString(const struct sockaddr* net_address,
115                                socklen_t address_len);
116 
117 // Same as NetAddressToString, but additionally includes the port number. For
118 // example: "192.168.0.1:99" or "[::1]:80".
119 std::string NetAddressToStringWithPort(const struct addrinfo* net_address);
120 std::string NetAddressToStringWithPort(const struct sockaddr* net_address,
121                                        socklen_t address_len);
122 
123 // Returns the hostname of the current system. Returns empty string on failure.
124 std::string GetHostName();
125 
126 // Extracts the unescaped username/password from |url|, saving the results
127 // into |*username| and |*password|.
128 void GetIdentityFromURL(const GURL& url,
129                         string16* username,
130                         string16* password);
131 
132 // Returns either the host from |url|, or, if the host is empty, the full spec.
133 std::string GetHostOrSpecFromURL(const GURL& url);
134 
135 // Return the value of the HTTP response header with name 'name'.  'headers'
136 // should be in the format that URLRequest::GetResponseHeaders() returns.
137 // Returns the empty string if the header is not found.
138 std::wstring GetSpecificHeader(const std::wstring& headers,
139                                const std::wstring& name);
140 std::string GetSpecificHeader(const std::string& headers,
141                               const std::string& name);
142 
143 // Return the value of the HTTP response header field's parameter named
144 // 'param_name'.  Returns the empty string if the parameter is not found or is
145 // improperly formatted.
146 std::wstring GetHeaderParamValue(const std::wstring& field,
147                                  const std::wstring& param_name,
148                                  QuoteRule::Type quote_rule);
149 std::string GetHeaderParamValue(const std::string& field,
150                                 const std::string& param_name,
151                                 QuoteRule::Type quote_rule);
152 
153 // Return the filename extracted from Content-Disposition header. The following
154 // formats are tried in order listed below:
155 //
156 // 1. RFC 5987
157 // 2. RFC 2047
158 // 3. Raw-8bit-characters :
159 //    a. UTF-8, b. referrer_charset, c. default os codepage.
160 // 4. %-escaped UTF-8.
161 //
162 // In step 3, if referrer_charset is empty(i.e. unknown), 3b is skipped.
163 // In step 4, the fallback charsets tried in step 3 are not tried. We
164 // can consider doing that later.
165 //
166 // When a param value is ASCII, but is not in format #2 or format #4 above,
167 // it is returned as it is unless it's pretty close to two supported
168 // formats but not well-formed. In that case, an empty string is returned.
169 //
170 // In any case, a caller must check for the empty return value and resort to
171 // another means to get a filename (e.g. url).
172 //
173 // This function does not do any escaping and callers are responsible for
174 // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
175 //
176 // TODO(jungshik): revisit this issue. At the moment, the only caller
177 // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters.  The
178 // other caller is a unit test. Need to figure out expose this function only to
179 // net_util_unittest.
180 //
181 std::string GetFileNameFromCD(const std::string& header,
182                               const std::string& referrer_charset);
183 
184 // Converts the given host name to unicode characters. This can be called for
185 // any host name, if the input is not IDN or is invalid in some way, we'll just
186 // return the ASCII source so it is still usable.
187 //
188 // The input should be the canonicalized ASCII host name from GURL. This
189 // function does NOT accept UTF-8! Its length must also be given (this is
190 // designed to work on the substring of the host out of a URL spec).
191 //
192 // |languages| is a comma separated list of ISO 639 language codes. It
193 // is used to determine whether a hostname is 'comprehensible' to a user
194 // who understands languages listed. |host| will be converted to a
195 // human-readable form (Unicode) ONLY when each component of |host| is
196 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
197 // Latin letters in the ASCII range can be mixed with a limited set of
198 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
199 // When |languages| is empty, even that mixing is not allowed.
200 //
201 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
202 // |url|'s spec(); each offset will be adjusted to point at the same logical
203 // place in the result strings during decoding.  If this isn't possible because
204 // an offset points past the end of |host| or into the middle of a punycode
205 // sequence, the offending offset will be set to std::wstring::npos.
206 // |offset[s]_for_adjustment| may be NULL.
207 std::wstring IDNToUnicode(const char* host,
208                           size_t host_len,
209                           const std::wstring& languages,
210                           size_t* offset_for_adjustment);
211 std::wstring IDNToUnicodeWithOffsets(
212     const char* host,
213     size_t host_len,
214     const std::wstring& languages,
215     std::vector<size_t>* offsets_for_adjustment);
216 
217 // Canonicalizes |host| and returns it.  Also fills |host_info| with
218 // IP address information.  |host_info| must not be NULL.
219 std::string CanonicalizeHost(const std::string& host,
220                              url_canon::CanonHostInfo* host_info);
221 std::string CanonicalizeHost(const std::wstring& host,
222                              url_canon::CanonHostInfo* host_info);
223 
224 // Returns true if |host| is not an IP address and is compliant with a set of
225 // rules based on RFC 1738 and tweaked to be compatible with the real world.
226 // The rules are:
227 //   * One or more components separated by '.'
228 //   * Each component begins and ends with an alphanumeric character
229 //   * Each component contains only alphanumeric characters and '-' or '_'
230 //   * The last component does not begin with a digit
231 //   * Optional trailing dot after last component (means "treat as FQDN")
232 // If |desired_tld| is non-NULL, the host will only be considered invalid if
233 // appending it as a trailing component still results in an invalid host.  This
234 // helps us avoid marking as "invalid" user attempts to open "www.401k.com" by
235 // typing 4-0-1-k-<ctrl>+<enter>.
236 //
237 // NOTE: You should only pass in hosts that have been returned from
238 // CanonicalizeHost(), or you may not get accurate results.
239 bool IsCanonicalizedHostCompliant(const std::string& host,
240                                   const std::string& desired_tld);
241 
242 // Call these functions to get the html snippet for a directory listing.
243 // The return values of both functions are in UTF-8.
244 std::string GetDirectoryListingHeader(const string16& title);
245 
246 // Given the name of a file in a directory (ftp or local) and
247 // other information (is_dir, size, modification time), it returns
248 // the html snippet to add the entry for the file to the directory listing.
249 // Currently, it's a script tag containing a call to a Javascript function
250 // |addRow|.
251 //
252 // |name| is the file name to be displayed. |raw_bytes| will be used
253 // as the actual target of the link (so for example, ftp links should use
254 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
255 // will be used.
256 //
257 // Both |name| and |raw_bytes| are escaped internally.
258 std::string GetDirectoryListingEntry(const string16& name,
259                                      const std::string& raw_bytes,
260                                      bool is_dir, int64 size,
261                                      base::Time modified);
262 
263 // If text starts with "www." it is removed, otherwise text is returned
264 // unmodified.
265 string16 StripWWW(const string16& text);
266 
267 // Gets the filename from the raw Content-Disposition header (as read from the
268 // network).  Otherwise uses the last path component name or hostname from
269 // |url|. If there is no filename or it can't be used, the given |default_name|,
270 // will be used unless it is empty.
271 
272 // Note: it's possible for the suggested filename to be empty (e.g.,
273 // file:///). referrer_charset is used as one of charsets
274 // to interpret a raw 8bit string in C-D header (after interpreting
275 // as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
276 string16 GetSuggestedFilename(const GURL& url,
277                               const std::string& content_disposition,
278                               const std::string& referrer_charset,
279                               const string16& default_name);
280 
281 // Checks the given port against a list of ports which are restricted by
282 // default.  Returns true if the port is allowed, false if it is restricted.
283 bool IsPortAllowedByDefault(int port);
284 
285 // Checks the given port against a list of ports which are restricted by the
286 // FTP protocol.  Returns true if the port is allowed, false if it is
287 // restricted.
288 bool IsPortAllowedByFtp(int port);
289 
290 // Check if banned |port| has been overriden by an entry in
291 // |explicitly_allowed_ports_|.
292 bool IsPortAllowedByOverride(int port);
293 
294 // Set socket to non-blocking mode
295 int SetNonBlocking(int fd);
296 
297 // Appends the given part of the original URL to the output string formatted for
298 // the user. The given parsed structure will be updated. The host name formatter
299 // also takes the same accept languages component as ElideURL. |new_parsed| may
300 // be null.
301 //
302 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
303 // |url|'s spec(); each offset will be adjusted to point at the same logical
304 // place in the result strings after reformatting of the host.  If this isn't
305 // possible because an offset points past the end of the host or into the middle
306 // of a multi-character sequence, the offending offset will be set to
307 // std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
308 void AppendFormattedHost(const GURL& url,
309                          const std::wstring& languages,
310                          std::wstring* output,
311                          url_parse::Parsed* new_parsed,
312                          size_t* offset_for_adjustment);
313 void AppendFormattedHostWithOffsets(
314     const GURL& url,
315     const std::wstring& languages,
316     std::wstring* output,
317     url_parse::Parsed* new_parsed,
318     std::vector<size_t>* offsets_for_adjustment);
319 
320 // Creates a string representation of |url|. The IDN host name may be in Unicode
321 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
322 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
323 // the URL for human readability. You will generally want |UnescapeRule::SPACES|
324 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
325 // if not. If the path part and the query part seem to be encoded in %-encoded
326 // UTF-8, decodes %-encoding and UTF-8.
327 //
328 // The last three parameters may be NULL.
329 // |new_parsed| will be set to the parsing parameters of the resultant URL.
330 // |prefix_end| will be the length before the hostname of the resultant URL.
331 //
332 // (|offset[s]_for_adjustment|) specifies one or more offsets into the original
333 // |url|'s spec(); each offset will be modified to reflect changes this function
334 // makes to the output string. For example, if |url| is "http://a:b@c.com/",
335 // |omit_username_password| is true, and an offset is 12 (the offset of '.'),
336 // then on return the output string will be "http://c.com/" and the offset will
337 // be 8.  If an offset cannot be successfully adjusted (e.g. because it points
338 // into the middle of a component that was entirely removed, past the end of the
339 // string, or into the middle of an encoding sequence), it will be set to
340 // string16::npos.
341 string16 FormatUrl(const GURL& url,
342                    const std::string& languages,
343                    FormatUrlTypes format_types,
344                    UnescapeRule::Type unescape_rules,
345                    url_parse::Parsed* new_parsed,
346                    size_t* prefix_end,
347                    size_t* offset_for_adjustment);
348 string16 FormatUrlWithOffsets(const GURL& url,
349                               const std::string& languages,
350                               FormatUrlTypes format_types,
351                               UnescapeRule::Type unescape_rules,
352                               url_parse::Parsed* new_parsed,
353                               size_t* prefix_end,
354                               std::vector<size_t>* offsets_for_adjustment);
355 
356 // This is a convenience function for FormatUrl() with
357 // format_types = kFormatUrlOmitAll and unescape = SPACES.  This is the typical
358 // set of flags for "URLs to display to the user".  You should be cautious about
359 // using this for URLs which will be parsed or sent to other applications.
FormatUrl(const GURL & url,const std::string & languages)360 inline string16 FormatUrl(const GURL& url, const std::string& languages) {
361   return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
362                    NULL, NULL, NULL);
363 }
364 
365 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a
366 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
367 bool CanStripTrailingSlash(const GURL& url);
368 
369 // Strip the portions of |url| that aren't core to the network request.
370 //   - user name / password
371 //   - reference section
372 GURL SimplifyUrlForRequest(const GURL& url);
373 
374 void SetExplicitlyAllowedPorts(const std::string& allowed_ports);
375 
376 class ScopedPortException {
377  public:
378   ScopedPortException(int port);
379   ~ScopedPortException();
380 
381  private:
382   int port_;
383 
384   DISALLOW_COPY_AND_ASSIGN(ScopedPortException);
385 };
386 
387 // Perform a simplistic test to see if IPv6 is supported by trying to create an
388 // IPv6 socket.
389 // TODO(jar): Make test more in-depth as needed.
390 bool IPv6Supported();
391 
392 // Returns true if it can determine that only loopback addresses are configured.
393 // i.e. if only 127.0.0.1 and ::1 are routable.
394 bool HaveOnlyLoopbackAddresses();
395 
396 // IPAddressNumber is used to represent an IP address's numeric value as an
397 // array of bytes, from most significant to least significant. This is the
398 // network byte ordering.
399 //
400 // IPv4 addresses will have length 4, whereas IPv6 address will have length 16.
401 typedef std::vector<unsigned char> IPAddressNumber;
402 
403 static const size_t kIPv4AddressSize = 4;
404 static const size_t kIPv6AddressSize = 16;
405 
406 // Parses an IP address literal (either IPv4 or IPv6) to its numeric value.
407 // Returns true on success and fills |ip_number| with the numeric value.
408 bool ParseIPLiteralToNumber(const std::string& ip_literal,
409                             IPAddressNumber* ip_number);
410 
411 // Converts an IPv4 address to an IPv4-mapped IPv6 address.
412 // For example 192.168.0.1 would be converted to ::ffff:192.168.0.1.
413 IPAddressNumber ConvertIPv4NumberToIPv6Number(
414     const IPAddressNumber& ipv4_number);
415 
416 // Parses an IP block specifier from CIDR notation to an
417 // (IP address, prefix length) pair. Returns true on success and fills
418 // |*ip_number| with the numeric value of the IP address and sets
419 // |*prefix_length_in_bits| with the length of the prefix.
420 //
421 // CIDR notation literals can use either IPv4 or IPv6 literals. Some examples:
422 //
423 //    10.10.3.1/20
424 //    a:b:c::/46
425 //    ::1/128
426 bool ParseCIDRBlock(const std::string& cidr_literal,
427                     IPAddressNumber* ip_number,
428                     size_t* prefix_length_in_bits);
429 
430 // Compares an IP address to see if it falls within the specified IP block.
431 // Returns true if it does, false otherwise.
432 //
433 // The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any
434 // IP address whose |prefix_length_in_bits| most significant bits match
435 // |ip_prefix| will be matched.
436 //
437 // In cases when an IPv4 address is being compared to an IPv6 address prefix
438 // and vice versa, the IPv4 addresses will be converted to IPv4-mapped
439 // (IPv6) addresses.
440 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
441                            const IPAddressNumber& ip_prefix,
442                            size_t prefix_length_in_bits);
443 
444 // Makes a copy of |info|. The dynamically-allocated parts are copied as well.
445 // If |recursive| is true, chained entries via ai_next are copied too.
446 // The copy returned by this function should be freed using
447 // FreeCopyOfAddrinfo(), and NOT freeaddrinfo().
448 struct addrinfo* CreateCopyOfAddrinfo(const struct addrinfo* info,
449                                       bool recursive);
450 
451 // Frees an addrinfo that was created by CreateCopyOfAddrinfo().
452 void FreeCopyOfAddrinfo(struct addrinfo* info);
453 
454 // Returns the port field of the sockaddr in |info|.
455 const uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info);
456 uint16* GetPortFieldFromAddrinfo(struct addrinfo* info);
457 
458 // Returns the value of |info's| port (in host byte ordering).
459 int GetPortFromAddrinfo(const struct addrinfo* info);
460 
461 // Same except for struct sockaddr.
462 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
463                                        socklen_t address_len);
464 int GetPortFromSockaddr(const struct sockaddr* address,
465                         socklen_t address_len);
466 
467 // Returns true if |host| is one of the names (e.g. "localhost") or IP
468 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
469 //
470 // Note that this function does not check for IP addresses other than
471 // the above, although other IP addresses may point to the local
472 // machine.
473 bool IsLocalhost(const std::string& host);
474 
475 // struct that is used by GetNetworkList() to represent a network
476 // interface.
477 struct NetworkInterface {
478   NetworkInterface();
479   NetworkInterface(const std::string& name, const IPAddressNumber& address);
480   ~NetworkInterface();
481 
482   std::string name;
483   IPAddressNumber address;
484 };
485 
486 typedef std::list<NetworkInterface> NetworkInterfaceList;
487 
488 // Returns list of network interfaces except loopback interface. If an
489 // interface has more than one address, a separate entry is added to
490 // the list for each address.
491 // Can be called only on a thread that allows IO.
492 bool GetNetworkList(NetworkInterfaceList* networks);
493 
494 // Private adjustment function called by std::transform which sets the offset
495 // to npos if the offset occurs at or before |component_start|, otherwise don't
496 // alter the offset. Exposed here for unit testing.
497 struct ClampComponentOffset {
498   explicit ClampComponentOffset(size_t component_start);
499   size_t operator()(size_t offset);
500 
501   const size_t component_start;
502 };
503 
504 }  // namespace net
505 
506 #endif  // NET_BASE_NET_UTIL_H_
507