1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef NET_BASE_NET_UTIL_H_
6 #define NET_BASE_NET_UTIL_H_
7
8 #include "build/build_config.h"
9
10 #if defined(OS_WIN)
11 #include <windows.h>
12 #include <ws2tcpip.h>
13 #elif defined(OS_POSIX)
14 #include <sys/types.h>
15 #include <sys/socket.h>
16 #endif
17
18 #include <string>
19 #include <vector>
20
21 #include "base/basictypes.h"
22 #include "base/strings/string16.h"
23 #include "base/strings/utf_offset_string_conversions.h"
24 #include "net/base/address_family.h"
25 #include "net/base/escape.h"
26 #include "net/base/net_export.h"
27 #include "net/base/network_change_notifier.h"
28
29 class GURL;
30
31 namespace base {
32 class Time;
33 }
34
35 namespace url {
36 struct CanonHostInfo;
37 struct Parsed;
38 }
39
40 namespace net {
41
42 // Used by FormatUrl to specify handling of certain parts of the url.
43 typedef uint32 FormatUrlType;
44 typedef uint32 FormatUrlTypes;
45
46 // IPAddressNumber is used to represent an IP address's numeric value as an
47 // array of bytes, from most significant to least significant. This is the
48 // network byte ordering.
49 //
50 // IPv4 addresses will have length 4, whereas IPv6 address will have length 16.
51 typedef std::vector<unsigned char> IPAddressNumber;
52 typedef std::vector<IPAddressNumber> IPAddressList;
53
54 static const size_t kIPv4AddressSize = 4;
55 static const size_t kIPv6AddressSize = 16;
56 #if defined(OS_WIN)
57 // Bluetooth address size. Windows Bluetooth is supported via winsock.
58 static const size_t kBluetoothAddressSize = 6;
59 #endif
60
61 // Nothing is ommitted.
62 NET_EXPORT extern const FormatUrlType kFormatUrlOmitNothing;
63
64 // If set, any username and password are removed.
65 NET_EXPORT extern const FormatUrlType kFormatUrlOmitUsernamePassword;
66
67 // If the scheme is 'http://', it's removed.
68 NET_EXPORT extern const FormatUrlType kFormatUrlOmitHTTP;
69
70 // Omits the path if it is just a slash and there is no query or ref. This is
71 // meaningful for non-file "standard" URLs.
72 NET_EXPORT extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
73
74 // Convenience for omitting all unecessary types.
75 NET_EXPORT extern const FormatUrlType kFormatUrlOmitAll;
76
77 // Returns the number of explicitly allowed ports; for testing.
78 NET_EXPORT_PRIVATE extern size_t GetCountOfExplicitlyAllowedPorts();
79
80 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
81 // Saves the result into |*host| and |*port|. If the input did not have
82 // the optional port, sets |*port| to -1.
83 // Returns true if the parsing was successful, false otherwise.
84 // The returned host is NOT canonicalized, and may be invalid. If <host> is
85 // an IPv6 literal address, the returned host includes the square brackets.
86 NET_EXPORT bool ParseHostAndPort(
87 std::string::const_iterator host_and_port_begin,
88 std::string::const_iterator host_and_port_end,
89 std::string* host,
90 int* port);
91 NET_EXPORT bool ParseHostAndPort(
92 const std::string& host_and_port,
93 std::string* host,
94 int* port);
95
96 // Returns a host:port string for the given URL.
97 NET_EXPORT std::string GetHostAndPort(const GURL& url);
98
99 // Returns a host[:port] string for the given URL, where the port is omitted
100 // if it is the default for the URL's scheme.
101 NET_EXPORT_PRIVATE std::string GetHostAndOptionalPort(const GURL& url);
102
103 // Returns true if |hostname| contains a non-registerable or non-assignable
104 // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
105 // that falls in an IANA-reserved range.
106 NET_EXPORT bool IsHostnameNonUnique(const std::string& hostname);
107
108 // Returns true if an IP address hostname is in a range reserved by the IANA.
109 // Works with both IPv4 and IPv6 addresses, and only compares against a given
110 // protocols's reserved ranges.
111 NET_EXPORT bool IsIPAddressReserved(const IPAddressNumber& address);
112
113 // Convenience struct for when you need a |struct sockaddr|.
114 struct SockaddrStorage {
SockaddrStorageSockaddrStorage115 SockaddrStorage() : addr_len(sizeof(addr_storage)),
116 addr(reinterpret_cast<struct sockaddr*>(&addr_storage)) {}
117 struct sockaddr_storage addr_storage;
118 socklen_t addr_len;
119 struct sockaddr* const addr;
120 };
121
122 // Extracts the IP address and port portions of a sockaddr. |port| is optional,
123 // and will not be filled in if NULL.
124 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr,
125 socklen_t sock_addr_len,
126 const unsigned char** address,
127 size_t* address_len,
128 uint16* port);
129
130 // Returns the string representation of an IP address.
131 // For example: "192.168.0.1" or "::1".
132 NET_EXPORT std::string IPAddressToString(const uint8* address,
133 size_t address_len);
134
135 // Returns the string representation of an IP address along with its port.
136 // For example: "192.168.0.1:99" or "[::1]:80".
137 NET_EXPORT std::string IPAddressToStringWithPort(const uint8* address,
138 size_t address_len,
139 uint16 port);
140
141 // Same as IPAddressToString() but for a sockaddr. This output will not include
142 // the IPv6 scope ID.
143 NET_EXPORT std::string NetAddressToString(const struct sockaddr* sa,
144 socklen_t sock_addr_len);
145
146 // Same as IPAddressToStringWithPort() but for a sockaddr. This output will not
147 // include the IPv6 scope ID.
148 NET_EXPORT std::string NetAddressToStringWithPort(const struct sockaddr* sa,
149 socklen_t sock_addr_len);
150
151 // Same as IPAddressToString() but for an IPAddressNumber.
152 NET_EXPORT std::string IPAddressToString(const IPAddressNumber& addr);
153
154 // Same as IPAddressToStringWithPort() but for an IPAddressNumber.
155 NET_EXPORT std::string IPAddressToStringWithPort(
156 const IPAddressNumber& addr, uint16 port);
157
158 // Returns the address as a sequence of bytes in network-byte-order.
159 NET_EXPORT std::string IPAddressToPackedString(const IPAddressNumber& addr);
160
161 // Returns the hostname of the current system. Returns empty string on failure.
162 NET_EXPORT std::string GetHostName();
163
164 // Extracts the unescaped username/password from |url|, saving the results
165 // into |*username| and |*password|.
166 NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
167 base::string16* username,
168 base::string16* password);
169
170 // Returns either the host from |url|, or, if the host is empty, the full spec.
171 NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
172
173 // Return the value of the HTTP response header with name 'name'. 'headers'
174 // should be in the format that URLRequest::GetResponseHeaders() returns.
175 // Returns the empty string if the header is not found.
176 NET_EXPORT std::string GetSpecificHeader(const std::string& headers,
177 const std::string& name);
178
179 // Converts the given host name to unicode characters. This can be called for
180 // any host name, if the input is not IDN or is invalid in some way, we'll just
181 // return the ASCII source so it is still usable.
182 //
183 // The input should be the canonicalized ASCII host name from GURL. This
184 // function does NOT accept UTF-8!
185 //
186 // |languages| is a comma separated list of ISO 639 language codes. It
187 // is used to determine whether a hostname is 'comprehensible' to a user
188 // who understands languages listed. |host| will be converted to a
189 // human-readable form (Unicode) ONLY when each component of |host| is
190 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
191 // Latin letters in the ASCII range can be mixed with a limited set of
192 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
193 // When |languages| is empty, even that mixing is not allowed.
194 NET_EXPORT base::string16 IDNToUnicode(const std::string& host,
195 const std::string& languages);
196
197 // Canonicalizes |host| and returns it. Also fills |host_info| with
198 // IP address information. |host_info| must not be NULL.
199 NET_EXPORT std::string CanonicalizeHost(const std::string& host,
200 url::CanonHostInfo* host_info);
201
202 // Returns true if |host| is not an IP address and is compliant with a set of
203 // rules based on RFC 1738 and tweaked to be compatible with the real world.
204 // The rules are:
205 // * One or more components separated by '.'
206 // * Each component begins with an alphanumeric character or '-'
207 // * Each component contains only alphanumeric characters and '-' or '_'
208 // * Each component ends with an alphanumeric character or '-'
209 // * The last component begins with an alphanumeric character
210 // * Optional trailing dot after last component (means "treat as FQDN")
211 // If |desired_tld| is non-NULL, the host will only be considered invalid if
212 // appending it as a trailing component still results in an invalid host. This
213 // helps us avoid marking as "invalid" user attempts to open, say, "www.-9.com"
214 // by typing -, 9, <ctrl>+<enter>.
215 //
216 // NOTE: You should only pass in hosts that have been returned from
217 // CanonicalizeHost(), or you may not get accurate results.
218 NET_EXPORT bool IsCanonicalizedHostCompliant(const std::string& host,
219 const std::string& desired_tld);
220
221 // Call these functions to get the html snippet for a directory listing.
222 // The return values of both functions are in UTF-8.
223 NET_EXPORT std::string GetDirectoryListingHeader(const base::string16& title);
224
225 // Given the name of a file in a directory (ftp or local) and
226 // other information (is_dir, size, modification time), it returns
227 // the html snippet to add the entry for the file to the directory listing.
228 // Currently, it's a script tag containing a call to a Javascript function
229 // |addRow|.
230 //
231 // |name| is the file name to be displayed. |raw_bytes| will be used
232 // as the actual target of the link (so for example, ftp links should use
233 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
234 // will be used.
235 //
236 // Both |name| and |raw_bytes| are escaped internally.
237 NET_EXPORT std::string GetDirectoryListingEntry(const base::string16& name,
238 const std::string& raw_bytes,
239 bool is_dir, int64 size,
240 base::Time modified);
241
242 // If text starts with "www." it is removed, otherwise text is returned
243 // unmodified.
244 NET_EXPORT base::string16 StripWWW(const base::string16& text);
245
246 // Runs |url|'s host through StripWWW(). |url| must be valid.
247 NET_EXPORT base::string16 StripWWWFromHost(const GURL& url);
248
249 // Checks |port| against a list of ports which are restricted by default.
250 // Returns true if |port| is allowed, false if it is restricted.
251 NET_EXPORT bool IsPortAllowedByDefault(int port);
252
253 // Checks |port| against a list of ports which are restricted by the FTP
254 // protocol. Returns true if |port| is allowed, false if it is restricted.
255 NET_EXPORT_PRIVATE bool IsPortAllowedByFtp(int port);
256
257 // Check if banned |port| has been overriden by an entry in
258 // |explicitly_allowed_ports_|.
259 NET_EXPORT_PRIVATE bool IsPortAllowedByOverride(int port);
260
261 // Set socket to non-blocking mode
262 NET_EXPORT int SetNonBlocking(int fd);
263
264 // Formats the host in |url| and appends it to |output|. The host formatter
265 // takes the same accept languages component as ElideURL().
266 NET_EXPORT void AppendFormattedHost(const GURL& url,
267 const std::string& languages,
268 base::string16* output);
269
270 // Creates a string representation of |url|. The IDN host name may be in Unicode
271 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
272 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
273 // the URL for human readability. You will generally want |UnescapeRule::SPACES|
274 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
275 // if not. If the path part and the query part seem to be encoded in %-encoded
276 // UTF-8, decodes %-encoding and UTF-8.
277 //
278 // The last three parameters may be NULL.
279 //
280 // |new_parsed| will be set to the parsing parameters of the resultant URL.
281 //
282 // |prefix_end| will be the length before the hostname of the resultant URL.
283 //
284 // |offset[s]_for_adjustment| specifies one or more offsets into the original
285 // URL, representing insertion or selection points between characters: if the
286 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
287 // between the scheme and the host, and offset 15 is after the end of the URL.
288 // Valid input offsets range from 0 to the length of the input URL string. On
289 // exit, each offset will have been modified to reflect any changes made to the
290 // output string. For example, if |url| is "http://a:b@c.com/",
291 // |omit_username_password| is true, and an offset is 12 (pointing between 'c'
292 // and '.'), then on return the output string will be "http://c.com/" and the
293 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because
294 // it points into the middle of a component that was entirely removed or into
295 // the middle of an encoding sequence), it will be set to base::string16::npos.
296 // For consistency, if an input offset points between the scheme and the
297 // username/password, and both are removed, on output this offset will be 0
298 // rather than npos; this means that offsets at the starts and ends of removed
299 // components are always transformed the same way regardless of what other
300 // components are adjacent.
301 NET_EXPORT base::string16 FormatUrl(const GURL& url,
302 const std::string& languages,
303 FormatUrlTypes format_types,
304 UnescapeRule::Type unescape_rules,
305 url::Parsed* new_parsed,
306 size_t* prefix_end,
307 size_t* offset_for_adjustment);
308 NET_EXPORT base::string16 FormatUrlWithOffsets(
309 const GURL& url,
310 const std::string& languages,
311 FormatUrlTypes format_types,
312 UnescapeRule::Type unescape_rules,
313 url::Parsed* new_parsed,
314 size_t* prefix_end,
315 std::vector<size_t>* offsets_for_adjustment);
316 // This function is like those above except it takes |adjustments| rather
317 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
318 // the transformations that happened to |url| to convert it into the returned
319 // value.
320 NET_EXPORT base::string16 FormatUrlWithAdjustments(
321 const GURL& url,
322 const std::string& languages,
323 FormatUrlTypes format_types,
324 UnescapeRule::Type unescape_rules,
325 url::Parsed* new_parsed,
326 size_t* prefix_end,
327 base::OffsetAdjuster::Adjustments* adjustments);
328
329 // This is a convenience function for FormatUrl() with
330 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
331 // set of flags for "URLs to display to the user". You should be cautious about
332 // using this for URLs which will be parsed or sent to other applications.
FormatUrl(const GURL & url,const std::string & languages)333 inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
334 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
335 NULL, NULL, NULL);
336 }
337
338 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a
339 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
340 NET_EXPORT bool CanStripTrailingSlash(const GURL& url);
341
342 // Strip the portions of |url| that aren't core to the network request.
343 // - user name / password
344 // - reference section
345 NET_EXPORT_PRIVATE GURL SimplifyUrlForRequest(const GURL& url);
346
347 NET_EXPORT void SetExplicitlyAllowedPorts(const std::string& allowed_ports);
348
349 class NET_EXPORT ScopedPortException {
350 public:
351 explicit ScopedPortException(int port);
352 ~ScopedPortException();
353
354 private:
355 int port_;
356
357 DISALLOW_COPY_AND_ASSIGN(ScopedPortException);
358 };
359
360 // Returns true if it can determine that only loopback addresses are configured.
361 // i.e. if only 127.0.0.1 and ::1 are routable.
362 // Also returns false if it cannot determine this.
363 bool HaveOnlyLoopbackAddresses();
364
365 // Returns AddressFamily of the address.
366 NET_EXPORT_PRIVATE AddressFamily GetAddressFamily(
367 const IPAddressNumber& address);
368
369 // Maps the given AddressFamily to either AF_INET, AF_INET6 or AF_UNSPEC.
370 NET_EXPORT_PRIVATE int ConvertAddressFamily(AddressFamily address_family);
371
372 // Parses an IP address literal (either IPv4 or IPv6) to its numeric value.
373 // Returns true on success and fills |ip_number| with the numeric value.
374 NET_EXPORT_PRIVATE bool ParseIPLiteralToNumber(const std::string& ip_literal,
375 IPAddressNumber* ip_number);
376
377 // Converts an IPv4 address to an IPv4-mapped IPv6 address.
378 // For example 192.168.0.1 would be converted to ::ffff:192.168.0.1.
379 NET_EXPORT_PRIVATE IPAddressNumber ConvertIPv4NumberToIPv6Number(
380 const IPAddressNumber& ipv4_number);
381
382 // Returns true iff |address| is an IPv4-mapped IPv6 address.
383 NET_EXPORT_PRIVATE bool IsIPv4Mapped(const IPAddressNumber& address);
384
385 // Converts an IPv4-mapped IPv6 address to IPv4 address. Should only be called
386 // on IPv4-mapped IPv6 addresses.
387 NET_EXPORT_PRIVATE IPAddressNumber ConvertIPv4MappedToIPv4(
388 const IPAddressNumber& address);
389
390 // Parses an IP block specifier from CIDR notation to an
391 // (IP address, prefix length) pair. Returns true on success and fills
392 // |*ip_number| with the numeric value of the IP address and sets
393 // |*prefix_length_in_bits| with the length of the prefix.
394 //
395 // CIDR notation literals can use either IPv4 or IPv6 literals. Some examples:
396 //
397 // 10.10.3.1/20
398 // a:b:c::/46
399 // ::1/128
400 NET_EXPORT bool ParseCIDRBlock(const std::string& cidr_literal,
401 IPAddressNumber* ip_number,
402 size_t* prefix_length_in_bits);
403
404 // Compares an IP address to see if it falls within the specified IP block.
405 // Returns true if it does, false otherwise.
406 //
407 // The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any
408 // IP address whose |prefix_length_in_bits| most significant bits match
409 // |ip_prefix| will be matched.
410 //
411 // In cases when an IPv4 address is being compared to an IPv6 address prefix
412 // and vice versa, the IPv4 addresses will be converted to IPv4-mapped
413 // (IPv6) addresses.
414 NET_EXPORT_PRIVATE bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
415 const IPAddressNumber& ip_prefix,
416 size_t prefix_length_in_bits);
417
418 // Retuns the port field of the |sockaddr|.
419 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
420 socklen_t address_len);
421 // Returns the value of port in |sockaddr| (in host byte ordering).
422 NET_EXPORT_PRIVATE int GetPortFromSockaddr(const struct sockaddr* address,
423 socklen_t address_len);
424
425 // Returns true if |host| is one of the names (e.g. "localhost") or IP
426 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
427 //
428 // Note that this function does not check for IP addresses other than
429 // the above, although other IP addresses may point to the local
430 // machine.
431 NET_EXPORT_PRIVATE bool IsLocalhost(const std::string& host);
432
433 // struct that is used by GetNetworkList() to represent a network
434 // interface.
435 struct NET_EXPORT NetworkInterface {
436 NetworkInterface();
437 NetworkInterface(const std::string& name,
438 const std::string& friendly_name,
439 uint32 interface_index,
440 NetworkChangeNotifier::ConnectionType type,
441 const IPAddressNumber& address,
442 size_t network_prefix);
443 ~NetworkInterface();
444
445 std::string name;
446 std::string friendly_name; // Same as |name| on non-Windows.
447 uint32 interface_index; // Always 0 on Android.
448 NetworkChangeNotifier::ConnectionType type;
449 IPAddressNumber address;
450 size_t network_prefix;
451 };
452
453 typedef std::vector<NetworkInterface> NetworkInterfaceList;
454
455 // Policy settings to include/exclude network interfaces.
456 enum HostAddressSelectionPolicy {
457 INCLUDE_HOST_SCOPE_VIRTUAL_INTERFACES = 0x0,
458 EXCLUDE_HOST_SCOPE_VIRTUAL_INTERFACES = 0x1,
459 // Include temp address only when interface has both permanent and
460 // temp addresses.
461 INCLUDE_ONLY_TEMP_IPV6_ADDRESS_IF_POSSIBLE = 0x2,
462 };
463
464 // Returns list of network interfaces except loopback interface. If an
465 // interface has more than one address, a separate entry is added to
466 // the list for each address.
467 // Can be called only on a thread that allows IO.
468 NET_EXPORT bool GetNetworkList(NetworkInterfaceList* networks,
469 int policy);
470
471 // General category of the IEEE 802.11 (wifi) physical layer operating mode.
472 enum WifiPHYLayerProtocol {
473 // No wifi support or no associated AP.
474 WIFI_PHY_LAYER_PROTOCOL_NONE,
475 // An obsolete modes introduced by the original 802.11, e.g. IR, FHSS.
476 WIFI_PHY_LAYER_PROTOCOL_ANCIENT,
477 // 802.11a, OFDM-based rates.
478 WIFI_PHY_LAYER_PROTOCOL_A,
479 // 802.11b, DSSS or HR DSSS.
480 WIFI_PHY_LAYER_PROTOCOL_B,
481 // 802.11g, same rates as 802.11a but compatible with 802.11b.
482 WIFI_PHY_LAYER_PROTOCOL_G,
483 // 802.11n, HT rates.
484 WIFI_PHY_LAYER_PROTOCOL_N,
485 // Unclassified mode or failure to identify.
486 WIFI_PHY_LAYER_PROTOCOL_UNKNOWN
487 };
488
489 // Characterize the PHY mode of the currently associated access point.
490 // Currently only available on OS_WIN.
491 NET_EXPORT WifiPHYLayerProtocol GetWifiPHYLayerProtocol();
492
493 // Returns number of matching initial bits between the addresses |a1| and |a2|.
494 unsigned CommonPrefixLength(const IPAddressNumber& a1,
495 const IPAddressNumber& a2);
496
497 // Computes the number of leading 1-bits in |mask|.
498 unsigned MaskPrefixLength(const IPAddressNumber& mask);
499
500 // Differentiated Services Code Point.
501 // See http://tools.ietf.org/html/rfc2474 for details.
502 enum DiffServCodePoint {
503 DSCP_NO_CHANGE = -1,
504 DSCP_FIRST = DSCP_NO_CHANGE,
505 DSCP_DEFAULT = 0, // Same as DSCP_CS0
506 DSCP_CS0 = 0, // The default
507 DSCP_CS1 = 8, // Bulk/background traffic
508 DSCP_AF11 = 10,
509 DSCP_AF12 = 12,
510 DSCP_AF13 = 14,
511 DSCP_CS2 = 16,
512 DSCP_AF21 = 18,
513 DSCP_AF22 = 20,
514 DSCP_AF23 = 22,
515 DSCP_CS3 = 24,
516 DSCP_AF31 = 26,
517 DSCP_AF32 = 28,
518 DSCP_AF33 = 30,
519 DSCP_CS4 = 32,
520 DSCP_AF41 = 34, // Video
521 DSCP_AF42 = 36, // Video
522 DSCP_AF43 = 38, // Video
523 DSCP_CS5 = 40, // Video
524 DSCP_EF = 46, // Voice
525 DSCP_CS6 = 48, // Voice
526 DSCP_CS7 = 56, // Control messages
527 DSCP_LAST = DSCP_CS7
528 };
529
530 } // namespace net
531
532 #endif // NET_BASE_NET_UTIL_H_
533