1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 use std::cmp;
10 use std::fmt::{self, Formatter};
11 use std::net::{Ipv4Addr, Ipv6Addr};
12
13 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14 #[cfg(feature = "serde")]
15 use serde::{Deserialize, Serialize};
16
17 use crate::parser::{ParseError, ParseResult};
18
19 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
21 pub(crate) enum HostInternal {
22 None,
23 Domain,
24 Ipv4(Ipv4Addr),
25 Ipv6(Ipv6Addr),
26 }
27
28 impl From<Host<String>> for HostInternal {
from(host: Host<String>) -> HostInternal29 fn from(host: Host<String>) -> HostInternal {
30 match host {
31 Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32 Host::Domain(_) => HostInternal::Domain,
33 Host::Ipv4(address) => HostInternal::Ipv4(address),
34 Host::Ipv6(address) => HostInternal::Ipv6(address),
35 }
36 }
37 }
38
39 /// The host name of an URL.
40 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41 #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42 pub enum Host<S = String> {
43 /// A DNS domain name, as '.' dot-separated labels.
44 /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45 /// a special URL, or percent encoded for non-special URLs. Hosts for
46 /// non-special URLs are also called opaque hosts.
47 Domain(S),
48
49 /// An IPv4 address.
50 /// `Url::host_str` returns the serialization of this address,
51 /// as four decimal integers separated by `.` dots.
52 Ipv4(Ipv4Addr),
53
54 /// An IPv6 address.
55 /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56 /// in the format per [RFC 5952 *A Recommendation
57 /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58 /// lowercase hexadecimal with maximal `::` compression.
59 Ipv6(Ipv6Addr),
60 }
61
62 impl<'a> Host<&'a str> {
63 /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>64 pub fn to_owned(&self) -> Host<String> {
65 match *self {
66 Host::Domain(domain) => Host::Domain(domain.to_owned()),
67 Host::Ipv4(address) => Host::Ipv4(address),
68 Host::Ipv6(address) => Host::Ipv6(address),
69 }
70 }
71 }
72
73 impl Host<String> {
74 /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75 ///
76 /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>77 pub fn parse(input: &str) -> Result<Self, ParseError> {
78 if input.starts_with('[') {
79 if !input.ends_with(']') {
80 return Err(ParseError::InvalidIpv6Address);
81 }
82 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83 }
84 let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85 let domain = idna::domain_to_ascii(&domain)?;
86 if domain.is_empty() {
87 return Err(ParseError::EmptyHost);
88 }
89
90 let is_invalid_domain_char = |c| {
91 matches!(
92 c,
93 '\0' | '\t'
94 | '\n'
95 | '\r'
96 | ' '
97 | '#'
98 | '%'
99 | '/'
100 | ':'
101 | '<'
102 | '>'
103 | '?'
104 | '@'
105 | '['
106 | '\\'
107 | ']'
108 | '^'
109 )
110 };
111
112 if domain.find(is_invalid_domain_char).is_some() {
113 Err(ParseError::InvalidDomainCharacter)
114 } else if let Some(address) = parse_ipv4addr(&domain)? {
115 Ok(Host::Ipv4(address))
116 } else {
117 Ok(Host::Domain(domain))
118 }
119 }
120
121 // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>122 pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
123 if input.starts_with('[') {
124 if !input.ends_with(']') {
125 return Err(ParseError::InvalidIpv6Address);
126 }
127 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
128 }
129
130 let is_invalid_host_char = |c| {
131 matches!(
132 c,
133 '\0' | '\t'
134 | '\n'
135 | '\r'
136 | ' '
137 | '#'
138 | '/'
139 | ':'
140 | '<'
141 | '>'
142 | '?'
143 | '@'
144 | '['
145 | '\\'
146 | ']'
147 | '^'
148 )
149 };
150
151 if input.find(is_invalid_host_char).is_some() {
152 Err(ParseError::InvalidDomainCharacter)
153 } else {
154 Ok(Host::Domain(
155 utf8_percent_encode(input, CONTROLS).to_string(),
156 ))
157 }
158 }
159 }
160
161 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result162 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
163 match *self {
164 Host::Domain(ref domain) => domain.as_ref().fmt(f),
165 Host::Ipv4(ref addr) => addr.fmt(f),
166 Host::Ipv6(ref addr) => {
167 f.write_str("[")?;
168 write_ipv6(addr, f)?;
169 f.write_str("]")
170 }
171 }
172 }
173 }
174
175 impl<S, T> PartialEq<Host<T>> for Host<S>
176 where
177 S: PartialEq<T>,
178 {
eq(&self, other: &Host<T>) -> bool179 fn eq(&self, other: &Host<T>) -> bool {
180 match (self, other) {
181 (Host::Domain(a), Host::Domain(b)) => a == b,
182 (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
183 (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
184 (_, _) => false,
185 }
186 }
187 }
188
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result189 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
190 let segments = addr.segments();
191 let (compress_start, compress_end) = longest_zero_sequence(&segments);
192 let mut i = 0;
193 while i < 8 {
194 if i == compress_start {
195 f.write_str(":")?;
196 if i == 0 {
197 f.write_str(":")?;
198 }
199 if compress_end < 8 {
200 i = compress_end;
201 } else {
202 break;
203 }
204 }
205 write!(f, "{:x}", segments[i as usize])?;
206 if i < 7 {
207 f.write_str(":")?;
208 }
209 i += 1;
210 }
211 Ok(())
212 }
213
214 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)215 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
216 let mut longest = -1;
217 let mut longest_length = -1;
218 let mut start = -1;
219 macro_rules! finish_sequence(
220 ($end: expr) => {
221 if start >= 0 {
222 let length = $end - start;
223 if length > longest_length {
224 longest = start;
225 longest_length = length;
226 }
227 }
228 };
229 );
230 for i in 0..8 {
231 if pieces[i as usize] == 0 {
232 if start < 0 {
233 start = i;
234 }
235 } else {
236 finish_sequence!(i);
237 start = -1;
238 }
239 }
240 finish_sequence!(8);
241 // https://url.spec.whatwg.org/#concept-ipv6-serializer
242 // step 3: ignore lone zeroes
243 if longest_length < 2 {
244 (-1, -2)
245 } else {
246 (longest, longest + longest_length)
247 }
248 }
249
250 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>251 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
252 let mut r = 10;
253 if input.starts_with("0x") || input.starts_with("0X") {
254 input = &input[2..];
255 r = 16;
256 } else if input.len() >= 2 && input.starts_with('0') {
257 input = &input[1..];
258 r = 8;
259 }
260
261 // At the moment we can't know the reason why from_str_radix fails
262 // https://github.com/rust-lang/rust/issues/22639
263 // So instead we check if the input looks like a real number and only return
264 // an error when it's an overflow.
265 let valid_number = match r {
266 8 => input.chars().all(|c| ('0'..='7').contains(&c)),
267 10 => input.chars().all(|c| ('0'..='9').contains(&c)),
268 16 => input.chars().all(|c| {
269 ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
270 }),
271 _ => false,
272 };
273
274 if !valid_number {
275 return Ok(None);
276 }
277
278 if input.is_empty() {
279 return Ok(Some(0));
280 }
281 if input.starts_with('+') {
282 return Ok(None);
283 }
284 match u32::from_str_radix(input, r) {
285 Ok(number) => Ok(Some(number)),
286 Err(_) => Err(()),
287 }
288 }
289
290 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>>291 fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
292 if input.is_empty() {
293 return Ok(None);
294 }
295 let mut parts: Vec<&str> = input.split('.').collect();
296 if parts.last() == Some(&"") {
297 parts.pop();
298 }
299 if parts.len() > 4 {
300 return Ok(None);
301 }
302 let mut numbers: Vec<u32> = Vec::new();
303 let mut overflow = false;
304 for part in parts {
305 if part.is_empty() {
306 return Ok(None);
307 }
308 match parse_ipv4number(part) {
309 Ok(Some(n)) => numbers.push(n),
310 Ok(None) => return Ok(None),
311 Err(()) => overflow = true,
312 };
313 }
314 if overflow {
315 return Err(ParseError::InvalidIpv4Address);
316 }
317 let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
318 // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
319 if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
320 return Err(ParseError::InvalidIpv4Address);
321 }
322 if numbers.iter().any(|x| *x > 255) {
323 return Err(ParseError::InvalidIpv4Address);
324 }
325 for (counter, n) in numbers.iter().enumerate() {
326 ipv4 += n << (8 * (3 - counter as u32))
327 }
328 Ok(Some(Ipv4Addr::from(ipv4)))
329 }
330
331 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>332 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
333 let input = input.as_bytes();
334 let len = input.len();
335 let mut is_ip_v4 = false;
336 let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
337 let mut piece_pointer = 0;
338 let mut compress_pointer = None;
339 let mut i = 0;
340
341 if len < 2 {
342 return Err(ParseError::InvalidIpv6Address);
343 }
344
345 if input[0] == b':' {
346 if input[1] != b':' {
347 return Err(ParseError::InvalidIpv6Address);
348 }
349 i = 2;
350 piece_pointer = 1;
351 compress_pointer = Some(1);
352 }
353
354 while i < len {
355 if piece_pointer == 8 {
356 return Err(ParseError::InvalidIpv6Address);
357 }
358 if input[i] == b':' {
359 if compress_pointer.is_some() {
360 return Err(ParseError::InvalidIpv6Address);
361 }
362 i += 1;
363 piece_pointer += 1;
364 compress_pointer = Some(piece_pointer);
365 continue;
366 }
367 let start = i;
368 let end = cmp::min(len, start + 4);
369 let mut value = 0u16;
370 while i < end {
371 match (input[i] as char).to_digit(16) {
372 Some(digit) => {
373 value = value * 0x10 + digit as u16;
374 i += 1;
375 }
376 None => break,
377 }
378 }
379 if i < len {
380 match input[i] {
381 b'.' => {
382 if i == start {
383 return Err(ParseError::InvalidIpv6Address);
384 }
385 i = start;
386 if piece_pointer > 6 {
387 return Err(ParseError::InvalidIpv6Address);
388 }
389 is_ip_v4 = true;
390 }
391 b':' => {
392 i += 1;
393 if i == len {
394 return Err(ParseError::InvalidIpv6Address);
395 }
396 }
397 _ => return Err(ParseError::InvalidIpv6Address),
398 }
399 }
400 if is_ip_v4 {
401 break;
402 }
403 pieces[piece_pointer] = value;
404 piece_pointer += 1;
405 }
406
407 if is_ip_v4 {
408 if piece_pointer > 6 {
409 return Err(ParseError::InvalidIpv6Address);
410 }
411 let mut numbers_seen = 0;
412 while i < len {
413 if numbers_seen > 0 {
414 if numbers_seen < 4 && (i < len && input[i] == b'.') {
415 i += 1
416 } else {
417 return Err(ParseError::InvalidIpv6Address);
418 }
419 }
420
421 let mut ipv4_piece = None;
422 while i < len {
423 let digit = match input[i] {
424 c @ b'0'..=b'9' => c - b'0',
425 _ => break,
426 };
427 match ipv4_piece {
428 None => ipv4_piece = Some(digit as u16),
429 Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
430 Some(ref mut v) => {
431 *v = *v * 10 + digit as u16;
432 if *v > 255 {
433 return Err(ParseError::InvalidIpv6Address);
434 }
435 }
436 }
437 i += 1;
438 }
439
440 pieces[piece_pointer] = if let Some(v) = ipv4_piece {
441 pieces[piece_pointer] * 0x100 + v
442 } else {
443 return Err(ParseError::InvalidIpv6Address);
444 };
445 numbers_seen += 1;
446
447 if numbers_seen == 2 || numbers_seen == 4 {
448 piece_pointer += 1;
449 }
450 }
451
452 if numbers_seen != 4 {
453 return Err(ParseError::InvalidIpv6Address);
454 }
455 }
456
457 if i < len {
458 return Err(ParseError::InvalidIpv6Address);
459 }
460
461 match compress_pointer {
462 Some(compress_pointer) => {
463 let mut swaps = piece_pointer - compress_pointer;
464 piece_pointer = 7;
465 while swaps > 0 {
466 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
467 swaps -= 1;
468 piece_pointer -= 1;
469 }
470 }
471 _ => {
472 if piece_pointer != 8 {
473 return Err(ParseError::InvalidIpv6Address);
474 }
475 }
476 }
477 Ok(Ipv6Addr::new(
478 pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
479 ))
480 }
481