• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 use std::cmp;
10 use std::fmt::{self, Formatter};
11 use std::net::{Ipv4Addr, Ipv6Addr};
12 
13 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14 #[cfg(feature = "serde")]
15 use serde::{Deserialize, Serialize};
16 
17 use crate::parser::{ParseError, ParseResult};
18 
19 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
21 pub(crate) enum HostInternal {
22     None,
23     Domain,
24     Ipv4(Ipv4Addr),
25     Ipv6(Ipv6Addr),
26 }
27 
28 impl From<Host<String>> for HostInternal {
from(host: Host<String>) -> HostInternal29     fn from(host: Host<String>) -> HostInternal {
30         match host {
31             Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32             Host::Domain(_) => HostInternal::Domain,
33             Host::Ipv4(address) => HostInternal::Ipv4(address),
34             Host::Ipv6(address) => HostInternal::Ipv6(address),
35         }
36     }
37 }
38 
39 /// The host name of an URL.
40 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41 #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42 pub enum Host<S = String> {
43     /// A DNS domain name, as '.' dot-separated labels.
44     /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45     /// a special URL, or percent encoded for non-special URLs. Hosts for
46     /// non-special URLs are also called opaque hosts.
47     Domain(S),
48 
49     /// An IPv4 address.
50     /// `Url::host_str` returns the serialization of this address,
51     /// as four decimal integers separated by `.` dots.
52     Ipv4(Ipv4Addr),
53 
54     /// An IPv6 address.
55     /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56     /// in the format per [RFC 5952 *A Recommendation
57     /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58     /// lowercase hexadecimal with maximal `::` compression.
59     Ipv6(Ipv6Addr),
60 }
61 
62 impl<'a> Host<&'a str> {
63     /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>64     pub fn to_owned(&self) -> Host<String> {
65         match *self {
66             Host::Domain(domain) => Host::Domain(domain.to_owned()),
67             Host::Ipv4(address) => Host::Ipv4(address),
68             Host::Ipv6(address) => Host::Ipv6(address),
69         }
70     }
71 }
72 
73 impl Host<String> {
74     /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75     ///
76     /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>77     pub fn parse(input: &str) -> Result<Self, ParseError> {
78         if input.starts_with('[') {
79             if !input.ends_with(']') {
80                 return Err(ParseError::InvalidIpv6Address);
81             }
82             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83         }
84         let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85 
86         let domain = Self::domain_to_ascii(&domain)?;
87 
88         if domain.is_empty() {
89             return Err(ParseError::EmptyHost);
90         }
91 
92         let is_invalid_domain_char = |c| {
93             matches!(
94                 c,
95                 '\0'..='\u{001F}'
96                     | ' '
97                     | '#'
98                     | '%'
99                     | '/'
100                     | ':'
101                     | '<'
102                     | '>'
103                     | '?'
104                     | '@'
105                     | '['
106                     | '\\'
107                     | ']'
108                     | '^'
109                     | '\u{007F}'
110                     | '|'
111             )
112         };
113 
114         if domain.find(is_invalid_domain_char).is_some() {
115             Err(ParseError::InvalidDomainCharacter)
116         } else if ends_in_a_number(&domain) {
117             let address = parse_ipv4addr(&domain)?;
118             Ok(Host::Ipv4(address))
119         } else {
120             Ok(Host::Domain(domain))
121         }
122     }
123 
124     // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>125     pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
126         if input.starts_with('[') {
127             if !input.ends_with(']') {
128                 return Err(ParseError::InvalidIpv6Address);
129             }
130             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
131         }
132 
133         let is_invalid_host_char = |c| {
134             matches!(
135                 c,
136                 '\0' | '\t'
137                     | '\n'
138                     | '\r'
139                     | ' '
140                     | '#'
141                     | '/'
142                     | ':'
143                     | '<'
144                     | '>'
145                     | '?'
146                     | '@'
147                     | '['
148                     | '\\'
149                     | ']'
150                     | '^'
151                     | '|'
152             )
153         };
154 
155         if input.find(is_invalid_host_char).is_some() {
156             Err(ParseError::InvalidDomainCharacter)
157         } else {
158             Ok(Host::Domain(
159                 utf8_percent_encode(input, CONTROLS).to_string(),
160             ))
161         }
162     }
163 
164     /// convert domain with idna
domain_to_ascii(domain: &str) -> Result<String, ParseError>165     fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
166         idna::domain_to_ascii(domain).map_err(Into::into)
167     }
168 }
169 
170 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result171     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
172         match *self {
173             Host::Domain(ref domain) => domain.as_ref().fmt(f),
174             Host::Ipv4(ref addr) => addr.fmt(f),
175             Host::Ipv6(ref addr) => {
176                 f.write_str("[")?;
177                 write_ipv6(addr, f)?;
178                 f.write_str("]")
179             }
180         }
181     }
182 }
183 
184 impl<S, T> PartialEq<Host<T>> for Host<S>
185 where
186     S: PartialEq<T>,
187 {
eq(&self, other: &Host<T>) -> bool188     fn eq(&self, other: &Host<T>) -> bool {
189         match (self, other) {
190             (Host::Domain(a), Host::Domain(b)) => a == b,
191             (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
192             (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
193             (_, _) => false,
194         }
195     }
196 }
197 
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result198 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
199     let segments = addr.segments();
200     let (compress_start, compress_end) = longest_zero_sequence(&segments);
201     let mut i = 0;
202     while i < 8 {
203         if i == compress_start {
204             f.write_str(":")?;
205             if i == 0 {
206                 f.write_str(":")?;
207             }
208             if compress_end < 8 {
209                 i = compress_end;
210             } else {
211                 break;
212             }
213         }
214         write!(f, "{:x}", segments[i as usize])?;
215         if i < 7 {
216             f.write_str(":")?;
217         }
218         i += 1;
219     }
220     Ok(())
221 }
222 
223 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)224 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
225     let mut longest = -1;
226     let mut longest_length = -1;
227     let mut start = -1;
228     macro_rules! finish_sequence(
229         ($end: expr) => {
230             if start >= 0 {
231                 let length = $end - start;
232                 if length > longest_length {
233                     longest = start;
234                     longest_length = length;
235                 }
236             }
237         };
238     );
239     for i in 0..8 {
240         if pieces[i as usize] == 0 {
241             if start < 0 {
242                 start = i;
243             }
244         } else {
245             finish_sequence!(i);
246             start = -1;
247         }
248     }
249     finish_sequence!(8);
250     // https://url.spec.whatwg.org/#concept-ipv6-serializer
251     // step 3: ignore lone zeroes
252     if longest_length < 2 {
253         (-1, -2)
254     } else {
255         (longest, longest + longest_length)
256     }
257 }
258 
259 /// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
ends_in_a_number(input: &str) -> bool260 fn ends_in_a_number(input: &str) -> bool {
261     let mut parts = input.rsplit('.');
262     let last = parts.next().unwrap();
263     let last = if last.is_empty() {
264         if let Some(last) = parts.next() {
265             last
266         } else {
267             return false;
268         }
269     } else {
270         last
271     };
272     if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
273         return true;
274     }
275 
276     parse_ipv4number(last).is_ok()
277 }
278 
279 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
280 /// Ok(None) means the input is a valid number, but it overflows a `u32`.
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>281 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
282     if input.is_empty() {
283         return Err(());
284     }
285 
286     let mut r = 10;
287     if input.starts_with("0x") || input.starts_with("0X") {
288         input = &input[2..];
289         r = 16;
290     } else if input.len() >= 2 && input.starts_with('0') {
291         input = &input[1..];
292         r = 8;
293     }
294 
295     if input.is_empty() {
296         return Ok(Some(0));
297     }
298 
299     let valid_number = match r {
300         8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
301         10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
302         16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
303         _ => false,
304     };
305     if !valid_number {
306         return Err(());
307     }
308 
309     match u32::from_str_radix(input, r) {
310         Ok(num) => Ok(Some(num)),
311         Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
312                             // The validity of the chars in the input is checked above.
313     }
314 }
315 
316 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr>317 fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
318     let mut parts: Vec<&str> = input.split('.').collect();
319     if parts.last() == Some(&"") {
320         parts.pop();
321     }
322     if parts.len() > 4 {
323         return Err(ParseError::InvalidIpv4Address);
324     }
325     let mut numbers: Vec<u32> = Vec::new();
326     for part in parts {
327         match parse_ipv4number(part) {
328             Ok(Some(n)) => numbers.push(n),
329             Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
330             Err(()) => return Err(ParseError::InvalidIpv4Address),
331         };
332     }
333     let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
334     // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
335     if ipv4 > u32::MAX >> (8 * numbers.len() as u32) {
336         return Err(ParseError::InvalidIpv4Address);
337     }
338     if numbers.iter().any(|x| *x > 255) {
339         return Err(ParseError::InvalidIpv4Address);
340     }
341     for (counter, n) in numbers.iter().enumerate() {
342         ipv4 += n << (8 * (3 - counter as u32))
343     }
344     Ok(Ipv4Addr::from(ipv4))
345 }
346 
347 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>348 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
349     let input = input.as_bytes();
350     let len = input.len();
351     let mut is_ip_v4 = false;
352     let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
353     let mut piece_pointer = 0;
354     let mut compress_pointer = None;
355     let mut i = 0;
356 
357     if len < 2 {
358         return Err(ParseError::InvalidIpv6Address);
359     }
360 
361     if input[0] == b':' {
362         if input[1] != b':' {
363             return Err(ParseError::InvalidIpv6Address);
364         }
365         i = 2;
366         piece_pointer = 1;
367         compress_pointer = Some(1);
368     }
369 
370     while i < len {
371         if piece_pointer == 8 {
372             return Err(ParseError::InvalidIpv6Address);
373         }
374         if input[i] == b':' {
375             if compress_pointer.is_some() {
376                 return Err(ParseError::InvalidIpv6Address);
377             }
378             i += 1;
379             piece_pointer += 1;
380             compress_pointer = Some(piece_pointer);
381             continue;
382         }
383         let start = i;
384         let end = cmp::min(len, start + 4);
385         let mut value = 0u16;
386         while i < end {
387             match (input[i] as char).to_digit(16) {
388                 Some(digit) => {
389                     value = value * 0x10 + digit as u16;
390                     i += 1;
391                 }
392                 None => break,
393             }
394         }
395         if i < len {
396             match input[i] {
397                 b'.' => {
398                     if i == start {
399                         return Err(ParseError::InvalidIpv6Address);
400                     }
401                     i = start;
402                     if piece_pointer > 6 {
403                         return Err(ParseError::InvalidIpv6Address);
404                     }
405                     is_ip_v4 = true;
406                 }
407                 b':' => {
408                     i += 1;
409                     if i == len {
410                         return Err(ParseError::InvalidIpv6Address);
411                     }
412                 }
413                 _ => return Err(ParseError::InvalidIpv6Address),
414             }
415         }
416         if is_ip_v4 {
417             break;
418         }
419         pieces[piece_pointer] = value;
420         piece_pointer += 1;
421     }
422 
423     if is_ip_v4 {
424         if piece_pointer > 6 {
425             return Err(ParseError::InvalidIpv6Address);
426         }
427         let mut numbers_seen = 0;
428         while i < len {
429             if numbers_seen > 0 {
430                 if numbers_seen < 4 && (i < len && input[i] == b'.') {
431                     i += 1
432                 } else {
433                     return Err(ParseError::InvalidIpv6Address);
434                 }
435             }
436 
437             let mut ipv4_piece = None;
438             while i < len {
439                 let digit = match input[i] {
440                     c @ b'0'..=b'9' => c - b'0',
441                     _ => break,
442                 };
443                 match ipv4_piece {
444                     None => ipv4_piece = Some(digit as u16),
445                     Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
446                     Some(ref mut v) => {
447                         *v = *v * 10 + digit as u16;
448                         if *v > 255 {
449                             return Err(ParseError::InvalidIpv6Address);
450                         }
451                     }
452                 }
453                 i += 1;
454             }
455 
456             pieces[piece_pointer] = if let Some(v) = ipv4_piece {
457                 pieces[piece_pointer] * 0x100 + v
458             } else {
459                 return Err(ParseError::InvalidIpv6Address);
460             };
461             numbers_seen += 1;
462 
463             if numbers_seen == 2 || numbers_seen == 4 {
464                 piece_pointer += 1;
465             }
466         }
467 
468         if numbers_seen != 4 {
469             return Err(ParseError::InvalidIpv6Address);
470         }
471     }
472 
473     if i < len {
474         return Err(ParseError::InvalidIpv6Address);
475     }
476 
477     match compress_pointer {
478         Some(compress_pointer) => {
479             let mut swaps = piece_pointer - compress_pointer;
480             piece_pointer = 7;
481             while swaps > 0 {
482                 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
483                 swaps -= 1;
484                 piece_pointer -= 1;
485             }
486         }
487         _ => {
488             if piece_pointer != 8 {
489                 return Err(ParseError::InvalidIpv6Address);
490             }
491         }
492     }
493     Ok(Ipv6Addr::new(
494         pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
495     ))
496 }
497