• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 /*!
10 
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13 
14 
15 # URL parsing and data structures
16 
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18 
19 ```
20 use url::{Url, ParseError};
21 
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24 
25 Let’s parse a valid URL and look at its components.
26 
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32     "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34 
35 
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44         Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53 
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57 
58 ```
59 use url::Url;
60 # use url::ParseError;
61 
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64 
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75 
76 ## Serde
77 
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79 
80 # Base URL
81 
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83 
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87 
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89 
90 ```
91 use url::{Url, ParseError};
92 
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95 
96 Use the `join` method on an `Url` to use it as a base URL:
97 
98 ```
99 use url::Url;
100 # use url::ParseError;
101 
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110 
111 # Feature: `serde`
112 
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117 
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121 
122 */
123 
124 #![doc(html_root_url = "https://docs.rs/url/2.3.1")]
125 #![cfg_attr(
126     feature = "debugger_visualizer",
127     feature(debugger_visualizer),
128     debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
129 )]
130 
131 pub use form_urlencoded;
132 
133 #[cfg(feature = "serde")]
134 extern crate serde;
135 
136 use crate::host::HostInternal;
137 use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
138 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
139 use std::borrow::Borrow;
140 use std::cmp;
141 use std::fmt::{self, Write};
142 use std::hash;
143 use std::io;
144 use std::mem;
145 use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
146 use std::ops::{Range, RangeFrom, RangeTo};
147 use std::path::{Path, PathBuf};
148 use std::str;
149 
150 use std::convert::TryFrom;
151 
152 pub use crate::host::Host;
153 pub use crate::origin::{OpaqueOrigin, Origin};
154 pub use crate::parser::{ParseError, SyntaxViolation};
155 pub use crate::path_segments::PathSegmentsMut;
156 pub use crate::slicing::Position;
157 pub use form_urlencoded::EncodingOverride;
158 
159 mod host;
160 mod origin;
161 mod parser;
162 mod path_segments;
163 mod slicing;
164 
165 #[doc(hidden)]
166 pub mod quirks;
167 
168 /// A parsed URL record.
169 #[derive(Clone)]
170 pub struct Url {
171     /// Syntax in pseudo-BNF:
172     ///
173     ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
174     ///   non-hierarchical = non-hierarchical-path
175     ///   non-hierarchical-path = /* Does not start with "/" */
176     ///   hierarchical = authority? hierarchical-path
177     ///   authority = "//" userinfo? host [ ":" port ]?
178     ///   userinfo = username [ ":" password ]? "@"
179     ///   hierarchical-path = [ "/" path-segment ]+
180     serialization: String,
181 
182     // Components
183     scheme_end: u32,   // Before ':'
184     username_end: u32, // Before ':' (if a password is given) or '@' (if not)
185     host_start: u32,
186     host_end: u32,
187     host: HostInternal,
188     port: Option<u16>,
189     path_start: u32,             // Before initial '/', if any
190     query_start: Option<u32>,    // Before '?', unlike Position::QueryStart
191     fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
192 }
193 
194 /// Full configuration for the URL parser.
195 #[derive(Copy, Clone)]
196 pub struct ParseOptions<'a> {
197     base_url: Option<&'a Url>,
198     encoding_override: EncodingOverride<'a>,
199     violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
200 }
201 
202 impl<'a> ParseOptions<'a> {
203     /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self204     pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
205         self.base_url = new;
206         self
207     }
208 
209     /// Override the character encoding of query strings.
210     /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self211     pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
212         self.encoding_override = new;
213         self
214     }
215 
216     /// Call the provided function or closure for a non-fatal `SyntaxViolation`
217     /// when it occurs during parsing. Note that since the provided function is
218     /// `Fn`, the caller might need to utilize _interior mutability_, such as with
219     /// a `RefCell`, to collect the violations.
220     ///
221     /// ## Example
222     /// ```
223     /// use std::cell::RefCell;
224     /// use url::{Url, SyntaxViolation};
225     /// # use url::ParseError;
226     /// # fn run() -> Result<(), url::ParseError> {
227     /// let violations = RefCell::new(Vec::new());
228     /// let url = Url::options()
229     ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
230     ///     .parse("https:////example.com")?;
231     /// assert_eq!(url.as_str(), "https://example.com/");
232     /// assert_eq!(violations.into_inner(),
233     ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
234     /// # Ok(())
235     /// # }
236     /// # run().unwrap();
237     /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self238     pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
239         self.violation_fn = new;
240         self
241     }
242 
243     /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>244     pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
245         Parser {
246             serialization: String::with_capacity(input.len()),
247             base_url: self.base_url,
248             query_encoding_override: self.encoding_override,
249             violation_fn: self.violation_fn,
250             context: Context::UrlParser,
251         }
252         .parse_url(input)
253     }
254 }
255 
256 impl Url {
257     /// Parse an absolute URL from a string.
258     ///
259     /// # Examples
260     ///
261     /// ```rust
262     /// use url::Url;
263     /// # use url::ParseError;
264     ///
265     /// # fn run() -> Result<(), ParseError> {
266     /// let url = Url::parse("https://example.net")?;
267     /// # Ok(())
268     /// # }
269     /// # run().unwrap();
270     /// ```
271     ///
272     /// # Errors
273     ///
274     /// If the function can not parse an absolute URL from the given string,
275     /// a [`ParseError`] variant will be returned.
276     ///
277     /// [`ParseError`]: enum.ParseError.html
278     #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>279     pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
280         Url::options().parse(input)
281     }
282 
283     /// Parse an absolute URL from a string and add params to its query string.
284     ///
285     /// Existing params are not removed.
286     ///
287     /// # Examples
288     ///
289     /// ```rust
290     /// use url::Url;
291     /// # use url::ParseError;
292     ///
293     /// # fn run() -> Result<(), ParseError> {
294     /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
295     ///                                  &[("lang", "rust"), ("browser", "servo")])?;
296     /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
297     /// # Ok(())
298     /// # }
299     /// # run().unwrap();
300     /// ```
301     ///
302     /// # Errors
303     ///
304     /// If the function can not parse an absolute URL from the given string,
305     /// a [`ParseError`] variant will be returned.
306     ///
307     /// [`ParseError`]: enum.ParseError.html
308     #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,309     pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
310     where
311         I: IntoIterator,
312         I::Item: Borrow<(K, V)>,
313         K: AsRef<str>,
314         V: AsRef<str>,
315     {
316         let mut url = Url::options().parse(input);
317 
318         if let Ok(ref mut url) = url {
319             url.query_pairs_mut().extend_pairs(iter);
320         }
321 
322         url
323     }
324 
325     /// Parse a string as an URL, with this URL as the base URL.
326     ///
327     /// The inverse of this is [`make_relative`].
328     ///
329     /// Note: a trailing slash is significant.
330     /// Without it, the last path component is considered to be a “file” name
331     /// to be removed to get at the “directory” that is used as the base:
332     ///
333     /// # Examples
334     ///
335     /// ```rust
336     /// use url::Url;
337     /// # use url::ParseError;
338     ///
339     /// # fn run() -> Result<(), ParseError> {
340     /// let base = Url::parse("https://example.net/a/b.html")?;
341     /// let url = base.join("c.png")?;
342     /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
343     ///
344     /// let base = Url::parse("https://example.net/a/b/")?;
345     /// let url = base.join("c.png")?;
346     /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
347     /// # Ok(())
348     /// # }
349     /// # run().unwrap();
350     /// ```
351     ///
352     /// # Errors
353     ///
354     /// If the function can not parse an URL from the given string
355     /// with this URL as the base URL, a [`ParseError`] variant will be returned.
356     ///
357     /// [`ParseError`]: enum.ParseError.html
358     /// [`make_relative`]: #method.make_relative
359     #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>360     pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
361         Url::options().base_url(Some(self)).parse(input)
362     }
363 
364     /// Creates a relative URL if possible, with this URL as the base URL.
365     ///
366     /// This is the inverse of [`join`].
367     ///
368     /// # Examples
369     ///
370     /// ```rust
371     /// use url::Url;
372     /// # use url::ParseError;
373     ///
374     /// # fn run() -> Result<(), ParseError> {
375     /// let base = Url::parse("https://example.net/a/b.html")?;
376     /// let url = Url::parse("https://example.net/a/c.png")?;
377     /// let relative = base.make_relative(&url);
378     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
379     ///
380     /// let base = Url::parse("https://example.net/a/b/")?;
381     /// let url = Url::parse("https://example.net/a/b/c.png")?;
382     /// let relative = base.make_relative(&url);
383     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
384     ///
385     /// let base = Url::parse("https://example.net/a/b/")?;
386     /// let url = Url::parse("https://example.net/a/d/c.png")?;
387     /// let relative = base.make_relative(&url);
388     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
389     ///
390     /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
391     /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
392     /// let relative = base.make_relative(&url);
393     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
394     /// # Ok(())
395     /// # }
396     /// # run().unwrap();
397     /// ```
398     ///
399     /// # Errors
400     ///
401     /// If this URL can't be a base for the given URL, `None` is returned.
402     /// This is for example the case if the scheme, host or port are not the same.
403     ///
404     /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>405     pub fn make_relative(&self, url: &Url) -> Option<String> {
406         if self.cannot_be_a_base() {
407             return None;
408         }
409 
410         // Scheme, host and port need to be the same
411         if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
412             return None;
413         }
414 
415         // We ignore username/password at this point
416 
417         // The path has to be transformed
418         let mut relative = String::new();
419 
420         // Extract the filename of both URIs, these need to be handled separately
421         fn extract_path_filename(s: &str) -> (&str, &str) {
422             let last_slash_idx = s.rfind('/').unwrap_or(0);
423             let (path, filename) = s.split_at(last_slash_idx);
424             if filename.is_empty() {
425                 (path, "")
426             } else {
427                 (path, &filename[1..])
428             }
429         }
430 
431         let (base_path, base_filename) = extract_path_filename(self.path());
432         let (url_path, url_filename) = extract_path_filename(url.path());
433 
434         let mut base_path = base_path.split('/').peekable();
435         let mut url_path = url_path.split('/').peekable();
436 
437         // Skip over the common prefix
438         while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
439             base_path.next();
440             url_path.next();
441         }
442 
443         // Add `..` segments for the remainder of the base path
444         for base_path_segment in base_path {
445             // Skip empty last segments
446             if base_path_segment.is_empty() {
447                 break;
448             }
449 
450             if !relative.is_empty() {
451                 relative.push('/');
452             }
453 
454             relative.push_str("..");
455         }
456 
457         // Append the remainder of the other URI
458         for url_path_segment in url_path {
459             if !relative.is_empty() {
460                 relative.push('/');
461             }
462 
463             relative.push_str(url_path_segment);
464         }
465 
466         // Add the filename if they are not the same
467         if !relative.is_empty() || base_filename != url_filename {
468             // If the URIs filename is empty this means that it was a directory
469             // so we'll have to append a '/'.
470             //
471             // Otherwise append it directly as the new filename.
472             if url_filename.is_empty() {
473                 relative.push('/');
474             } else {
475                 if !relative.is_empty() {
476                     relative.push('/');
477                 }
478                 relative.push_str(url_filename);
479             }
480         }
481 
482         // Query and fragment are only taken from the other URI
483         if let Some(query) = url.query() {
484             relative.push('?');
485             relative.push_str(query);
486         }
487 
488         if let Some(fragment) = url.fragment() {
489             relative.push('#');
490             relative.push_str(fragment);
491         }
492 
493         Some(relative)
494     }
495 
496     /// Return a default `ParseOptions` that can fully configure the URL parser.
497     ///
498     /// # Examples
499     ///
500     /// Get default `ParseOptions`, then change base url
501     ///
502     /// ```rust
503     /// use url::Url;
504     /// # use url::ParseError;
505     /// # fn run() -> Result<(), ParseError> {
506     /// let options = Url::options();
507     /// let api = Url::parse("https://api.example.com")?;
508     /// let base_url = options.base_url(Some(&api));
509     /// let version_url = base_url.parse("version.json")?;
510     /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
511     /// # Ok(())
512     /// # }
513     /// # run().unwrap();
514     /// ```
options<'a>() -> ParseOptions<'a>515     pub fn options<'a>() -> ParseOptions<'a> {
516         ParseOptions {
517             base_url: None,
518             encoding_override: None,
519             violation_fn: None,
520         }
521     }
522 
523     /// Return the serialization of this URL.
524     ///
525     /// This is fast since that serialization is already stored in the `Url` struct.
526     ///
527     /// # Examples
528     ///
529     /// ```rust
530     /// use url::Url;
531     /// # use url::ParseError;
532     ///
533     /// # fn run() -> Result<(), ParseError> {
534     /// let url_str = "https://example.net/";
535     /// let url = Url::parse(url_str)?;
536     /// assert_eq!(url.as_str(), url_str);
537     /// # Ok(())
538     /// # }
539     /// # run().unwrap();
540     /// ```
541     #[inline]
as_str(&self) -> &str542     pub fn as_str(&self) -> &str {
543         &self.serialization
544     }
545 
546     /// Return the serialization of this URL.
547     ///
548     /// This consumes the `Url` and takes ownership of the `String` stored in it.
549     ///
550     /// # Examples
551     ///
552     /// ```rust
553     /// use url::Url;
554     /// # use url::ParseError;
555     ///
556     /// # fn run() -> Result<(), ParseError> {
557     /// let url_str = "https://example.net/";
558     /// let url = Url::parse(url_str)?;
559     /// assert_eq!(String::from(url), url_str);
560     /// # Ok(())
561     /// # }
562     /// # run().unwrap();
563     /// ```
564     #[inline]
565     #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String566     pub fn into_string(self) -> String {
567         self.into()
568     }
569 
570     /// For internal testing, not part of the public API.
571     ///
572     /// Methods of the `Url` struct assume a number of invariants.
573     /// This checks each of these invariants and panic if one is not met.
574     /// This is for testing rust-url itself.
575     #[doc(hidden)]
check_invariants(&self) -> Result<(), String>576     pub fn check_invariants(&self) -> Result<(), String> {
577         macro_rules! assert {
578             ($x: expr) => {
579                 if !$x {
580                     return Err(format!(
581                         "!( {} ) for URL {:?}",
582                         stringify!($x),
583                         self.serialization
584                     ));
585                 }
586             };
587         }
588 
589         macro_rules! assert_eq {
590             ($a: expr, $b: expr) => {
591                 {
592                     let a = $a;
593                     let b = $b;
594                     if a != b {
595                         return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
596                                            a, b, stringify!($a), stringify!($b),
597                                            self.serialization))
598                     }
599                 }
600             }
601         }
602 
603         assert!(self.scheme_end >= 1);
604         assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
605         assert!(self
606             .slice(1..self.scheme_end)
607             .chars()
608             .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
609         assert_eq!(self.byte_at(self.scheme_end), b':');
610 
611         if self.slice(self.scheme_end + 1..).starts_with("//") {
612             // URL with authority
613             if self.username_end != self.serialization.len() as u32 {
614                 match self.byte_at(self.username_end) {
615                     b':' => {
616                         assert!(self.host_start >= self.username_end + 2);
617                         assert_eq!(self.byte_at(self.host_start - 1), b'@');
618                     }
619                     b'@' => assert!(self.host_start == self.username_end + 1),
620                     _ => assert_eq!(self.username_end, self.scheme_end + 3),
621                 }
622             }
623             assert!(self.host_start >= self.username_end);
624             assert!(self.host_end >= self.host_start);
625             let host_str = self.slice(self.host_start..self.host_end);
626             match self.host {
627                 HostInternal::None => assert_eq!(host_str, ""),
628                 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
629                 HostInternal::Ipv6(address) => {
630                     let h: Host<String> = Host::Ipv6(address);
631                     assert_eq!(host_str, h.to_string())
632                 }
633                 HostInternal::Domain => {
634                     if SchemeType::from(self.scheme()).is_special() {
635                         assert!(!host_str.is_empty())
636                     }
637                 }
638             }
639             if self.path_start == self.host_end {
640                 assert_eq!(self.port, None);
641             } else {
642                 assert_eq!(self.byte_at(self.host_end), b':');
643                 let port_str = self.slice(self.host_end + 1..self.path_start);
644                 assert_eq!(
645                     self.port,
646                     Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
647                 );
648             }
649             assert!(
650                 self.path_start as usize == self.serialization.len()
651                     || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
652             );
653         } else {
654             // Anarchist URL (no authority)
655             assert_eq!(self.username_end, self.scheme_end + 1);
656             assert_eq!(self.host_start, self.scheme_end + 1);
657             assert_eq!(self.host_end, self.scheme_end + 1);
658             assert_eq!(self.host, HostInternal::None);
659             assert_eq!(self.port, None);
660             assert_eq!(self.path_start, self.scheme_end + 1);
661         }
662         if let Some(start) = self.query_start {
663             assert!(start >= self.path_start);
664             assert_eq!(self.byte_at(start), b'?');
665         }
666         if let Some(start) = self.fragment_start {
667             assert!(start >= self.path_start);
668             assert_eq!(self.byte_at(start), b'#');
669         }
670         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
671             assert!(fragment_start > query_start);
672         }
673 
674         let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
675         assert_eq!(&self.serialization, &other.serialization);
676         assert_eq!(self.scheme_end, other.scheme_end);
677         assert_eq!(self.username_end, other.username_end);
678         assert_eq!(self.host_start, other.host_start);
679         assert_eq!(self.host_end, other.host_end);
680         assert!(
681             self.host == other.host ||
682                 // XXX No host round-trips to empty host.
683                 // See https://github.com/whatwg/url/issues/79
684                 (self.host_str(), other.host_str()) == (None, Some(""))
685         );
686         assert_eq!(self.port, other.port);
687         assert_eq!(self.path_start, other.path_start);
688         assert_eq!(self.query_start, other.query_start);
689         assert_eq!(self.fragment_start, other.fragment_start);
690         Ok(())
691     }
692 
693     /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
694     ///
695     /// Note: this returns an opaque origin for `file:` URLs, which causes
696     /// `url.origin() != url.origin()`.
697     ///
698     /// # Examples
699     ///
700     /// URL with `ftp` scheme:
701     ///
702     /// ```rust
703     /// use url::{Host, Origin, Url};
704     /// # use url::ParseError;
705     ///
706     /// # fn run() -> Result<(), ParseError> {
707     /// let url = Url::parse("ftp://example.com/foo")?;
708     /// assert_eq!(url.origin(),
709     ///            Origin::Tuple("ftp".into(),
710     ///                          Host::Domain("example.com".into()),
711     ///                          21));
712     /// # Ok(())
713     /// # }
714     /// # run().unwrap();
715     /// ```
716     ///
717     /// URL with `blob` scheme:
718     ///
719     /// ```rust
720     /// use url::{Host, Origin, Url};
721     /// # use url::ParseError;
722     ///
723     /// # fn run() -> Result<(), ParseError> {
724     /// let url = Url::parse("blob:https://example.com/foo")?;
725     /// assert_eq!(url.origin(),
726     ///            Origin::Tuple("https".into(),
727     ///                          Host::Domain("example.com".into()),
728     ///                          443));
729     /// # Ok(())
730     /// # }
731     /// # run().unwrap();
732     /// ```
733     ///
734     /// URL with `file` scheme:
735     ///
736     /// ```rust
737     /// use url::{Host, Origin, Url};
738     /// # use url::ParseError;
739     ///
740     /// # fn run() -> Result<(), ParseError> {
741     /// let url = Url::parse("file:///tmp/foo")?;
742     /// assert!(!url.origin().is_tuple());
743     ///
744     /// let other_url = Url::parse("file:///tmp/foo")?;
745     /// assert!(url.origin() != other_url.origin());
746     /// # Ok(())
747     /// # }
748     /// # run().unwrap();
749     /// ```
750     ///
751     /// URL with other scheme:
752     ///
753     /// ```rust
754     /// use url::{Host, Origin, Url};
755     /// # use url::ParseError;
756     ///
757     /// # fn run() -> Result<(), ParseError> {
758     /// let url = Url::parse("foo:bar")?;
759     /// assert!(!url.origin().is_tuple());
760     /// # Ok(())
761     /// # }
762     /// # run().unwrap();
763     /// ```
764     #[inline]
origin(&self) -> Origin765     pub fn origin(&self) -> Origin {
766         origin::url_origin(self)
767     }
768 
769     /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
770     ///
771     /// # Examples
772     ///
773     /// ```
774     /// use url::Url;
775     /// # use url::ParseError;
776     ///
777     /// # fn run() -> Result<(), ParseError> {
778     /// let url = Url::parse("file:///tmp/foo")?;
779     /// assert_eq!(url.scheme(), "file");
780     /// # Ok(())
781     /// # }
782     /// # run().unwrap();
783     /// ```
784     #[inline]
scheme(&self) -> &str785     pub fn scheme(&self) -> &str {
786         self.slice(..self.scheme_end)
787     }
788 
789     /// Return whether the URL has an 'authority',
790     /// which can contain a username, password, host, and port number.
791     ///
792     /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
793     /// or cannot-be-a-base like `data:text/plain,Stuff`.
794     ///
795     /// # Examples
796     ///
797     /// ```
798     /// use url::Url;
799     /// # use url::ParseError;
800     ///
801     /// # fn run() -> Result<(), ParseError> {
802     /// let url = Url::parse("ftp://rms@example.com")?;
803     /// assert!(url.has_authority());
804     ///
805     /// let url = Url::parse("unix:/run/foo.socket")?;
806     /// assert!(!url.has_authority());
807     ///
808     /// let url = Url::parse("data:text/plain,Stuff")?;
809     /// assert!(!url.has_authority());
810     /// # Ok(())
811     /// # }
812     /// # run().unwrap();
813     /// ```
814     #[inline]
has_authority(&self) -> bool815     pub fn has_authority(&self) -> bool {
816         debug_assert!(self.byte_at(self.scheme_end) == b':');
817         self.slice(self.scheme_end..).starts_with("://")
818     }
819 
820     /// Return whether this URL is a cannot-be-a-base URL,
821     /// meaning that parsing a relative URL string with this URL as the base will return an error.
822     ///
823     /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
824     /// as is typically the case of `data:` and `mailto:` URLs.
825     ///
826     /// # Examples
827     ///
828     /// ```
829     /// use url::Url;
830     /// # use url::ParseError;
831     ///
832     /// # fn run() -> Result<(), ParseError> {
833     /// let url = Url::parse("ftp://rms@example.com")?;
834     /// assert!(!url.cannot_be_a_base());
835     ///
836     /// let url = Url::parse("unix:/run/foo.socket")?;
837     /// assert!(!url.cannot_be_a_base());
838     ///
839     /// let url = Url::parse("data:text/plain,Stuff")?;
840     /// assert!(url.cannot_be_a_base());
841     /// # Ok(())
842     /// # }
843     /// # run().unwrap();
844     /// ```
845     #[inline]
cannot_be_a_base(&self) -> bool846     pub fn cannot_be_a_base(&self) -> bool {
847         !self.slice(self.scheme_end + 1..).starts_with('/')
848     }
849 
850     /// Return the username for this URL (typically the empty string)
851     /// as a percent-encoded ASCII string.
852     ///
853     /// # Examples
854     ///
855     /// ```
856     /// use url::Url;
857     /// # use url::ParseError;
858     ///
859     /// # fn run() -> Result<(), ParseError> {
860     /// let url = Url::parse("ftp://rms@example.com")?;
861     /// assert_eq!(url.username(), "rms");
862     ///
863     /// let url = Url::parse("ftp://:secret123@example.com")?;
864     /// assert_eq!(url.username(), "");
865     ///
866     /// let url = Url::parse("https://example.com")?;
867     /// assert_eq!(url.username(), "");
868     /// # Ok(())
869     /// # }
870     /// # run().unwrap();
871     /// ```
username(&self) -> &str872     pub fn username(&self) -> &str {
873         let scheme_separator_len = "://".len() as u32;
874         if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
875             self.slice(self.scheme_end + scheme_separator_len..self.username_end)
876         } else {
877             ""
878         }
879     }
880 
881     /// Return the password for this URL, if any, as a percent-encoded ASCII string.
882     ///
883     /// # Examples
884     ///
885     /// ```
886     /// use url::Url;
887     /// # use url::ParseError;
888     ///
889     /// # fn run() -> Result<(), ParseError> {
890     /// let url = Url::parse("ftp://rms:secret123@example.com")?;
891     /// assert_eq!(url.password(), Some("secret123"));
892     ///
893     /// let url = Url::parse("ftp://:secret123@example.com")?;
894     /// assert_eq!(url.password(), Some("secret123"));
895     ///
896     /// let url = Url::parse("ftp://rms@example.com")?;
897     /// assert_eq!(url.password(), None);
898     ///
899     /// let url = Url::parse("https://example.com")?;
900     /// assert_eq!(url.password(), None);
901     /// # Ok(())
902     /// # }
903     /// # run().unwrap();
904     /// ```
password(&self) -> Option<&str>905     pub fn password(&self) -> Option<&str> {
906         // This ':' is not the one marking a port number since a host can not be empty.
907         // (Except for file: URLs, which do not have port numbers.)
908         if self.has_authority()
909             && self.username_end != self.serialization.len() as u32
910             && self.byte_at(self.username_end) == b':'
911         {
912             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
913             Some(self.slice(self.username_end + 1..self.host_start - 1))
914         } else {
915             None
916         }
917     }
918 
919     /// Equivalent to `url.host().is_some()`.
920     ///
921     /// # Examples
922     ///
923     /// ```
924     /// use url::Url;
925     /// # use url::ParseError;
926     ///
927     /// # fn run() -> Result<(), ParseError> {
928     /// let url = Url::parse("ftp://rms@example.com")?;
929     /// assert!(url.has_host());
930     ///
931     /// let url = Url::parse("unix:/run/foo.socket")?;
932     /// assert!(!url.has_host());
933     ///
934     /// let url = Url::parse("data:text/plain,Stuff")?;
935     /// assert!(!url.has_host());
936     /// # Ok(())
937     /// # }
938     /// # run().unwrap();
939     /// ```
has_host(&self) -> bool940     pub fn has_host(&self) -> bool {
941         !matches!(self.host, HostInternal::None)
942     }
943 
944     /// Return the string representation of the host (domain or IP address) for this URL, if any.
945     ///
946     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
947     /// of a special URL, or percent encoded for non-special URLs.
948     /// IPv6 addresses are given between `[` and `]` brackets.
949     ///
950     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
951     /// don’t have a host.
952     ///
953     /// See also the `host` method.
954     ///
955     /// # Examples
956     ///
957     /// ```
958     /// use url::Url;
959     /// # use url::ParseError;
960     ///
961     /// # fn run() -> Result<(), ParseError> {
962     /// let url = Url::parse("https://127.0.0.1/index.html")?;
963     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
964     ///
965     /// let url = Url::parse("ftp://rms@example.com")?;
966     /// assert_eq!(url.host_str(), Some("example.com"));
967     ///
968     /// let url = Url::parse("unix:/run/foo.socket")?;
969     /// assert_eq!(url.host_str(), None);
970     ///
971     /// let url = Url::parse("data:text/plain,Stuff")?;
972     /// assert_eq!(url.host_str(), None);
973     /// # Ok(())
974     /// # }
975     /// # run().unwrap();
976     /// ```
host_str(&self) -> Option<&str>977     pub fn host_str(&self) -> Option<&str> {
978         if self.has_host() {
979             Some(self.slice(self.host_start..self.host_end))
980         } else {
981             None
982         }
983     }
984 
985     /// Return the parsed representation of the host for this URL.
986     /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
987     /// of a special URL, or percent encoded for non-special URLs.
988     ///
989     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
990     /// don’t have a host.
991     ///
992     /// See also the `host_str` method.
993     ///
994     /// # Examples
995     ///
996     /// ```
997     /// use url::Url;
998     /// # use url::ParseError;
999     ///
1000     /// # fn run() -> Result<(), ParseError> {
1001     /// let url = Url::parse("https://127.0.0.1/index.html")?;
1002     /// assert!(url.host().is_some());
1003     ///
1004     /// let url = Url::parse("ftp://rms@example.com")?;
1005     /// assert!(url.host().is_some());
1006     ///
1007     /// let url = Url::parse("unix:/run/foo.socket")?;
1008     /// assert!(url.host().is_none());
1009     ///
1010     /// let url = Url::parse("data:text/plain,Stuff")?;
1011     /// assert!(url.host().is_none());
1012     /// # Ok(())
1013     /// # }
1014     /// # run().unwrap();
1015     /// ```
host(&self) -> Option<Host<&str>>1016     pub fn host(&self) -> Option<Host<&str>> {
1017         match self.host {
1018             HostInternal::None => None,
1019             HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1020             HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1021             HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1022         }
1023     }
1024 
1025     /// If this URL has a host and it is a domain name (not an IP address), return it.
1026     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1027     /// of a special URL, or percent encoded for non-special URLs.
1028     ///
1029     /// # Examples
1030     ///
1031     /// ```
1032     /// use url::Url;
1033     /// # use url::ParseError;
1034     ///
1035     /// # fn run() -> Result<(), ParseError> {
1036     /// let url = Url::parse("https://127.0.0.1/")?;
1037     /// assert_eq!(url.domain(), None);
1038     ///
1039     /// let url = Url::parse("mailto:rms@example.net")?;
1040     /// assert_eq!(url.domain(), None);
1041     ///
1042     /// let url = Url::parse("https://example.com/")?;
1043     /// assert_eq!(url.domain(), Some("example.com"));
1044     /// # Ok(())
1045     /// # }
1046     /// # run().unwrap();
1047     /// ```
domain(&self) -> Option<&str>1048     pub fn domain(&self) -> Option<&str> {
1049         match self.host {
1050             HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1051             _ => None,
1052         }
1053     }
1054 
1055     /// Return the port number for this URL, if any.
1056     ///
1057     /// Note that default port numbers are never reflected by the serialization,
1058     /// use the `port_or_known_default()` method if you want a default port number returned.
1059     ///
1060     /// # Examples
1061     ///
1062     /// ```
1063     /// use url::Url;
1064     /// # use url::ParseError;
1065     ///
1066     /// # fn run() -> Result<(), ParseError> {
1067     /// let url = Url::parse("https://example.com")?;
1068     /// assert_eq!(url.port(), None);
1069     ///
1070     /// let url = Url::parse("https://example.com:443/")?;
1071     /// assert_eq!(url.port(), None);
1072     ///
1073     /// let url = Url::parse("ssh://example.com:22")?;
1074     /// assert_eq!(url.port(), Some(22));
1075     /// # Ok(())
1076     /// # }
1077     /// # run().unwrap();
1078     /// ```
1079     #[inline]
port(&self) -> Option<u16>1080     pub fn port(&self) -> Option<u16> {
1081         self.port
1082     }
1083 
1084     /// Return the port number for this URL, or the default port number if it is known.
1085     ///
1086     /// This method only knows the default port number
1087     /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1088     ///
1089     /// For URLs in these schemes, this method always returns `Some(_)`.
1090     /// For other schemes, it is the same as `Url::port()`.
1091     ///
1092     /// # Examples
1093     ///
1094     /// ```
1095     /// use url::Url;
1096     /// # use url::ParseError;
1097     ///
1098     /// # fn run() -> Result<(), ParseError> {
1099     /// let url = Url::parse("foo://example.com")?;
1100     /// assert_eq!(url.port_or_known_default(), None);
1101     ///
1102     /// let url = Url::parse("foo://example.com:1456")?;
1103     /// assert_eq!(url.port_or_known_default(), Some(1456));
1104     ///
1105     /// let url = Url::parse("https://example.com")?;
1106     /// assert_eq!(url.port_or_known_default(), Some(443));
1107     /// # Ok(())
1108     /// # }
1109     /// # run().unwrap();
1110     /// ```
1111     #[inline]
port_or_known_default(&self) -> Option<u16>1112     pub fn port_or_known_default(&self) -> Option<u16> {
1113         self.port.or_else(|| parser::default_port(self.scheme()))
1114     }
1115 
1116     /// Resolve a URL’s host and port number to `SocketAddr`.
1117     ///
1118     /// If the URL has the default port number of a scheme that is unknown to this library,
1119     /// `default_port_number` provides an opportunity to provide the actual port number.
1120     /// In non-example code this should be implemented either simply as `|| None`,
1121     /// or by matching on the URL’s `.scheme()`.
1122     ///
1123     /// If the host is a domain, it is resolved using the standard library’s DNS support.
1124     ///
1125     /// # Examples
1126     ///
1127     /// ```no_run
1128     /// let url = url::Url::parse("https://example.net/").unwrap();
1129     /// let addrs = url.socket_addrs(|| None).unwrap();
1130     /// std::net::TcpStream::connect(&*addrs)
1131     /// # ;
1132     /// ```
1133     ///
1134     /// ```
1135     /// /// With application-specific known default port numbers
1136     /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1137     ///     url.socket_addrs(|| match url.scheme() {
1138     ///         "socks5" | "socks5h" => Some(1080),
1139     ///         _ => None,
1140     ///     })
1141     /// }
1142     /// ```
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1143     pub fn socket_addrs(
1144         &self,
1145         default_port_number: impl Fn() -> Option<u16>,
1146     ) -> io::Result<Vec<SocketAddr>> {
1147         // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1148         // causes borrowck issues because the return value borrows `default_port_number`:
1149         //
1150         // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1151         //
1152         // > This RFC proposes that *all* type parameters are considered in scope
1153         // > for `impl Trait` in return position
1154 
1155         fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1156             opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1157         }
1158 
1159         let host = io_result(self.host(), "No host name in the URL")?;
1160         let port = io_result(
1161             self.port_or_known_default().or_else(default_port_number),
1162             "No port number in the URL",
1163         )?;
1164         Ok(match host {
1165             Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1166             Host::Ipv4(ip) => vec![(ip, port).into()],
1167             Host::Ipv6(ip) => vec![(ip, port).into()],
1168         })
1169     }
1170 
1171     /// Return the path for this URL, as a percent-encoded ASCII string.
1172     /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1173     /// For other URLs, this starts with a '/' slash
1174     /// and continues with slash-separated path segments.
1175     ///
1176     /// # Examples
1177     ///
1178     /// ```rust
1179     /// use url::{Url, ParseError};
1180     ///
1181     /// # fn run() -> Result<(), ParseError> {
1182     /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1183     /// assert_eq!(url.path(), "/api/versions");
1184     ///
1185     /// let url = Url::parse("https://example.com")?;
1186     /// assert_eq!(url.path(), "/");
1187     ///
1188     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1189     /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1190     /// # Ok(())
1191     /// # }
1192     /// # run().unwrap();
1193     /// ```
path(&self) -> &str1194     pub fn path(&self) -> &str {
1195         match (self.query_start, self.fragment_start) {
1196             (None, None) => self.slice(self.path_start..),
1197             (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1198                 self.slice(self.path_start..next_component_start)
1199             }
1200         }
1201     }
1202 
1203     /// Unless this URL is cannot-be-a-base,
1204     /// return an iterator of '/' slash-separated path segments,
1205     /// each as a percent-encoded ASCII string.
1206     ///
1207     /// Return `None` for cannot-be-a-base URLs.
1208     ///
1209     /// When `Some` is returned, the iterator always contains at least one string
1210     /// (which may be empty).
1211     ///
1212     /// # Examples
1213     ///
1214     /// ```
1215     /// use url::Url;
1216     /// # use std::error::Error;
1217     ///
1218     /// # fn run() -> Result<(), Box<dyn Error>> {
1219     /// let url = Url::parse("https://example.com/foo/bar")?;
1220     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1221     /// assert_eq!(path_segments.next(), Some("foo"));
1222     /// assert_eq!(path_segments.next(), Some("bar"));
1223     /// assert_eq!(path_segments.next(), None);
1224     ///
1225     /// let url = Url::parse("https://example.com")?;
1226     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1227     /// assert_eq!(path_segments.next(), Some(""));
1228     /// assert_eq!(path_segments.next(), None);
1229     ///
1230     /// let url = Url::parse("data:text/plain,HelloWorld")?;
1231     /// assert!(url.path_segments().is_none());
1232     ///
1233     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1234     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1235     /// assert_eq!(path_segments.next(), Some("countries"));
1236     /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1237     /// # Ok(())
1238     /// # }
1239     /// # run().unwrap();
1240     /// ```
path_segments(&self) -> Option<str::Split<'_, char>>1241     pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1242         let path = self.path();
1243         path.strip_prefix('/').map(|remainder| remainder.split('/'))
1244     }
1245 
1246     /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1247     ///
1248     /// # Examples
1249     ///
1250     /// ```rust
1251     /// use url::Url;
1252     /// # use url::ParseError;
1253     ///
1254     /// fn run() -> Result<(), ParseError> {
1255     /// let url = Url::parse("https://example.com/products?page=2")?;
1256     /// let query = url.query();
1257     /// assert_eq!(query, Some("page=2"));
1258     ///
1259     /// let url = Url::parse("https://example.com/products")?;
1260     /// let query = url.query();
1261     /// assert!(query.is_none());
1262     ///
1263     /// let url = Url::parse("https://example.com/?country=español")?;
1264     /// let query = url.query();
1265     /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1266     /// # Ok(())
1267     /// # }
1268     /// # run().unwrap();
1269     /// ```
query(&self) -> Option<&str>1270     pub fn query(&self) -> Option<&str> {
1271         match (self.query_start, self.fragment_start) {
1272             (None, _) => None,
1273             (Some(query_start), None) => {
1274                 debug_assert!(self.byte_at(query_start) == b'?');
1275                 Some(self.slice(query_start + 1..))
1276             }
1277             (Some(query_start), Some(fragment_start)) => {
1278                 debug_assert!(self.byte_at(query_start) == b'?');
1279                 Some(self.slice(query_start + 1..fragment_start))
1280             }
1281         }
1282     }
1283 
1284     /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1285     /// and return an iterator of (key, value) pairs.
1286     ///
1287     /// # Examples
1288     ///
1289     /// ```rust
1290     /// use std::borrow::Cow;
1291     ///
1292     /// use url::Url;
1293     /// # use url::ParseError;
1294     ///
1295     /// # fn run() -> Result<(), ParseError> {
1296     /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1297     /// let mut pairs = url.query_pairs();
1298     ///
1299     /// assert_eq!(pairs.count(), 2);
1300     ///
1301     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1302     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1303     /// # Ok(())
1304     /// # }
1305     /// # run().unwrap();
1306     /// ```
1307 
1308     #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1309     pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1310         form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1311     }
1312 
1313     /// Return this URL’s fragment identifier, if any.
1314     ///
1315     /// A fragment is the part of the URL after the `#` symbol.
1316     /// The fragment is optional and, if present, contains a fragment identifier
1317     /// that identifies a secondary resource, such as a section heading
1318     /// of a document.
1319     ///
1320     /// In HTML, the fragment identifier is usually the id attribute of a an element
1321     /// that is scrolled to on load. Browsers typically will not send the fragment portion
1322     /// of a URL to the server.
1323     ///
1324     /// **Note:** the parser did *not* percent-encode this component,
1325     /// but the input may have been percent-encoded already.
1326     ///
1327     /// # Examples
1328     ///
1329     /// ```rust
1330     /// use url::Url;
1331     /// # use url::ParseError;
1332     ///
1333     /// # fn run() -> Result<(), ParseError> {
1334     /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1335     ///
1336     /// assert_eq!(url.fragment(), Some("row=4"));
1337     ///
1338     /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1339     ///
1340     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1341     /// # Ok(())
1342     /// # }
1343     /// # run().unwrap();
1344     /// ```
fragment(&self) -> Option<&str>1345     pub fn fragment(&self) -> Option<&str> {
1346         self.fragment_start.map(|start| {
1347             debug_assert!(self.byte_at(start) == b'#');
1348             self.slice(start + 1..)
1349         })
1350     }
1351 
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1352     fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1353         let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1354         let result = f(&mut parser);
1355         self.serialization = parser.serialization;
1356         result
1357     }
1358 
1359     /// Change this URL’s fragment identifier.
1360     ///
1361     /// # Examples
1362     ///
1363     /// ```rust
1364     /// use url::Url;
1365     /// # use url::ParseError;
1366     ///
1367     /// # fn run() -> Result<(), ParseError> {
1368     /// let mut url = Url::parse("https://example.com/data.csv")?;
1369     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1370 
1371     /// url.set_fragment(Some("cell=4,1-6,2"));
1372     /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1373     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1374     ///
1375     /// url.set_fragment(None);
1376     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1377     /// assert!(url.fragment().is_none());
1378     /// # Ok(())
1379     /// # }
1380     /// # run().unwrap();
1381     /// ```
set_fragment(&mut self, fragment: Option<&str>)1382     pub fn set_fragment(&mut self, fragment: Option<&str>) {
1383         // Remove any previous fragment
1384         if let Some(start) = self.fragment_start {
1385             debug_assert!(self.byte_at(start) == b'#');
1386             self.serialization.truncate(start as usize);
1387         }
1388         // Write the new one
1389         if let Some(input) = fragment {
1390             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1391             self.serialization.push('#');
1392             self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
1393         } else {
1394             self.fragment_start = None
1395         }
1396     }
1397 
take_fragment(&mut self) -> Option<String>1398     fn take_fragment(&mut self) -> Option<String> {
1399         self.fragment_start.take().map(|start| {
1400             debug_assert!(self.byte_at(start) == b'#');
1401             let fragment = self.slice(start + 1..).to_owned();
1402             self.serialization.truncate(start as usize);
1403             fragment
1404         })
1405     }
1406 
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1407     fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1408         if let Some(ref fragment) = fragment {
1409             assert!(self.fragment_start.is_none());
1410             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1411             self.serialization.push('#');
1412             self.serialization.push_str(fragment);
1413         }
1414     }
1415 
1416     /// Change this URL’s query string.
1417     ///
1418     /// # Examples
1419     ///
1420     /// ```rust
1421     /// use url::Url;
1422     /// # use url::ParseError;
1423     ///
1424     /// # fn run() -> Result<(), ParseError> {
1425     /// let mut url = Url::parse("https://example.com/products")?;
1426     /// assert_eq!(url.as_str(), "https://example.com/products");
1427     ///
1428     /// url.set_query(Some("page=2"));
1429     /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1430     /// assert_eq!(url.query(), Some("page=2"));
1431     /// # Ok(())
1432     /// # }
1433     /// # run().unwrap();
1434     /// ```
set_query(&mut self, query: Option<&str>)1435     pub fn set_query(&mut self, query: Option<&str>) {
1436         let fragment = self.take_fragment();
1437 
1438         // Remove any previous query
1439         if let Some(start) = self.query_start.take() {
1440             debug_assert!(self.byte_at(start) == b'?');
1441             self.serialization.truncate(start as usize);
1442         }
1443         // Write the new query, if any
1444         if let Some(input) = query {
1445             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1446             self.serialization.push('?');
1447             let scheme_type = SchemeType::from(self.scheme());
1448             let scheme_end = self.scheme_end;
1449             self.mutate(|parser| {
1450                 let vfn = parser.violation_fn;
1451                 parser.parse_query(
1452                     scheme_type,
1453                     scheme_end,
1454                     parser::Input::trim_tab_and_newlines(input, vfn),
1455                 )
1456             });
1457         }
1458 
1459         self.restore_already_parsed_fragment(fragment);
1460     }
1461 
1462     /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1463     /// in `application/x-www-form-urlencoded` syntax.
1464     ///
1465     /// The return value has a method-chaining API:
1466     ///
1467     /// ```rust
1468     /// # use url::{Url, ParseError};
1469     ///
1470     /// # fn run() -> Result<(), ParseError> {
1471     /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1472     /// assert_eq!(url.query(), Some("lang=fr"));
1473     ///
1474     /// url.query_pairs_mut().append_pair("foo", "bar");
1475     /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1476     /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1477     ///
1478     /// url.query_pairs_mut()
1479     ///     .clear()
1480     ///     .append_pair("foo", "bar & baz")
1481     ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1482     /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1483     /// assert_eq!(url.as_str(),
1484     ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1485     /// # Ok(())
1486     /// # }
1487     /// # run().unwrap();
1488     /// ```
1489     ///
1490     /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1491     /// not `url.set_query(None)`.
1492     ///
1493     /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1494     pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1495         let fragment = self.take_fragment();
1496 
1497         let query_start;
1498         if let Some(start) = self.query_start {
1499             debug_assert!(self.byte_at(start) == b'?');
1500             query_start = start as usize;
1501         } else {
1502             query_start = self.serialization.len();
1503             self.query_start = Some(to_u32(query_start).unwrap());
1504             self.serialization.push('?');
1505         }
1506 
1507         let query = UrlQuery {
1508             url: Some(self),
1509             fragment,
1510         };
1511         form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1512     }
1513 
take_after_path(&mut self) -> String1514     fn take_after_path(&mut self) -> String {
1515         match (self.query_start, self.fragment_start) {
1516             (Some(i), _) | (None, Some(i)) => {
1517                 let after_path = self.slice(i..).to_owned();
1518                 self.serialization.truncate(i as usize);
1519                 after_path
1520             }
1521             (None, None) => String::new(),
1522         }
1523     }
1524 
1525     /// Change this URL’s path.
1526     ///
1527     /// # Examples
1528     ///
1529     /// ```rust
1530     /// use url::Url;
1531     /// # use url::ParseError;
1532     ///
1533     /// # fn run() -> Result<(), ParseError> {
1534     /// let mut url = Url::parse("https://example.com")?;
1535     /// url.set_path("api/comments");
1536     /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1537     /// assert_eq!(url.path(), "/api/comments");
1538     ///
1539     /// let mut url = Url::parse("https://example.com/api")?;
1540     /// url.set_path("data/report.csv");
1541     /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1542     /// assert_eq!(url.path(), "/data/report.csv");
1543     ///
1544     /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1545     /// let mut url = Url::parse("https://example.com")?;
1546     /// url.set_path("api/some comments");
1547     /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1548     /// assert_eq!(url.path(), "/api/some%20comments");
1549     ///
1550     /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1551     /// let mut url = Url::parse("https://example.com")?;
1552     /// url.set_path("api/some%20comments");
1553     /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1554     /// assert_eq!(url.path(), "/api/some%20comments");
1555     ///
1556     /// # Ok(())
1557     /// # }
1558     /// # run().unwrap();
1559     /// ```
set_path(&mut self, mut path: &str)1560     pub fn set_path(&mut self, mut path: &str) {
1561         let after_path = self.take_after_path();
1562         let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1563         let cannot_be_a_base = self.cannot_be_a_base();
1564         let scheme_type = SchemeType::from(self.scheme());
1565         self.serialization.truncate(self.path_start as usize);
1566         self.mutate(|parser| {
1567             if cannot_be_a_base {
1568                 if path.starts_with('/') {
1569                     parser.serialization.push_str("%2F");
1570                     path = &path[1..];
1571                 }
1572                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1573             } else {
1574                 let mut has_host = true; // FIXME
1575                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1576             }
1577         });
1578         self.restore_after_path(old_after_path_pos, &after_path);
1579     }
1580 
1581     /// Return an object with methods to manipulate this URL’s path segments.
1582     ///
1583     /// Return `Err(())` if this URL is cannot-be-a-base.
1584     #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1585     pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1586         if self.cannot_be_a_base() {
1587             Err(())
1588         } else {
1589             Ok(path_segments::new(self))
1590         }
1591     }
1592 
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1593     fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1594         let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1595         let adjust = |index: &mut u32| {
1596             *index -= old_after_path_position;
1597             *index += new_after_path_position;
1598         };
1599         if let Some(ref mut index) = self.query_start {
1600             adjust(index)
1601         }
1602         if let Some(ref mut index) = self.fragment_start {
1603             adjust(index)
1604         }
1605         self.serialization.push_str(after_path)
1606     }
1607 
1608     /// Change this URL’s port number.
1609     ///
1610     /// Note that default port numbers are not reflected in the serialization.
1611     ///
1612     /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1613     /// do nothing and return `Err`.
1614     ///
1615     /// # Examples
1616     ///
1617     /// ```
1618     /// use url::Url;
1619     /// # use std::error::Error;
1620     ///
1621     /// # fn run() -> Result<(), Box<dyn Error>> {
1622     /// let mut url = Url::parse("ssh://example.net:2048/")?;
1623     ///
1624     /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1625     /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1626     ///
1627     /// url.set_port(None).map_err(|_| "cannot be base")?;
1628     /// assert_eq!(url.as_str(), "ssh://example.net/");
1629     /// # Ok(())
1630     /// # }
1631     /// # run().unwrap();
1632     /// ```
1633     ///
1634     /// Known default port numbers are not reflected:
1635     ///
1636     /// ```rust
1637     /// use url::Url;
1638     /// # use std::error::Error;
1639     ///
1640     /// # fn run() -> Result<(), Box<dyn Error>> {
1641     /// let mut url = Url::parse("https://example.org/")?;
1642     ///
1643     /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1644     /// assert!(url.port().is_none());
1645     /// # Ok(())
1646     /// # }
1647     /// # run().unwrap();
1648     /// ```
1649     ///
1650     /// Cannot set port for cannot-be-a-base URLs:
1651     ///
1652     /// ```
1653     /// use url::Url;
1654     /// # use url::ParseError;
1655     ///
1656     /// # fn run() -> Result<(), ParseError> {
1657     /// let mut url = Url::parse("mailto:rms@example.net")?;
1658     ///
1659     /// let result = url.set_port(Some(80));
1660     /// assert!(result.is_err());
1661     ///
1662     /// let result = url.set_port(None);
1663     /// assert!(result.is_err());
1664     /// # Ok(())
1665     /// # }
1666     /// # run().unwrap();
1667     /// ```
1668     #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1669     pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1670         // has_host implies !cannot_be_a_base
1671         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1672             return Err(());
1673         }
1674         if port.is_some() && port == parser::default_port(self.scheme()) {
1675             port = None
1676         }
1677         self.set_port_internal(port);
1678         Ok(())
1679     }
1680 
set_port_internal(&mut self, port: Option<u16>)1681     fn set_port_internal(&mut self, port: Option<u16>) {
1682         match (self.port, port) {
1683             (None, None) => {}
1684             (Some(_), None) => {
1685                 self.serialization
1686                     .drain(self.host_end as usize..self.path_start as usize);
1687                 let offset = self.path_start - self.host_end;
1688                 self.path_start = self.host_end;
1689                 if let Some(ref mut index) = self.query_start {
1690                     *index -= offset
1691                 }
1692                 if let Some(ref mut index) = self.fragment_start {
1693                     *index -= offset
1694                 }
1695             }
1696             (Some(old), Some(new)) if old == new => {}
1697             (_, Some(new)) => {
1698                 let path_and_after = self.slice(self.path_start..).to_owned();
1699                 self.serialization.truncate(self.host_end as usize);
1700                 write!(&mut self.serialization, ":{}", new).unwrap();
1701                 let old_path_start = self.path_start;
1702                 let new_path_start = to_u32(self.serialization.len()).unwrap();
1703                 self.path_start = new_path_start;
1704                 let adjust = |index: &mut u32| {
1705                     *index -= old_path_start;
1706                     *index += new_path_start;
1707                 };
1708                 if let Some(ref mut index) = self.query_start {
1709                     adjust(index)
1710                 }
1711                 if let Some(ref mut index) = self.fragment_start {
1712                     adjust(index)
1713                 }
1714                 self.serialization.push_str(&path_and_after);
1715             }
1716         }
1717         self.port = port;
1718     }
1719 
1720     /// Change this URL’s host.
1721     ///
1722     /// Removing the host (calling this with `None`)
1723     /// will also remove any username, password, and port number.
1724     ///
1725     /// # Examples
1726     ///
1727     /// Change host:
1728     ///
1729     /// ```
1730     /// use url::Url;
1731     /// # use url::ParseError;
1732     ///
1733     /// # fn run() -> Result<(), ParseError> {
1734     /// let mut url = Url::parse("https://example.net")?;
1735     /// let result = url.set_host(Some("rust-lang.org"));
1736     /// assert!(result.is_ok());
1737     /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1738     /// # Ok(())
1739     /// # }
1740     /// # run().unwrap();
1741     /// ```
1742     ///
1743     /// Remove host:
1744     ///
1745     /// ```
1746     /// use url::Url;
1747     /// # use url::ParseError;
1748     ///
1749     /// # fn run() -> Result<(), ParseError> {
1750     /// let mut url = Url::parse("foo://example.net")?;
1751     /// let result = url.set_host(None);
1752     /// assert!(result.is_ok());
1753     /// assert_eq!(url.as_str(), "foo:/");
1754     /// # Ok(())
1755     /// # }
1756     /// # run().unwrap();
1757     /// ```
1758     ///
1759     /// Cannot remove host for 'special' schemes (e.g. `http`):
1760     ///
1761     /// ```
1762     /// use url::Url;
1763     /// # use url::ParseError;
1764     ///
1765     /// # fn run() -> Result<(), ParseError> {
1766     /// let mut url = Url::parse("https://example.net")?;
1767     /// let result = url.set_host(None);
1768     /// assert!(result.is_err());
1769     /// assert_eq!(url.as_str(), "https://example.net/");
1770     /// # Ok(())
1771     /// # }
1772     /// # run().unwrap();
1773     /// ```
1774     ///
1775     /// Cannot change or remove host for cannot-be-a-base URLs:
1776     ///
1777     /// ```
1778     /// use url::Url;
1779     /// # use url::ParseError;
1780     ///
1781     /// # fn run() -> Result<(), ParseError> {
1782     /// let mut url = Url::parse("mailto:rms@example.net")?;
1783     ///
1784     /// let result = url.set_host(Some("rust-lang.org"));
1785     /// assert!(result.is_err());
1786     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1787     ///
1788     /// let result = url.set_host(None);
1789     /// assert!(result.is_err());
1790     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1791     /// # Ok(())
1792     /// # }
1793     /// # run().unwrap();
1794     /// ```
1795     ///
1796     /// # Errors
1797     ///
1798     /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1799     /// a [`ParseError`] variant will be returned.
1800     ///
1801     /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1802     pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1803         if self.cannot_be_a_base() {
1804             return Err(ParseError::SetHostOnCannotBeABaseUrl);
1805         }
1806 
1807         let scheme_type = SchemeType::from(self.scheme());
1808 
1809         if let Some(host) = host {
1810             if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1811                 return Err(ParseError::EmptyHost);
1812             }
1813             let mut host_substr = host;
1814             // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1815             if !host.starts_with('[') || !host.ends_with(']') {
1816                 match host.find(':') {
1817                     Some(0) => {
1818                         // If buffer is the empty string, validation error, return failure.
1819                         return Err(ParseError::InvalidDomainCharacter);
1820                     }
1821                     // Let host be the result of host parsing buffer
1822                     Some(colon_index) => {
1823                         host_substr = &host[..colon_index];
1824                     }
1825                     None => {}
1826                 }
1827             }
1828             if SchemeType::from(self.scheme()).is_special() {
1829                 self.set_host_internal(Host::parse(host_substr)?, None);
1830             } else {
1831                 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1832             }
1833         } else if self.has_host() {
1834             if scheme_type.is_special() && !scheme_type.is_file() {
1835                 return Err(ParseError::EmptyHost);
1836             } else if self.serialization.len() == self.path_start as usize {
1837                 self.serialization.push('/');
1838             }
1839             debug_assert!(self.byte_at(self.scheme_end) == b':');
1840             debug_assert!(self.byte_at(self.path_start) == b'/');
1841 
1842             let new_path_start = if scheme_type.is_file() {
1843                 self.scheme_end + 3
1844             } else {
1845                 self.scheme_end + 1
1846             };
1847 
1848             self.serialization
1849                 .drain(new_path_start as usize..self.path_start as usize);
1850             let offset = self.path_start - new_path_start;
1851             self.path_start = new_path_start;
1852             self.username_end = new_path_start;
1853             self.host_start = new_path_start;
1854             self.host_end = new_path_start;
1855             self.port = None;
1856             if let Some(ref mut index) = self.query_start {
1857                 *index -= offset
1858             }
1859             if let Some(ref mut index) = self.fragment_start {
1860                 *index -= offset
1861             }
1862         }
1863         Ok(())
1864     }
1865 
1866     /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1867     fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1868         let old_suffix_pos = if opt_new_port.is_some() {
1869             self.path_start
1870         } else {
1871             self.host_end
1872         };
1873         let suffix = self.slice(old_suffix_pos..).to_owned();
1874         self.serialization.truncate(self.host_start as usize);
1875         if !self.has_authority() {
1876             debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1877             debug_assert!(self.username_end == self.host_start);
1878             self.serialization.push('/');
1879             self.serialization.push('/');
1880             self.username_end += 2;
1881             self.host_start += 2;
1882         }
1883         write!(&mut self.serialization, "{}", host).unwrap();
1884         self.host_end = to_u32(self.serialization.len()).unwrap();
1885         self.host = host.into();
1886 
1887         if let Some(new_port) = opt_new_port {
1888             self.port = new_port;
1889             if let Some(port) = new_port {
1890                 write!(&mut self.serialization, ":{}", port).unwrap();
1891             }
1892         }
1893         let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1894         self.serialization.push_str(&suffix);
1895 
1896         let adjust = |index: &mut u32| {
1897             *index -= old_suffix_pos;
1898             *index += new_suffix_pos;
1899         };
1900         adjust(&mut self.path_start);
1901         if let Some(ref mut index) = self.query_start {
1902             adjust(index)
1903         }
1904         if let Some(ref mut index) = self.fragment_start {
1905             adjust(index)
1906         }
1907     }
1908 
1909     /// Change this URL’s host to the given IP address.
1910     ///
1911     /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1912     ///
1913     /// Compared to `Url::set_host`, this skips the host parser.
1914     ///
1915     /// # Examples
1916     ///
1917     /// ```rust
1918     /// use url::{Url, ParseError};
1919     ///
1920     /// # fn run() -> Result<(), ParseError> {
1921     /// let mut url = Url::parse("http://example.com")?;
1922     /// url.set_ip_host("127.0.0.1".parse().unwrap());
1923     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1924     /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1925     /// # Ok(())
1926     /// # }
1927     /// # run().unwrap();
1928     /// ```
1929     ///
1930     /// Cannot change URL's from mailto(cannot-be-base) to ip:
1931     ///
1932     /// ```rust
1933     /// use url::{Url, ParseError};
1934     ///
1935     /// # fn run() -> Result<(), ParseError> {
1936     /// let mut url = Url::parse("mailto:rms@example.com")?;
1937     /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1938     ///
1939     /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1940     /// assert!(result.is_err());
1941     /// # Ok(())
1942     /// # }
1943     /// # run().unwrap();
1944     /// ```
1945     ///
1946     #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1947     pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1948         if self.cannot_be_a_base() {
1949             return Err(());
1950         }
1951 
1952         let address = match address {
1953             IpAddr::V4(address) => Host::Ipv4(address),
1954             IpAddr::V6(address) => Host::Ipv6(address),
1955         };
1956         self.set_host_internal(address, None);
1957         Ok(())
1958     }
1959 
1960     /// Change this URL’s password.
1961     ///
1962     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1963     ///
1964     /// # Examples
1965     ///
1966     /// ```rust
1967     /// use url::{Url, ParseError};
1968     ///
1969     /// # fn run() -> Result<(), ParseError> {
1970     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1971     /// let result = url.set_password(Some("secret_password"));
1972     /// assert!(result.is_err());
1973     ///
1974     /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1975     /// let result = url.set_password(Some("secret_password"));
1976     /// assert_eq!(url.password(), Some("secret_password"));
1977     ///
1978     /// let mut url = Url::parse("ftp://user2:@example.com")?;
1979     /// let result = url.set_password(Some("secret2"));
1980     /// assert!(result.is_ok());
1981     /// assert_eq!(url.password(), Some("secret2"));
1982     /// # Ok(())
1983     /// # }
1984     /// # run().unwrap();
1985     /// ```
1986     #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1987     pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1988         // has_host implies !cannot_be_a_base
1989         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1990             return Err(());
1991         }
1992         if let Some(password) = password {
1993             let host_and_after = self.slice(self.host_start..).to_owned();
1994             self.serialization.truncate(self.username_end as usize);
1995             self.serialization.push(':');
1996             self.serialization
1997                 .extend(utf8_percent_encode(password, USERINFO));
1998             self.serialization.push('@');
1999 
2000             let old_host_start = self.host_start;
2001             let new_host_start = to_u32(self.serialization.len()).unwrap();
2002             let adjust = |index: &mut u32| {
2003                 *index -= old_host_start;
2004                 *index += new_host_start;
2005             };
2006             self.host_start = new_host_start;
2007             adjust(&mut self.host_end);
2008             adjust(&mut self.path_start);
2009             if let Some(ref mut index) = self.query_start {
2010                 adjust(index)
2011             }
2012             if let Some(ref mut index) = self.fragment_start {
2013                 adjust(index)
2014             }
2015 
2016             self.serialization.push_str(&host_and_after);
2017         } else if self.byte_at(self.username_end) == b':' {
2018             // If there is a password to remove
2019             let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2020             debug_assert!(has_username_or_password);
2021             let username_start = self.scheme_end + 3;
2022             let empty_username = username_start == self.username_end;
2023             let start = self.username_end; // Remove the ':'
2024             let end = if empty_username {
2025                 self.host_start // Remove the '@' as well
2026             } else {
2027                 self.host_start - 1 // Keep the '@' to separate the username from the host
2028             };
2029             self.serialization.drain(start as usize..end as usize);
2030             let offset = end - start;
2031             self.host_start -= offset;
2032             self.host_end -= offset;
2033             self.path_start -= offset;
2034             if let Some(ref mut index) = self.query_start {
2035                 *index -= offset
2036             }
2037             if let Some(ref mut index) = self.fragment_start {
2038                 *index -= offset
2039             }
2040         }
2041         Ok(())
2042     }
2043 
2044     /// Change this URL’s username.
2045     ///
2046     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2047     /// # Examples
2048     ///
2049     /// Cannot setup username from mailto(cannot-be-base)
2050     ///
2051     /// ```rust
2052     /// use url::{Url, ParseError};
2053     ///
2054     /// # fn run() -> Result<(), ParseError> {
2055     /// let mut url = Url::parse("mailto:rmz@example.com")?;
2056     /// let result = url.set_username("user1");
2057     /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2058     /// assert!(result.is_err());
2059     /// # Ok(())
2060     /// # }
2061     /// # run().unwrap();
2062     /// ```
2063     ///
2064     /// Setup username to user1
2065     ///
2066     /// ```rust
2067     /// use url::{Url, ParseError};
2068     ///
2069     /// # fn run() -> Result<(), ParseError> {
2070     /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2071     /// let result = url.set_username("user1");
2072     /// assert!(result.is_ok());
2073     /// assert_eq!(url.username(), "user1");
2074     /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2075     /// # Ok(())
2076     /// # }
2077     /// # run().unwrap();
2078     /// ```
2079     #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2080     pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2081         // has_host implies !cannot_be_a_base
2082         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2083             return Err(());
2084         }
2085         let username_start = self.scheme_end + 3;
2086         debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2087         if self.slice(username_start..self.username_end) == username {
2088             return Ok(());
2089         }
2090         let after_username = self.slice(self.username_end..).to_owned();
2091         self.serialization.truncate(username_start as usize);
2092         self.serialization
2093             .extend(utf8_percent_encode(username, USERINFO));
2094 
2095         let mut removed_bytes = self.username_end;
2096         self.username_end = to_u32(self.serialization.len()).unwrap();
2097         let mut added_bytes = self.username_end;
2098 
2099         let new_username_is_empty = self.username_end == username_start;
2100         match (new_username_is_empty, after_username.chars().next()) {
2101             (true, Some('@')) => {
2102                 removed_bytes += 1;
2103                 self.serialization.push_str(&after_username[1..]);
2104             }
2105             (false, Some('@')) | (_, Some(':')) | (true, _) => {
2106                 self.serialization.push_str(&after_username);
2107             }
2108             (false, _) => {
2109                 added_bytes += 1;
2110                 self.serialization.push('@');
2111                 self.serialization.push_str(&after_username);
2112             }
2113         }
2114 
2115         let adjust = |index: &mut u32| {
2116             *index -= removed_bytes;
2117             *index += added_bytes;
2118         };
2119         adjust(&mut self.host_start);
2120         adjust(&mut self.host_end);
2121         adjust(&mut self.path_start);
2122         if let Some(ref mut index) = self.query_start {
2123             adjust(index)
2124         }
2125         if let Some(ref mut index) = self.fragment_start {
2126             adjust(index)
2127         }
2128         Ok(())
2129     }
2130 
2131     /// Change this URL’s scheme.
2132     ///
2133     /// Do nothing and return `Err` under the following circumstances:
2134     ///
2135     /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2136     /// * If this URL is cannot-be-a-base and the new scheme is one of
2137     ///   `http`, `https`, `ws`, `wss` or `ftp`
2138     /// * If either the old or new scheme is `http`, `https`, `ws`,
2139     ///   `wss` or `ftp` and the other is not one of these
2140     /// * If the new scheme is `file` and this URL includes credentials
2141     ///   or has a non-null port
2142     /// * If this URL's scheme is `file` and its host is empty or null
2143     ///
2144     /// See also [the URL specification's section on legal scheme state
2145     /// overrides](https://url.spec.whatwg.org/#scheme-state).
2146     ///
2147     /// # Examples
2148     ///
2149     /// Change the URL’s scheme from `https` to `http`:
2150     ///
2151     /// ```
2152     /// use url::Url;
2153     /// # use url::ParseError;
2154     ///
2155     /// # fn run() -> Result<(), ParseError> {
2156     /// let mut url = Url::parse("https://example.net")?;
2157     /// let result = url.set_scheme("http");
2158     /// assert_eq!(url.as_str(), "http://example.net/");
2159     /// assert!(result.is_ok());
2160     /// # Ok(())
2161     /// # }
2162     /// # run().unwrap();
2163     /// ```
2164     /// Change the URL’s scheme from `foo` to `bar`:
2165     ///
2166     /// ```
2167     /// use url::Url;
2168     /// # use url::ParseError;
2169     ///
2170     /// # fn run() -> Result<(), ParseError> {
2171     /// let mut url = Url::parse("foo://example.net")?;
2172     /// let result = url.set_scheme("bar");
2173     /// assert_eq!(url.as_str(), "bar://example.net");
2174     /// assert!(result.is_ok());
2175     /// # Ok(())
2176     /// # }
2177     /// # run().unwrap();
2178     /// ```
2179     ///
2180     /// Cannot change URL’s scheme from `https` to `foõ`:
2181     ///
2182     /// ```
2183     /// use url::Url;
2184     /// # use url::ParseError;
2185     ///
2186     /// # fn run() -> Result<(), ParseError> {
2187     /// let mut url = Url::parse("https://example.net")?;
2188     /// let result = url.set_scheme("foõ");
2189     /// assert_eq!(url.as_str(), "https://example.net/");
2190     /// assert!(result.is_err());
2191     /// # Ok(())
2192     /// # }
2193     /// # run().unwrap();
2194     /// ```
2195     ///
2196     /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2197     ///
2198     /// ```
2199     /// use url::Url;
2200     /// # use url::ParseError;
2201     ///
2202     /// # fn run() -> Result<(), ParseError> {
2203     /// let mut url = Url::parse("mailto:rms@example.net")?;
2204     /// let result = url.set_scheme("https");
2205     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2206     /// assert!(result.is_err());
2207     /// # Ok(())
2208     /// # }
2209     /// # run().unwrap();
2210     /// ```
2211     /// Cannot change the URL’s scheme from `foo` to `https`:
2212     ///
2213     /// ```
2214     /// use url::Url;
2215     /// # use url::ParseError;
2216     ///
2217     /// # fn run() -> Result<(), ParseError> {
2218     /// let mut url = Url::parse("foo://example.net")?;
2219     /// let result = url.set_scheme("https");
2220     /// assert_eq!(url.as_str(), "foo://example.net");
2221     /// assert!(result.is_err());
2222     /// # Ok(())
2223     /// # }
2224     /// # run().unwrap();
2225     /// ```
2226     /// Cannot change the URL’s scheme from `http` to `foo`:
2227     ///
2228     /// ```
2229     /// use url::Url;
2230     /// # use url::ParseError;
2231     ///
2232     /// # fn run() -> Result<(), ParseError> {
2233     /// let mut url = Url::parse("http://example.net")?;
2234     /// let result = url.set_scheme("foo");
2235     /// assert_eq!(url.as_str(), "http://example.net/");
2236     /// assert!(result.is_err());
2237     /// # Ok(())
2238     /// # }
2239     /// # run().unwrap();
2240     /// ```
2241     #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2242     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2243         let mut parser = Parser::for_setter(String::new());
2244         let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
2245         let new_scheme_type = SchemeType::from(&parser.serialization);
2246         let old_scheme_type = SchemeType::from(self.scheme());
2247         // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2248         if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2249             // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2250             (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2251             // If url includes credentials or has a non-null port, and buffer is "file", then return.
2252             // If url’s scheme is "file" and its host is an empty host or null, then return.
2253             (new_scheme_type.is_file() && self.has_authority())
2254         {
2255             return Err(());
2256         }
2257 
2258         if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2259             return Err(());
2260         }
2261         let old_scheme_end = self.scheme_end;
2262         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2263         let adjust = |index: &mut u32| {
2264             *index -= old_scheme_end;
2265             *index += new_scheme_end;
2266         };
2267 
2268         self.scheme_end = new_scheme_end;
2269         adjust(&mut self.username_end);
2270         adjust(&mut self.host_start);
2271         adjust(&mut self.host_end);
2272         adjust(&mut self.path_start);
2273         if let Some(ref mut index) = self.query_start {
2274             adjust(index)
2275         }
2276         if let Some(ref mut index) = self.fragment_start {
2277             adjust(index)
2278         }
2279 
2280         parser.serialization.push_str(self.slice(old_scheme_end..));
2281         self.serialization = parser.serialization;
2282 
2283         // Update the port so it can be removed
2284         // If it is the scheme's default
2285         // we don't mind it silently failing
2286         // if there was no port in the first place
2287         let previous_port = self.port();
2288         let _ = self.set_port(previous_port);
2289 
2290         Ok(())
2291     }
2292 
2293     /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2294     ///
2295     /// This returns `Err` if the given path is not absolute or,
2296     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2297     ///
2298     /// # Examples
2299     ///
2300     /// On Unix-like platforms:
2301     ///
2302     /// ```
2303     /// # if cfg!(unix) {
2304     /// use url::Url;
2305     ///
2306     /// # fn run() -> Result<(), ()> {
2307     /// let url = Url::from_file_path("/tmp/foo.txt")?;
2308     /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2309     ///
2310     /// let url = Url::from_file_path("../foo.txt");
2311     /// assert!(url.is_err());
2312     ///
2313     /// let url = Url::from_file_path("https://google.com/");
2314     /// assert!(url.is_err());
2315     /// # Ok(())
2316     /// # }
2317     /// # run().unwrap();
2318     /// # }
2319     /// ```
2320     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2321     #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2322     pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2323         let mut serialization = "file://".to_owned();
2324         let host_start = serialization.len() as u32;
2325         let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2326         Ok(Url {
2327             serialization,
2328             scheme_end: "file".len() as u32,
2329             username_end: host_start,
2330             host_start,
2331             host_end,
2332             host,
2333             port: None,
2334             path_start: host_end,
2335             query_start: None,
2336             fragment_start: None,
2337         })
2338     }
2339 
2340     /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2341     ///
2342     /// This returns `Err` if the given path is not absolute or,
2343     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2344     ///
2345     /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2346     /// so that the entire path is considered when using this URL as a base URL.
2347     ///
2348     /// For example:
2349     ///
2350     /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2351     ///   as the base URL is `file:///var/www/index.html`
2352     /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2353     ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2354     ///
2355     /// Note that `std::path` does not consider trailing slashes significant
2356     /// and usually does not include them (e.g. in `Path::parent()`).
2357     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2358     #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2359     pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2360         let mut url = Url::from_file_path(path)?;
2361         if !url.serialization.ends_with('/') {
2362             url.serialization.push('/')
2363         }
2364         Ok(url)
2365     }
2366 
2367     /// Serialize with Serde using the internal representation of the `Url` struct.
2368     ///
2369     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2370     /// for speed, compared to the `Deserialize` trait impl.
2371     ///
2372     /// This method is only available if the `serde` Cargo feature is enabled.
2373     #[cfg(feature = "serde")]
2374     #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2375     pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2376     where
2377         S: serde::Serializer,
2378     {
2379         use serde::Serialize;
2380         // Destructuring first lets us ensure that adding or removing fields forces this method
2381         // to be updated
2382         let Url {
2383             ref serialization,
2384             ref scheme_end,
2385             ref username_end,
2386             ref host_start,
2387             ref host_end,
2388             ref host,
2389             ref port,
2390             ref path_start,
2391             ref query_start,
2392             ref fragment_start,
2393         } = *self;
2394         (
2395             serialization,
2396             scheme_end,
2397             username_end,
2398             host_start,
2399             host_end,
2400             host,
2401             port,
2402             path_start,
2403             query_start,
2404             fragment_start,
2405         )
2406             .serialize(serializer)
2407     }
2408 
2409     /// Serialize with Serde using the internal representation of the `Url` struct.
2410     ///
2411     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2412     /// for speed, compared to the `Deserialize` trait impl.
2413     ///
2414     /// This method is only available if the `serde` Cargo feature is enabled.
2415     #[cfg(feature = "serde")]
2416     #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2417     pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2418     where
2419         D: serde::Deserializer<'de>,
2420     {
2421         use serde::de::{Deserialize, Error, Unexpected};
2422         let (
2423             serialization,
2424             scheme_end,
2425             username_end,
2426             host_start,
2427             host_end,
2428             host,
2429             port,
2430             path_start,
2431             query_start,
2432             fragment_start,
2433         ) = Deserialize::deserialize(deserializer)?;
2434         let url = Url {
2435             serialization,
2436             scheme_end,
2437             username_end,
2438             host_start,
2439             host_end,
2440             host,
2441             port,
2442             path_start,
2443             query_start,
2444             fragment_start,
2445         };
2446         if cfg!(debug_assertions) {
2447             url.check_invariants().map_err(|reason| {
2448                 let reason: &str = &reason;
2449                 Error::invalid_value(Unexpected::Other("value"), &reason)
2450             })?
2451         }
2452         Ok(url)
2453     }
2454 
2455     /// Assuming the URL is in the `file` scheme or similar,
2456     /// convert its path to an absolute `std::path::Path`.
2457     ///
2458     /// **Note:** This does not actually check the URL’s `scheme`,
2459     /// and may give nonsensical results for other schemes.
2460     /// It is the user’s responsibility to check the URL’s scheme before calling this.
2461     ///
2462     /// ```
2463     /// # use url::Url;
2464     /// # let url = Url::parse("file:///etc/passwd").unwrap();
2465     /// let path = url.to_file_path();
2466     /// ```
2467     ///
2468     /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2469     /// `file:` URLs may have a non-local host),
2470     /// or if `Path::new_opt()` returns `None`.
2471     /// (That is, if the percent-decoded path contains a NUL byte or,
2472     /// for a Windows path, is not UTF-8.)
2473     #[inline]
2474     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2475     #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2476     pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2477         if let Some(segments) = self.path_segments() {
2478             let host = match self.host() {
2479                 None | Some(Host::Domain("localhost")) => None,
2480                 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2481                     Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2482                 }
2483                 _ => return Err(()),
2484             };
2485 
2486             return file_url_segments_to_pathbuf(host, segments);
2487         }
2488         Err(())
2489     }
2490 
2491     // Private helper methods:
2492 
2493     #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2494     fn slice<R>(&self, range: R) -> &str
2495     where
2496         R: RangeArg,
2497     {
2498         range.slice_of(&self.serialization)
2499     }
2500 
2501     #[inline]
byte_at(&self, i: u32) -> u82502     fn byte_at(&self, i: u32) -> u8 {
2503         self.serialization.as_bytes()[i as usize]
2504     }
2505 }
2506 
2507 /// Parse a string as an URL, without a base URL or encoding override.
2508 impl str::FromStr for Url {
2509     type Err = ParseError;
2510 
2511     #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2512     fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2513         Url::parse(input)
2514     }
2515 }
2516 
2517 impl<'a> TryFrom<&'a str> for Url {
2518     type Error = ParseError;
2519 
try_from(s: &'a str) -> Result<Self, Self::Error>2520     fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2521         Url::parse(s)
2522     }
2523 }
2524 
2525 /// Display the serialization of this URL.
2526 impl fmt::Display for Url {
2527     #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2528     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2529         fmt::Display::fmt(&self.serialization, formatter)
2530     }
2531 }
2532 
2533 /// String conversion.
2534 impl From<Url> for String {
from(value: Url) -> String2535     fn from(value: Url) -> String {
2536         value.serialization
2537     }
2538 }
2539 
2540 /// Debug the serialization of this URL.
2541 impl fmt::Debug for Url {
2542     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2543     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2544         formatter
2545             .debug_struct("Url")
2546             .field("scheme", &self.scheme())
2547             .field("cannot_be_a_base", &self.cannot_be_a_base())
2548             .field("username", &self.username())
2549             .field("password", &self.password())
2550             .field("host", &self.host())
2551             .field("port", &self.port())
2552             .field("path", &self.path())
2553             .field("query", &self.query())
2554             .field("fragment", &self.fragment())
2555             .finish()
2556     }
2557 }
2558 
2559 /// URLs compare like their serialization.
2560 impl Eq for Url {}
2561 
2562 /// URLs compare like their serialization.
2563 impl PartialEq for Url {
2564     #[inline]
eq(&self, other: &Self) -> bool2565     fn eq(&self, other: &Self) -> bool {
2566         self.serialization == other.serialization
2567     }
2568 }
2569 
2570 /// URLs compare like their serialization.
2571 impl Ord for Url {
2572     #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2573     fn cmp(&self, other: &Self) -> cmp::Ordering {
2574         self.serialization.cmp(&other.serialization)
2575     }
2576 }
2577 
2578 /// URLs compare like their serialization.
2579 impl PartialOrd for Url {
2580     #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2581     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2582         self.serialization.partial_cmp(&other.serialization)
2583     }
2584 }
2585 
2586 /// URLs hash like their serialization.
2587 impl hash::Hash for Url {
2588     #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2589     fn hash<H>(&self, state: &mut H)
2590     where
2591         H: hash::Hasher,
2592     {
2593         hash::Hash::hash(&self.serialization, state)
2594     }
2595 }
2596 
2597 /// Return the serialization of this URL.
2598 impl AsRef<str> for Url {
2599     #[inline]
as_ref(&self) -> &str2600     fn as_ref(&self) -> &str {
2601         &self.serialization
2602     }
2603 }
2604 
2605 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2606     fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2607 }
2608 
2609 impl RangeArg for Range<u32> {
2610     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2611     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2612         &s[self.start as usize..self.end as usize]
2613     }
2614 }
2615 
2616 impl RangeArg for RangeFrom<u32> {
2617     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2618     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2619         &s[self.start as usize..]
2620     }
2621 }
2622 
2623 impl RangeArg for RangeTo<u32> {
2624     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2625     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2626         &s[..self.end as usize]
2627     }
2628 }
2629 
2630 /// Serializes this URL into a `serde` stream.
2631 ///
2632 /// This implementation is only available if the `serde` Cargo feature is enabled.
2633 #[cfg(feature = "serde")]
2634 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2635     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2636     where
2637         S: serde::Serializer,
2638     {
2639         serializer.serialize_str(self.as_str())
2640     }
2641 }
2642 
2643 /// Deserializes this URL from a `serde` stream.
2644 ///
2645 /// This implementation is only available if the `serde` Cargo feature is enabled.
2646 #[cfg(feature = "serde")]
2647 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2648     fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2649     where
2650         D: serde::Deserializer<'de>,
2651     {
2652         use serde::de::{Error, Unexpected, Visitor};
2653 
2654         struct UrlVisitor;
2655 
2656         impl<'de> Visitor<'de> for UrlVisitor {
2657             type Value = Url;
2658 
2659             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2660                 formatter.write_str("a string representing an URL")
2661             }
2662 
2663             fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2664             where
2665                 E: Error,
2666             {
2667                 Url::parse(s).map_err(|err| {
2668                     let err_s = format!("{}", err);
2669                     Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2670                 })
2671             }
2672         }
2673 
2674         deserializer.deserialize_str(UrlVisitor)
2675     }
2676 }
2677 
2678 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2679 fn path_to_file_url_segments(
2680     path: &Path,
2681     serialization: &mut String,
2682 ) -> Result<(u32, HostInternal), ()> {
2683     #[cfg(any(unix, target_os = "redox"))]
2684     use std::os::unix::prelude::OsStrExt;
2685     #[cfg(target_os = "wasi")]
2686     use std::os::wasi::prelude::OsStrExt;
2687     if !path.is_absolute() {
2688         return Err(());
2689     }
2690     let host_end = to_u32(serialization.len()).unwrap();
2691     let mut empty = true;
2692     // skip the root component
2693     for component in path.components().skip(1) {
2694         empty = false;
2695         serialization.push('/');
2696         serialization.extend(percent_encode(
2697             component.as_os_str().as_bytes(),
2698             PATH_SEGMENT,
2699         ));
2700     }
2701     if empty {
2702         // An URL’s path must not be empty.
2703         serialization.push('/');
2704     }
2705     Ok((host_end, HostInternal::None))
2706 }
2707 
2708 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2709 fn path_to_file_url_segments(
2710     path: &Path,
2711     serialization: &mut String,
2712 ) -> Result<(u32, HostInternal), ()> {
2713     path_to_file_url_segments_windows(path, serialization)
2714 }
2715 
2716 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2717 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2718 fn path_to_file_url_segments_windows(
2719     path: &Path,
2720     serialization: &mut String,
2721 ) -> Result<(u32, HostInternal), ()> {
2722     use std::path::{Component, Prefix};
2723     if !path.is_absolute() {
2724         return Err(());
2725     }
2726     let mut components = path.components();
2727 
2728     let host_start = serialization.len() + 1;
2729     let host_end;
2730     let host_internal;
2731 
2732     match components.next() {
2733         Some(Component::Prefix(ref p)) => match p.kind() {
2734             Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2735                 host_end = to_u32(serialization.len()).unwrap();
2736                 host_internal = HostInternal::None;
2737                 serialization.push('/');
2738                 serialization.push(letter as char);
2739                 serialization.push(':');
2740             }
2741             Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2742                 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2743                 write!(serialization, "{}", host).unwrap();
2744                 host_end = to_u32(serialization.len()).unwrap();
2745                 host_internal = host.into();
2746                 serialization.push('/');
2747                 let share = share.to_str().ok_or(())?;
2748                 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2749             }
2750             _ => return Err(()),
2751         },
2752         _ => return Err(()),
2753     }
2754 
2755     let mut path_only_has_prefix = true;
2756     for component in components {
2757         if component == Component::RootDir {
2758             continue;
2759         }
2760 
2761         path_only_has_prefix = false;
2762         // FIXME: somehow work with non-unicode?
2763         let component = component.as_os_str().to_str().ok_or(())?;
2764 
2765         serialization.push('/');
2766         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2767     }
2768 
2769     // A windows drive letter must end with a slash.
2770     if serialization.len() > host_start
2771         && parser::is_windows_drive_letter(&serialization[host_start..])
2772         && path_only_has_prefix
2773     {
2774         serialization.push('/');
2775     }
2776 
2777     Ok((host_end, host_internal))
2778 }
2779 
2780 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2781 fn file_url_segments_to_pathbuf(
2782     host: Option<&str>,
2783     segments: str::Split<'_, char>,
2784 ) -> Result<PathBuf, ()> {
2785     use std::ffi::OsStr;
2786     #[cfg(any(unix, target_os = "redox"))]
2787     use std::os::unix::prelude::OsStrExt;
2788     #[cfg(target_os = "wasi")]
2789     use std::os::wasi::prelude::OsStrExt;
2790 
2791     if host.is_some() {
2792         return Err(());
2793     }
2794 
2795     let mut bytes = if cfg!(target_os = "redox") {
2796         b"file:".to_vec()
2797     } else {
2798         Vec::new()
2799     };
2800 
2801     for segment in segments {
2802         bytes.push(b'/');
2803         bytes.extend(percent_decode(segment.as_bytes()));
2804     }
2805 
2806     // A windows drive letter must end with a slash.
2807     if bytes.len() > 2
2808         && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2809         && matches!(bytes[bytes.len() - 1], b':' | b'|')
2810     {
2811         bytes.push(b'/');
2812     }
2813 
2814     let os_str = OsStr::from_bytes(&bytes);
2815     let path = PathBuf::from(os_str);
2816 
2817     debug_assert!(
2818         path.is_absolute(),
2819         "to_file_path() failed to produce an absolute Path"
2820     );
2821 
2822     Ok(path)
2823 }
2824 
2825 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2826 fn file_url_segments_to_pathbuf(
2827     host: Option<&str>,
2828     segments: str::Split<char>,
2829 ) -> Result<PathBuf, ()> {
2830     file_url_segments_to_pathbuf_windows(host, segments)
2831 }
2832 
2833 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2834 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2835 fn file_url_segments_to_pathbuf_windows(
2836     host: Option<&str>,
2837     mut segments: str::Split<'_, char>,
2838 ) -> Result<PathBuf, ()> {
2839     let mut string = if let Some(host) = host {
2840         r"\\".to_owned() + host
2841     } else {
2842         let first = segments.next().ok_or(())?;
2843 
2844         match first.len() {
2845             2 => {
2846                 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2847                     return Err(());
2848                 }
2849 
2850                 first.to_owned()
2851             }
2852 
2853             4 => {
2854                 if !first.starts_with(parser::ascii_alpha) {
2855                     return Err(());
2856                 }
2857                 let bytes = first.as_bytes();
2858                 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2859                     return Err(());
2860                 }
2861 
2862                 first[0..1].to_owned() + ":"
2863             }
2864 
2865             _ => return Err(()),
2866         }
2867     };
2868 
2869     for segment in segments {
2870         string.push('\\');
2871 
2872         // Currently non-unicode windows paths cannot be represented
2873         match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2874             Ok(s) => string.push_str(&s),
2875             Err(..) => return Err(()),
2876         }
2877     }
2878     let path = PathBuf::from(string);
2879     debug_assert!(
2880         path.is_absolute(),
2881         "to_file_path() failed to produce an absolute Path"
2882     );
2883     Ok(path)
2884 }
2885 
2886 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2887 #[derive(Debug)]
2888 pub struct UrlQuery<'a> {
2889     url: Option<&'a mut Url>,
2890     fragment: Option<String>,
2891 }
2892 
2893 // `as_mut_string` string here exposes the internal serialization of an `Url`,
2894 // which should not be exposed to users.
2895 // We achieve that by not giving users direct access to `UrlQuery`:
2896 // * Its fields are private
2897 //   (and so can not be constructed with struct literal syntax outside of this crate),
2898 // * It has no constructor
2899 // * It is only visible (on the type level) to users in the return type of
2900 //   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
2901 // * `Serializer` keeps its target in a private field
2902 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
2903 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String2904     fn as_mut_string(&mut self) -> &mut String {
2905         &mut self.url.as_mut().unwrap().serialization
2906     }
2907 
finish(mut self) -> &'a mut Url2908     fn finish(mut self) -> &'a mut Url {
2909         let url = self.url.take().unwrap();
2910         url.restore_already_parsed_fragment(self.fragment.take());
2911         url
2912     }
2913 
2914     type Finished = &'a mut Url;
2915 }
2916 
2917 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2918     fn drop(&mut self) {
2919         if let Some(url) = self.url.take() {
2920             url.restore_already_parsed_fragment(self.fragment.take())
2921         }
2922     }
2923 }
2924