• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 /*!
10 
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13 
14 
15 # URL parsing and data structures
16 
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18 
19 ```
20 use url::{Url, ParseError};
21 
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24 
25 Let’s parse a valid URL and look at its components.
26 
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32     "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34 
35 
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44         Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53 
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57 
58 ```
59 use url::Url;
60 # use url::ParseError;
61 
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64 
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75 
76 ## Serde
77 
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79 
80 # Base URL
81 
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83 
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87 
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89 
90 ```
91 use url::{Url, ParseError};
92 
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95 
96 Use the `join` method on an `Url` to use it as a base URL:
97 
98 ```
99 use url::Url;
100 # use url::ParseError;
101 
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110 
111 # Feature: `serde`
112 
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117 
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121 
122 # Feature: `debugger_visualizer`
123 
124 If you enable the `debugger_visualizer` feature, the `url` crate will include
125 a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
126 for [Visual Studio](https://www.visualstudio.com/) that allows you to view
127 [`Url`](struct.Url.html) objects in the debugger.
128 
129 This feature requires Rust 1.71 or later.
130 
131 ```toml
132 url = { version = "2", features = ["debugger_visualizer"] }
133 ```
134 
135 */
136 
137 #![doc(html_root_url = "https://docs.rs/url/2.5.2")]
138 #![cfg_attr(
139     feature = "debugger_visualizer",
140     debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
141 )]
142 
143 pub use form_urlencoded;
144 
145 #[cfg(feature = "serde")]
146 extern crate serde;
147 
148 use crate::host::HostInternal;
149 use crate::parser::{
150     to_u32, Context, Parser, SchemeType, PATH_SEGMENT, SPECIAL_PATH_SEGMENT, USERINFO,
151 };
152 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
153 use std::borrow::Borrow;
154 use std::cmp;
155 use std::fmt::{self, Write};
156 use std::hash;
157 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
158 use std::io;
159 use std::mem;
160 use std::net::IpAddr;
161 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
162 use std::net::{SocketAddr, ToSocketAddrs};
163 use std::ops::{Range, RangeFrom, RangeTo};
164 use std::path::{Path, PathBuf};
165 use std::str;
166 
167 use std::convert::TryFrom;
168 
169 pub use crate::host::Host;
170 pub use crate::origin::{OpaqueOrigin, Origin};
171 pub use crate::parser::{ParseError, SyntaxViolation};
172 pub use crate::path_segments::PathSegmentsMut;
173 pub use crate::slicing::Position;
174 pub use form_urlencoded::EncodingOverride;
175 
176 mod host;
177 mod origin;
178 mod parser;
179 mod path_segments;
180 mod slicing;
181 
182 #[doc(hidden)]
183 pub mod quirks;
184 
185 /// A parsed URL record.
186 #[derive(Clone)]
187 pub struct Url {
188     /// Syntax in pseudo-BNF:
189     ///
190     ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
191     ///   non-hierarchical = non-hierarchical-path
192     ///   non-hierarchical-path = /* Does not start with "/" */
193     ///   hierarchical = authority? hierarchical-path
194     ///   authority = "//" userinfo? host [ ":" port ]?
195     ///   userinfo = username [ ":" password ]? "@"
196     ///   hierarchical-path = [ "/" path-segment ]+
197     serialization: String,
198 
199     // Components
200     scheme_end: u32,   // Before ':'
201     username_end: u32, // Before ':' (if a password is given) or '@' (if not)
202     host_start: u32,
203     host_end: u32,
204     host: HostInternal,
205     port: Option<u16>,
206     path_start: u32,             // Before initial '/', if any
207     query_start: Option<u32>,    // Before '?', unlike Position::QueryStart
208     fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
209 }
210 
211 /// Full configuration for the URL parser.
212 #[derive(Copy, Clone)]
213 #[must_use]
214 pub struct ParseOptions<'a> {
215     base_url: Option<&'a Url>,
216     encoding_override: EncodingOverride<'a>,
217     violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
218 }
219 
220 impl<'a> ParseOptions<'a> {
221     /// Change the base URL
222     ///
223     /// See the notes of [`Url::join`] for more details about how this base is considered
224     /// when parsing.
base_url(mut self, new: Option<&'a Url>) -> Self225     pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
226         self.base_url = new;
227         self
228     }
229 
230     /// Override the character encoding of query strings.
231     /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self232     pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
233         self.encoding_override = new;
234         self
235     }
236 
237     /// Call the provided function or closure for a non-fatal `SyntaxViolation`
238     /// when it occurs during parsing. Note that since the provided function is
239     /// `Fn`, the caller might need to utilize _interior mutability_, such as with
240     /// a `RefCell`, to collect the violations.
241     ///
242     /// ## Example
243     /// ```
244     /// use std::cell::RefCell;
245     /// use url::{Url, SyntaxViolation};
246     /// # use url::ParseError;
247     /// # fn run() -> Result<(), url::ParseError> {
248     /// let violations = RefCell::new(Vec::new());
249     /// let url = Url::options()
250     ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
251     ///     .parse("https:////example.com")?;
252     /// assert_eq!(url.as_str(), "https://example.com/");
253     /// assert_eq!(violations.into_inner(),
254     ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
255     /// # Ok(())
256     /// # }
257     /// # run().unwrap();
258     /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self259     pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
260         self.violation_fn = new;
261         self
262     }
263 
264     /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>265     pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
266         Parser {
267             serialization: String::with_capacity(input.len()),
268             base_url: self.base_url,
269             query_encoding_override: self.encoding_override,
270             violation_fn: self.violation_fn,
271             context: Context::UrlParser,
272         }
273         .parse_url(input)
274     }
275 }
276 
277 impl Url {
278     /// Parse an absolute URL from a string.
279     ///
280     /// # Examples
281     ///
282     /// ```rust
283     /// use url::Url;
284     /// # use url::ParseError;
285     ///
286     /// # fn run() -> Result<(), ParseError> {
287     /// let url = Url::parse("https://example.net")?;
288     /// # Ok(())
289     /// # }
290     /// # run().unwrap();
291     /// ```
292     ///
293     /// # Errors
294     ///
295     /// If the function can not parse an absolute URL from the given string,
296     /// a [`ParseError`] variant will be returned.
297     ///
298     /// [`ParseError`]: enum.ParseError.html
299     #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>300     pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
301         Url::options().parse(input)
302     }
303 
304     /// Parse an absolute URL from a string and add params to its query string.
305     ///
306     /// Existing params are not removed.
307     ///
308     /// # Examples
309     ///
310     /// ```rust
311     /// use url::Url;
312     /// # use url::ParseError;
313     ///
314     /// # fn run() -> Result<(), ParseError> {
315     /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
316     ///                                  &[("lang", "rust"), ("browser", "servo")])?;
317     /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
318     /// # Ok(())
319     /// # }
320     /// # run().unwrap();
321     /// ```
322     ///
323     /// # Errors
324     ///
325     /// If the function can not parse an absolute URL from the given string,
326     /// a [`ParseError`] variant will be returned.
327     ///
328     /// [`ParseError`]: enum.ParseError.html
329     #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,330     pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
331     where
332         I: IntoIterator,
333         I::Item: Borrow<(K, V)>,
334         K: AsRef<str>,
335         V: AsRef<str>,
336     {
337         let mut url = Url::options().parse(input);
338 
339         if let Ok(ref mut url) = url {
340             url.query_pairs_mut().extend_pairs(iter);
341         }
342 
343         url
344     }
345 
346     /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
strip_trailing_spaces_from_opaque_path(&mut self)347     fn strip_trailing_spaces_from_opaque_path(&mut self) {
348         if !self.cannot_be_a_base() {
349             return;
350         }
351 
352         if self.fragment_start.is_some() {
353             return;
354         }
355 
356         if self.query_start.is_some() {
357             return;
358         }
359 
360         let trailing_space_count = self
361             .serialization
362             .chars()
363             .rev()
364             .take_while(|c| *c == ' ')
365             .count();
366 
367         let start = self.serialization.len() - trailing_space_count;
368 
369         self.serialization.truncate(start);
370     }
371 
372     /// Parse a string as an URL, with this URL as the base URL.
373     ///
374     /// The inverse of this is [`make_relative`].
375     ///
376     /// # Notes
377     ///
378     /// - A trailing slash is significant.
379     /// Without it, the last path component is considered to be a “file” name
380     /// to be removed to get at the “directory” that is used as the base.
381     /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
382     /// as input replaces everything in the base URL after the scheme.
383     /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
384     ///
385     /// # Examples
386     ///
387     /// ```rust
388     /// use url::Url;
389     /// # use url::ParseError;
390     ///
391     /// // Base without a trailing slash
392     /// # fn run() -> Result<(), ParseError> {
393     /// let base = Url::parse("https://example.net/a/b.html")?;
394     /// let url = base.join("c.png")?;
395     /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
396     ///
397     /// // Base with a trailing slash
398     /// let base = Url::parse("https://example.net/a/b/")?;
399     /// let url = base.join("c.png")?;
400     /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
401     ///
402     /// // Input as scheme relative special URL
403     /// let base = Url::parse("https://alice.com/a")?;
404     /// let url = base.join("//eve.com/b")?;
405     /// assert_eq!(url.as_str(), "https://eve.com/b");
406     ///
407     /// // Input as absolute URL
408     /// let base = Url::parse("https://alice.com/a")?;
409     /// let url = base.join("http://eve.com/b")?;
410     /// assert_eq!(url.as_str(), "http://eve.com/b");  // http instead of https
411 
412     /// # Ok(())
413     /// # }
414     /// # run().unwrap();
415     /// ```
416     ///
417     /// # Errors
418     ///
419     /// If the function can not parse an URL from the given string
420     /// with this URL as the base URL, a [`ParseError`] variant will be returned.
421     ///
422     /// [`ParseError`]: enum.ParseError.html
423     /// [`make_relative`]: #method.make_relative
424     #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>425     pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
426         Url::options().base_url(Some(self)).parse(input)
427     }
428 
429     /// Creates a relative URL if possible, with this URL as the base URL.
430     ///
431     /// This is the inverse of [`join`].
432     ///
433     /// # Examples
434     ///
435     /// ```rust
436     /// use url::Url;
437     /// # use url::ParseError;
438     ///
439     /// # fn run() -> Result<(), ParseError> {
440     /// let base = Url::parse("https://example.net/a/b.html")?;
441     /// let url = Url::parse("https://example.net/a/c.png")?;
442     /// let relative = base.make_relative(&url);
443     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
444     ///
445     /// let base = Url::parse("https://example.net/a/b/")?;
446     /// let url = Url::parse("https://example.net/a/b/c.png")?;
447     /// let relative = base.make_relative(&url);
448     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
449     ///
450     /// let base = Url::parse("https://example.net/a/b/")?;
451     /// let url = Url::parse("https://example.net/a/d/c.png")?;
452     /// let relative = base.make_relative(&url);
453     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
454     ///
455     /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
456     /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
457     /// let relative = base.make_relative(&url);
458     /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
459     /// # Ok(())
460     /// # }
461     /// # run().unwrap();
462     /// ```
463     ///
464     /// # Errors
465     ///
466     /// If this URL can't be a base for the given URL, `None` is returned.
467     /// This is for example the case if the scheme, host or port are not the same.
468     ///
469     /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>470     pub fn make_relative(&self, url: &Url) -> Option<String> {
471         if self.cannot_be_a_base() {
472             return None;
473         }
474 
475         // Scheme, host and port need to be the same
476         if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
477             return None;
478         }
479 
480         // We ignore username/password at this point
481 
482         // The path has to be transformed
483         let mut relative = String::new();
484 
485         // Extract the filename of both URIs, these need to be handled separately
486         fn extract_path_filename(s: &str) -> (&str, &str) {
487             let last_slash_idx = s.rfind('/').unwrap_or(0);
488             let (path, filename) = s.split_at(last_slash_idx);
489             if filename.is_empty() {
490                 (path, "")
491             } else {
492                 (path, &filename[1..])
493             }
494         }
495 
496         let (base_path, base_filename) = extract_path_filename(self.path());
497         let (url_path, url_filename) = extract_path_filename(url.path());
498 
499         let mut base_path = base_path.split('/').peekable();
500         let mut url_path = url_path.split('/').peekable();
501 
502         // Skip over the common prefix
503         while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
504             base_path.next();
505             url_path.next();
506         }
507 
508         // Add `..` segments for the remainder of the base path
509         for base_path_segment in base_path {
510             // Skip empty last segments
511             if base_path_segment.is_empty() {
512                 break;
513             }
514 
515             if !relative.is_empty() {
516                 relative.push('/');
517             }
518 
519             relative.push_str("..");
520         }
521 
522         // Append the remainder of the other URI
523         for url_path_segment in url_path {
524             if !relative.is_empty() {
525                 relative.push('/');
526             }
527 
528             relative.push_str(url_path_segment);
529         }
530 
531         // Add the filename if they are not the same
532         if !relative.is_empty() || base_filename != url_filename {
533             // If the URIs filename is empty this means that it was a directory
534             // so we'll have to append a '/'.
535             //
536             // Otherwise append it directly as the new filename.
537             if url_filename.is_empty() {
538                 relative.push('/');
539             } else {
540                 if !relative.is_empty() {
541                     relative.push('/');
542                 }
543                 relative.push_str(url_filename);
544             }
545         }
546 
547         // Query and fragment are only taken from the other URI
548         if let Some(query) = url.query() {
549             relative.push('?');
550             relative.push_str(query);
551         }
552 
553         if let Some(fragment) = url.fragment() {
554             relative.push('#');
555             relative.push_str(fragment);
556         }
557 
558         Some(relative)
559     }
560 
561     /// Return a default `ParseOptions` that can fully configure the URL parser.
562     ///
563     /// # Examples
564     ///
565     /// Get default `ParseOptions`, then change base url
566     ///
567     /// ```rust
568     /// use url::Url;
569     /// # use url::ParseError;
570     /// # fn run() -> Result<(), ParseError> {
571     /// let options = Url::options();
572     /// let api = Url::parse("https://api.example.com")?;
573     /// let base_url = options.base_url(Some(&api));
574     /// let version_url = base_url.parse("version.json")?;
575     /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
576     /// # Ok(())
577     /// # }
578     /// # run().unwrap();
579     /// ```
options<'a>() -> ParseOptions<'a>580     pub fn options<'a>() -> ParseOptions<'a> {
581         ParseOptions {
582             base_url: None,
583             encoding_override: None,
584             violation_fn: None,
585         }
586     }
587 
588     /// Return the serialization of this URL.
589     ///
590     /// This is fast since that serialization is already stored in the `Url` struct.
591     ///
592     /// # Examples
593     ///
594     /// ```rust
595     /// use url::Url;
596     /// # use url::ParseError;
597     ///
598     /// # fn run() -> Result<(), ParseError> {
599     /// let url_str = "https://example.net/";
600     /// let url = Url::parse(url_str)?;
601     /// assert_eq!(url.as_str(), url_str);
602     /// # Ok(())
603     /// # }
604     /// # run().unwrap();
605     /// ```
606     #[inline]
as_str(&self) -> &str607     pub fn as_str(&self) -> &str {
608         &self.serialization
609     }
610 
611     /// Return the serialization of this URL.
612     ///
613     /// This consumes the `Url` and takes ownership of the `String` stored in it.
614     ///
615     /// # Examples
616     ///
617     /// ```rust
618     /// use url::Url;
619     /// # use url::ParseError;
620     ///
621     /// # fn run() -> Result<(), ParseError> {
622     /// let url_str = "https://example.net/";
623     /// let url = Url::parse(url_str)?;
624     /// assert_eq!(String::from(url), url_str);
625     /// # Ok(())
626     /// # }
627     /// # run().unwrap();
628     /// ```
629     #[inline]
630     #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String631     pub fn into_string(self) -> String {
632         self.into()
633     }
634 
635     /// For internal testing, not part of the public API.
636     ///
637     /// Methods of the `Url` struct assume a number of invariants.
638     /// This checks each of these invariants and panic if one is not met.
639     /// This is for testing rust-url itself.
640     #[doc(hidden)]
check_invariants(&self) -> Result<(), String>641     pub fn check_invariants(&self) -> Result<(), String> {
642         macro_rules! assert {
643             ($x: expr) => {
644                 if !$x {
645                     return Err(format!(
646                         "!( {} ) for URL {:?}",
647                         stringify!($x),
648                         self.serialization
649                     ));
650                 }
651             };
652         }
653 
654         macro_rules! assert_eq {
655             ($a: expr, $b: expr) => {
656                 {
657                     let a = $a;
658                     let b = $b;
659                     if a != b {
660                         return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
661                                            a, b, stringify!($a), stringify!($b),
662                                            self.serialization))
663                     }
664                 }
665             }
666         }
667 
668         assert!(self.scheme_end >= 1);
669         assert!(self.byte_at(0).is_ascii_alphabetic());
670         assert!(self
671             .slice(1..self.scheme_end)
672             .chars()
673             .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
674         assert_eq!(self.byte_at(self.scheme_end), b':');
675 
676         if self.slice(self.scheme_end + 1..).starts_with("//") {
677             // URL with authority
678             if self.username_end != self.serialization.len() as u32 {
679                 match self.byte_at(self.username_end) {
680                     b':' => {
681                         assert!(self.host_start >= self.username_end + 2);
682                         assert_eq!(self.byte_at(self.host_start - 1), b'@');
683                     }
684                     b'@' => assert!(self.host_start == self.username_end + 1),
685                     _ => assert_eq!(self.username_end, self.scheme_end + 3),
686                 }
687             }
688             assert!(self.host_start >= self.username_end);
689             assert!(self.host_end >= self.host_start);
690             let host_str = self.slice(self.host_start..self.host_end);
691             match self.host {
692                 HostInternal::None => assert_eq!(host_str, ""),
693                 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
694                 HostInternal::Ipv6(address) => {
695                     let h: Host<String> = Host::Ipv6(address);
696                     assert_eq!(host_str, h.to_string())
697                 }
698                 HostInternal::Domain => {
699                     if SchemeType::from(self.scheme()).is_special() {
700                         assert!(!host_str.is_empty())
701                     }
702                 }
703             }
704             if self.path_start == self.host_end {
705                 assert_eq!(self.port, None);
706             } else {
707                 assert_eq!(self.byte_at(self.host_end), b':');
708                 let port_str = self.slice(self.host_end + 1..self.path_start);
709                 assert_eq!(
710                     self.port,
711                     Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
712                 );
713             }
714             assert!(
715                 self.path_start as usize == self.serialization.len()
716                     || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
717             );
718         } else {
719             // Anarchist URL (no authority)
720             assert_eq!(self.username_end, self.scheme_end + 1);
721             assert_eq!(self.host_start, self.scheme_end + 1);
722             assert_eq!(self.host_end, self.scheme_end + 1);
723             assert_eq!(self.host, HostInternal::None);
724             assert_eq!(self.port, None);
725             if self.path().starts_with("//") {
726                 // special case when first path segment is empty
727                 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
728                 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
729                 assert_eq!(self.path_start, self.scheme_end + 3);
730             } else {
731                 assert_eq!(self.path_start, self.scheme_end + 1);
732             }
733         }
734         if let Some(start) = self.query_start {
735             assert!(start >= self.path_start);
736             assert_eq!(self.byte_at(start), b'?');
737         }
738         if let Some(start) = self.fragment_start {
739             assert!(start >= self.path_start);
740             assert_eq!(self.byte_at(start), b'#');
741         }
742         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
743             assert!(fragment_start > query_start);
744         }
745 
746         let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
747         assert_eq!(&self.serialization, &other.serialization);
748         assert_eq!(self.scheme_end, other.scheme_end);
749         assert_eq!(self.username_end, other.username_end);
750         assert_eq!(self.host_start, other.host_start);
751         assert_eq!(self.host_end, other.host_end);
752         assert!(
753             self.host == other.host ||
754                 // XXX No host round-trips to empty host.
755                 // See https://github.com/whatwg/url/issues/79
756                 (self.host_str(), other.host_str()) == (None, Some(""))
757         );
758         assert_eq!(self.port, other.port);
759         assert_eq!(self.path_start, other.path_start);
760         assert_eq!(self.query_start, other.query_start);
761         assert_eq!(self.fragment_start, other.fragment_start);
762         Ok(())
763     }
764 
765     /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
766     ///
767     /// Note: this returns an opaque origin for `file:` URLs, which causes
768     /// `url.origin() != url.origin()`.
769     ///
770     /// # Examples
771     ///
772     /// URL with `ftp` scheme:
773     ///
774     /// ```rust
775     /// use url::{Host, Origin, Url};
776     /// # use url::ParseError;
777     ///
778     /// # fn run() -> Result<(), ParseError> {
779     /// let url = Url::parse("ftp://example.com/foo")?;
780     /// assert_eq!(url.origin(),
781     ///            Origin::Tuple("ftp".into(),
782     ///                          Host::Domain("example.com".into()),
783     ///                          21));
784     /// # Ok(())
785     /// # }
786     /// # run().unwrap();
787     /// ```
788     ///
789     /// URL with `blob` scheme:
790     ///
791     /// ```rust
792     /// use url::{Host, Origin, Url};
793     /// # use url::ParseError;
794     ///
795     /// # fn run() -> Result<(), ParseError> {
796     /// let url = Url::parse("blob:https://example.com/foo")?;
797     /// assert_eq!(url.origin(),
798     ///            Origin::Tuple("https".into(),
799     ///                          Host::Domain("example.com".into()),
800     ///                          443));
801     /// # Ok(())
802     /// # }
803     /// # run().unwrap();
804     /// ```
805     ///
806     /// URL with `file` scheme:
807     ///
808     /// ```rust
809     /// use url::{Host, Origin, Url};
810     /// # use url::ParseError;
811     ///
812     /// # fn run() -> Result<(), ParseError> {
813     /// let url = Url::parse("file:///tmp/foo")?;
814     /// assert!(!url.origin().is_tuple());
815     ///
816     /// let other_url = Url::parse("file:///tmp/foo")?;
817     /// assert!(url.origin() != other_url.origin());
818     /// # Ok(())
819     /// # }
820     /// # run().unwrap();
821     /// ```
822     ///
823     /// URL with other scheme:
824     ///
825     /// ```rust
826     /// use url::{Host, Origin, Url};
827     /// # use url::ParseError;
828     ///
829     /// # fn run() -> Result<(), ParseError> {
830     /// let url = Url::parse("foo:bar")?;
831     /// assert!(!url.origin().is_tuple());
832     /// # Ok(())
833     /// # }
834     /// # run().unwrap();
835     /// ```
836     #[inline]
origin(&self) -> Origin837     pub fn origin(&self) -> Origin {
838         origin::url_origin(self)
839     }
840 
841     /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
842     ///
843     /// # Examples
844     ///
845     /// ```
846     /// use url::Url;
847     /// # use url::ParseError;
848     ///
849     /// # fn run() -> Result<(), ParseError> {
850     /// let url = Url::parse("file:///tmp/foo")?;
851     /// assert_eq!(url.scheme(), "file");
852     /// # Ok(())
853     /// # }
854     /// # run().unwrap();
855     /// ```
856     #[inline]
scheme(&self) -> &str857     pub fn scheme(&self) -> &str {
858         self.slice(..self.scheme_end)
859     }
860 
861     /// Return whether the URL is special (has a special scheme)
862     ///
863     /// # Examples
864     ///
865     /// ```
866     /// use url::Url;
867     /// # use url::ParseError;
868     ///
869     /// # fn run() -> Result<(), ParseError> {
870     /// assert!(Url::parse("http:///tmp/foo")?.is_special());
871     /// assert!(Url::parse("file:///tmp/foo")?.is_special());
872     /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
873     /// # Ok(())
874     /// # }
875     /// # run().unwrap();
876     /// ```
is_special(&self) -> bool877     pub fn is_special(&self) -> bool {
878         let scheme_type = SchemeType::from(self.scheme());
879         scheme_type.is_special()
880     }
881 
882     /// Return whether the URL has an 'authority',
883     /// which can contain a username, password, host, and port number.
884     ///
885     /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
886     /// or cannot-be-a-base like `data:text/plain,Stuff`.
887     ///
888     /// See also the `authority` method.
889     ///
890     /// # Examples
891     ///
892     /// ```
893     /// use url::Url;
894     /// # use url::ParseError;
895     ///
896     /// # fn run() -> Result<(), ParseError> {
897     /// let url = Url::parse("ftp://rms@example.com")?;
898     /// assert!(url.has_authority());
899     ///
900     /// let url = Url::parse("unix:/run/foo.socket")?;
901     /// assert!(!url.has_authority());
902     ///
903     /// let url = Url::parse("data:text/plain,Stuff")?;
904     /// assert!(!url.has_authority());
905     /// # Ok(())
906     /// # }
907     /// # run().unwrap();
908     /// ```
909     #[inline]
has_authority(&self) -> bool910     pub fn has_authority(&self) -> bool {
911         debug_assert!(self.byte_at(self.scheme_end) == b':');
912         self.slice(self.scheme_end..).starts_with("://")
913     }
914 
915     /// Return the authority of this URL as an ASCII string.
916     ///
917     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
918     /// of a special URL, or percent encoded for non-special URLs.
919     /// IPv6 addresses are given between `[` and `]` brackets.
920     /// Ports are omitted if they match the well known port of a special URL.
921     ///
922     /// Username and password are percent-encoded.
923     ///
924     /// See also the `has_authority` method.
925     ///
926     /// # Examples
927     ///
928     /// ```
929     /// use url::Url;
930     /// # use url::ParseError;
931     ///
932     /// # fn run() -> Result<(), ParseError> {
933     /// let url = Url::parse("unix:/run/foo.socket")?;
934     /// assert_eq!(url.authority(), "");
935     /// let url = Url::parse("file:///tmp/foo")?;
936     /// assert_eq!(url.authority(), "");
937     /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
938     /// assert_eq!(url.authority(), "user:password@example.com");
939     /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
940     /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
941     /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
942     /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
943     /// # Ok(())
944     /// # }
945     /// # run().unwrap();
946     /// ```
authority(&self) -> &str947     pub fn authority(&self) -> &str {
948         let scheme_separator_len = "://".len() as u32;
949         if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
950             self.slice(self.scheme_end + scheme_separator_len..self.path_start)
951         } else {
952             ""
953         }
954     }
955 
956     /// Return whether this URL is a cannot-be-a-base URL,
957     /// meaning that parsing a relative URL string with this URL as the base will return an error.
958     ///
959     /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
960     /// as is typically the case of `data:` and `mailto:` URLs.
961     ///
962     /// # Examples
963     ///
964     /// ```
965     /// use url::Url;
966     /// # use url::ParseError;
967     ///
968     /// # fn run() -> Result<(), ParseError> {
969     /// let url = Url::parse("ftp://rms@example.com")?;
970     /// assert!(!url.cannot_be_a_base());
971     ///
972     /// let url = Url::parse("unix:/run/foo.socket")?;
973     /// assert!(!url.cannot_be_a_base());
974     ///
975     /// let url = Url::parse("data:text/plain,Stuff")?;
976     /// assert!(url.cannot_be_a_base());
977     /// # Ok(())
978     /// # }
979     /// # run().unwrap();
980     /// ```
981     #[inline]
cannot_be_a_base(&self) -> bool982     pub fn cannot_be_a_base(&self) -> bool {
983         !self.slice(self.scheme_end + 1..).starts_with('/')
984     }
985 
986     /// Return the username for this URL (typically the empty string)
987     /// as a percent-encoded ASCII string.
988     ///
989     /// # Examples
990     ///
991     /// ```
992     /// use url::Url;
993     /// # use url::ParseError;
994     ///
995     /// # fn run() -> Result<(), ParseError> {
996     /// let url = Url::parse("ftp://rms@example.com")?;
997     /// assert_eq!(url.username(), "rms");
998     ///
999     /// let url = Url::parse("ftp://:secret123@example.com")?;
1000     /// assert_eq!(url.username(), "");
1001     ///
1002     /// let url = Url::parse("https://example.com")?;
1003     /// assert_eq!(url.username(), "");
1004     /// # Ok(())
1005     /// # }
1006     /// # run().unwrap();
1007     /// ```
username(&self) -> &str1008     pub fn username(&self) -> &str {
1009         let scheme_separator_len = "://".len() as u32;
1010         if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1011             self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1012         } else {
1013             ""
1014         }
1015     }
1016 
1017     /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1018     ///
1019     /// # Examples
1020     ///
1021     /// ```
1022     /// use url::Url;
1023     /// # use url::ParseError;
1024     ///
1025     /// # fn run() -> Result<(), ParseError> {
1026     /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1027     /// assert_eq!(url.password(), Some("secret123"));
1028     ///
1029     /// let url = Url::parse("ftp://:secret123@example.com")?;
1030     /// assert_eq!(url.password(), Some("secret123"));
1031     ///
1032     /// let url = Url::parse("ftp://rms@example.com")?;
1033     /// assert_eq!(url.password(), None);
1034     ///
1035     /// let url = Url::parse("https://example.com")?;
1036     /// assert_eq!(url.password(), None);
1037     /// # Ok(())
1038     /// # }
1039     /// # run().unwrap();
1040     /// ```
password(&self) -> Option<&str>1041     pub fn password(&self) -> Option<&str> {
1042         // This ':' is not the one marking a port number since a host can not be empty.
1043         // (Except for file: URLs, which do not have port numbers.)
1044         if self.has_authority()
1045             && self.username_end != self.serialization.len() as u32
1046             && self.byte_at(self.username_end) == b':'
1047         {
1048             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1049             Some(self.slice(self.username_end + 1..self.host_start - 1))
1050         } else {
1051             None
1052         }
1053     }
1054 
1055     /// Equivalent to `url.host().is_some()`.
1056     ///
1057     /// # Examples
1058     ///
1059     /// ```
1060     /// use url::Url;
1061     /// # use url::ParseError;
1062     ///
1063     /// # fn run() -> Result<(), ParseError> {
1064     /// let url = Url::parse("ftp://rms@example.com")?;
1065     /// assert!(url.has_host());
1066     ///
1067     /// let url = Url::parse("unix:/run/foo.socket")?;
1068     /// assert!(!url.has_host());
1069     ///
1070     /// let url = Url::parse("data:text/plain,Stuff")?;
1071     /// assert!(!url.has_host());
1072     /// # Ok(())
1073     /// # }
1074     /// # run().unwrap();
1075     /// ```
has_host(&self) -> bool1076     pub fn has_host(&self) -> bool {
1077         !matches!(self.host, HostInternal::None)
1078     }
1079 
1080     /// Return the string representation of the host (domain or IP address) for this URL, if any.
1081     ///
1082     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1083     /// of a special URL, or percent encoded for non-special URLs.
1084     /// IPv6 addresses are given between `[` and `]` brackets.
1085     ///
1086     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1087     /// don’t have a host.
1088     ///
1089     /// See also the `host` method.
1090     ///
1091     /// # Examples
1092     ///
1093     /// ```
1094     /// use url::Url;
1095     /// # use url::ParseError;
1096     ///
1097     /// # fn run() -> Result<(), ParseError> {
1098     /// let url = Url::parse("https://127.0.0.1/index.html")?;
1099     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1100     ///
1101     /// let url = Url::parse("ftp://rms@example.com")?;
1102     /// assert_eq!(url.host_str(), Some("example.com"));
1103     ///
1104     /// let url = Url::parse("unix:/run/foo.socket")?;
1105     /// assert_eq!(url.host_str(), None);
1106     ///
1107     /// let url = Url::parse("data:text/plain,Stuff")?;
1108     /// assert_eq!(url.host_str(), None);
1109     /// # Ok(())
1110     /// # }
1111     /// # run().unwrap();
1112     /// ```
host_str(&self) -> Option<&str>1113     pub fn host_str(&self) -> Option<&str> {
1114         if self.has_host() {
1115             Some(self.slice(self.host_start..self.host_end))
1116         } else {
1117             None
1118         }
1119     }
1120 
1121     /// Return the parsed representation of the host for this URL.
1122     /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1123     /// of a special URL, or percent encoded for non-special URLs.
1124     ///
1125     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1126     /// don’t have a host.
1127     ///
1128     /// See also the `host_str` method.
1129     ///
1130     /// # Examples
1131     ///
1132     /// ```
1133     /// use url::Url;
1134     /// # use url::ParseError;
1135     ///
1136     /// # fn run() -> Result<(), ParseError> {
1137     /// let url = Url::parse("https://127.0.0.1/index.html")?;
1138     /// assert!(url.host().is_some());
1139     ///
1140     /// let url = Url::parse("ftp://rms@example.com")?;
1141     /// assert!(url.host().is_some());
1142     ///
1143     /// let url = Url::parse("unix:/run/foo.socket")?;
1144     /// assert!(url.host().is_none());
1145     ///
1146     /// let url = Url::parse("data:text/plain,Stuff")?;
1147     /// assert!(url.host().is_none());
1148     /// # Ok(())
1149     /// # }
1150     /// # run().unwrap();
1151     /// ```
host(&self) -> Option<Host<&str>>1152     pub fn host(&self) -> Option<Host<&str>> {
1153         match self.host {
1154             HostInternal::None => None,
1155             HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1156             HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1157             HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1158         }
1159     }
1160 
1161     /// If this URL has a host and it is a domain name (not an IP address), return it.
1162     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1163     /// of a special URL, or percent encoded for non-special URLs.
1164     ///
1165     /// # Examples
1166     ///
1167     /// ```
1168     /// use url::Url;
1169     /// # use url::ParseError;
1170     ///
1171     /// # fn run() -> Result<(), ParseError> {
1172     /// let url = Url::parse("https://127.0.0.1/")?;
1173     /// assert_eq!(url.domain(), None);
1174     ///
1175     /// let url = Url::parse("mailto:rms@example.net")?;
1176     /// assert_eq!(url.domain(), None);
1177     ///
1178     /// let url = Url::parse("https://example.com/")?;
1179     /// assert_eq!(url.domain(), Some("example.com"));
1180     /// # Ok(())
1181     /// # }
1182     /// # run().unwrap();
1183     /// ```
domain(&self) -> Option<&str>1184     pub fn domain(&self) -> Option<&str> {
1185         match self.host {
1186             HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1187             _ => None,
1188         }
1189     }
1190 
1191     /// Return the port number for this URL, if any.
1192     ///
1193     /// Note that default port numbers are never reflected by the serialization,
1194     /// use the `port_or_known_default()` method if you want a default port number returned.
1195     ///
1196     /// # Examples
1197     ///
1198     /// ```
1199     /// use url::Url;
1200     /// # use url::ParseError;
1201     ///
1202     /// # fn run() -> Result<(), ParseError> {
1203     /// let url = Url::parse("https://example.com")?;
1204     /// assert_eq!(url.port(), None);
1205     ///
1206     /// let url = Url::parse("https://example.com:443/")?;
1207     /// assert_eq!(url.port(), None);
1208     ///
1209     /// let url = Url::parse("ssh://example.com:22")?;
1210     /// assert_eq!(url.port(), Some(22));
1211     /// # Ok(())
1212     /// # }
1213     /// # run().unwrap();
1214     /// ```
1215     #[inline]
port(&self) -> Option<u16>1216     pub fn port(&self) -> Option<u16> {
1217         self.port
1218     }
1219 
1220     /// Return the port number for this URL, or the default port number if it is known.
1221     ///
1222     /// This method only knows the default port number
1223     /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1224     ///
1225     /// For URLs in these schemes, this method always returns `Some(_)`.
1226     /// For other schemes, it is the same as `Url::port()`.
1227     ///
1228     /// # Examples
1229     ///
1230     /// ```
1231     /// use url::Url;
1232     /// # use url::ParseError;
1233     ///
1234     /// # fn run() -> Result<(), ParseError> {
1235     /// let url = Url::parse("foo://example.com")?;
1236     /// assert_eq!(url.port_or_known_default(), None);
1237     ///
1238     /// let url = Url::parse("foo://example.com:1456")?;
1239     /// assert_eq!(url.port_or_known_default(), Some(1456));
1240     ///
1241     /// let url = Url::parse("https://example.com")?;
1242     /// assert_eq!(url.port_or_known_default(), Some(443));
1243     /// # Ok(())
1244     /// # }
1245     /// # run().unwrap();
1246     /// ```
1247     #[inline]
port_or_known_default(&self) -> Option<u16>1248     pub fn port_or_known_default(&self) -> Option<u16> {
1249         self.port.or_else(|| parser::default_port(self.scheme()))
1250     }
1251 
1252     /// Resolve a URL’s host and port number to `SocketAddr`.
1253     ///
1254     /// If the URL has the default port number of a scheme that is unknown to this library,
1255     /// `default_port_number` provides an opportunity to provide the actual port number.
1256     /// In non-example code this should be implemented either simply as `|| None`,
1257     /// or by matching on the URL’s `.scheme()`.
1258     ///
1259     /// If the host is a domain, it is resolved using the standard library’s DNS support.
1260     ///
1261     /// # Examples
1262     ///
1263     /// ```no_run
1264     /// let url = url::Url::parse("https://example.net/").unwrap();
1265     /// let addrs = url.socket_addrs(|| None).unwrap();
1266     /// std::net::TcpStream::connect(&*addrs)
1267     /// # ;
1268     /// ```
1269     ///
1270     /// ```
1271     /// /// With application-specific known default port numbers
1272     /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1273     ///     url.socket_addrs(|| match url.scheme() {
1274     ///         "socks5" | "socks5h" => Some(1080),
1275     ///         _ => None,
1276     ///     })
1277     /// }
1278     /// ```
1279     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1280     pub fn socket_addrs(
1281         &self,
1282         default_port_number: impl Fn() -> Option<u16>,
1283     ) -> io::Result<Vec<SocketAddr>> {
1284         // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1285         // causes borrowck issues because the return value borrows `default_port_number`:
1286         //
1287         // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1288         //
1289         // > This RFC proposes that *all* type parameters are considered in scope
1290         // > for `impl Trait` in return position
1291 
1292         fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1293             opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1294         }
1295 
1296         let host = io_result(self.host(), "No host name in the URL")?;
1297         let port = io_result(
1298             self.port_or_known_default().or_else(default_port_number),
1299             "No port number in the URL",
1300         )?;
1301         Ok(match host {
1302             Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1303             Host::Ipv4(ip) => vec![(ip, port).into()],
1304             Host::Ipv6(ip) => vec![(ip, port).into()],
1305         })
1306     }
1307 
1308     /// Return the path for this URL, as a percent-encoded ASCII string.
1309     /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1310     /// For other URLs, this starts with a '/' slash
1311     /// and continues with slash-separated path segments.
1312     ///
1313     /// # Examples
1314     ///
1315     /// ```rust
1316     /// use url::{Url, ParseError};
1317     ///
1318     /// # fn run() -> Result<(), ParseError> {
1319     /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1320     /// assert_eq!(url.path(), "/api/versions");
1321     ///
1322     /// let url = Url::parse("https://example.com")?;
1323     /// assert_eq!(url.path(), "/");
1324     ///
1325     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1326     /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1327     /// # Ok(())
1328     /// # }
1329     /// # run().unwrap();
1330     /// ```
path(&self) -> &str1331     pub fn path(&self) -> &str {
1332         match (self.query_start, self.fragment_start) {
1333             (None, None) => self.slice(self.path_start..),
1334             (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1335                 self.slice(self.path_start..next_component_start)
1336             }
1337         }
1338     }
1339 
1340     /// Unless this URL is cannot-be-a-base,
1341     /// return an iterator of '/' slash-separated path segments,
1342     /// each as a percent-encoded ASCII string.
1343     ///
1344     /// Return `None` for cannot-be-a-base URLs.
1345     ///
1346     /// When `Some` is returned, the iterator always contains at least one string
1347     /// (which may be empty).
1348     ///
1349     /// # Examples
1350     ///
1351     /// ```
1352     /// use url::Url;
1353     /// # use std::error::Error;
1354     ///
1355     /// # fn run() -> Result<(), Box<dyn Error>> {
1356     /// let url = Url::parse("https://example.com/foo/bar")?;
1357     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1358     /// assert_eq!(path_segments.next(), Some("foo"));
1359     /// assert_eq!(path_segments.next(), Some("bar"));
1360     /// assert_eq!(path_segments.next(), None);
1361     ///
1362     /// let url = Url::parse("https://example.com")?;
1363     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1364     /// assert_eq!(path_segments.next(), Some(""));
1365     /// assert_eq!(path_segments.next(), None);
1366     ///
1367     /// let url = Url::parse("data:text/plain,HelloWorld")?;
1368     /// assert!(url.path_segments().is_none());
1369     ///
1370     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1371     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1372     /// assert_eq!(path_segments.next(), Some("countries"));
1373     /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1374     /// # Ok(())
1375     /// # }
1376     /// # run().unwrap();
1377     /// ```
path_segments(&self) -> Option<str::Split<'_, char>>1378     pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1379         let path = self.path();
1380         path.strip_prefix('/').map(|remainder| remainder.split('/'))
1381     }
1382 
1383     /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1384     ///
1385     /// # Examples
1386     ///
1387     /// ```rust
1388     /// use url::Url;
1389     /// # use url::ParseError;
1390     ///
1391     /// fn run() -> Result<(), ParseError> {
1392     /// let url = Url::parse("https://example.com/products?page=2")?;
1393     /// let query = url.query();
1394     /// assert_eq!(query, Some("page=2"));
1395     ///
1396     /// let url = Url::parse("https://example.com/products")?;
1397     /// let query = url.query();
1398     /// assert!(query.is_none());
1399     ///
1400     /// let url = Url::parse("https://example.com/?country=español")?;
1401     /// let query = url.query();
1402     /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1403     /// # Ok(())
1404     /// # }
1405     /// # run().unwrap();
1406     /// ```
query(&self) -> Option<&str>1407     pub fn query(&self) -> Option<&str> {
1408         match (self.query_start, self.fragment_start) {
1409             (None, _) => None,
1410             (Some(query_start), None) => {
1411                 debug_assert!(self.byte_at(query_start) == b'?');
1412                 Some(self.slice(query_start + 1..))
1413             }
1414             (Some(query_start), Some(fragment_start)) => {
1415                 debug_assert!(self.byte_at(query_start) == b'?');
1416                 Some(self.slice(query_start + 1..fragment_start))
1417             }
1418         }
1419     }
1420 
1421     /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1422     /// and return an iterator of (key, value) pairs.
1423     ///
1424     /// # Examples
1425     ///
1426     /// ```rust
1427     /// use std::borrow::Cow;
1428     ///
1429     /// use url::Url;
1430     /// # use url::ParseError;
1431     ///
1432     /// # fn run() -> Result<(), ParseError> {
1433     /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1434     /// let mut pairs = url.query_pairs();
1435     ///
1436     /// assert_eq!(pairs.count(), 2);
1437     ///
1438     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1439     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1440     /// # Ok(())
1441     /// # }
1442     /// # run().unwrap();
1443     /// ```
1444 
1445     #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1446     pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1447         form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1448     }
1449 
1450     /// Return this URL’s fragment identifier, if any.
1451     ///
1452     /// A fragment is the part of the URL after the `#` symbol.
1453     /// The fragment is optional and, if present, contains a fragment identifier
1454     /// that identifies a secondary resource, such as a section heading
1455     /// of a document.
1456     ///
1457     /// In HTML, the fragment identifier is usually the id attribute of a an element
1458     /// that is scrolled to on load. Browsers typically will not send the fragment portion
1459     /// of a URL to the server.
1460     ///
1461     /// **Note:** the parser did *not* percent-encode this component,
1462     /// but the input may have been percent-encoded already.
1463     ///
1464     /// # Examples
1465     ///
1466     /// ```rust
1467     /// use url::Url;
1468     /// # use url::ParseError;
1469     ///
1470     /// # fn run() -> Result<(), ParseError> {
1471     /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1472     ///
1473     /// assert_eq!(url.fragment(), Some("row=4"));
1474     ///
1475     /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1476     ///
1477     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1478     /// # Ok(())
1479     /// # }
1480     /// # run().unwrap();
1481     /// ```
fragment(&self) -> Option<&str>1482     pub fn fragment(&self) -> Option<&str> {
1483         self.fragment_start.map(|start| {
1484             debug_assert!(self.byte_at(start) == b'#');
1485             self.slice(start + 1..)
1486         })
1487     }
1488 
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1489     fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1490         let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1491         let result = f(&mut parser);
1492         self.serialization = parser.serialization;
1493         result
1494     }
1495 
1496     /// Change this URL’s fragment identifier.
1497     ///
1498     /// # Examples
1499     ///
1500     /// ```rust
1501     /// use url::Url;
1502     /// # use url::ParseError;
1503     ///
1504     /// # fn run() -> Result<(), ParseError> {
1505     /// let mut url = Url::parse("https://example.com/data.csv")?;
1506     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1507 
1508     /// url.set_fragment(Some("cell=4,1-6,2"));
1509     /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1510     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1511     ///
1512     /// url.set_fragment(None);
1513     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1514     /// assert!(url.fragment().is_none());
1515     /// # Ok(())
1516     /// # }
1517     /// # run().unwrap();
1518     /// ```
set_fragment(&mut self, fragment: Option<&str>)1519     pub fn set_fragment(&mut self, fragment: Option<&str>) {
1520         // Remove any previous fragment
1521         if let Some(start) = self.fragment_start {
1522             debug_assert!(self.byte_at(start) == b'#');
1523             self.serialization.truncate(start as usize);
1524         }
1525         // Write the new one
1526         if let Some(input) = fragment {
1527             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1528             self.serialization.push('#');
1529             self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1530         } else {
1531             self.fragment_start = None;
1532             self.strip_trailing_spaces_from_opaque_path();
1533         }
1534     }
1535 
take_fragment(&mut self) -> Option<String>1536     fn take_fragment(&mut self) -> Option<String> {
1537         self.fragment_start.take().map(|start| {
1538             debug_assert!(self.byte_at(start) == b'#');
1539             let fragment = self.slice(start + 1..).to_owned();
1540             self.serialization.truncate(start as usize);
1541             fragment
1542         })
1543     }
1544 
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1545     fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1546         if let Some(ref fragment) = fragment {
1547             assert!(self.fragment_start.is_none());
1548             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1549             self.serialization.push('#');
1550             self.serialization.push_str(fragment);
1551         }
1552     }
1553 
1554     /// Change this URL’s query string. If `query` is `None`, this URL's
1555     /// query string will be cleared.
1556     ///
1557     /// # Examples
1558     ///
1559     /// ```rust
1560     /// use url::Url;
1561     /// # use url::ParseError;
1562     ///
1563     /// # fn run() -> Result<(), ParseError> {
1564     /// let mut url = Url::parse("https://example.com/products")?;
1565     /// assert_eq!(url.as_str(), "https://example.com/products");
1566     ///
1567     /// url.set_query(Some("page=2"));
1568     /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1569     /// assert_eq!(url.query(), Some("page=2"));
1570     /// # Ok(())
1571     /// # }
1572     /// # run().unwrap();
1573     /// ```
set_query(&mut self, query: Option<&str>)1574     pub fn set_query(&mut self, query: Option<&str>) {
1575         let fragment = self.take_fragment();
1576 
1577         // Remove any previous query
1578         if let Some(start) = self.query_start.take() {
1579             debug_assert!(self.byte_at(start) == b'?');
1580             self.serialization.truncate(start as usize);
1581         }
1582         // Write the new query, if any
1583         if let Some(input) = query {
1584             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1585             self.serialization.push('?');
1586             let scheme_type = SchemeType::from(self.scheme());
1587             let scheme_end = self.scheme_end;
1588             self.mutate(|parser| {
1589                 let vfn = parser.violation_fn;
1590                 parser.parse_query(
1591                     scheme_type,
1592                     scheme_end,
1593                     parser::Input::new_trim_tab_and_newlines(input, vfn),
1594                 )
1595             });
1596         } else {
1597             self.query_start = None;
1598             if fragment.is_none() {
1599                 self.strip_trailing_spaces_from_opaque_path();
1600             }
1601         }
1602 
1603         self.restore_already_parsed_fragment(fragment);
1604     }
1605 
1606     /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1607     /// in `application/x-www-form-urlencoded` syntax.
1608     ///
1609     /// The return value has a method-chaining API:
1610     ///
1611     /// ```rust
1612     /// # use url::{Url, ParseError};
1613     ///
1614     /// # fn run() -> Result<(), ParseError> {
1615     /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1616     /// assert_eq!(url.query(), Some("lang=fr"));
1617     ///
1618     /// url.query_pairs_mut().append_pair("foo", "bar");
1619     /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1620     /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1621     ///
1622     /// url.query_pairs_mut()
1623     ///     .clear()
1624     ///     .append_pair("foo", "bar & baz")
1625     ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1626     /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1627     /// assert_eq!(url.as_str(),
1628     ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1629     /// # Ok(())
1630     /// # }
1631     /// # run().unwrap();
1632     /// ```
1633     ///
1634     /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1635     /// not `url.set_query(None)`.
1636     ///
1637     /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1638     pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1639         let fragment = self.take_fragment();
1640 
1641         let query_start;
1642         if let Some(start) = self.query_start {
1643             debug_assert!(self.byte_at(start) == b'?');
1644             query_start = start as usize;
1645         } else {
1646             query_start = self.serialization.len();
1647             self.query_start = Some(to_u32(query_start).unwrap());
1648             self.serialization.push('?');
1649         }
1650 
1651         let query = UrlQuery {
1652             url: Some(self),
1653             fragment,
1654         };
1655         form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1656     }
1657 
take_after_path(&mut self) -> String1658     fn take_after_path(&mut self) -> String {
1659         match (self.query_start, self.fragment_start) {
1660             (Some(i), _) | (None, Some(i)) => {
1661                 let after_path = self.slice(i..).to_owned();
1662                 self.serialization.truncate(i as usize);
1663                 after_path
1664             }
1665             (None, None) => String::new(),
1666         }
1667     }
1668 
1669     /// Change this URL’s path.
1670     ///
1671     /// # Examples
1672     ///
1673     /// ```rust
1674     /// use url::Url;
1675     /// # use url::ParseError;
1676     ///
1677     /// # fn run() -> Result<(), ParseError> {
1678     /// let mut url = Url::parse("https://example.com")?;
1679     /// url.set_path("api/comments");
1680     /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1681     /// assert_eq!(url.path(), "/api/comments");
1682     ///
1683     /// let mut url = Url::parse("https://example.com/api")?;
1684     /// url.set_path("data/report.csv");
1685     /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1686     /// assert_eq!(url.path(), "/data/report.csv");
1687     ///
1688     /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1689     /// let mut url = Url::parse("https://example.com")?;
1690     /// url.set_path("api/some comments");
1691     /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1692     /// assert_eq!(url.path(), "/api/some%20comments");
1693     ///
1694     /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1695     /// let mut url = Url::parse("https://example.com")?;
1696     /// url.set_path("api/some%20comments");
1697     /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1698     /// assert_eq!(url.path(), "/api/some%20comments");
1699     ///
1700     /// # Ok(())
1701     /// # }
1702     /// # run().unwrap();
1703     /// ```
set_path(&mut self, mut path: &str)1704     pub fn set_path(&mut self, mut path: &str) {
1705         let after_path = self.take_after_path();
1706         let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1707         let cannot_be_a_base = self.cannot_be_a_base();
1708         let scheme_type = SchemeType::from(self.scheme());
1709         self.serialization.truncate(self.path_start as usize);
1710         self.mutate(|parser| {
1711             if cannot_be_a_base {
1712                 if path.starts_with('/') {
1713                     parser.serialization.push_str("%2F");
1714                     path = &path[1..];
1715                 }
1716                 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1717             } else {
1718                 let mut has_host = true; // FIXME
1719                 parser.parse_path_start(
1720                     scheme_type,
1721                     &mut has_host,
1722                     parser::Input::new_no_trim(path),
1723                 );
1724             }
1725         });
1726         self.restore_after_path(old_after_path_pos, &after_path);
1727     }
1728 
1729     /// Return an object with methods to manipulate this URL’s path segments.
1730     ///
1731     /// Return `Err(())` if this URL is cannot-be-a-base.
1732     #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1733     pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1734         if self.cannot_be_a_base() {
1735             Err(())
1736         } else {
1737             Ok(path_segments::new(self))
1738         }
1739     }
1740 
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1741     fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1742         let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1743         let adjust = |index: &mut u32| {
1744             *index -= old_after_path_position;
1745             *index += new_after_path_position;
1746         };
1747         if let Some(ref mut index) = self.query_start {
1748             adjust(index)
1749         }
1750         if let Some(ref mut index) = self.fragment_start {
1751             adjust(index)
1752         }
1753         self.serialization.push_str(after_path)
1754     }
1755 
1756     /// Change this URL’s port number.
1757     ///
1758     /// Note that default port numbers are not reflected in the serialization.
1759     ///
1760     /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1761     /// do nothing and return `Err`.
1762     ///
1763     /// # Examples
1764     ///
1765     /// ```
1766     /// use url::Url;
1767     /// # use std::error::Error;
1768     ///
1769     /// # fn run() -> Result<(), Box<dyn Error>> {
1770     /// let mut url = Url::parse("ssh://example.net:2048/")?;
1771     ///
1772     /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1773     /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1774     ///
1775     /// url.set_port(None).map_err(|_| "cannot be base")?;
1776     /// assert_eq!(url.as_str(), "ssh://example.net/");
1777     /// # Ok(())
1778     /// # }
1779     /// # run().unwrap();
1780     /// ```
1781     ///
1782     /// Known default port numbers are not reflected:
1783     ///
1784     /// ```rust
1785     /// use url::Url;
1786     /// # use std::error::Error;
1787     ///
1788     /// # fn run() -> Result<(), Box<dyn Error>> {
1789     /// let mut url = Url::parse("https://example.org/")?;
1790     ///
1791     /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1792     /// assert!(url.port().is_none());
1793     /// # Ok(())
1794     /// # }
1795     /// # run().unwrap();
1796     /// ```
1797     ///
1798     /// Cannot set port for cannot-be-a-base URLs:
1799     ///
1800     /// ```
1801     /// use url::Url;
1802     /// # use url::ParseError;
1803     ///
1804     /// # fn run() -> Result<(), ParseError> {
1805     /// let mut url = Url::parse("mailto:rms@example.net")?;
1806     ///
1807     /// let result = url.set_port(Some(80));
1808     /// assert!(result.is_err());
1809     ///
1810     /// let result = url.set_port(None);
1811     /// assert!(result.is_err());
1812     /// # Ok(())
1813     /// # }
1814     /// # run().unwrap();
1815     /// ```
1816     #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1817     pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1818         // has_host implies !cannot_be_a_base
1819         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1820             return Err(());
1821         }
1822         if port.is_some() && port == parser::default_port(self.scheme()) {
1823             port = None
1824         }
1825         self.set_port_internal(port);
1826         Ok(())
1827     }
1828 
set_port_internal(&mut self, port: Option<u16>)1829     fn set_port_internal(&mut self, port: Option<u16>) {
1830         match (self.port, port) {
1831             (None, None) => {}
1832             (Some(_), None) => {
1833                 self.serialization
1834                     .drain(self.host_end as usize..self.path_start as usize);
1835                 let offset = self.path_start - self.host_end;
1836                 self.path_start = self.host_end;
1837                 if let Some(ref mut index) = self.query_start {
1838                     *index -= offset
1839                 }
1840                 if let Some(ref mut index) = self.fragment_start {
1841                     *index -= offset
1842                 }
1843             }
1844             (Some(old), Some(new)) if old == new => {}
1845             (_, Some(new)) => {
1846                 let path_and_after = self.slice(self.path_start..).to_owned();
1847                 self.serialization.truncate(self.host_end as usize);
1848                 write!(&mut self.serialization, ":{}", new).unwrap();
1849                 let old_path_start = self.path_start;
1850                 let new_path_start = to_u32(self.serialization.len()).unwrap();
1851                 self.path_start = new_path_start;
1852                 let adjust = |index: &mut u32| {
1853                     *index -= old_path_start;
1854                     *index += new_path_start;
1855                 };
1856                 if let Some(ref mut index) = self.query_start {
1857                     adjust(index)
1858                 }
1859                 if let Some(ref mut index) = self.fragment_start {
1860                     adjust(index)
1861                 }
1862                 self.serialization.push_str(&path_and_after);
1863             }
1864         }
1865         self.port = port;
1866     }
1867 
1868     /// Change this URL’s host.
1869     ///
1870     /// Removing the host (calling this with `None`)
1871     /// will also remove any username, password, and port number.
1872     ///
1873     /// # Examples
1874     ///
1875     /// Change host:
1876     ///
1877     /// ```
1878     /// use url::Url;
1879     /// # use url::ParseError;
1880     ///
1881     /// # fn run() -> Result<(), ParseError> {
1882     /// let mut url = Url::parse("https://example.net")?;
1883     /// let result = url.set_host(Some("rust-lang.org"));
1884     /// assert!(result.is_ok());
1885     /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1886     /// # Ok(())
1887     /// # }
1888     /// # run().unwrap();
1889     /// ```
1890     ///
1891     /// Remove host:
1892     ///
1893     /// ```
1894     /// use url::Url;
1895     /// # use url::ParseError;
1896     ///
1897     /// # fn run() -> Result<(), ParseError> {
1898     /// let mut url = Url::parse("foo://example.net")?;
1899     /// let result = url.set_host(None);
1900     /// assert!(result.is_ok());
1901     /// assert_eq!(url.as_str(), "foo:/");
1902     /// # Ok(())
1903     /// # }
1904     /// # run().unwrap();
1905     /// ```
1906     ///
1907     /// Cannot remove host for 'special' schemes (e.g. `http`):
1908     ///
1909     /// ```
1910     /// use url::Url;
1911     /// # use url::ParseError;
1912     ///
1913     /// # fn run() -> Result<(), ParseError> {
1914     /// let mut url = Url::parse("https://example.net")?;
1915     /// let result = url.set_host(None);
1916     /// assert!(result.is_err());
1917     /// assert_eq!(url.as_str(), "https://example.net/");
1918     /// # Ok(())
1919     /// # }
1920     /// # run().unwrap();
1921     /// ```
1922     ///
1923     /// Cannot change or remove host for cannot-be-a-base URLs:
1924     ///
1925     /// ```
1926     /// use url::Url;
1927     /// # use url::ParseError;
1928     ///
1929     /// # fn run() -> Result<(), ParseError> {
1930     /// let mut url = Url::parse("mailto:rms@example.net")?;
1931     ///
1932     /// let result = url.set_host(Some("rust-lang.org"));
1933     /// assert!(result.is_err());
1934     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1935     ///
1936     /// let result = url.set_host(None);
1937     /// assert!(result.is_err());
1938     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1939     /// # Ok(())
1940     /// # }
1941     /// # run().unwrap();
1942     /// ```
1943     ///
1944     /// # Errors
1945     ///
1946     /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1947     /// a [`ParseError`] variant will be returned.
1948     ///
1949     /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1950     pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1951         if self.cannot_be_a_base() {
1952             return Err(ParseError::SetHostOnCannotBeABaseUrl);
1953         }
1954 
1955         let scheme_type = SchemeType::from(self.scheme());
1956 
1957         if let Some(host) = host {
1958             if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1959                 return Err(ParseError::EmptyHost);
1960             }
1961             let mut host_substr = host;
1962             // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1963             if !host.starts_with('[') || !host.ends_with(']') {
1964                 match host.find(':') {
1965                     Some(0) => {
1966                         // If buffer is the empty string, validation error, return failure.
1967                         return Err(ParseError::InvalidDomainCharacter);
1968                     }
1969                     // Let host be the result of host parsing buffer
1970                     Some(colon_index) => {
1971                         host_substr = &host[..colon_index];
1972                     }
1973                     None => {}
1974                 }
1975             }
1976             if SchemeType::from(self.scheme()).is_special() {
1977                 self.set_host_internal(Host::parse(host_substr)?, None);
1978             } else {
1979                 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1980             }
1981         } else if self.has_host() {
1982             if scheme_type.is_special() && !scheme_type.is_file() {
1983                 return Err(ParseError::EmptyHost);
1984             } else if self.serialization.len() == self.path_start as usize {
1985                 self.serialization.push('/');
1986             }
1987             debug_assert!(self.byte_at(self.scheme_end) == b':');
1988             debug_assert!(self.byte_at(self.path_start) == b'/');
1989 
1990             let new_path_start = if scheme_type.is_file() {
1991                 self.scheme_end + 3
1992             } else {
1993                 self.scheme_end + 1
1994             };
1995 
1996             self.serialization
1997                 .drain(new_path_start as usize..self.path_start as usize);
1998             let offset = self.path_start - new_path_start;
1999             self.path_start = new_path_start;
2000             self.username_end = new_path_start;
2001             self.host_start = new_path_start;
2002             self.host_end = new_path_start;
2003             self.port = None;
2004             if let Some(ref mut index) = self.query_start {
2005                 *index -= offset
2006             }
2007             if let Some(ref mut index) = self.fragment_start {
2008                 *index -= offset
2009             }
2010         }
2011         Ok(())
2012     }
2013 
2014     /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)2015     fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2016         let old_suffix_pos = if opt_new_port.is_some() {
2017             self.path_start
2018         } else {
2019             self.host_end
2020         };
2021         let suffix = self.slice(old_suffix_pos..).to_owned();
2022         self.serialization.truncate(self.host_start as usize);
2023         if !self.has_authority() {
2024             debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2025             debug_assert!(self.username_end == self.host_start);
2026             self.serialization.push('/');
2027             self.serialization.push('/');
2028             self.username_end += 2;
2029             self.host_start += 2;
2030         }
2031         write!(&mut self.serialization, "{}", host).unwrap();
2032         self.host_end = to_u32(self.serialization.len()).unwrap();
2033         self.host = host.into();
2034 
2035         if let Some(new_port) = opt_new_port {
2036             self.port = new_port;
2037             if let Some(port) = new_port {
2038                 write!(&mut self.serialization, ":{}", port).unwrap();
2039             }
2040         }
2041         let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2042         self.serialization.push_str(&suffix);
2043 
2044         let adjust = |index: &mut u32| {
2045             *index -= old_suffix_pos;
2046             *index += new_suffix_pos;
2047         };
2048         adjust(&mut self.path_start);
2049         if let Some(ref mut index) = self.query_start {
2050             adjust(index)
2051         }
2052         if let Some(ref mut index) = self.fragment_start {
2053             adjust(index)
2054         }
2055     }
2056 
2057     /// Change this URL’s host to the given IP address.
2058     ///
2059     /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2060     ///
2061     /// Compared to `Url::set_host`, this skips the host parser.
2062     ///
2063     /// # Examples
2064     ///
2065     /// ```rust
2066     /// use url::{Url, ParseError};
2067     ///
2068     /// # fn run() -> Result<(), ParseError> {
2069     /// let mut url = Url::parse("http://example.com")?;
2070     /// url.set_ip_host("127.0.0.1".parse().unwrap());
2071     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2072     /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2073     /// # Ok(())
2074     /// # }
2075     /// # run().unwrap();
2076     /// ```
2077     ///
2078     /// Cannot change URL's from mailto(cannot-be-base) to ip:
2079     ///
2080     /// ```rust
2081     /// use url::{Url, ParseError};
2082     ///
2083     /// # fn run() -> Result<(), ParseError> {
2084     /// let mut url = Url::parse("mailto:rms@example.com")?;
2085     /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2086     ///
2087     /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2088     /// assert!(result.is_err());
2089     /// # Ok(())
2090     /// # }
2091     /// # run().unwrap();
2092     /// ```
2093     ///
2094     #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>2095     pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2096         if self.cannot_be_a_base() {
2097             return Err(());
2098         }
2099 
2100         let address = match address {
2101             IpAddr::V4(address) => Host::Ipv4(address),
2102             IpAddr::V6(address) => Host::Ipv6(address),
2103         };
2104         self.set_host_internal(address, None);
2105         Ok(())
2106     }
2107 
2108     /// Change this URL’s password.
2109     ///
2110     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2111     ///
2112     /// # Examples
2113     ///
2114     /// ```rust
2115     /// use url::{Url, ParseError};
2116     ///
2117     /// # fn run() -> Result<(), ParseError> {
2118     /// let mut url = Url::parse("mailto:rmz@example.com")?;
2119     /// let result = url.set_password(Some("secret_password"));
2120     /// assert!(result.is_err());
2121     ///
2122     /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2123     /// let result = url.set_password(Some("secret_password"));
2124     /// assert_eq!(url.password(), Some("secret_password"));
2125     ///
2126     /// let mut url = Url::parse("ftp://user2:@example.com")?;
2127     /// let result = url.set_password(Some("secret2"));
2128     /// assert!(result.is_ok());
2129     /// assert_eq!(url.password(), Some("secret2"));
2130     /// # Ok(())
2131     /// # }
2132     /// # run().unwrap();
2133     /// ```
2134     #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>2135     pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2136         // has_host implies !cannot_be_a_base
2137         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2138             return Err(());
2139         }
2140         let password = password.unwrap_or_default();
2141         if !password.is_empty() {
2142             let host_and_after = self.slice(self.host_start..).to_owned();
2143             self.serialization.truncate(self.username_end as usize);
2144             self.serialization.push(':');
2145             self.serialization
2146                 .extend(utf8_percent_encode(password, USERINFO));
2147             self.serialization.push('@');
2148 
2149             let old_host_start = self.host_start;
2150             let new_host_start = to_u32(self.serialization.len()).unwrap();
2151             let adjust = |index: &mut u32| {
2152                 *index -= old_host_start;
2153                 *index += new_host_start;
2154             };
2155             self.host_start = new_host_start;
2156             adjust(&mut self.host_end);
2157             adjust(&mut self.path_start);
2158             if let Some(ref mut index) = self.query_start {
2159                 adjust(index)
2160             }
2161             if let Some(ref mut index) = self.fragment_start {
2162                 adjust(index)
2163             }
2164 
2165             self.serialization.push_str(&host_and_after);
2166         } else if self.byte_at(self.username_end) == b':' {
2167             // If there is a password to remove
2168             let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2169             debug_assert!(has_username_or_password);
2170             let username_start = self.scheme_end + 3;
2171             let empty_username = username_start == self.username_end;
2172             let start = self.username_end; // Remove the ':'
2173             let end = if empty_username {
2174                 self.host_start // Remove the '@' as well
2175             } else {
2176                 self.host_start - 1 // Keep the '@' to separate the username from the host
2177             };
2178             self.serialization.drain(start as usize..end as usize);
2179             let offset = end - start;
2180             self.host_start -= offset;
2181             self.host_end -= offset;
2182             self.path_start -= offset;
2183             if let Some(ref mut index) = self.query_start {
2184                 *index -= offset
2185             }
2186             if let Some(ref mut index) = self.fragment_start {
2187                 *index -= offset
2188             }
2189         }
2190         Ok(())
2191     }
2192 
2193     /// Change this URL’s username.
2194     ///
2195     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2196     /// # Examples
2197     ///
2198     /// Cannot setup username from mailto(cannot-be-base)
2199     ///
2200     /// ```rust
2201     /// use url::{Url, ParseError};
2202     ///
2203     /// # fn run() -> Result<(), ParseError> {
2204     /// let mut url = Url::parse("mailto:rmz@example.com")?;
2205     /// let result = url.set_username("user1");
2206     /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2207     /// assert!(result.is_err());
2208     /// # Ok(())
2209     /// # }
2210     /// # run().unwrap();
2211     /// ```
2212     ///
2213     /// Setup username to user1
2214     ///
2215     /// ```rust
2216     /// use url::{Url, ParseError};
2217     ///
2218     /// # fn run() -> Result<(), ParseError> {
2219     /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2220     /// let result = url.set_username("user1");
2221     /// assert!(result.is_ok());
2222     /// assert_eq!(url.username(), "user1");
2223     /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2224     /// # Ok(())
2225     /// # }
2226     /// # run().unwrap();
2227     /// ```
2228     #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2229     pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2230         // has_host implies !cannot_be_a_base
2231         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2232             return Err(());
2233         }
2234         let username_start = self.scheme_end + 3;
2235         debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2236         if self.slice(username_start..self.username_end) == username {
2237             return Ok(());
2238         }
2239         let after_username = self.slice(self.username_end..).to_owned();
2240         self.serialization.truncate(username_start as usize);
2241         self.serialization
2242             .extend(utf8_percent_encode(username, USERINFO));
2243 
2244         let mut removed_bytes = self.username_end;
2245         self.username_end = to_u32(self.serialization.len()).unwrap();
2246         let mut added_bytes = self.username_end;
2247 
2248         let new_username_is_empty = self.username_end == username_start;
2249         match (new_username_is_empty, after_username.chars().next()) {
2250             (true, Some('@')) => {
2251                 removed_bytes += 1;
2252                 self.serialization.push_str(&after_username[1..]);
2253             }
2254             (false, Some('@')) | (_, Some(':')) | (true, _) => {
2255                 self.serialization.push_str(&after_username);
2256             }
2257             (false, _) => {
2258                 added_bytes += 1;
2259                 self.serialization.push('@');
2260                 self.serialization.push_str(&after_username);
2261             }
2262         }
2263 
2264         let adjust = |index: &mut u32| {
2265             *index -= removed_bytes;
2266             *index += added_bytes;
2267         };
2268         adjust(&mut self.host_start);
2269         adjust(&mut self.host_end);
2270         adjust(&mut self.path_start);
2271         if let Some(ref mut index) = self.query_start {
2272             adjust(index)
2273         }
2274         if let Some(ref mut index) = self.fragment_start {
2275             adjust(index)
2276         }
2277         Ok(())
2278     }
2279 
2280     /// Change this URL’s scheme.
2281     ///
2282     /// Do nothing and return `Err` under the following circumstances:
2283     ///
2284     /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2285     /// * If this URL is cannot-be-a-base and the new scheme is one of
2286     ///   `http`, `https`, `ws`, `wss` or `ftp`
2287     /// * If either the old or new scheme is `http`, `https`, `ws`,
2288     ///   `wss` or `ftp` and the other is not one of these
2289     /// * If the new scheme is `file` and this URL includes credentials
2290     ///   or has a non-null port
2291     /// * If this URL's scheme is `file` and its host is empty or null
2292     ///
2293     /// See also [the URL specification's section on legal scheme state
2294     /// overrides](https://url.spec.whatwg.org/#scheme-state).
2295     ///
2296     /// # Examples
2297     ///
2298     /// Change the URL’s scheme from `https` to `http`:
2299     ///
2300     /// ```
2301     /// use url::Url;
2302     /// # use url::ParseError;
2303     ///
2304     /// # fn run() -> Result<(), ParseError> {
2305     /// let mut url = Url::parse("https://example.net")?;
2306     /// let result = url.set_scheme("http");
2307     /// assert_eq!(url.as_str(), "http://example.net/");
2308     /// assert!(result.is_ok());
2309     /// # Ok(())
2310     /// # }
2311     /// # run().unwrap();
2312     /// ```
2313     /// Change the URL’s scheme from `foo` to `bar`:
2314     ///
2315     /// ```
2316     /// use url::Url;
2317     /// # use url::ParseError;
2318     ///
2319     /// # fn run() -> Result<(), ParseError> {
2320     /// let mut url = Url::parse("foo://example.net")?;
2321     /// let result = url.set_scheme("bar");
2322     /// assert_eq!(url.as_str(), "bar://example.net");
2323     /// assert!(result.is_ok());
2324     /// # Ok(())
2325     /// # }
2326     /// # run().unwrap();
2327     /// ```
2328     ///
2329     /// Cannot change URL’s scheme from `https` to `foõ`:
2330     ///
2331     /// ```
2332     /// use url::Url;
2333     /// # use url::ParseError;
2334     ///
2335     /// # fn run() -> Result<(), ParseError> {
2336     /// let mut url = Url::parse("https://example.net")?;
2337     /// let result = url.set_scheme("foõ");
2338     /// assert_eq!(url.as_str(), "https://example.net/");
2339     /// assert!(result.is_err());
2340     /// # Ok(())
2341     /// # }
2342     /// # run().unwrap();
2343     /// ```
2344     ///
2345     /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2346     ///
2347     /// ```
2348     /// use url::Url;
2349     /// # use url::ParseError;
2350     ///
2351     /// # fn run() -> Result<(), ParseError> {
2352     /// let mut url = Url::parse("mailto:rms@example.net")?;
2353     /// let result = url.set_scheme("https");
2354     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2355     /// assert!(result.is_err());
2356     /// # Ok(())
2357     /// # }
2358     /// # run().unwrap();
2359     /// ```
2360     /// Cannot change the URL’s scheme from `foo` to `https`:
2361     ///
2362     /// ```
2363     /// use url::Url;
2364     /// # use url::ParseError;
2365     ///
2366     /// # fn run() -> Result<(), ParseError> {
2367     /// let mut url = Url::parse("foo://example.net")?;
2368     /// let result = url.set_scheme("https");
2369     /// assert_eq!(url.as_str(), "foo://example.net");
2370     /// assert!(result.is_err());
2371     /// # Ok(())
2372     /// # }
2373     /// # run().unwrap();
2374     /// ```
2375     /// Cannot change the URL’s scheme from `http` to `foo`:
2376     ///
2377     /// ```
2378     /// use url::Url;
2379     /// # use url::ParseError;
2380     ///
2381     /// # fn run() -> Result<(), ParseError> {
2382     /// let mut url = Url::parse("http://example.net")?;
2383     /// let result = url.set_scheme("foo");
2384     /// assert_eq!(url.as_str(), "http://example.net/");
2385     /// assert!(result.is_err());
2386     /// # Ok(())
2387     /// # }
2388     /// # run().unwrap();
2389     /// ```
2390     #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2391     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2392         let mut parser = Parser::for_setter(String::new());
2393         let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2394         let new_scheme_type = SchemeType::from(&parser.serialization);
2395         let old_scheme_type = SchemeType::from(self.scheme());
2396         // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2397         if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2398             // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2399             (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2400             // If url includes credentials or has a non-null port, and buffer is "file", then return.
2401             // If url’s scheme is "file" and its host is an empty host or null, then return.
2402             (new_scheme_type.is_file() && self.has_authority())
2403         {
2404             return Err(());
2405         }
2406 
2407         if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2408             return Err(());
2409         }
2410         let old_scheme_end = self.scheme_end;
2411         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2412         let adjust = |index: &mut u32| {
2413             *index -= old_scheme_end;
2414             *index += new_scheme_end;
2415         };
2416 
2417         self.scheme_end = new_scheme_end;
2418         adjust(&mut self.username_end);
2419         adjust(&mut self.host_start);
2420         adjust(&mut self.host_end);
2421         adjust(&mut self.path_start);
2422         if let Some(ref mut index) = self.query_start {
2423             adjust(index)
2424         }
2425         if let Some(ref mut index) = self.fragment_start {
2426             adjust(index)
2427         }
2428 
2429         parser.serialization.push_str(self.slice(old_scheme_end..));
2430         self.serialization = parser.serialization;
2431 
2432         // Update the port so it can be removed
2433         // If it is the scheme's default
2434         // we don't mind it silently failing
2435         // if there was no port in the first place
2436         let previous_port = self.port();
2437         let _ = self.set_port(previous_port);
2438 
2439         Ok(())
2440     }
2441 
2442     /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2443     ///
2444     /// This returns `Err` if the given path is not absolute or,
2445     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2446     ///
2447     /// # Examples
2448     ///
2449     /// On Unix-like platforms:
2450     ///
2451     /// ```
2452     /// # if cfg!(unix) {
2453     /// use url::Url;
2454     ///
2455     /// # fn run() -> Result<(), ()> {
2456     /// let url = Url::from_file_path("/tmp/foo.txt")?;
2457     /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2458     ///
2459     /// let url = Url::from_file_path("../foo.txt");
2460     /// assert!(url.is_err());
2461     ///
2462     /// let url = Url::from_file_path("https://google.com/");
2463     /// assert!(url.is_err());
2464     /// # Ok(())
2465     /// # }
2466     /// # run().unwrap();
2467     /// # }
2468     /// ```
2469     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2470     #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2471     pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2472         let mut serialization = "file://".to_owned();
2473         let host_start = serialization.len() as u32;
2474         let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2475         Ok(Url {
2476             serialization,
2477             scheme_end: "file".len() as u32,
2478             username_end: host_start,
2479             host_start,
2480             host_end,
2481             host,
2482             port: None,
2483             path_start: host_end,
2484             query_start: None,
2485             fragment_start: None,
2486         })
2487     }
2488 
2489     /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2490     ///
2491     /// This returns `Err` if the given path is not absolute or,
2492     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2493     ///
2494     /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2495     /// so that the entire path is considered when using this URL as a base URL.
2496     ///
2497     /// For example:
2498     ///
2499     /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2500     ///   as the base URL is `file:///var/www/index.html`
2501     /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2502     ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2503     ///
2504     /// Note that `std::path` does not consider trailing slashes significant
2505     /// and usually does not include them (e.g. in `Path::parent()`).
2506     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2507     #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2508     pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2509         let mut url = Url::from_file_path(path)?;
2510         if !url.serialization.ends_with('/') {
2511             url.serialization.push('/')
2512         }
2513         Ok(url)
2514     }
2515 
2516     /// Serialize with Serde using the internal representation of the `Url` struct.
2517     ///
2518     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2519     /// for speed, compared to the `Deserialize` trait impl.
2520     ///
2521     /// This method is only available if the `serde` Cargo feature is enabled.
2522     #[cfg(feature = "serde")]
2523     #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2524     pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2525     where
2526         S: serde::Serializer,
2527     {
2528         use serde::Serialize;
2529         // Destructuring first lets us ensure that adding or removing fields forces this method
2530         // to be updated
2531         let Url {
2532             ref serialization,
2533             ref scheme_end,
2534             ref username_end,
2535             ref host_start,
2536             ref host_end,
2537             ref host,
2538             ref port,
2539             ref path_start,
2540             ref query_start,
2541             ref fragment_start,
2542         } = *self;
2543         (
2544             serialization,
2545             scheme_end,
2546             username_end,
2547             host_start,
2548             host_end,
2549             host,
2550             port,
2551             path_start,
2552             query_start,
2553             fragment_start,
2554         )
2555             .serialize(serializer)
2556     }
2557 
2558     /// Serialize with Serde using the internal representation of the `Url` struct.
2559     ///
2560     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2561     /// for speed, compared to the `Deserialize` trait impl.
2562     ///
2563     /// This method is only available if the `serde` Cargo feature is enabled.
2564     #[cfg(feature = "serde")]
2565     #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2566     pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2567     where
2568         D: serde::Deserializer<'de>,
2569     {
2570         use serde::de::{Deserialize, Error, Unexpected};
2571         let (
2572             serialization,
2573             scheme_end,
2574             username_end,
2575             host_start,
2576             host_end,
2577             host,
2578             port,
2579             path_start,
2580             query_start,
2581             fragment_start,
2582         ) = Deserialize::deserialize(deserializer)?;
2583         let url = Url {
2584             serialization,
2585             scheme_end,
2586             username_end,
2587             host_start,
2588             host_end,
2589             host,
2590             port,
2591             path_start,
2592             query_start,
2593             fragment_start,
2594         };
2595         if cfg!(debug_assertions) {
2596             url.check_invariants().map_err(|reason| {
2597                 let reason: &str = &reason;
2598                 Error::invalid_value(Unexpected::Other("value"), &reason)
2599             })?
2600         }
2601         Ok(url)
2602     }
2603 
2604     /// Assuming the URL is in the `file` scheme or similar,
2605     /// convert its path to an absolute `std::path::Path`.
2606     ///
2607     /// **Note:** This does not actually check the URL’s `scheme`,
2608     /// and may give nonsensical results for other schemes.
2609     /// It is the user’s responsibility to check the URL’s scheme before calling this.
2610     ///
2611     /// ```
2612     /// # use url::Url;
2613     /// # let url = Url::parse("file:///etc/passwd").unwrap();
2614     /// let path = url.to_file_path();
2615     /// ```
2616     ///
2617     /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2618     /// `file:` URLs may have a non-local host),
2619     /// or if `Path::new_opt()` returns `None`.
2620     /// (That is, if the percent-decoded path contains a NUL byte or,
2621     /// for a Windows path, is not UTF-8.)
2622     #[inline]
2623     #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2624     #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2625     pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2626         if let Some(segments) = self.path_segments() {
2627             let host = match self.host() {
2628                 None | Some(Host::Domain("localhost")) => None,
2629                 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2630                     Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2631                 }
2632                 _ => return Err(()),
2633             };
2634 
2635             return file_url_segments_to_pathbuf(host, segments);
2636         }
2637         Err(())
2638     }
2639 
2640     // Private helper methods:
2641 
2642     #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2643     fn slice<R>(&self, range: R) -> &str
2644     where
2645         R: RangeArg,
2646     {
2647         range.slice_of(&self.serialization)
2648     }
2649 
2650     #[inline]
byte_at(&self, i: u32) -> u82651     fn byte_at(&self, i: u32) -> u8 {
2652         self.serialization.as_bytes()[i as usize]
2653     }
2654 }
2655 
2656 /// Parse a string as an URL, without a base URL or encoding override.
2657 impl str::FromStr for Url {
2658     type Err = ParseError;
2659 
2660     #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2661     fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2662         Url::parse(input)
2663     }
2664 }
2665 
2666 impl<'a> TryFrom<&'a str> for Url {
2667     type Error = ParseError;
2668 
try_from(s: &'a str) -> Result<Self, Self::Error>2669     fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2670         Url::parse(s)
2671     }
2672 }
2673 
2674 /// Display the serialization of this URL.
2675 impl fmt::Display for Url {
2676     #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2677     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2678         fmt::Display::fmt(&self.serialization, formatter)
2679     }
2680 }
2681 
2682 /// String conversion.
2683 impl From<Url> for String {
from(value: Url) -> String2684     fn from(value: Url) -> String {
2685         value.serialization
2686     }
2687 }
2688 
2689 /// Debug the serialization of this URL.
2690 impl fmt::Debug for Url {
2691     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2692     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2693         formatter
2694             .debug_struct("Url")
2695             .field("scheme", &self.scheme())
2696             .field("cannot_be_a_base", &self.cannot_be_a_base())
2697             .field("username", &self.username())
2698             .field("password", &self.password())
2699             .field("host", &self.host())
2700             .field("port", &self.port())
2701             .field("path", &self.path())
2702             .field("query", &self.query())
2703             .field("fragment", &self.fragment())
2704             .finish()
2705     }
2706 }
2707 
2708 /// URLs compare like their serialization.
2709 impl Eq for Url {}
2710 
2711 /// URLs compare like their serialization.
2712 impl PartialEq for Url {
2713     #[inline]
eq(&self, other: &Self) -> bool2714     fn eq(&self, other: &Self) -> bool {
2715         self.serialization == other.serialization
2716     }
2717 }
2718 
2719 /// URLs compare like their serialization.
2720 impl Ord for Url {
2721     #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2722     fn cmp(&self, other: &Self) -> cmp::Ordering {
2723         self.serialization.cmp(&other.serialization)
2724     }
2725 }
2726 
2727 /// URLs compare like their serialization.
2728 impl PartialOrd for Url {
2729     #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2730     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2731         Some(self.cmp(other))
2732     }
2733 }
2734 
2735 /// URLs hash like their serialization.
2736 impl hash::Hash for Url {
2737     #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2738     fn hash<H>(&self, state: &mut H)
2739     where
2740         H: hash::Hasher,
2741     {
2742         hash::Hash::hash(&self.serialization, state)
2743     }
2744 }
2745 
2746 /// Return the serialization of this URL.
2747 impl AsRef<str> for Url {
2748     #[inline]
as_ref(&self) -> &str2749     fn as_ref(&self) -> &str {
2750         &self.serialization
2751     }
2752 }
2753 
2754 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2755     fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2756 }
2757 
2758 impl RangeArg for Range<u32> {
2759     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2760     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2761         &s[self.start as usize..self.end as usize]
2762     }
2763 }
2764 
2765 impl RangeArg for RangeFrom<u32> {
2766     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2767     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2768         &s[self.start as usize..]
2769     }
2770 }
2771 
2772 impl RangeArg for RangeTo<u32> {
2773     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2774     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2775         &s[..self.end as usize]
2776     }
2777 }
2778 
2779 /// Serializes this URL into a `serde` stream.
2780 ///
2781 /// This implementation is only available if the `serde` Cargo feature is enabled.
2782 #[cfg(feature = "serde")]
2783 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2784     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2785     where
2786         S: serde::Serializer,
2787     {
2788         serializer.serialize_str(self.as_str())
2789     }
2790 }
2791 
2792 /// Deserializes this URL from a `serde` stream.
2793 ///
2794 /// This implementation is only available if the `serde` Cargo feature is enabled.
2795 #[cfg(feature = "serde")]
2796 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2797     fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2798     where
2799         D: serde::Deserializer<'de>,
2800     {
2801         use serde::de::{Error, Unexpected, Visitor};
2802 
2803         struct UrlVisitor;
2804 
2805         impl<'de> Visitor<'de> for UrlVisitor {
2806             type Value = Url;
2807 
2808             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2809                 formatter.write_str("a string representing an URL")
2810             }
2811 
2812             fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2813             where
2814                 E: Error,
2815             {
2816                 Url::parse(s).map_err(|err| {
2817                     let err_s = format!("{}", err);
2818                     Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2819                 })
2820             }
2821         }
2822 
2823         deserializer.deserialize_str(UrlVisitor)
2824     }
2825 }
2826 
2827 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2828 fn path_to_file_url_segments(
2829     path: &Path,
2830     serialization: &mut String,
2831 ) -> Result<(u32, HostInternal), ()> {
2832     #[cfg(any(unix, target_os = "redox"))]
2833     use std::os::unix::prelude::OsStrExt;
2834     #[cfg(target_os = "wasi")]
2835     use std::os::wasi::prelude::OsStrExt;
2836     if !path.is_absolute() {
2837         return Err(());
2838     }
2839     let host_end = to_u32(serialization.len()).unwrap();
2840     let mut empty = true;
2841     // skip the root component
2842     for component in path.components().skip(1) {
2843         empty = false;
2844         serialization.push('/');
2845         serialization.extend(percent_encode(
2846             component.as_os_str().as_bytes(),
2847             SPECIAL_PATH_SEGMENT,
2848         ));
2849     }
2850     if empty {
2851         // An URL’s path must not be empty.
2852         serialization.push('/');
2853     }
2854     Ok((host_end, HostInternal::None))
2855 }
2856 
2857 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2858 fn path_to_file_url_segments(
2859     path: &Path,
2860     serialization: &mut String,
2861 ) -> Result<(u32, HostInternal), ()> {
2862     path_to_file_url_segments_windows(path, serialization)
2863 }
2864 
2865 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2866 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2867 fn path_to_file_url_segments_windows(
2868     path: &Path,
2869     serialization: &mut String,
2870 ) -> Result<(u32, HostInternal), ()> {
2871     use std::path::{Component, Prefix};
2872     if !path.is_absolute() {
2873         return Err(());
2874     }
2875     let mut components = path.components();
2876 
2877     let host_start = serialization.len() + 1;
2878     let host_end;
2879     let host_internal;
2880 
2881     match components.next() {
2882         Some(Component::Prefix(ref p)) => match p.kind() {
2883             Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2884                 host_end = to_u32(serialization.len()).unwrap();
2885                 host_internal = HostInternal::None;
2886                 serialization.push('/');
2887                 serialization.push(letter as char);
2888                 serialization.push(':');
2889             }
2890             Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2891                 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2892                 write!(serialization, "{}", host).unwrap();
2893                 host_end = to_u32(serialization.len()).unwrap();
2894                 host_internal = host.into();
2895                 serialization.push('/');
2896                 let share = share.to_str().ok_or(())?;
2897                 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2898             }
2899             _ => return Err(()),
2900         },
2901         _ => return Err(()),
2902     }
2903 
2904     let mut path_only_has_prefix = true;
2905     for component in components {
2906         if component == Component::RootDir {
2907             continue;
2908         }
2909 
2910         path_only_has_prefix = false;
2911         // FIXME: somehow work with non-unicode?
2912         let component = component.as_os_str().to_str().ok_or(())?;
2913 
2914         serialization.push('/');
2915         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2916     }
2917 
2918     // A windows drive letter must end with a slash.
2919     if serialization.len() > host_start
2920         && parser::is_windows_drive_letter(&serialization[host_start..])
2921         && path_only_has_prefix
2922     {
2923         serialization.push('/');
2924     }
2925 
2926     Ok((host_end, host_internal))
2927 }
2928 
2929 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2930 fn file_url_segments_to_pathbuf(
2931     host: Option<&str>,
2932     segments: str::Split<'_, char>,
2933 ) -> Result<PathBuf, ()> {
2934     use std::ffi::OsStr;
2935     #[cfg(any(unix, target_os = "redox"))]
2936     use std::os::unix::prelude::OsStrExt;
2937     #[cfg(target_os = "wasi")]
2938     use std::os::wasi::prelude::OsStrExt;
2939 
2940     if host.is_some() {
2941         return Err(());
2942     }
2943 
2944     let mut bytes = if cfg!(target_os = "redox") {
2945         b"file:".to_vec()
2946     } else {
2947         Vec::new()
2948     };
2949 
2950     for segment in segments {
2951         bytes.push(b'/');
2952         bytes.extend(percent_decode(segment.as_bytes()));
2953     }
2954 
2955     // A windows drive letter must end with a slash.
2956     if bytes.len() > 2
2957         && bytes[bytes.len() - 2].is_ascii_alphabetic()
2958         && matches!(bytes[bytes.len() - 1], b':' | b'|')
2959     {
2960         bytes.push(b'/');
2961     }
2962 
2963     let os_str = OsStr::from_bytes(&bytes);
2964     let path = PathBuf::from(os_str);
2965 
2966     debug_assert!(
2967         path.is_absolute(),
2968         "to_file_path() failed to produce an absolute Path"
2969     );
2970 
2971     Ok(path)
2972 }
2973 
2974 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2975 fn file_url_segments_to_pathbuf(
2976     host: Option<&str>,
2977     segments: str::Split<char>,
2978 ) -> Result<PathBuf, ()> {
2979     file_url_segments_to_pathbuf_windows(host, segments)
2980 }
2981 
2982 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2983 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2984 fn file_url_segments_to_pathbuf_windows(
2985     host: Option<&str>,
2986     mut segments: str::Split<'_, char>,
2987 ) -> Result<PathBuf, ()> {
2988     let mut string = if let Some(host) = host {
2989         r"\\".to_owned() + host
2990     } else {
2991         let first = segments.next().ok_or(())?;
2992 
2993         match first.len() {
2994             2 => {
2995                 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2996                     return Err(());
2997                 }
2998 
2999                 first.to_owned()
3000             }
3001 
3002             4 => {
3003                 if !first.starts_with(parser::ascii_alpha) {
3004                     return Err(());
3005                 }
3006                 let bytes = first.as_bytes();
3007                 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3008                     return Err(());
3009                 }
3010 
3011                 first[0..1].to_owned() + ":"
3012             }
3013 
3014             _ => return Err(()),
3015         }
3016     };
3017 
3018     for segment in segments {
3019         string.push('\\');
3020 
3021         // Currently non-unicode windows paths cannot be represented
3022         match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3023             Ok(s) => string.push_str(&s),
3024             Err(..) => return Err(()),
3025         }
3026     }
3027     let path = PathBuf::from(string);
3028     debug_assert!(
3029         path.is_absolute(),
3030         "to_file_path() failed to produce an absolute Path"
3031     );
3032     Ok(path)
3033 }
3034 
3035 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3036 #[derive(Debug)]
3037 pub struct UrlQuery<'a> {
3038     url: Option<&'a mut Url>,
3039     fragment: Option<String>,
3040 }
3041 
3042 // `as_mut_string` string here exposes the internal serialization of an `Url`,
3043 // which should not be exposed to users.
3044 // We achieve that by not giving users direct access to `UrlQuery`:
3045 // * Its fields are private
3046 //   (and so can not be constructed with struct literal syntax outside of this crate),
3047 // * It has no constructor
3048 // * It is only visible (on the type level) to users in the return type of
3049 //   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3050 // * `Serializer` keeps its target in a private field
3051 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3052 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String3053     fn as_mut_string(&mut self) -> &mut String {
3054         &mut self.url.as_mut().unwrap().serialization
3055     }
3056 
finish(mut self) -> &'a mut Url3057     fn finish(mut self) -> &'a mut Url {
3058         let url = self.url.take().unwrap();
3059         url.restore_already_parsed_fragment(self.fragment.take());
3060         url
3061     }
3062 
3063     type Finished = &'a mut Url;
3064 }
3065 
3066 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)3067     fn drop(&mut self) {
3068         if let Some(url) = self.url.take() {
3069             url.restore_already_parsed_fragment(self.fragment.take())
3070         }
3071     }
3072 }
3073