1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 /*!
10
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13
14
15 # URL parsing and data structures
16
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19 ```
20 use url::{Url, ParseError};
21
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24
25 Let’s parse a valid URL and look at its components.
26
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34
35
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57
58 ```
59 use url::Url;
60 # use url::ParseError;
61
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75
76 ## Serde
77
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79
80 # Base URL
81
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89
90 ```
91 use url::{Url, ParseError};
92
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95
96 Use the `join` method on an `Url` to use it as a base URL:
97
98 ```
99 use url::Url;
100 # use url::ParseError;
101
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110
111 # Feature: `serde`
112
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121
122 # Feature: `debugger_visualizer`
123
124 If you enable the `debugger_visualizer` feature, the `url` crate will include
125 a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
126 for [Visual Studio](https://www.visualstudio.com/) that allows you to view
127 [`Url`](struct.Url.html) objects in the debugger.
128
129 This feature requires Rust 1.71 or later.
130
131 ```toml
132 url = { version = "2", features = ["debugger_visualizer"] }
133 ```
134
135 */
136
137 #![doc(html_root_url = "https://docs.rs/url/2.5.2")]
138 #![cfg_attr(
139 feature = "debugger_visualizer",
140 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
141 )]
142
143 pub use form_urlencoded;
144
145 #[cfg(feature = "serde")]
146 extern crate serde;
147
148 use crate::host::HostInternal;
149 use crate::parser::{
150 to_u32, Context, Parser, SchemeType, PATH_SEGMENT, SPECIAL_PATH_SEGMENT, USERINFO,
151 };
152 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
153 use std::borrow::Borrow;
154 use std::cmp;
155 use std::fmt::{self, Write};
156 use std::hash;
157 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
158 use std::io;
159 use std::mem;
160 use std::net::IpAddr;
161 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
162 use std::net::{SocketAddr, ToSocketAddrs};
163 use std::ops::{Range, RangeFrom, RangeTo};
164 use std::path::{Path, PathBuf};
165 use std::str;
166
167 use std::convert::TryFrom;
168
169 pub use crate::host::Host;
170 pub use crate::origin::{OpaqueOrigin, Origin};
171 pub use crate::parser::{ParseError, SyntaxViolation};
172 pub use crate::path_segments::PathSegmentsMut;
173 pub use crate::slicing::Position;
174 pub use form_urlencoded::EncodingOverride;
175
176 mod host;
177 mod origin;
178 mod parser;
179 mod path_segments;
180 mod slicing;
181
182 #[doc(hidden)]
183 pub mod quirks;
184
185 /// A parsed URL record.
186 #[derive(Clone)]
187 pub struct Url {
188 /// Syntax in pseudo-BNF:
189 ///
190 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
191 /// non-hierarchical = non-hierarchical-path
192 /// non-hierarchical-path = /* Does not start with "/" */
193 /// hierarchical = authority? hierarchical-path
194 /// authority = "//" userinfo? host [ ":" port ]?
195 /// userinfo = username [ ":" password ]? "@"
196 /// hierarchical-path = [ "/" path-segment ]+
197 serialization: String,
198
199 // Components
200 scheme_end: u32, // Before ':'
201 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
202 host_start: u32,
203 host_end: u32,
204 host: HostInternal,
205 port: Option<u16>,
206 path_start: u32, // Before initial '/', if any
207 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
208 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
209 }
210
211 /// Full configuration for the URL parser.
212 #[derive(Copy, Clone)]
213 #[must_use]
214 pub struct ParseOptions<'a> {
215 base_url: Option<&'a Url>,
216 encoding_override: EncodingOverride<'a>,
217 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
218 }
219
220 impl<'a> ParseOptions<'a> {
221 /// Change the base URL
222 ///
223 /// See the notes of [`Url::join`] for more details about how this base is considered
224 /// when parsing.
base_url(mut self, new: Option<&'a Url>) -> Self225 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
226 self.base_url = new;
227 self
228 }
229
230 /// Override the character encoding of query strings.
231 /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self232 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
233 self.encoding_override = new;
234 self
235 }
236
237 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
238 /// when it occurs during parsing. Note that since the provided function is
239 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
240 /// a `RefCell`, to collect the violations.
241 ///
242 /// ## Example
243 /// ```
244 /// use std::cell::RefCell;
245 /// use url::{Url, SyntaxViolation};
246 /// # use url::ParseError;
247 /// # fn run() -> Result<(), url::ParseError> {
248 /// let violations = RefCell::new(Vec::new());
249 /// let url = Url::options()
250 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
251 /// .parse("https:////example.com")?;
252 /// assert_eq!(url.as_str(), "https://example.com/");
253 /// assert_eq!(violations.into_inner(),
254 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
255 /// # Ok(())
256 /// # }
257 /// # run().unwrap();
258 /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self259 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
260 self.violation_fn = new;
261 self
262 }
263
264 /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>265 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
266 Parser {
267 serialization: String::with_capacity(input.len()),
268 base_url: self.base_url,
269 query_encoding_override: self.encoding_override,
270 violation_fn: self.violation_fn,
271 context: Context::UrlParser,
272 }
273 .parse_url(input)
274 }
275 }
276
277 impl Url {
278 /// Parse an absolute URL from a string.
279 ///
280 /// # Examples
281 ///
282 /// ```rust
283 /// use url::Url;
284 /// # use url::ParseError;
285 ///
286 /// # fn run() -> Result<(), ParseError> {
287 /// let url = Url::parse("https://example.net")?;
288 /// # Ok(())
289 /// # }
290 /// # run().unwrap();
291 /// ```
292 ///
293 /// # Errors
294 ///
295 /// If the function can not parse an absolute URL from the given string,
296 /// a [`ParseError`] variant will be returned.
297 ///
298 /// [`ParseError`]: enum.ParseError.html
299 #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>300 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
301 Url::options().parse(input)
302 }
303
304 /// Parse an absolute URL from a string and add params to its query string.
305 ///
306 /// Existing params are not removed.
307 ///
308 /// # Examples
309 ///
310 /// ```rust
311 /// use url::Url;
312 /// # use url::ParseError;
313 ///
314 /// # fn run() -> Result<(), ParseError> {
315 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
316 /// &[("lang", "rust"), ("browser", "servo")])?;
317 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
318 /// # Ok(())
319 /// # }
320 /// # run().unwrap();
321 /// ```
322 ///
323 /// # Errors
324 ///
325 /// If the function can not parse an absolute URL from the given string,
326 /// a [`ParseError`] variant will be returned.
327 ///
328 /// [`ParseError`]: enum.ParseError.html
329 #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,330 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
331 where
332 I: IntoIterator,
333 I::Item: Borrow<(K, V)>,
334 K: AsRef<str>,
335 V: AsRef<str>,
336 {
337 let mut url = Url::options().parse(input);
338
339 if let Ok(ref mut url) = url {
340 url.query_pairs_mut().extend_pairs(iter);
341 }
342
343 url
344 }
345
346 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
strip_trailing_spaces_from_opaque_path(&mut self)347 fn strip_trailing_spaces_from_opaque_path(&mut self) {
348 if !self.cannot_be_a_base() {
349 return;
350 }
351
352 if self.fragment_start.is_some() {
353 return;
354 }
355
356 if self.query_start.is_some() {
357 return;
358 }
359
360 let trailing_space_count = self
361 .serialization
362 .chars()
363 .rev()
364 .take_while(|c| *c == ' ')
365 .count();
366
367 let start = self.serialization.len() - trailing_space_count;
368
369 self.serialization.truncate(start);
370 }
371
372 /// Parse a string as an URL, with this URL as the base URL.
373 ///
374 /// The inverse of this is [`make_relative`].
375 ///
376 /// # Notes
377 ///
378 /// - A trailing slash is significant.
379 /// Without it, the last path component is considered to be a “file” name
380 /// to be removed to get at the “directory” that is used as the base.
381 /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
382 /// as input replaces everything in the base URL after the scheme.
383 /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
384 ///
385 /// # Examples
386 ///
387 /// ```rust
388 /// use url::Url;
389 /// # use url::ParseError;
390 ///
391 /// // Base without a trailing slash
392 /// # fn run() -> Result<(), ParseError> {
393 /// let base = Url::parse("https://example.net/a/b.html")?;
394 /// let url = base.join("c.png")?;
395 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
396 ///
397 /// // Base with a trailing slash
398 /// let base = Url::parse("https://example.net/a/b/")?;
399 /// let url = base.join("c.png")?;
400 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
401 ///
402 /// // Input as scheme relative special URL
403 /// let base = Url::parse("https://alice.com/a")?;
404 /// let url = base.join("//eve.com/b")?;
405 /// assert_eq!(url.as_str(), "https://eve.com/b");
406 ///
407 /// // Input as absolute URL
408 /// let base = Url::parse("https://alice.com/a")?;
409 /// let url = base.join("http://eve.com/b")?;
410 /// assert_eq!(url.as_str(), "http://eve.com/b"); // http instead of https
411
412 /// # Ok(())
413 /// # }
414 /// # run().unwrap();
415 /// ```
416 ///
417 /// # Errors
418 ///
419 /// If the function can not parse an URL from the given string
420 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
421 ///
422 /// [`ParseError`]: enum.ParseError.html
423 /// [`make_relative`]: #method.make_relative
424 #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>425 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
426 Url::options().base_url(Some(self)).parse(input)
427 }
428
429 /// Creates a relative URL if possible, with this URL as the base URL.
430 ///
431 /// This is the inverse of [`join`].
432 ///
433 /// # Examples
434 ///
435 /// ```rust
436 /// use url::Url;
437 /// # use url::ParseError;
438 ///
439 /// # fn run() -> Result<(), ParseError> {
440 /// let base = Url::parse("https://example.net/a/b.html")?;
441 /// let url = Url::parse("https://example.net/a/c.png")?;
442 /// let relative = base.make_relative(&url);
443 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
444 ///
445 /// let base = Url::parse("https://example.net/a/b/")?;
446 /// let url = Url::parse("https://example.net/a/b/c.png")?;
447 /// let relative = base.make_relative(&url);
448 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
449 ///
450 /// let base = Url::parse("https://example.net/a/b/")?;
451 /// let url = Url::parse("https://example.net/a/d/c.png")?;
452 /// let relative = base.make_relative(&url);
453 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
454 ///
455 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
456 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
457 /// let relative = base.make_relative(&url);
458 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
459 /// # Ok(())
460 /// # }
461 /// # run().unwrap();
462 /// ```
463 ///
464 /// # Errors
465 ///
466 /// If this URL can't be a base for the given URL, `None` is returned.
467 /// This is for example the case if the scheme, host or port are not the same.
468 ///
469 /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>470 pub fn make_relative(&self, url: &Url) -> Option<String> {
471 if self.cannot_be_a_base() {
472 return None;
473 }
474
475 // Scheme, host and port need to be the same
476 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
477 return None;
478 }
479
480 // We ignore username/password at this point
481
482 // The path has to be transformed
483 let mut relative = String::new();
484
485 // Extract the filename of both URIs, these need to be handled separately
486 fn extract_path_filename(s: &str) -> (&str, &str) {
487 let last_slash_idx = s.rfind('/').unwrap_or(0);
488 let (path, filename) = s.split_at(last_slash_idx);
489 if filename.is_empty() {
490 (path, "")
491 } else {
492 (path, &filename[1..])
493 }
494 }
495
496 let (base_path, base_filename) = extract_path_filename(self.path());
497 let (url_path, url_filename) = extract_path_filename(url.path());
498
499 let mut base_path = base_path.split('/').peekable();
500 let mut url_path = url_path.split('/').peekable();
501
502 // Skip over the common prefix
503 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
504 base_path.next();
505 url_path.next();
506 }
507
508 // Add `..` segments for the remainder of the base path
509 for base_path_segment in base_path {
510 // Skip empty last segments
511 if base_path_segment.is_empty() {
512 break;
513 }
514
515 if !relative.is_empty() {
516 relative.push('/');
517 }
518
519 relative.push_str("..");
520 }
521
522 // Append the remainder of the other URI
523 for url_path_segment in url_path {
524 if !relative.is_empty() {
525 relative.push('/');
526 }
527
528 relative.push_str(url_path_segment);
529 }
530
531 // Add the filename if they are not the same
532 if !relative.is_empty() || base_filename != url_filename {
533 // If the URIs filename is empty this means that it was a directory
534 // so we'll have to append a '/'.
535 //
536 // Otherwise append it directly as the new filename.
537 if url_filename.is_empty() {
538 relative.push('/');
539 } else {
540 if !relative.is_empty() {
541 relative.push('/');
542 }
543 relative.push_str(url_filename);
544 }
545 }
546
547 // Query and fragment are only taken from the other URI
548 if let Some(query) = url.query() {
549 relative.push('?');
550 relative.push_str(query);
551 }
552
553 if let Some(fragment) = url.fragment() {
554 relative.push('#');
555 relative.push_str(fragment);
556 }
557
558 Some(relative)
559 }
560
561 /// Return a default `ParseOptions` that can fully configure the URL parser.
562 ///
563 /// # Examples
564 ///
565 /// Get default `ParseOptions`, then change base url
566 ///
567 /// ```rust
568 /// use url::Url;
569 /// # use url::ParseError;
570 /// # fn run() -> Result<(), ParseError> {
571 /// let options = Url::options();
572 /// let api = Url::parse("https://api.example.com")?;
573 /// let base_url = options.base_url(Some(&api));
574 /// let version_url = base_url.parse("version.json")?;
575 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
576 /// # Ok(())
577 /// # }
578 /// # run().unwrap();
579 /// ```
options<'a>() -> ParseOptions<'a>580 pub fn options<'a>() -> ParseOptions<'a> {
581 ParseOptions {
582 base_url: None,
583 encoding_override: None,
584 violation_fn: None,
585 }
586 }
587
588 /// Return the serialization of this URL.
589 ///
590 /// This is fast since that serialization is already stored in the `Url` struct.
591 ///
592 /// # Examples
593 ///
594 /// ```rust
595 /// use url::Url;
596 /// # use url::ParseError;
597 ///
598 /// # fn run() -> Result<(), ParseError> {
599 /// let url_str = "https://example.net/";
600 /// let url = Url::parse(url_str)?;
601 /// assert_eq!(url.as_str(), url_str);
602 /// # Ok(())
603 /// # }
604 /// # run().unwrap();
605 /// ```
606 #[inline]
as_str(&self) -> &str607 pub fn as_str(&self) -> &str {
608 &self.serialization
609 }
610
611 /// Return the serialization of this URL.
612 ///
613 /// This consumes the `Url` and takes ownership of the `String` stored in it.
614 ///
615 /// # Examples
616 ///
617 /// ```rust
618 /// use url::Url;
619 /// # use url::ParseError;
620 ///
621 /// # fn run() -> Result<(), ParseError> {
622 /// let url_str = "https://example.net/";
623 /// let url = Url::parse(url_str)?;
624 /// assert_eq!(String::from(url), url_str);
625 /// # Ok(())
626 /// # }
627 /// # run().unwrap();
628 /// ```
629 #[inline]
630 #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String631 pub fn into_string(self) -> String {
632 self.into()
633 }
634
635 /// For internal testing, not part of the public API.
636 ///
637 /// Methods of the `Url` struct assume a number of invariants.
638 /// This checks each of these invariants and panic if one is not met.
639 /// This is for testing rust-url itself.
640 #[doc(hidden)]
check_invariants(&self) -> Result<(), String>641 pub fn check_invariants(&self) -> Result<(), String> {
642 macro_rules! assert {
643 ($x: expr) => {
644 if !$x {
645 return Err(format!(
646 "!( {} ) for URL {:?}",
647 stringify!($x),
648 self.serialization
649 ));
650 }
651 };
652 }
653
654 macro_rules! assert_eq {
655 ($a: expr, $b: expr) => {
656 {
657 let a = $a;
658 let b = $b;
659 if a != b {
660 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
661 a, b, stringify!($a), stringify!($b),
662 self.serialization))
663 }
664 }
665 }
666 }
667
668 assert!(self.scheme_end >= 1);
669 assert!(self.byte_at(0).is_ascii_alphabetic());
670 assert!(self
671 .slice(1..self.scheme_end)
672 .chars()
673 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
674 assert_eq!(self.byte_at(self.scheme_end), b':');
675
676 if self.slice(self.scheme_end + 1..).starts_with("//") {
677 // URL with authority
678 if self.username_end != self.serialization.len() as u32 {
679 match self.byte_at(self.username_end) {
680 b':' => {
681 assert!(self.host_start >= self.username_end + 2);
682 assert_eq!(self.byte_at(self.host_start - 1), b'@');
683 }
684 b'@' => assert!(self.host_start == self.username_end + 1),
685 _ => assert_eq!(self.username_end, self.scheme_end + 3),
686 }
687 }
688 assert!(self.host_start >= self.username_end);
689 assert!(self.host_end >= self.host_start);
690 let host_str = self.slice(self.host_start..self.host_end);
691 match self.host {
692 HostInternal::None => assert_eq!(host_str, ""),
693 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
694 HostInternal::Ipv6(address) => {
695 let h: Host<String> = Host::Ipv6(address);
696 assert_eq!(host_str, h.to_string())
697 }
698 HostInternal::Domain => {
699 if SchemeType::from(self.scheme()).is_special() {
700 assert!(!host_str.is_empty())
701 }
702 }
703 }
704 if self.path_start == self.host_end {
705 assert_eq!(self.port, None);
706 } else {
707 assert_eq!(self.byte_at(self.host_end), b':');
708 let port_str = self.slice(self.host_end + 1..self.path_start);
709 assert_eq!(
710 self.port,
711 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
712 );
713 }
714 assert!(
715 self.path_start as usize == self.serialization.len()
716 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
717 );
718 } else {
719 // Anarchist URL (no authority)
720 assert_eq!(self.username_end, self.scheme_end + 1);
721 assert_eq!(self.host_start, self.scheme_end + 1);
722 assert_eq!(self.host_end, self.scheme_end + 1);
723 assert_eq!(self.host, HostInternal::None);
724 assert_eq!(self.port, None);
725 if self.path().starts_with("//") {
726 // special case when first path segment is empty
727 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
728 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
729 assert_eq!(self.path_start, self.scheme_end + 3);
730 } else {
731 assert_eq!(self.path_start, self.scheme_end + 1);
732 }
733 }
734 if let Some(start) = self.query_start {
735 assert!(start >= self.path_start);
736 assert_eq!(self.byte_at(start), b'?');
737 }
738 if let Some(start) = self.fragment_start {
739 assert!(start >= self.path_start);
740 assert_eq!(self.byte_at(start), b'#');
741 }
742 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
743 assert!(fragment_start > query_start);
744 }
745
746 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
747 assert_eq!(&self.serialization, &other.serialization);
748 assert_eq!(self.scheme_end, other.scheme_end);
749 assert_eq!(self.username_end, other.username_end);
750 assert_eq!(self.host_start, other.host_start);
751 assert_eq!(self.host_end, other.host_end);
752 assert!(
753 self.host == other.host ||
754 // XXX No host round-trips to empty host.
755 // See https://github.com/whatwg/url/issues/79
756 (self.host_str(), other.host_str()) == (None, Some(""))
757 );
758 assert_eq!(self.port, other.port);
759 assert_eq!(self.path_start, other.path_start);
760 assert_eq!(self.query_start, other.query_start);
761 assert_eq!(self.fragment_start, other.fragment_start);
762 Ok(())
763 }
764
765 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
766 ///
767 /// Note: this returns an opaque origin for `file:` URLs, which causes
768 /// `url.origin() != url.origin()`.
769 ///
770 /// # Examples
771 ///
772 /// URL with `ftp` scheme:
773 ///
774 /// ```rust
775 /// use url::{Host, Origin, Url};
776 /// # use url::ParseError;
777 ///
778 /// # fn run() -> Result<(), ParseError> {
779 /// let url = Url::parse("ftp://example.com/foo")?;
780 /// assert_eq!(url.origin(),
781 /// Origin::Tuple("ftp".into(),
782 /// Host::Domain("example.com".into()),
783 /// 21));
784 /// # Ok(())
785 /// # }
786 /// # run().unwrap();
787 /// ```
788 ///
789 /// URL with `blob` scheme:
790 ///
791 /// ```rust
792 /// use url::{Host, Origin, Url};
793 /// # use url::ParseError;
794 ///
795 /// # fn run() -> Result<(), ParseError> {
796 /// let url = Url::parse("blob:https://example.com/foo")?;
797 /// assert_eq!(url.origin(),
798 /// Origin::Tuple("https".into(),
799 /// Host::Domain("example.com".into()),
800 /// 443));
801 /// # Ok(())
802 /// # }
803 /// # run().unwrap();
804 /// ```
805 ///
806 /// URL with `file` scheme:
807 ///
808 /// ```rust
809 /// use url::{Host, Origin, Url};
810 /// # use url::ParseError;
811 ///
812 /// # fn run() -> Result<(), ParseError> {
813 /// let url = Url::parse("file:///tmp/foo")?;
814 /// assert!(!url.origin().is_tuple());
815 ///
816 /// let other_url = Url::parse("file:///tmp/foo")?;
817 /// assert!(url.origin() != other_url.origin());
818 /// # Ok(())
819 /// # }
820 /// # run().unwrap();
821 /// ```
822 ///
823 /// URL with other scheme:
824 ///
825 /// ```rust
826 /// use url::{Host, Origin, Url};
827 /// # use url::ParseError;
828 ///
829 /// # fn run() -> Result<(), ParseError> {
830 /// let url = Url::parse("foo:bar")?;
831 /// assert!(!url.origin().is_tuple());
832 /// # Ok(())
833 /// # }
834 /// # run().unwrap();
835 /// ```
836 #[inline]
origin(&self) -> Origin837 pub fn origin(&self) -> Origin {
838 origin::url_origin(self)
839 }
840
841 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
842 ///
843 /// # Examples
844 ///
845 /// ```
846 /// use url::Url;
847 /// # use url::ParseError;
848 ///
849 /// # fn run() -> Result<(), ParseError> {
850 /// let url = Url::parse("file:///tmp/foo")?;
851 /// assert_eq!(url.scheme(), "file");
852 /// # Ok(())
853 /// # }
854 /// # run().unwrap();
855 /// ```
856 #[inline]
scheme(&self) -> &str857 pub fn scheme(&self) -> &str {
858 self.slice(..self.scheme_end)
859 }
860
861 /// Return whether the URL is special (has a special scheme)
862 ///
863 /// # Examples
864 ///
865 /// ```
866 /// use url::Url;
867 /// # use url::ParseError;
868 ///
869 /// # fn run() -> Result<(), ParseError> {
870 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
871 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
872 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
873 /// # Ok(())
874 /// # }
875 /// # run().unwrap();
876 /// ```
is_special(&self) -> bool877 pub fn is_special(&self) -> bool {
878 let scheme_type = SchemeType::from(self.scheme());
879 scheme_type.is_special()
880 }
881
882 /// Return whether the URL has an 'authority',
883 /// which can contain a username, password, host, and port number.
884 ///
885 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
886 /// or cannot-be-a-base like `data:text/plain,Stuff`.
887 ///
888 /// See also the `authority` method.
889 ///
890 /// # Examples
891 ///
892 /// ```
893 /// use url::Url;
894 /// # use url::ParseError;
895 ///
896 /// # fn run() -> Result<(), ParseError> {
897 /// let url = Url::parse("ftp://rms@example.com")?;
898 /// assert!(url.has_authority());
899 ///
900 /// let url = Url::parse("unix:/run/foo.socket")?;
901 /// assert!(!url.has_authority());
902 ///
903 /// let url = Url::parse("data:text/plain,Stuff")?;
904 /// assert!(!url.has_authority());
905 /// # Ok(())
906 /// # }
907 /// # run().unwrap();
908 /// ```
909 #[inline]
has_authority(&self) -> bool910 pub fn has_authority(&self) -> bool {
911 debug_assert!(self.byte_at(self.scheme_end) == b':');
912 self.slice(self.scheme_end..).starts_with("://")
913 }
914
915 /// Return the authority of this URL as an ASCII string.
916 ///
917 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
918 /// of a special URL, or percent encoded for non-special URLs.
919 /// IPv6 addresses are given between `[` and `]` brackets.
920 /// Ports are omitted if they match the well known port of a special URL.
921 ///
922 /// Username and password are percent-encoded.
923 ///
924 /// See also the `has_authority` method.
925 ///
926 /// # Examples
927 ///
928 /// ```
929 /// use url::Url;
930 /// # use url::ParseError;
931 ///
932 /// # fn run() -> Result<(), ParseError> {
933 /// let url = Url::parse("unix:/run/foo.socket")?;
934 /// assert_eq!(url.authority(), "");
935 /// let url = Url::parse("file:///tmp/foo")?;
936 /// assert_eq!(url.authority(), "");
937 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
938 /// assert_eq!(url.authority(), "user:password@example.com");
939 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
940 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
941 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
942 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
943 /// # Ok(())
944 /// # }
945 /// # run().unwrap();
946 /// ```
authority(&self) -> &str947 pub fn authority(&self) -> &str {
948 let scheme_separator_len = "://".len() as u32;
949 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
950 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
951 } else {
952 ""
953 }
954 }
955
956 /// Return whether this URL is a cannot-be-a-base URL,
957 /// meaning that parsing a relative URL string with this URL as the base will return an error.
958 ///
959 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
960 /// as is typically the case of `data:` and `mailto:` URLs.
961 ///
962 /// # Examples
963 ///
964 /// ```
965 /// use url::Url;
966 /// # use url::ParseError;
967 ///
968 /// # fn run() -> Result<(), ParseError> {
969 /// let url = Url::parse("ftp://rms@example.com")?;
970 /// assert!(!url.cannot_be_a_base());
971 ///
972 /// let url = Url::parse("unix:/run/foo.socket")?;
973 /// assert!(!url.cannot_be_a_base());
974 ///
975 /// let url = Url::parse("data:text/plain,Stuff")?;
976 /// assert!(url.cannot_be_a_base());
977 /// # Ok(())
978 /// # }
979 /// # run().unwrap();
980 /// ```
981 #[inline]
cannot_be_a_base(&self) -> bool982 pub fn cannot_be_a_base(&self) -> bool {
983 !self.slice(self.scheme_end + 1..).starts_with('/')
984 }
985
986 /// Return the username for this URL (typically the empty string)
987 /// as a percent-encoded ASCII string.
988 ///
989 /// # Examples
990 ///
991 /// ```
992 /// use url::Url;
993 /// # use url::ParseError;
994 ///
995 /// # fn run() -> Result<(), ParseError> {
996 /// let url = Url::parse("ftp://rms@example.com")?;
997 /// assert_eq!(url.username(), "rms");
998 ///
999 /// let url = Url::parse("ftp://:secret123@example.com")?;
1000 /// assert_eq!(url.username(), "");
1001 ///
1002 /// let url = Url::parse("https://example.com")?;
1003 /// assert_eq!(url.username(), "");
1004 /// # Ok(())
1005 /// # }
1006 /// # run().unwrap();
1007 /// ```
username(&self) -> &str1008 pub fn username(&self) -> &str {
1009 let scheme_separator_len = "://".len() as u32;
1010 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1011 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1012 } else {
1013 ""
1014 }
1015 }
1016
1017 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1018 ///
1019 /// # Examples
1020 ///
1021 /// ```
1022 /// use url::Url;
1023 /// # use url::ParseError;
1024 ///
1025 /// # fn run() -> Result<(), ParseError> {
1026 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1027 /// assert_eq!(url.password(), Some("secret123"));
1028 ///
1029 /// let url = Url::parse("ftp://:secret123@example.com")?;
1030 /// assert_eq!(url.password(), Some("secret123"));
1031 ///
1032 /// let url = Url::parse("ftp://rms@example.com")?;
1033 /// assert_eq!(url.password(), None);
1034 ///
1035 /// let url = Url::parse("https://example.com")?;
1036 /// assert_eq!(url.password(), None);
1037 /// # Ok(())
1038 /// # }
1039 /// # run().unwrap();
1040 /// ```
password(&self) -> Option<&str>1041 pub fn password(&self) -> Option<&str> {
1042 // This ':' is not the one marking a port number since a host can not be empty.
1043 // (Except for file: URLs, which do not have port numbers.)
1044 if self.has_authority()
1045 && self.username_end != self.serialization.len() as u32
1046 && self.byte_at(self.username_end) == b':'
1047 {
1048 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1049 Some(self.slice(self.username_end + 1..self.host_start - 1))
1050 } else {
1051 None
1052 }
1053 }
1054
1055 /// Equivalent to `url.host().is_some()`.
1056 ///
1057 /// # Examples
1058 ///
1059 /// ```
1060 /// use url::Url;
1061 /// # use url::ParseError;
1062 ///
1063 /// # fn run() -> Result<(), ParseError> {
1064 /// let url = Url::parse("ftp://rms@example.com")?;
1065 /// assert!(url.has_host());
1066 ///
1067 /// let url = Url::parse("unix:/run/foo.socket")?;
1068 /// assert!(!url.has_host());
1069 ///
1070 /// let url = Url::parse("data:text/plain,Stuff")?;
1071 /// assert!(!url.has_host());
1072 /// # Ok(())
1073 /// # }
1074 /// # run().unwrap();
1075 /// ```
has_host(&self) -> bool1076 pub fn has_host(&self) -> bool {
1077 !matches!(self.host, HostInternal::None)
1078 }
1079
1080 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1081 ///
1082 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1083 /// of a special URL, or percent encoded for non-special URLs.
1084 /// IPv6 addresses are given between `[` and `]` brackets.
1085 ///
1086 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1087 /// don’t have a host.
1088 ///
1089 /// See also the `host` method.
1090 ///
1091 /// # Examples
1092 ///
1093 /// ```
1094 /// use url::Url;
1095 /// # use url::ParseError;
1096 ///
1097 /// # fn run() -> Result<(), ParseError> {
1098 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1099 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1100 ///
1101 /// let url = Url::parse("ftp://rms@example.com")?;
1102 /// assert_eq!(url.host_str(), Some("example.com"));
1103 ///
1104 /// let url = Url::parse("unix:/run/foo.socket")?;
1105 /// assert_eq!(url.host_str(), None);
1106 ///
1107 /// let url = Url::parse("data:text/plain,Stuff")?;
1108 /// assert_eq!(url.host_str(), None);
1109 /// # Ok(())
1110 /// # }
1111 /// # run().unwrap();
1112 /// ```
host_str(&self) -> Option<&str>1113 pub fn host_str(&self) -> Option<&str> {
1114 if self.has_host() {
1115 Some(self.slice(self.host_start..self.host_end))
1116 } else {
1117 None
1118 }
1119 }
1120
1121 /// Return the parsed representation of the host for this URL.
1122 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1123 /// of a special URL, or percent encoded for non-special URLs.
1124 ///
1125 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1126 /// don’t have a host.
1127 ///
1128 /// See also the `host_str` method.
1129 ///
1130 /// # Examples
1131 ///
1132 /// ```
1133 /// use url::Url;
1134 /// # use url::ParseError;
1135 ///
1136 /// # fn run() -> Result<(), ParseError> {
1137 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1138 /// assert!(url.host().is_some());
1139 ///
1140 /// let url = Url::parse("ftp://rms@example.com")?;
1141 /// assert!(url.host().is_some());
1142 ///
1143 /// let url = Url::parse("unix:/run/foo.socket")?;
1144 /// assert!(url.host().is_none());
1145 ///
1146 /// let url = Url::parse("data:text/plain,Stuff")?;
1147 /// assert!(url.host().is_none());
1148 /// # Ok(())
1149 /// # }
1150 /// # run().unwrap();
1151 /// ```
host(&self) -> Option<Host<&str>>1152 pub fn host(&self) -> Option<Host<&str>> {
1153 match self.host {
1154 HostInternal::None => None,
1155 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1156 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1157 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1158 }
1159 }
1160
1161 /// If this URL has a host and it is a domain name (not an IP address), return it.
1162 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1163 /// of a special URL, or percent encoded for non-special URLs.
1164 ///
1165 /// # Examples
1166 ///
1167 /// ```
1168 /// use url::Url;
1169 /// # use url::ParseError;
1170 ///
1171 /// # fn run() -> Result<(), ParseError> {
1172 /// let url = Url::parse("https://127.0.0.1/")?;
1173 /// assert_eq!(url.domain(), None);
1174 ///
1175 /// let url = Url::parse("mailto:rms@example.net")?;
1176 /// assert_eq!(url.domain(), None);
1177 ///
1178 /// let url = Url::parse("https://example.com/")?;
1179 /// assert_eq!(url.domain(), Some("example.com"));
1180 /// # Ok(())
1181 /// # }
1182 /// # run().unwrap();
1183 /// ```
domain(&self) -> Option<&str>1184 pub fn domain(&self) -> Option<&str> {
1185 match self.host {
1186 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1187 _ => None,
1188 }
1189 }
1190
1191 /// Return the port number for this URL, if any.
1192 ///
1193 /// Note that default port numbers are never reflected by the serialization,
1194 /// use the `port_or_known_default()` method if you want a default port number returned.
1195 ///
1196 /// # Examples
1197 ///
1198 /// ```
1199 /// use url::Url;
1200 /// # use url::ParseError;
1201 ///
1202 /// # fn run() -> Result<(), ParseError> {
1203 /// let url = Url::parse("https://example.com")?;
1204 /// assert_eq!(url.port(), None);
1205 ///
1206 /// let url = Url::parse("https://example.com:443/")?;
1207 /// assert_eq!(url.port(), None);
1208 ///
1209 /// let url = Url::parse("ssh://example.com:22")?;
1210 /// assert_eq!(url.port(), Some(22));
1211 /// # Ok(())
1212 /// # }
1213 /// # run().unwrap();
1214 /// ```
1215 #[inline]
port(&self) -> Option<u16>1216 pub fn port(&self) -> Option<u16> {
1217 self.port
1218 }
1219
1220 /// Return the port number for this URL, or the default port number if it is known.
1221 ///
1222 /// This method only knows the default port number
1223 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1224 ///
1225 /// For URLs in these schemes, this method always returns `Some(_)`.
1226 /// For other schemes, it is the same as `Url::port()`.
1227 ///
1228 /// # Examples
1229 ///
1230 /// ```
1231 /// use url::Url;
1232 /// # use url::ParseError;
1233 ///
1234 /// # fn run() -> Result<(), ParseError> {
1235 /// let url = Url::parse("foo://example.com")?;
1236 /// assert_eq!(url.port_or_known_default(), None);
1237 ///
1238 /// let url = Url::parse("foo://example.com:1456")?;
1239 /// assert_eq!(url.port_or_known_default(), Some(1456));
1240 ///
1241 /// let url = Url::parse("https://example.com")?;
1242 /// assert_eq!(url.port_or_known_default(), Some(443));
1243 /// # Ok(())
1244 /// # }
1245 /// # run().unwrap();
1246 /// ```
1247 #[inline]
port_or_known_default(&self) -> Option<u16>1248 pub fn port_or_known_default(&self) -> Option<u16> {
1249 self.port.or_else(|| parser::default_port(self.scheme()))
1250 }
1251
1252 /// Resolve a URL’s host and port number to `SocketAddr`.
1253 ///
1254 /// If the URL has the default port number of a scheme that is unknown to this library,
1255 /// `default_port_number` provides an opportunity to provide the actual port number.
1256 /// In non-example code this should be implemented either simply as `|| None`,
1257 /// or by matching on the URL’s `.scheme()`.
1258 ///
1259 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1260 ///
1261 /// # Examples
1262 ///
1263 /// ```no_run
1264 /// let url = url::Url::parse("https://example.net/").unwrap();
1265 /// let addrs = url.socket_addrs(|| None).unwrap();
1266 /// std::net::TcpStream::connect(&*addrs)
1267 /// # ;
1268 /// ```
1269 ///
1270 /// ```
1271 /// /// With application-specific known default port numbers
1272 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1273 /// url.socket_addrs(|| match url.scheme() {
1274 /// "socks5" | "socks5h" => Some(1080),
1275 /// _ => None,
1276 /// })
1277 /// }
1278 /// ```
1279 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1280 pub fn socket_addrs(
1281 &self,
1282 default_port_number: impl Fn() -> Option<u16>,
1283 ) -> io::Result<Vec<SocketAddr>> {
1284 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1285 // causes borrowck issues because the return value borrows `default_port_number`:
1286 //
1287 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1288 //
1289 // > This RFC proposes that *all* type parameters are considered in scope
1290 // > for `impl Trait` in return position
1291
1292 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1293 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1294 }
1295
1296 let host = io_result(self.host(), "No host name in the URL")?;
1297 let port = io_result(
1298 self.port_or_known_default().or_else(default_port_number),
1299 "No port number in the URL",
1300 )?;
1301 Ok(match host {
1302 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1303 Host::Ipv4(ip) => vec![(ip, port).into()],
1304 Host::Ipv6(ip) => vec![(ip, port).into()],
1305 })
1306 }
1307
1308 /// Return the path for this URL, as a percent-encoded ASCII string.
1309 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1310 /// For other URLs, this starts with a '/' slash
1311 /// and continues with slash-separated path segments.
1312 ///
1313 /// # Examples
1314 ///
1315 /// ```rust
1316 /// use url::{Url, ParseError};
1317 ///
1318 /// # fn run() -> Result<(), ParseError> {
1319 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1320 /// assert_eq!(url.path(), "/api/versions");
1321 ///
1322 /// let url = Url::parse("https://example.com")?;
1323 /// assert_eq!(url.path(), "/");
1324 ///
1325 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1326 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1327 /// # Ok(())
1328 /// # }
1329 /// # run().unwrap();
1330 /// ```
path(&self) -> &str1331 pub fn path(&self) -> &str {
1332 match (self.query_start, self.fragment_start) {
1333 (None, None) => self.slice(self.path_start..),
1334 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1335 self.slice(self.path_start..next_component_start)
1336 }
1337 }
1338 }
1339
1340 /// Unless this URL is cannot-be-a-base,
1341 /// return an iterator of '/' slash-separated path segments,
1342 /// each as a percent-encoded ASCII string.
1343 ///
1344 /// Return `None` for cannot-be-a-base URLs.
1345 ///
1346 /// When `Some` is returned, the iterator always contains at least one string
1347 /// (which may be empty).
1348 ///
1349 /// # Examples
1350 ///
1351 /// ```
1352 /// use url::Url;
1353 /// # use std::error::Error;
1354 ///
1355 /// # fn run() -> Result<(), Box<dyn Error>> {
1356 /// let url = Url::parse("https://example.com/foo/bar")?;
1357 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1358 /// assert_eq!(path_segments.next(), Some("foo"));
1359 /// assert_eq!(path_segments.next(), Some("bar"));
1360 /// assert_eq!(path_segments.next(), None);
1361 ///
1362 /// let url = Url::parse("https://example.com")?;
1363 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1364 /// assert_eq!(path_segments.next(), Some(""));
1365 /// assert_eq!(path_segments.next(), None);
1366 ///
1367 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1368 /// assert!(url.path_segments().is_none());
1369 ///
1370 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1371 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1372 /// assert_eq!(path_segments.next(), Some("countries"));
1373 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1374 /// # Ok(())
1375 /// # }
1376 /// # run().unwrap();
1377 /// ```
path_segments(&self) -> Option<str::Split<'_, char>>1378 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1379 let path = self.path();
1380 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1381 }
1382
1383 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1384 ///
1385 /// # Examples
1386 ///
1387 /// ```rust
1388 /// use url::Url;
1389 /// # use url::ParseError;
1390 ///
1391 /// fn run() -> Result<(), ParseError> {
1392 /// let url = Url::parse("https://example.com/products?page=2")?;
1393 /// let query = url.query();
1394 /// assert_eq!(query, Some("page=2"));
1395 ///
1396 /// let url = Url::parse("https://example.com/products")?;
1397 /// let query = url.query();
1398 /// assert!(query.is_none());
1399 ///
1400 /// let url = Url::parse("https://example.com/?country=español")?;
1401 /// let query = url.query();
1402 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1403 /// # Ok(())
1404 /// # }
1405 /// # run().unwrap();
1406 /// ```
query(&self) -> Option<&str>1407 pub fn query(&self) -> Option<&str> {
1408 match (self.query_start, self.fragment_start) {
1409 (None, _) => None,
1410 (Some(query_start), None) => {
1411 debug_assert!(self.byte_at(query_start) == b'?');
1412 Some(self.slice(query_start + 1..))
1413 }
1414 (Some(query_start), Some(fragment_start)) => {
1415 debug_assert!(self.byte_at(query_start) == b'?');
1416 Some(self.slice(query_start + 1..fragment_start))
1417 }
1418 }
1419 }
1420
1421 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1422 /// and return an iterator of (key, value) pairs.
1423 ///
1424 /// # Examples
1425 ///
1426 /// ```rust
1427 /// use std::borrow::Cow;
1428 ///
1429 /// use url::Url;
1430 /// # use url::ParseError;
1431 ///
1432 /// # fn run() -> Result<(), ParseError> {
1433 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1434 /// let mut pairs = url.query_pairs();
1435 ///
1436 /// assert_eq!(pairs.count(), 2);
1437 ///
1438 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1439 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1440 /// # Ok(())
1441 /// # }
1442 /// # run().unwrap();
1443 /// ```
1444
1445 #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1446 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1447 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1448 }
1449
1450 /// Return this URL’s fragment identifier, if any.
1451 ///
1452 /// A fragment is the part of the URL after the `#` symbol.
1453 /// The fragment is optional and, if present, contains a fragment identifier
1454 /// that identifies a secondary resource, such as a section heading
1455 /// of a document.
1456 ///
1457 /// In HTML, the fragment identifier is usually the id attribute of a an element
1458 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1459 /// of a URL to the server.
1460 ///
1461 /// **Note:** the parser did *not* percent-encode this component,
1462 /// but the input may have been percent-encoded already.
1463 ///
1464 /// # Examples
1465 ///
1466 /// ```rust
1467 /// use url::Url;
1468 /// # use url::ParseError;
1469 ///
1470 /// # fn run() -> Result<(), ParseError> {
1471 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1472 ///
1473 /// assert_eq!(url.fragment(), Some("row=4"));
1474 ///
1475 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1476 ///
1477 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1478 /// # Ok(())
1479 /// # }
1480 /// # run().unwrap();
1481 /// ```
fragment(&self) -> Option<&str>1482 pub fn fragment(&self) -> Option<&str> {
1483 self.fragment_start.map(|start| {
1484 debug_assert!(self.byte_at(start) == b'#');
1485 self.slice(start + 1..)
1486 })
1487 }
1488
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1489 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1490 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1491 let result = f(&mut parser);
1492 self.serialization = parser.serialization;
1493 result
1494 }
1495
1496 /// Change this URL’s fragment identifier.
1497 ///
1498 /// # Examples
1499 ///
1500 /// ```rust
1501 /// use url::Url;
1502 /// # use url::ParseError;
1503 ///
1504 /// # fn run() -> Result<(), ParseError> {
1505 /// let mut url = Url::parse("https://example.com/data.csv")?;
1506 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1507
1508 /// url.set_fragment(Some("cell=4,1-6,2"));
1509 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1510 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1511 ///
1512 /// url.set_fragment(None);
1513 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1514 /// assert!(url.fragment().is_none());
1515 /// # Ok(())
1516 /// # }
1517 /// # run().unwrap();
1518 /// ```
set_fragment(&mut self, fragment: Option<&str>)1519 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1520 // Remove any previous fragment
1521 if let Some(start) = self.fragment_start {
1522 debug_assert!(self.byte_at(start) == b'#');
1523 self.serialization.truncate(start as usize);
1524 }
1525 // Write the new one
1526 if let Some(input) = fragment {
1527 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1528 self.serialization.push('#');
1529 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1530 } else {
1531 self.fragment_start = None;
1532 self.strip_trailing_spaces_from_opaque_path();
1533 }
1534 }
1535
take_fragment(&mut self) -> Option<String>1536 fn take_fragment(&mut self) -> Option<String> {
1537 self.fragment_start.take().map(|start| {
1538 debug_assert!(self.byte_at(start) == b'#');
1539 let fragment = self.slice(start + 1..).to_owned();
1540 self.serialization.truncate(start as usize);
1541 fragment
1542 })
1543 }
1544
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1545 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1546 if let Some(ref fragment) = fragment {
1547 assert!(self.fragment_start.is_none());
1548 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1549 self.serialization.push('#');
1550 self.serialization.push_str(fragment);
1551 }
1552 }
1553
1554 /// Change this URL’s query string. If `query` is `None`, this URL's
1555 /// query string will be cleared.
1556 ///
1557 /// # Examples
1558 ///
1559 /// ```rust
1560 /// use url::Url;
1561 /// # use url::ParseError;
1562 ///
1563 /// # fn run() -> Result<(), ParseError> {
1564 /// let mut url = Url::parse("https://example.com/products")?;
1565 /// assert_eq!(url.as_str(), "https://example.com/products");
1566 ///
1567 /// url.set_query(Some("page=2"));
1568 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1569 /// assert_eq!(url.query(), Some("page=2"));
1570 /// # Ok(())
1571 /// # }
1572 /// # run().unwrap();
1573 /// ```
set_query(&mut self, query: Option<&str>)1574 pub fn set_query(&mut self, query: Option<&str>) {
1575 let fragment = self.take_fragment();
1576
1577 // Remove any previous query
1578 if let Some(start) = self.query_start.take() {
1579 debug_assert!(self.byte_at(start) == b'?');
1580 self.serialization.truncate(start as usize);
1581 }
1582 // Write the new query, if any
1583 if let Some(input) = query {
1584 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1585 self.serialization.push('?');
1586 let scheme_type = SchemeType::from(self.scheme());
1587 let scheme_end = self.scheme_end;
1588 self.mutate(|parser| {
1589 let vfn = parser.violation_fn;
1590 parser.parse_query(
1591 scheme_type,
1592 scheme_end,
1593 parser::Input::new_trim_tab_and_newlines(input, vfn),
1594 )
1595 });
1596 } else {
1597 self.query_start = None;
1598 if fragment.is_none() {
1599 self.strip_trailing_spaces_from_opaque_path();
1600 }
1601 }
1602
1603 self.restore_already_parsed_fragment(fragment);
1604 }
1605
1606 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1607 /// in `application/x-www-form-urlencoded` syntax.
1608 ///
1609 /// The return value has a method-chaining API:
1610 ///
1611 /// ```rust
1612 /// # use url::{Url, ParseError};
1613 ///
1614 /// # fn run() -> Result<(), ParseError> {
1615 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1616 /// assert_eq!(url.query(), Some("lang=fr"));
1617 ///
1618 /// url.query_pairs_mut().append_pair("foo", "bar");
1619 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1620 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1621 ///
1622 /// url.query_pairs_mut()
1623 /// .clear()
1624 /// .append_pair("foo", "bar & baz")
1625 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1626 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1627 /// assert_eq!(url.as_str(),
1628 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1629 /// # Ok(())
1630 /// # }
1631 /// # run().unwrap();
1632 /// ```
1633 ///
1634 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1635 /// not `url.set_query(None)`.
1636 ///
1637 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1638 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1639 let fragment = self.take_fragment();
1640
1641 let query_start;
1642 if let Some(start) = self.query_start {
1643 debug_assert!(self.byte_at(start) == b'?');
1644 query_start = start as usize;
1645 } else {
1646 query_start = self.serialization.len();
1647 self.query_start = Some(to_u32(query_start).unwrap());
1648 self.serialization.push('?');
1649 }
1650
1651 let query = UrlQuery {
1652 url: Some(self),
1653 fragment,
1654 };
1655 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1656 }
1657
take_after_path(&mut self) -> String1658 fn take_after_path(&mut self) -> String {
1659 match (self.query_start, self.fragment_start) {
1660 (Some(i), _) | (None, Some(i)) => {
1661 let after_path = self.slice(i..).to_owned();
1662 self.serialization.truncate(i as usize);
1663 after_path
1664 }
1665 (None, None) => String::new(),
1666 }
1667 }
1668
1669 /// Change this URL’s path.
1670 ///
1671 /// # Examples
1672 ///
1673 /// ```rust
1674 /// use url::Url;
1675 /// # use url::ParseError;
1676 ///
1677 /// # fn run() -> Result<(), ParseError> {
1678 /// let mut url = Url::parse("https://example.com")?;
1679 /// url.set_path("api/comments");
1680 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1681 /// assert_eq!(url.path(), "/api/comments");
1682 ///
1683 /// let mut url = Url::parse("https://example.com/api")?;
1684 /// url.set_path("data/report.csv");
1685 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1686 /// assert_eq!(url.path(), "/data/report.csv");
1687 ///
1688 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1689 /// let mut url = Url::parse("https://example.com")?;
1690 /// url.set_path("api/some comments");
1691 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1692 /// assert_eq!(url.path(), "/api/some%20comments");
1693 ///
1694 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1695 /// let mut url = Url::parse("https://example.com")?;
1696 /// url.set_path("api/some%20comments");
1697 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1698 /// assert_eq!(url.path(), "/api/some%20comments");
1699 ///
1700 /// # Ok(())
1701 /// # }
1702 /// # run().unwrap();
1703 /// ```
set_path(&mut self, mut path: &str)1704 pub fn set_path(&mut self, mut path: &str) {
1705 let after_path = self.take_after_path();
1706 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1707 let cannot_be_a_base = self.cannot_be_a_base();
1708 let scheme_type = SchemeType::from(self.scheme());
1709 self.serialization.truncate(self.path_start as usize);
1710 self.mutate(|parser| {
1711 if cannot_be_a_base {
1712 if path.starts_with('/') {
1713 parser.serialization.push_str("%2F");
1714 path = &path[1..];
1715 }
1716 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1717 } else {
1718 let mut has_host = true; // FIXME
1719 parser.parse_path_start(
1720 scheme_type,
1721 &mut has_host,
1722 parser::Input::new_no_trim(path),
1723 );
1724 }
1725 });
1726 self.restore_after_path(old_after_path_pos, &after_path);
1727 }
1728
1729 /// Return an object with methods to manipulate this URL’s path segments.
1730 ///
1731 /// Return `Err(())` if this URL is cannot-be-a-base.
1732 #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1733 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1734 if self.cannot_be_a_base() {
1735 Err(())
1736 } else {
1737 Ok(path_segments::new(self))
1738 }
1739 }
1740
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1741 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1742 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1743 let adjust = |index: &mut u32| {
1744 *index -= old_after_path_position;
1745 *index += new_after_path_position;
1746 };
1747 if let Some(ref mut index) = self.query_start {
1748 adjust(index)
1749 }
1750 if let Some(ref mut index) = self.fragment_start {
1751 adjust(index)
1752 }
1753 self.serialization.push_str(after_path)
1754 }
1755
1756 /// Change this URL’s port number.
1757 ///
1758 /// Note that default port numbers are not reflected in the serialization.
1759 ///
1760 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1761 /// do nothing and return `Err`.
1762 ///
1763 /// # Examples
1764 ///
1765 /// ```
1766 /// use url::Url;
1767 /// # use std::error::Error;
1768 ///
1769 /// # fn run() -> Result<(), Box<dyn Error>> {
1770 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1771 ///
1772 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1773 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1774 ///
1775 /// url.set_port(None).map_err(|_| "cannot be base")?;
1776 /// assert_eq!(url.as_str(), "ssh://example.net/");
1777 /// # Ok(())
1778 /// # }
1779 /// # run().unwrap();
1780 /// ```
1781 ///
1782 /// Known default port numbers are not reflected:
1783 ///
1784 /// ```rust
1785 /// use url::Url;
1786 /// # use std::error::Error;
1787 ///
1788 /// # fn run() -> Result<(), Box<dyn Error>> {
1789 /// let mut url = Url::parse("https://example.org/")?;
1790 ///
1791 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1792 /// assert!(url.port().is_none());
1793 /// # Ok(())
1794 /// # }
1795 /// # run().unwrap();
1796 /// ```
1797 ///
1798 /// Cannot set port for cannot-be-a-base URLs:
1799 ///
1800 /// ```
1801 /// use url::Url;
1802 /// # use url::ParseError;
1803 ///
1804 /// # fn run() -> Result<(), ParseError> {
1805 /// let mut url = Url::parse("mailto:rms@example.net")?;
1806 ///
1807 /// let result = url.set_port(Some(80));
1808 /// assert!(result.is_err());
1809 ///
1810 /// let result = url.set_port(None);
1811 /// assert!(result.is_err());
1812 /// # Ok(())
1813 /// # }
1814 /// # run().unwrap();
1815 /// ```
1816 #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1817 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1818 // has_host implies !cannot_be_a_base
1819 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1820 return Err(());
1821 }
1822 if port.is_some() && port == parser::default_port(self.scheme()) {
1823 port = None
1824 }
1825 self.set_port_internal(port);
1826 Ok(())
1827 }
1828
set_port_internal(&mut self, port: Option<u16>)1829 fn set_port_internal(&mut self, port: Option<u16>) {
1830 match (self.port, port) {
1831 (None, None) => {}
1832 (Some(_), None) => {
1833 self.serialization
1834 .drain(self.host_end as usize..self.path_start as usize);
1835 let offset = self.path_start - self.host_end;
1836 self.path_start = self.host_end;
1837 if let Some(ref mut index) = self.query_start {
1838 *index -= offset
1839 }
1840 if let Some(ref mut index) = self.fragment_start {
1841 *index -= offset
1842 }
1843 }
1844 (Some(old), Some(new)) if old == new => {}
1845 (_, Some(new)) => {
1846 let path_and_after = self.slice(self.path_start..).to_owned();
1847 self.serialization.truncate(self.host_end as usize);
1848 write!(&mut self.serialization, ":{}", new).unwrap();
1849 let old_path_start = self.path_start;
1850 let new_path_start = to_u32(self.serialization.len()).unwrap();
1851 self.path_start = new_path_start;
1852 let adjust = |index: &mut u32| {
1853 *index -= old_path_start;
1854 *index += new_path_start;
1855 };
1856 if let Some(ref mut index) = self.query_start {
1857 adjust(index)
1858 }
1859 if let Some(ref mut index) = self.fragment_start {
1860 adjust(index)
1861 }
1862 self.serialization.push_str(&path_and_after);
1863 }
1864 }
1865 self.port = port;
1866 }
1867
1868 /// Change this URL’s host.
1869 ///
1870 /// Removing the host (calling this with `None`)
1871 /// will also remove any username, password, and port number.
1872 ///
1873 /// # Examples
1874 ///
1875 /// Change host:
1876 ///
1877 /// ```
1878 /// use url::Url;
1879 /// # use url::ParseError;
1880 ///
1881 /// # fn run() -> Result<(), ParseError> {
1882 /// let mut url = Url::parse("https://example.net")?;
1883 /// let result = url.set_host(Some("rust-lang.org"));
1884 /// assert!(result.is_ok());
1885 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1886 /// # Ok(())
1887 /// # }
1888 /// # run().unwrap();
1889 /// ```
1890 ///
1891 /// Remove host:
1892 ///
1893 /// ```
1894 /// use url::Url;
1895 /// # use url::ParseError;
1896 ///
1897 /// # fn run() -> Result<(), ParseError> {
1898 /// let mut url = Url::parse("foo://example.net")?;
1899 /// let result = url.set_host(None);
1900 /// assert!(result.is_ok());
1901 /// assert_eq!(url.as_str(), "foo:/");
1902 /// # Ok(())
1903 /// # }
1904 /// # run().unwrap();
1905 /// ```
1906 ///
1907 /// Cannot remove host for 'special' schemes (e.g. `http`):
1908 ///
1909 /// ```
1910 /// use url::Url;
1911 /// # use url::ParseError;
1912 ///
1913 /// # fn run() -> Result<(), ParseError> {
1914 /// let mut url = Url::parse("https://example.net")?;
1915 /// let result = url.set_host(None);
1916 /// assert!(result.is_err());
1917 /// assert_eq!(url.as_str(), "https://example.net/");
1918 /// # Ok(())
1919 /// # }
1920 /// # run().unwrap();
1921 /// ```
1922 ///
1923 /// Cannot change or remove host for cannot-be-a-base URLs:
1924 ///
1925 /// ```
1926 /// use url::Url;
1927 /// # use url::ParseError;
1928 ///
1929 /// # fn run() -> Result<(), ParseError> {
1930 /// let mut url = Url::parse("mailto:rms@example.net")?;
1931 ///
1932 /// let result = url.set_host(Some("rust-lang.org"));
1933 /// assert!(result.is_err());
1934 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1935 ///
1936 /// let result = url.set_host(None);
1937 /// assert!(result.is_err());
1938 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1939 /// # Ok(())
1940 /// # }
1941 /// # run().unwrap();
1942 /// ```
1943 ///
1944 /// # Errors
1945 ///
1946 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1947 /// a [`ParseError`] variant will be returned.
1948 ///
1949 /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1950 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1951 if self.cannot_be_a_base() {
1952 return Err(ParseError::SetHostOnCannotBeABaseUrl);
1953 }
1954
1955 let scheme_type = SchemeType::from(self.scheme());
1956
1957 if let Some(host) = host {
1958 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1959 return Err(ParseError::EmptyHost);
1960 }
1961 let mut host_substr = host;
1962 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1963 if !host.starts_with('[') || !host.ends_with(']') {
1964 match host.find(':') {
1965 Some(0) => {
1966 // If buffer is the empty string, validation error, return failure.
1967 return Err(ParseError::InvalidDomainCharacter);
1968 }
1969 // Let host be the result of host parsing buffer
1970 Some(colon_index) => {
1971 host_substr = &host[..colon_index];
1972 }
1973 None => {}
1974 }
1975 }
1976 if SchemeType::from(self.scheme()).is_special() {
1977 self.set_host_internal(Host::parse(host_substr)?, None);
1978 } else {
1979 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1980 }
1981 } else if self.has_host() {
1982 if scheme_type.is_special() && !scheme_type.is_file() {
1983 return Err(ParseError::EmptyHost);
1984 } else if self.serialization.len() == self.path_start as usize {
1985 self.serialization.push('/');
1986 }
1987 debug_assert!(self.byte_at(self.scheme_end) == b':');
1988 debug_assert!(self.byte_at(self.path_start) == b'/');
1989
1990 let new_path_start = if scheme_type.is_file() {
1991 self.scheme_end + 3
1992 } else {
1993 self.scheme_end + 1
1994 };
1995
1996 self.serialization
1997 .drain(new_path_start as usize..self.path_start as usize);
1998 let offset = self.path_start - new_path_start;
1999 self.path_start = new_path_start;
2000 self.username_end = new_path_start;
2001 self.host_start = new_path_start;
2002 self.host_end = new_path_start;
2003 self.port = None;
2004 if let Some(ref mut index) = self.query_start {
2005 *index -= offset
2006 }
2007 if let Some(ref mut index) = self.fragment_start {
2008 *index -= offset
2009 }
2010 }
2011 Ok(())
2012 }
2013
2014 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)2015 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2016 let old_suffix_pos = if opt_new_port.is_some() {
2017 self.path_start
2018 } else {
2019 self.host_end
2020 };
2021 let suffix = self.slice(old_suffix_pos..).to_owned();
2022 self.serialization.truncate(self.host_start as usize);
2023 if !self.has_authority() {
2024 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2025 debug_assert!(self.username_end == self.host_start);
2026 self.serialization.push('/');
2027 self.serialization.push('/');
2028 self.username_end += 2;
2029 self.host_start += 2;
2030 }
2031 write!(&mut self.serialization, "{}", host).unwrap();
2032 self.host_end = to_u32(self.serialization.len()).unwrap();
2033 self.host = host.into();
2034
2035 if let Some(new_port) = opt_new_port {
2036 self.port = new_port;
2037 if let Some(port) = new_port {
2038 write!(&mut self.serialization, ":{}", port).unwrap();
2039 }
2040 }
2041 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2042 self.serialization.push_str(&suffix);
2043
2044 let adjust = |index: &mut u32| {
2045 *index -= old_suffix_pos;
2046 *index += new_suffix_pos;
2047 };
2048 adjust(&mut self.path_start);
2049 if let Some(ref mut index) = self.query_start {
2050 adjust(index)
2051 }
2052 if let Some(ref mut index) = self.fragment_start {
2053 adjust(index)
2054 }
2055 }
2056
2057 /// Change this URL’s host to the given IP address.
2058 ///
2059 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2060 ///
2061 /// Compared to `Url::set_host`, this skips the host parser.
2062 ///
2063 /// # Examples
2064 ///
2065 /// ```rust
2066 /// use url::{Url, ParseError};
2067 ///
2068 /// # fn run() -> Result<(), ParseError> {
2069 /// let mut url = Url::parse("http://example.com")?;
2070 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2071 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2072 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2073 /// # Ok(())
2074 /// # }
2075 /// # run().unwrap();
2076 /// ```
2077 ///
2078 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2079 ///
2080 /// ```rust
2081 /// use url::{Url, ParseError};
2082 ///
2083 /// # fn run() -> Result<(), ParseError> {
2084 /// let mut url = Url::parse("mailto:rms@example.com")?;
2085 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2086 ///
2087 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2088 /// assert!(result.is_err());
2089 /// # Ok(())
2090 /// # }
2091 /// # run().unwrap();
2092 /// ```
2093 ///
2094 #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>2095 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2096 if self.cannot_be_a_base() {
2097 return Err(());
2098 }
2099
2100 let address = match address {
2101 IpAddr::V4(address) => Host::Ipv4(address),
2102 IpAddr::V6(address) => Host::Ipv6(address),
2103 };
2104 self.set_host_internal(address, None);
2105 Ok(())
2106 }
2107
2108 /// Change this URL’s password.
2109 ///
2110 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2111 ///
2112 /// # Examples
2113 ///
2114 /// ```rust
2115 /// use url::{Url, ParseError};
2116 ///
2117 /// # fn run() -> Result<(), ParseError> {
2118 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2119 /// let result = url.set_password(Some("secret_password"));
2120 /// assert!(result.is_err());
2121 ///
2122 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2123 /// let result = url.set_password(Some("secret_password"));
2124 /// assert_eq!(url.password(), Some("secret_password"));
2125 ///
2126 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2127 /// let result = url.set_password(Some("secret2"));
2128 /// assert!(result.is_ok());
2129 /// assert_eq!(url.password(), Some("secret2"));
2130 /// # Ok(())
2131 /// # }
2132 /// # run().unwrap();
2133 /// ```
2134 #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>2135 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2136 // has_host implies !cannot_be_a_base
2137 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2138 return Err(());
2139 }
2140 let password = password.unwrap_or_default();
2141 if !password.is_empty() {
2142 let host_and_after = self.slice(self.host_start..).to_owned();
2143 self.serialization.truncate(self.username_end as usize);
2144 self.serialization.push(':');
2145 self.serialization
2146 .extend(utf8_percent_encode(password, USERINFO));
2147 self.serialization.push('@');
2148
2149 let old_host_start = self.host_start;
2150 let new_host_start = to_u32(self.serialization.len()).unwrap();
2151 let adjust = |index: &mut u32| {
2152 *index -= old_host_start;
2153 *index += new_host_start;
2154 };
2155 self.host_start = new_host_start;
2156 adjust(&mut self.host_end);
2157 adjust(&mut self.path_start);
2158 if let Some(ref mut index) = self.query_start {
2159 adjust(index)
2160 }
2161 if let Some(ref mut index) = self.fragment_start {
2162 adjust(index)
2163 }
2164
2165 self.serialization.push_str(&host_and_after);
2166 } else if self.byte_at(self.username_end) == b':' {
2167 // If there is a password to remove
2168 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2169 debug_assert!(has_username_or_password);
2170 let username_start = self.scheme_end + 3;
2171 let empty_username = username_start == self.username_end;
2172 let start = self.username_end; // Remove the ':'
2173 let end = if empty_username {
2174 self.host_start // Remove the '@' as well
2175 } else {
2176 self.host_start - 1 // Keep the '@' to separate the username from the host
2177 };
2178 self.serialization.drain(start as usize..end as usize);
2179 let offset = end - start;
2180 self.host_start -= offset;
2181 self.host_end -= offset;
2182 self.path_start -= offset;
2183 if let Some(ref mut index) = self.query_start {
2184 *index -= offset
2185 }
2186 if let Some(ref mut index) = self.fragment_start {
2187 *index -= offset
2188 }
2189 }
2190 Ok(())
2191 }
2192
2193 /// Change this URL’s username.
2194 ///
2195 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2196 /// # Examples
2197 ///
2198 /// Cannot setup username from mailto(cannot-be-base)
2199 ///
2200 /// ```rust
2201 /// use url::{Url, ParseError};
2202 ///
2203 /// # fn run() -> Result<(), ParseError> {
2204 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2205 /// let result = url.set_username("user1");
2206 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2207 /// assert!(result.is_err());
2208 /// # Ok(())
2209 /// # }
2210 /// # run().unwrap();
2211 /// ```
2212 ///
2213 /// Setup username to user1
2214 ///
2215 /// ```rust
2216 /// use url::{Url, ParseError};
2217 ///
2218 /// # fn run() -> Result<(), ParseError> {
2219 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2220 /// let result = url.set_username("user1");
2221 /// assert!(result.is_ok());
2222 /// assert_eq!(url.username(), "user1");
2223 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2224 /// # Ok(())
2225 /// # }
2226 /// # run().unwrap();
2227 /// ```
2228 #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2229 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2230 // has_host implies !cannot_be_a_base
2231 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2232 return Err(());
2233 }
2234 let username_start = self.scheme_end + 3;
2235 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2236 if self.slice(username_start..self.username_end) == username {
2237 return Ok(());
2238 }
2239 let after_username = self.slice(self.username_end..).to_owned();
2240 self.serialization.truncate(username_start as usize);
2241 self.serialization
2242 .extend(utf8_percent_encode(username, USERINFO));
2243
2244 let mut removed_bytes = self.username_end;
2245 self.username_end = to_u32(self.serialization.len()).unwrap();
2246 let mut added_bytes = self.username_end;
2247
2248 let new_username_is_empty = self.username_end == username_start;
2249 match (new_username_is_empty, after_username.chars().next()) {
2250 (true, Some('@')) => {
2251 removed_bytes += 1;
2252 self.serialization.push_str(&after_username[1..]);
2253 }
2254 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2255 self.serialization.push_str(&after_username);
2256 }
2257 (false, _) => {
2258 added_bytes += 1;
2259 self.serialization.push('@');
2260 self.serialization.push_str(&after_username);
2261 }
2262 }
2263
2264 let adjust = |index: &mut u32| {
2265 *index -= removed_bytes;
2266 *index += added_bytes;
2267 };
2268 adjust(&mut self.host_start);
2269 adjust(&mut self.host_end);
2270 adjust(&mut self.path_start);
2271 if let Some(ref mut index) = self.query_start {
2272 adjust(index)
2273 }
2274 if let Some(ref mut index) = self.fragment_start {
2275 adjust(index)
2276 }
2277 Ok(())
2278 }
2279
2280 /// Change this URL’s scheme.
2281 ///
2282 /// Do nothing and return `Err` under the following circumstances:
2283 ///
2284 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2285 /// * If this URL is cannot-be-a-base and the new scheme is one of
2286 /// `http`, `https`, `ws`, `wss` or `ftp`
2287 /// * If either the old or new scheme is `http`, `https`, `ws`,
2288 /// `wss` or `ftp` and the other is not one of these
2289 /// * If the new scheme is `file` and this URL includes credentials
2290 /// or has a non-null port
2291 /// * If this URL's scheme is `file` and its host is empty or null
2292 ///
2293 /// See also [the URL specification's section on legal scheme state
2294 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2295 ///
2296 /// # Examples
2297 ///
2298 /// Change the URL’s scheme from `https` to `http`:
2299 ///
2300 /// ```
2301 /// use url::Url;
2302 /// # use url::ParseError;
2303 ///
2304 /// # fn run() -> Result<(), ParseError> {
2305 /// let mut url = Url::parse("https://example.net")?;
2306 /// let result = url.set_scheme("http");
2307 /// assert_eq!(url.as_str(), "http://example.net/");
2308 /// assert!(result.is_ok());
2309 /// # Ok(())
2310 /// # }
2311 /// # run().unwrap();
2312 /// ```
2313 /// Change the URL’s scheme from `foo` to `bar`:
2314 ///
2315 /// ```
2316 /// use url::Url;
2317 /// # use url::ParseError;
2318 ///
2319 /// # fn run() -> Result<(), ParseError> {
2320 /// let mut url = Url::parse("foo://example.net")?;
2321 /// let result = url.set_scheme("bar");
2322 /// assert_eq!(url.as_str(), "bar://example.net");
2323 /// assert!(result.is_ok());
2324 /// # Ok(())
2325 /// # }
2326 /// # run().unwrap();
2327 /// ```
2328 ///
2329 /// Cannot change URL’s scheme from `https` to `foõ`:
2330 ///
2331 /// ```
2332 /// use url::Url;
2333 /// # use url::ParseError;
2334 ///
2335 /// # fn run() -> Result<(), ParseError> {
2336 /// let mut url = Url::parse("https://example.net")?;
2337 /// let result = url.set_scheme("foõ");
2338 /// assert_eq!(url.as_str(), "https://example.net/");
2339 /// assert!(result.is_err());
2340 /// # Ok(())
2341 /// # }
2342 /// # run().unwrap();
2343 /// ```
2344 ///
2345 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2346 ///
2347 /// ```
2348 /// use url::Url;
2349 /// # use url::ParseError;
2350 ///
2351 /// # fn run() -> Result<(), ParseError> {
2352 /// let mut url = Url::parse("mailto:rms@example.net")?;
2353 /// let result = url.set_scheme("https");
2354 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2355 /// assert!(result.is_err());
2356 /// # Ok(())
2357 /// # }
2358 /// # run().unwrap();
2359 /// ```
2360 /// Cannot change the URL’s scheme from `foo` to `https`:
2361 ///
2362 /// ```
2363 /// use url::Url;
2364 /// # use url::ParseError;
2365 ///
2366 /// # fn run() -> Result<(), ParseError> {
2367 /// let mut url = Url::parse("foo://example.net")?;
2368 /// let result = url.set_scheme("https");
2369 /// assert_eq!(url.as_str(), "foo://example.net");
2370 /// assert!(result.is_err());
2371 /// # Ok(())
2372 /// # }
2373 /// # run().unwrap();
2374 /// ```
2375 /// Cannot change the URL’s scheme from `http` to `foo`:
2376 ///
2377 /// ```
2378 /// use url::Url;
2379 /// # use url::ParseError;
2380 ///
2381 /// # fn run() -> Result<(), ParseError> {
2382 /// let mut url = Url::parse("http://example.net")?;
2383 /// let result = url.set_scheme("foo");
2384 /// assert_eq!(url.as_str(), "http://example.net/");
2385 /// assert!(result.is_err());
2386 /// # Ok(())
2387 /// # }
2388 /// # run().unwrap();
2389 /// ```
2390 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2391 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2392 let mut parser = Parser::for_setter(String::new());
2393 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2394 let new_scheme_type = SchemeType::from(&parser.serialization);
2395 let old_scheme_type = SchemeType::from(self.scheme());
2396 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2397 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2398 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2399 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2400 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2401 // If url’s scheme is "file" and its host is an empty host or null, then return.
2402 (new_scheme_type.is_file() && self.has_authority())
2403 {
2404 return Err(());
2405 }
2406
2407 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2408 return Err(());
2409 }
2410 let old_scheme_end = self.scheme_end;
2411 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2412 let adjust = |index: &mut u32| {
2413 *index -= old_scheme_end;
2414 *index += new_scheme_end;
2415 };
2416
2417 self.scheme_end = new_scheme_end;
2418 adjust(&mut self.username_end);
2419 adjust(&mut self.host_start);
2420 adjust(&mut self.host_end);
2421 adjust(&mut self.path_start);
2422 if let Some(ref mut index) = self.query_start {
2423 adjust(index)
2424 }
2425 if let Some(ref mut index) = self.fragment_start {
2426 adjust(index)
2427 }
2428
2429 parser.serialization.push_str(self.slice(old_scheme_end..));
2430 self.serialization = parser.serialization;
2431
2432 // Update the port so it can be removed
2433 // If it is the scheme's default
2434 // we don't mind it silently failing
2435 // if there was no port in the first place
2436 let previous_port = self.port();
2437 let _ = self.set_port(previous_port);
2438
2439 Ok(())
2440 }
2441
2442 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2443 ///
2444 /// This returns `Err` if the given path is not absolute or,
2445 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2446 ///
2447 /// # Examples
2448 ///
2449 /// On Unix-like platforms:
2450 ///
2451 /// ```
2452 /// # if cfg!(unix) {
2453 /// use url::Url;
2454 ///
2455 /// # fn run() -> Result<(), ()> {
2456 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2457 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2458 ///
2459 /// let url = Url::from_file_path("../foo.txt");
2460 /// assert!(url.is_err());
2461 ///
2462 /// let url = Url::from_file_path("https://google.com/");
2463 /// assert!(url.is_err());
2464 /// # Ok(())
2465 /// # }
2466 /// # run().unwrap();
2467 /// # }
2468 /// ```
2469 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2470 #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2471 pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2472 let mut serialization = "file://".to_owned();
2473 let host_start = serialization.len() as u32;
2474 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2475 Ok(Url {
2476 serialization,
2477 scheme_end: "file".len() as u32,
2478 username_end: host_start,
2479 host_start,
2480 host_end,
2481 host,
2482 port: None,
2483 path_start: host_end,
2484 query_start: None,
2485 fragment_start: None,
2486 })
2487 }
2488
2489 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2490 ///
2491 /// This returns `Err` if the given path is not absolute or,
2492 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2493 ///
2494 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2495 /// so that the entire path is considered when using this URL as a base URL.
2496 ///
2497 /// For example:
2498 ///
2499 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2500 /// as the base URL is `file:///var/www/index.html`
2501 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2502 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2503 ///
2504 /// Note that `std::path` does not consider trailing slashes significant
2505 /// and usually does not include them (e.g. in `Path::parent()`).
2506 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2507 #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2508 pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2509 let mut url = Url::from_file_path(path)?;
2510 if !url.serialization.ends_with('/') {
2511 url.serialization.push('/')
2512 }
2513 Ok(url)
2514 }
2515
2516 /// Serialize with Serde using the internal representation of the `Url` struct.
2517 ///
2518 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2519 /// for speed, compared to the `Deserialize` trait impl.
2520 ///
2521 /// This method is only available if the `serde` Cargo feature is enabled.
2522 #[cfg(feature = "serde")]
2523 #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2524 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2525 where
2526 S: serde::Serializer,
2527 {
2528 use serde::Serialize;
2529 // Destructuring first lets us ensure that adding or removing fields forces this method
2530 // to be updated
2531 let Url {
2532 ref serialization,
2533 ref scheme_end,
2534 ref username_end,
2535 ref host_start,
2536 ref host_end,
2537 ref host,
2538 ref port,
2539 ref path_start,
2540 ref query_start,
2541 ref fragment_start,
2542 } = *self;
2543 (
2544 serialization,
2545 scheme_end,
2546 username_end,
2547 host_start,
2548 host_end,
2549 host,
2550 port,
2551 path_start,
2552 query_start,
2553 fragment_start,
2554 )
2555 .serialize(serializer)
2556 }
2557
2558 /// Serialize with Serde using the internal representation of the `Url` struct.
2559 ///
2560 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2561 /// for speed, compared to the `Deserialize` trait impl.
2562 ///
2563 /// This method is only available if the `serde` Cargo feature is enabled.
2564 #[cfg(feature = "serde")]
2565 #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2566 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2567 where
2568 D: serde::Deserializer<'de>,
2569 {
2570 use serde::de::{Deserialize, Error, Unexpected};
2571 let (
2572 serialization,
2573 scheme_end,
2574 username_end,
2575 host_start,
2576 host_end,
2577 host,
2578 port,
2579 path_start,
2580 query_start,
2581 fragment_start,
2582 ) = Deserialize::deserialize(deserializer)?;
2583 let url = Url {
2584 serialization,
2585 scheme_end,
2586 username_end,
2587 host_start,
2588 host_end,
2589 host,
2590 port,
2591 path_start,
2592 query_start,
2593 fragment_start,
2594 };
2595 if cfg!(debug_assertions) {
2596 url.check_invariants().map_err(|reason| {
2597 let reason: &str = &reason;
2598 Error::invalid_value(Unexpected::Other("value"), &reason)
2599 })?
2600 }
2601 Ok(url)
2602 }
2603
2604 /// Assuming the URL is in the `file` scheme or similar,
2605 /// convert its path to an absolute `std::path::Path`.
2606 ///
2607 /// **Note:** This does not actually check the URL’s `scheme`,
2608 /// and may give nonsensical results for other schemes.
2609 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2610 ///
2611 /// ```
2612 /// # use url::Url;
2613 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2614 /// let path = url.to_file_path();
2615 /// ```
2616 ///
2617 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2618 /// `file:` URLs may have a non-local host),
2619 /// or if `Path::new_opt()` returns `None`.
2620 /// (That is, if the percent-decoded path contains a NUL byte or,
2621 /// for a Windows path, is not UTF-8.)
2622 #[inline]
2623 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2624 #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2625 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2626 if let Some(segments) = self.path_segments() {
2627 let host = match self.host() {
2628 None | Some(Host::Domain("localhost")) => None,
2629 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2630 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2631 }
2632 _ => return Err(()),
2633 };
2634
2635 return file_url_segments_to_pathbuf(host, segments);
2636 }
2637 Err(())
2638 }
2639
2640 // Private helper methods:
2641
2642 #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2643 fn slice<R>(&self, range: R) -> &str
2644 where
2645 R: RangeArg,
2646 {
2647 range.slice_of(&self.serialization)
2648 }
2649
2650 #[inline]
byte_at(&self, i: u32) -> u82651 fn byte_at(&self, i: u32) -> u8 {
2652 self.serialization.as_bytes()[i as usize]
2653 }
2654 }
2655
2656 /// Parse a string as an URL, without a base URL or encoding override.
2657 impl str::FromStr for Url {
2658 type Err = ParseError;
2659
2660 #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2661 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2662 Url::parse(input)
2663 }
2664 }
2665
2666 impl<'a> TryFrom<&'a str> for Url {
2667 type Error = ParseError;
2668
try_from(s: &'a str) -> Result<Self, Self::Error>2669 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2670 Url::parse(s)
2671 }
2672 }
2673
2674 /// Display the serialization of this URL.
2675 impl fmt::Display for Url {
2676 #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2677 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2678 fmt::Display::fmt(&self.serialization, formatter)
2679 }
2680 }
2681
2682 /// String conversion.
2683 impl From<Url> for String {
from(value: Url) -> String2684 fn from(value: Url) -> String {
2685 value.serialization
2686 }
2687 }
2688
2689 /// Debug the serialization of this URL.
2690 impl fmt::Debug for Url {
2691 #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2692 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2693 formatter
2694 .debug_struct("Url")
2695 .field("scheme", &self.scheme())
2696 .field("cannot_be_a_base", &self.cannot_be_a_base())
2697 .field("username", &self.username())
2698 .field("password", &self.password())
2699 .field("host", &self.host())
2700 .field("port", &self.port())
2701 .field("path", &self.path())
2702 .field("query", &self.query())
2703 .field("fragment", &self.fragment())
2704 .finish()
2705 }
2706 }
2707
2708 /// URLs compare like their serialization.
2709 impl Eq for Url {}
2710
2711 /// URLs compare like their serialization.
2712 impl PartialEq for Url {
2713 #[inline]
eq(&self, other: &Self) -> bool2714 fn eq(&self, other: &Self) -> bool {
2715 self.serialization == other.serialization
2716 }
2717 }
2718
2719 /// URLs compare like their serialization.
2720 impl Ord for Url {
2721 #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2722 fn cmp(&self, other: &Self) -> cmp::Ordering {
2723 self.serialization.cmp(&other.serialization)
2724 }
2725 }
2726
2727 /// URLs compare like their serialization.
2728 impl PartialOrd for Url {
2729 #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2730 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2731 Some(self.cmp(other))
2732 }
2733 }
2734
2735 /// URLs hash like their serialization.
2736 impl hash::Hash for Url {
2737 #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2738 fn hash<H>(&self, state: &mut H)
2739 where
2740 H: hash::Hasher,
2741 {
2742 hash::Hash::hash(&self.serialization, state)
2743 }
2744 }
2745
2746 /// Return the serialization of this URL.
2747 impl AsRef<str> for Url {
2748 #[inline]
as_ref(&self) -> &str2749 fn as_ref(&self) -> &str {
2750 &self.serialization
2751 }
2752 }
2753
2754 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2755 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2756 }
2757
2758 impl RangeArg for Range<u32> {
2759 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2760 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2761 &s[self.start as usize..self.end as usize]
2762 }
2763 }
2764
2765 impl RangeArg for RangeFrom<u32> {
2766 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2767 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2768 &s[self.start as usize..]
2769 }
2770 }
2771
2772 impl RangeArg for RangeTo<u32> {
2773 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2774 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2775 &s[..self.end as usize]
2776 }
2777 }
2778
2779 /// Serializes this URL into a `serde` stream.
2780 ///
2781 /// This implementation is only available if the `serde` Cargo feature is enabled.
2782 #[cfg(feature = "serde")]
2783 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2784 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2785 where
2786 S: serde::Serializer,
2787 {
2788 serializer.serialize_str(self.as_str())
2789 }
2790 }
2791
2792 /// Deserializes this URL from a `serde` stream.
2793 ///
2794 /// This implementation is only available if the `serde` Cargo feature is enabled.
2795 #[cfg(feature = "serde")]
2796 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2797 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2798 where
2799 D: serde::Deserializer<'de>,
2800 {
2801 use serde::de::{Error, Unexpected, Visitor};
2802
2803 struct UrlVisitor;
2804
2805 impl<'de> Visitor<'de> for UrlVisitor {
2806 type Value = Url;
2807
2808 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2809 formatter.write_str("a string representing an URL")
2810 }
2811
2812 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2813 where
2814 E: Error,
2815 {
2816 Url::parse(s).map_err(|err| {
2817 let err_s = format!("{}", err);
2818 Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2819 })
2820 }
2821 }
2822
2823 deserializer.deserialize_str(UrlVisitor)
2824 }
2825 }
2826
2827 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2828 fn path_to_file_url_segments(
2829 path: &Path,
2830 serialization: &mut String,
2831 ) -> Result<(u32, HostInternal), ()> {
2832 #[cfg(any(unix, target_os = "redox"))]
2833 use std::os::unix::prelude::OsStrExt;
2834 #[cfg(target_os = "wasi")]
2835 use std::os::wasi::prelude::OsStrExt;
2836 if !path.is_absolute() {
2837 return Err(());
2838 }
2839 let host_end = to_u32(serialization.len()).unwrap();
2840 let mut empty = true;
2841 // skip the root component
2842 for component in path.components().skip(1) {
2843 empty = false;
2844 serialization.push('/');
2845 serialization.extend(percent_encode(
2846 component.as_os_str().as_bytes(),
2847 SPECIAL_PATH_SEGMENT,
2848 ));
2849 }
2850 if empty {
2851 // An URL’s path must not be empty.
2852 serialization.push('/');
2853 }
2854 Ok((host_end, HostInternal::None))
2855 }
2856
2857 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2858 fn path_to_file_url_segments(
2859 path: &Path,
2860 serialization: &mut String,
2861 ) -> Result<(u32, HostInternal), ()> {
2862 path_to_file_url_segments_windows(path, serialization)
2863 }
2864
2865 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2866 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2867 fn path_to_file_url_segments_windows(
2868 path: &Path,
2869 serialization: &mut String,
2870 ) -> Result<(u32, HostInternal), ()> {
2871 use std::path::{Component, Prefix};
2872 if !path.is_absolute() {
2873 return Err(());
2874 }
2875 let mut components = path.components();
2876
2877 let host_start = serialization.len() + 1;
2878 let host_end;
2879 let host_internal;
2880
2881 match components.next() {
2882 Some(Component::Prefix(ref p)) => match p.kind() {
2883 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2884 host_end = to_u32(serialization.len()).unwrap();
2885 host_internal = HostInternal::None;
2886 serialization.push('/');
2887 serialization.push(letter as char);
2888 serialization.push(':');
2889 }
2890 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2891 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2892 write!(serialization, "{}", host).unwrap();
2893 host_end = to_u32(serialization.len()).unwrap();
2894 host_internal = host.into();
2895 serialization.push('/');
2896 let share = share.to_str().ok_or(())?;
2897 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2898 }
2899 _ => return Err(()),
2900 },
2901 _ => return Err(()),
2902 }
2903
2904 let mut path_only_has_prefix = true;
2905 for component in components {
2906 if component == Component::RootDir {
2907 continue;
2908 }
2909
2910 path_only_has_prefix = false;
2911 // FIXME: somehow work with non-unicode?
2912 let component = component.as_os_str().to_str().ok_or(())?;
2913
2914 serialization.push('/');
2915 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2916 }
2917
2918 // A windows drive letter must end with a slash.
2919 if serialization.len() > host_start
2920 && parser::is_windows_drive_letter(&serialization[host_start..])
2921 && path_only_has_prefix
2922 {
2923 serialization.push('/');
2924 }
2925
2926 Ok((host_end, host_internal))
2927 }
2928
2929 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2930 fn file_url_segments_to_pathbuf(
2931 host: Option<&str>,
2932 segments: str::Split<'_, char>,
2933 ) -> Result<PathBuf, ()> {
2934 use std::ffi::OsStr;
2935 #[cfg(any(unix, target_os = "redox"))]
2936 use std::os::unix::prelude::OsStrExt;
2937 #[cfg(target_os = "wasi")]
2938 use std::os::wasi::prelude::OsStrExt;
2939
2940 if host.is_some() {
2941 return Err(());
2942 }
2943
2944 let mut bytes = if cfg!(target_os = "redox") {
2945 b"file:".to_vec()
2946 } else {
2947 Vec::new()
2948 };
2949
2950 for segment in segments {
2951 bytes.push(b'/');
2952 bytes.extend(percent_decode(segment.as_bytes()));
2953 }
2954
2955 // A windows drive letter must end with a slash.
2956 if bytes.len() > 2
2957 && bytes[bytes.len() - 2].is_ascii_alphabetic()
2958 && matches!(bytes[bytes.len() - 1], b':' | b'|')
2959 {
2960 bytes.push(b'/');
2961 }
2962
2963 let os_str = OsStr::from_bytes(&bytes);
2964 let path = PathBuf::from(os_str);
2965
2966 debug_assert!(
2967 path.is_absolute(),
2968 "to_file_path() failed to produce an absolute Path"
2969 );
2970
2971 Ok(path)
2972 }
2973
2974 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2975 fn file_url_segments_to_pathbuf(
2976 host: Option<&str>,
2977 segments: str::Split<char>,
2978 ) -> Result<PathBuf, ()> {
2979 file_url_segments_to_pathbuf_windows(host, segments)
2980 }
2981
2982 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2983 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2984 fn file_url_segments_to_pathbuf_windows(
2985 host: Option<&str>,
2986 mut segments: str::Split<'_, char>,
2987 ) -> Result<PathBuf, ()> {
2988 let mut string = if let Some(host) = host {
2989 r"\\".to_owned() + host
2990 } else {
2991 let first = segments.next().ok_or(())?;
2992
2993 match first.len() {
2994 2 => {
2995 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2996 return Err(());
2997 }
2998
2999 first.to_owned()
3000 }
3001
3002 4 => {
3003 if !first.starts_with(parser::ascii_alpha) {
3004 return Err(());
3005 }
3006 let bytes = first.as_bytes();
3007 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3008 return Err(());
3009 }
3010
3011 first[0..1].to_owned() + ":"
3012 }
3013
3014 _ => return Err(()),
3015 }
3016 };
3017
3018 for segment in segments {
3019 string.push('\\');
3020
3021 // Currently non-unicode windows paths cannot be represented
3022 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3023 Ok(s) => string.push_str(&s),
3024 Err(..) => return Err(()),
3025 }
3026 }
3027 let path = PathBuf::from(string);
3028 debug_assert!(
3029 path.is_absolute(),
3030 "to_file_path() failed to produce an absolute Path"
3031 );
3032 Ok(path)
3033 }
3034
3035 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3036 #[derive(Debug)]
3037 pub struct UrlQuery<'a> {
3038 url: Option<&'a mut Url>,
3039 fragment: Option<String>,
3040 }
3041
3042 // `as_mut_string` string here exposes the internal serialization of an `Url`,
3043 // which should not be exposed to users.
3044 // We achieve that by not giving users direct access to `UrlQuery`:
3045 // * Its fields are private
3046 // (and so can not be constructed with struct literal syntax outside of this crate),
3047 // * It has no constructor
3048 // * It is only visible (on the type level) to users in the return type of
3049 // `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3050 // * `Serializer` keeps its target in a private field
3051 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3052 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String3053 fn as_mut_string(&mut self) -> &mut String {
3054 &mut self.url.as_mut().unwrap().serialization
3055 }
3056
finish(mut self) -> &'a mut Url3057 fn finish(mut self) -> &'a mut Url {
3058 let url = self.url.take().unwrap();
3059 url.restore_already_parsed_fragment(self.fragment.take());
3060 url
3061 }
3062
3063 type Finished = &'a mut Url;
3064 }
3065
3066 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)3067 fn drop(&mut self) {
3068 if let Some(url) = self.url.take() {
3069 url.restore_already_parsed_fragment(self.fragment.take())
3070 }
3071 }
3072 }
3073