1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 /*!
10
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13
14
15 # URL parsing and data structures
16
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19 ```
20 use url::{Url, ParseError};
21
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24
25 Let’s parse a valid URL and look at its components.
26
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34
35
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57
58 ```
59 use url::Url;
60 # use url::ParseError;
61
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75
76 ## Serde
77
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79
80 # Base URL
81
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89
90 ```
91 use url::{Url, ParseError};
92
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95
96 Use the `join` method on an `Url` to use it as a base URL:
97
98 ```
99 use url::Url;
100 # use url::ParseError;
101
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110
111 # Feature: `serde`
112
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121
122 */
123
124 #![doc(html_root_url = "https://docs.rs/url/2.3.1")]
125 #![cfg_attr(
126 feature = "debugger_visualizer",
127 feature(debugger_visualizer),
128 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
129 )]
130
131 pub use form_urlencoded;
132
133 #[cfg(feature = "serde")]
134 extern crate serde;
135
136 use crate::host::HostInternal;
137 use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
138 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
139 use std::borrow::Borrow;
140 use std::cmp;
141 use std::fmt::{self, Write};
142 use std::hash;
143 use std::io;
144 use std::mem;
145 use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
146 use std::ops::{Range, RangeFrom, RangeTo};
147 use std::path::{Path, PathBuf};
148 use std::str;
149
150 use std::convert::TryFrom;
151
152 pub use crate::host::Host;
153 pub use crate::origin::{OpaqueOrigin, Origin};
154 pub use crate::parser::{ParseError, SyntaxViolation};
155 pub use crate::path_segments::PathSegmentsMut;
156 pub use crate::slicing::Position;
157 pub use form_urlencoded::EncodingOverride;
158
159 mod host;
160 mod origin;
161 mod parser;
162 mod path_segments;
163 mod slicing;
164
165 #[doc(hidden)]
166 pub mod quirks;
167
168 /// A parsed URL record.
169 #[derive(Clone)]
170 pub struct Url {
171 /// Syntax in pseudo-BNF:
172 ///
173 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
174 /// non-hierarchical = non-hierarchical-path
175 /// non-hierarchical-path = /* Does not start with "/" */
176 /// hierarchical = authority? hierarchical-path
177 /// authority = "//" userinfo? host [ ":" port ]?
178 /// userinfo = username [ ":" password ]? "@"
179 /// hierarchical-path = [ "/" path-segment ]+
180 serialization: String,
181
182 // Components
183 scheme_end: u32, // Before ':'
184 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
185 host_start: u32,
186 host_end: u32,
187 host: HostInternal,
188 port: Option<u16>,
189 path_start: u32, // Before initial '/', if any
190 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
191 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
192 }
193
194 /// Full configuration for the URL parser.
195 #[derive(Copy, Clone)]
196 pub struct ParseOptions<'a> {
197 base_url: Option<&'a Url>,
198 encoding_override: EncodingOverride<'a>,
199 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
200 }
201
202 impl<'a> ParseOptions<'a> {
203 /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self204 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
205 self.base_url = new;
206 self
207 }
208
209 /// Override the character encoding of query strings.
210 /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self211 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
212 self.encoding_override = new;
213 self
214 }
215
216 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
217 /// when it occurs during parsing. Note that since the provided function is
218 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
219 /// a `RefCell`, to collect the violations.
220 ///
221 /// ## Example
222 /// ```
223 /// use std::cell::RefCell;
224 /// use url::{Url, SyntaxViolation};
225 /// # use url::ParseError;
226 /// # fn run() -> Result<(), url::ParseError> {
227 /// let violations = RefCell::new(Vec::new());
228 /// let url = Url::options()
229 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
230 /// .parse("https:////example.com")?;
231 /// assert_eq!(url.as_str(), "https://example.com/");
232 /// assert_eq!(violations.into_inner(),
233 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
234 /// # Ok(())
235 /// # }
236 /// # run().unwrap();
237 /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self238 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
239 self.violation_fn = new;
240 self
241 }
242
243 /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>244 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
245 Parser {
246 serialization: String::with_capacity(input.len()),
247 base_url: self.base_url,
248 query_encoding_override: self.encoding_override,
249 violation_fn: self.violation_fn,
250 context: Context::UrlParser,
251 }
252 .parse_url(input)
253 }
254 }
255
256 impl Url {
257 /// Parse an absolute URL from a string.
258 ///
259 /// # Examples
260 ///
261 /// ```rust
262 /// use url::Url;
263 /// # use url::ParseError;
264 ///
265 /// # fn run() -> Result<(), ParseError> {
266 /// let url = Url::parse("https://example.net")?;
267 /// # Ok(())
268 /// # }
269 /// # run().unwrap();
270 /// ```
271 ///
272 /// # Errors
273 ///
274 /// If the function can not parse an absolute URL from the given string,
275 /// a [`ParseError`] variant will be returned.
276 ///
277 /// [`ParseError`]: enum.ParseError.html
278 #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>279 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
280 Url::options().parse(input)
281 }
282
283 /// Parse an absolute URL from a string and add params to its query string.
284 ///
285 /// Existing params are not removed.
286 ///
287 /// # Examples
288 ///
289 /// ```rust
290 /// use url::Url;
291 /// # use url::ParseError;
292 ///
293 /// # fn run() -> Result<(), ParseError> {
294 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
295 /// &[("lang", "rust"), ("browser", "servo")])?;
296 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
297 /// # Ok(())
298 /// # }
299 /// # run().unwrap();
300 /// ```
301 ///
302 /// # Errors
303 ///
304 /// If the function can not parse an absolute URL from the given string,
305 /// a [`ParseError`] variant will be returned.
306 ///
307 /// [`ParseError`]: enum.ParseError.html
308 #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,309 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
310 where
311 I: IntoIterator,
312 I::Item: Borrow<(K, V)>,
313 K: AsRef<str>,
314 V: AsRef<str>,
315 {
316 let mut url = Url::options().parse(input);
317
318 if let Ok(ref mut url) = url {
319 url.query_pairs_mut().extend_pairs(iter);
320 }
321
322 url
323 }
324
325 /// Parse a string as an URL, with this URL as the base URL.
326 ///
327 /// The inverse of this is [`make_relative`].
328 ///
329 /// Note: a trailing slash is significant.
330 /// Without it, the last path component is considered to be a “file” name
331 /// to be removed to get at the “directory” that is used as the base:
332 ///
333 /// # Examples
334 ///
335 /// ```rust
336 /// use url::Url;
337 /// # use url::ParseError;
338 ///
339 /// # fn run() -> Result<(), ParseError> {
340 /// let base = Url::parse("https://example.net/a/b.html")?;
341 /// let url = base.join("c.png")?;
342 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
343 ///
344 /// let base = Url::parse("https://example.net/a/b/")?;
345 /// let url = base.join("c.png")?;
346 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
347 /// # Ok(())
348 /// # }
349 /// # run().unwrap();
350 /// ```
351 ///
352 /// # Errors
353 ///
354 /// If the function can not parse an URL from the given string
355 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
356 ///
357 /// [`ParseError`]: enum.ParseError.html
358 /// [`make_relative`]: #method.make_relative
359 #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>360 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
361 Url::options().base_url(Some(self)).parse(input)
362 }
363
364 /// Creates a relative URL if possible, with this URL as the base URL.
365 ///
366 /// This is the inverse of [`join`].
367 ///
368 /// # Examples
369 ///
370 /// ```rust
371 /// use url::Url;
372 /// # use url::ParseError;
373 ///
374 /// # fn run() -> Result<(), ParseError> {
375 /// let base = Url::parse("https://example.net/a/b.html")?;
376 /// let url = Url::parse("https://example.net/a/c.png")?;
377 /// let relative = base.make_relative(&url);
378 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
379 ///
380 /// let base = Url::parse("https://example.net/a/b/")?;
381 /// let url = Url::parse("https://example.net/a/b/c.png")?;
382 /// let relative = base.make_relative(&url);
383 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
384 ///
385 /// let base = Url::parse("https://example.net/a/b/")?;
386 /// let url = Url::parse("https://example.net/a/d/c.png")?;
387 /// let relative = base.make_relative(&url);
388 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
389 ///
390 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
391 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
392 /// let relative = base.make_relative(&url);
393 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
394 /// # Ok(())
395 /// # }
396 /// # run().unwrap();
397 /// ```
398 ///
399 /// # Errors
400 ///
401 /// If this URL can't be a base for the given URL, `None` is returned.
402 /// This is for example the case if the scheme, host or port are not the same.
403 ///
404 /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>405 pub fn make_relative(&self, url: &Url) -> Option<String> {
406 if self.cannot_be_a_base() {
407 return None;
408 }
409
410 // Scheme, host and port need to be the same
411 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
412 return None;
413 }
414
415 // We ignore username/password at this point
416
417 // The path has to be transformed
418 let mut relative = String::new();
419
420 // Extract the filename of both URIs, these need to be handled separately
421 fn extract_path_filename(s: &str) -> (&str, &str) {
422 let last_slash_idx = s.rfind('/').unwrap_or(0);
423 let (path, filename) = s.split_at(last_slash_idx);
424 if filename.is_empty() {
425 (path, "")
426 } else {
427 (path, &filename[1..])
428 }
429 }
430
431 let (base_path, base_filename) = extract_path_filename(self.path());
432 let (url_path, url_filename) = extract_path_filename(url.path());
433
434 let mut base_path = base_path.split('/').peekable();
435 let mut url_path = url_path.split('/').peekable();
436
437 // Skip over the common prefix
438 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
439 base_path.next();
440 url_path.next();
441 }
442
443 // Add `..` segments for the remainder of the base path
444 for base_path_segment in base_path {
445 // Skip empty last segments
446 if base_path_segment.is_empty() {
447 break;
448 }
449
450 if !relative.is_empty() {
451 relative.push('/');
452 }
453
454 relative.push_str("..");
455 }
456
457 // Append the remainder of the other URI
458 for url_path_segment in url_path {
459 if !relative.is_empty() {
460 relative.push('/');
461 }
462
463 relative.push_str(url_path_segment);
464 }
465
466 // Add the filename if they are not the same
467 if !relative.is_empty() || base_filename != url_filename {
468 // If the URIs filename is empty this means that it was a directory
469 // so we'll have to append a '/'.
470 //
471 // Otherwise append it directly as the new filename.
472 if url_filename.is_empty() {
473 relative.push('/');
474 } else {
475 if !relative.is_empty() {
476 relative.push('/');
477 }
478 relative.push_str(url_filename);
479 }
480 }
481
482 // Query and fragment are only taken from the other URI
483 if let Some(query) = url.query() {
484 relative.push('?');
485 relative.push_str(query);
486 }
487
488 if let Some(fragment) = url.fragment() {
489 relative.push('#');
490 relative.push_str(fragment);
491 }
492
493 Some(relative)
494 }
495
496 /// Return a default `ParseOptions` that can fully configure the URL parser.
497 ///
498 /// # Examples
499 ///
500 /// Get default `ParseOptions`, then change base url
501 ///
502 /// ```rust
503 /// use url::Url;
504 /// # use url::ParseError;
505 /// # fn run() -> Result<(), ParseError> {
506 /// let options = Url::options();
507 /// let api = Url::parse("https://api.example.com")?;
508 /// let base_url = options.base_url(Some(&api));
509 /// let version_url = base_url.parse("version.json")?;
510 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
511 /// # Ok(())
512 /// # }
513 /// # run().unwrap();
514 /// ```
options<'a>() -> ParseOptions<'a>515 pub fn options<'a>() -> ParseOptions<'a> {
516 ParseOptions {
517 base_url: None,
518 encoding_override: None,
519 violation_fn: None,
520 }
521 }
522
523 /// Return the serialization of this URL.
524 ///
525 /// This is fast since that serialization is already stored in the `Url` struct.
526 ///
527 /// # Examples
528 ///
529 /// ```rust
530 /// use url::Url;
531 /// # use url::ParseError;
532 ///
533 /// # fn run() -> Result<(), ParseError> {
534 /// let url_str = "https://example.net/";
535 /// let url = Url::parse(url_str)?;
536 /// assert_eq!(url.as_str(), url_str);
537 /// # Ok(())
538 /// # }
539 /// # run().unwrap();
540 /// ```
541 #[inline]
as_str(&self) -> &str542 pub fn as_str(&self) -> &str {
543 &self.serialization
544 }
545
546 /// Return the serialization of this URL.
547 ///
548 /// This consumes the `Url` and takes ownership of the `String` stored in it.
549 ///
550 /// # Examples
551 ///
552 /// ```rust
553 /// use url::Url;
554 /// # use url::ParseError;
555 ///
556 /// # fn run() -> Result<(), ParseError> {
557 /// let url_str = "https://example.net/";
558 /// let url = Url::parse(url_str)?;
559 /// assert_eq!(String::from(url), url_str);
560 /// # Ok(())
561 /// # }
562 /// # run().unwrap();
563 /// ```
564 #[inline]
565 #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String566 pub fn into_string(self) -> String {
567 self.into()
568 }
569
570 /// For internal testing, not part of the public API.
571 ///
572 /// Methods of the `Url` struct assume a number of invariants.
573 /// This checks each of these invariants and panic if one is not met.
574 /// This is for testing rust-url itself.
575 #[doc(hidden)]
check_invariants(&self) -> Result<(), String>576 pub fn check_invariants(&self) -> Result<(), String> {
577 macro_rules! assert {
578 ($x: expr) => {
579 if !$x {
580 return Err(format!(
581 "!( {} ) for URL {:?}",
582 stringify!($x),
583 self.serialization
584 ));
585 }
586 };
587 }
588
589 macro_rules! assert_eq {
590 ($a: expr, $b: expr) => {
591 {
592 let a = $a;
593 let b = $b;
594 if a != b {
595 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
596 a, b, stringify!($a), stringify!($b),
597 self.serialization))
598 }
599 }
600 }
601 }
602
603 assert!(self.scheme_end >= 1);
604 assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
605 assert!(self
606 .slice(1..self.scheme_end)
607 .chars()
608 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
609 assert_eq!(self.byte_at(self.scheme_end), b':');
610
611 if self.slice(self.scheme_end + 1..).starts_with("//") {
612 // URL with authority
613 if self.username_end != self.serialization.len() as u32 {
614 match self.byte_at(self.username_end) {
615 b':' => {
616 assert!(self.host_start >= self.username_end + 2);
617 assert_eq!(self.byte_at(self.host_start - 1), b'@');
618 }
619 b'@' => assert!(self.host_start == self.username_end + 1),
620 _ => assert_eq!(self.username_end, self.scheme_end + 3),
621 }
622 }
623 assert!(self.host_start >= self.username_end);
624 assert!(self.host_end >= self.host_start);
625 let host_str = self.slice(self.host_start..self.host_end);
626 match self.host {
627 HostInternal::None => assert_eq!(host_str, ""),
628 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
629 HostInternal::Ipv6(address) => {
630 let h: Host<String> = Host::Ipv6(address);
631 assert_eq!(host_str, h.to_string())
632 }
633 HostInternal::Domain => {
634 if SchemeType::from(self.scheme()).is_special() {
635 assert!(!host_str.is_empty())
636 }
637 }
638 }
639 if self.path_start == self.host_end {
640 assert_eq!(self.port, None);
641 } else {
642 assert_eq!(self.byte_at(self.host_end), b':');
643 let port_str = self.slice(self.host_end + 1..self.path_start);
644 assert_eq!(
645 self.port,
646 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
647 );
648 }
649 assert!(
650 self.path_start as usize == self.serialization.len()
651 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
652 );
653 } else {
654 // Anarchist URL (no authority)
655 assert_eq!(self.username_end, self.scheme_end + 1);
656 assert_eq!(self.host_start, self.scheme_end + 1);
657 assert_eq!(self.host_end, self.scheme_end + 1);
658 assert_eq!(self.host, HostInternal::None);
659 assert_eq!(self.port, None);
660 assert_eq!(self.path_start, self.scheme_end + 1);
661 }
662 if let Some(start) = self.query_start {
663 assert!(start >= self.path_start);
664 assert_eq!(self.byte_at(start), b'?');
665 }
666 if let Some(start) = self.fragment_start {
667 assert!(start >= self.path_start);
668 assert_eq!(self.byte_at(start), b'#');
669 }
670 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
671 assert!(fragment_start > query_start);
672 }
673
674 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
675 assert_eq!(&self.serialization, &other.serialization);
676 assert_eq!(self.scheme_end, other.scheme_end);
677 assert_eq!(self.username_end, other.username_end);
678 assert_eq!(self.host_start, other.host_start);
679 assert_eq!(self.host_end, other.host_end);
680 assert!(
681 self.host == other.host ||
682 // XXX No host round-trips to empty host.
683 // See https://github.com/whatwg/url/issues/79
684 (self.host_str(), other.host_str()) == (None, Some(""))
685 );
686 assert_eq!(self.port, other.port);
687 assert_eq!(self.path_start, other.path_start);
688 assert_eq!(self.query_start, other.query_start);
689 assert_eq!(self.fragment_start, other.fragment_start);
690 Ok(())
691 }
692
693 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
694 ///
695 /// Note: this returns an opaque origin for `file:` URLs, which causes
696 /// `url.origin() != url.origin()`.
697 ///
698 /// # Examples
699 ///
700 /// URL with `ftp` scheme:
701 ///
702 /// ```rust
703 /// use url::{Host, Origin, Url};
704 /// # use url::ParseError;
705 ///
706 /// # fn run() -> Result<(), ParseError> {
707 /// let url = Url::parse("ftp://example.com/foo")?;
708 /// assert_eq!(url.origin(),
709 /// Origin::Tuple("ftp".into(),
710 /// Host::Domain("example.com".into()),
711 /// 21));
712 /// # Ok(())
713 /// # }
714 /// # run().unwrap();
715 /// ```
716 ///
717 /// URL with `blob` scheme:
718 ///
719 /// ```rust
720 /// use url::{Host, Origin, Url};
721 /// # use url::ParseError;
722 ///
723 /// # fn run() -> Result<(), ParseError> {
724 /// let url = Url::parse("blob:https://example.com/foo")?;
725 /// assert_eq!(url.origin(),
726 /// Origin::Tuple("https".into(),
727 /// Host::Domain("example.com".into()),
728 /// 443));
729 /// # Ok(())
730 /// # }
731 /// # run().unwrap();
732 /// ```
733 ///
734 /// URL with `file` scheme:
735 ///
736 /// ```rust
737 /// use url::{Host, Origin, Url};
738 /// # use url::ParseError;
739 ///
740 /// # fn run() -> Result<(), ParseError> {
741 /// let url = Url::parse("file:///tmp/foo")?;
742 /// assert!(!url.origin().is_tuple());
743 ///
744 /// let other_url = Url::parse("file:///tmp/foo")?;
745 /// assert!(url.origin() != other_url.origin());
746 /// # Ok(())
747 /// # }
748 /// # run().unwrap();
749 /// ```
750 ///
751 /// URL with other scheme:
752 ///
753 /// ```rust
754 /// use url::{Host, Origin, Url};
755 /// # use url::ParseError;
756 ///
757 /// # fn run() -> Result<(), ParseError> {
758 /// let url = Url::parse("foo:bar")?;
759 /// assert!(!url.origin().is_tuple());
760 /// # Ok(())
761 /// # }
762 /// # run().unwrap();
763 /// ```
764 #[inline]
origin(&self) -> Origin765 pub fn origin(&self) -> Origin {
766 origin::url_origin(self)
767 }
768
769 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
770 ///
771 /// # Examples
772 ///
773 /// ```
774 /// use url::Url;
775 /// # use url::ParseError;
776 ///
777 /// # fn run() -> Result<(), ParseError> {
778 /// let url = Url::parse("file:///tmp/foo")?;
779 /// assert_eq!(url.scheme(), "file");
780 /// # Ok(())
781 /// # }
782 /// # run().unwrap();
783 /// ```
784 #[inline]
scheme(&self) -> &str785 pub fn scheme(&self) -> &str {
786 self.slice(..self.scheme_end)
787 }
788
789 /// Return whether the URL has an 'authority',
790 /// which can contain a username, password, host, and port number.
791 ///
792 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
793 /// or cannot-be-a-base like `data:text/plain,Stuff`.
794 ///
795 /// # Examples
796 ///
797 /// ```
798 /// use url::Url;
799 /// # use url::ParseError;
800 ///
801 /// # fn run() -> Result<(), ParseError> {
802 /// let url = Url::parse("ftp://rms@example.com")?;
803 /// assert!(url.has_authority());
804 ///
805 /// let url = Url::parse("unix:/run/foo.socket")?;
806 /// assert!(!url.has_authority());
807 ///
808 /// let url = Url::parse("data:text/plain,Stuff")?;
809 /// assert!(!url.has_authority());
810 /// # Ok(())
811 /// # }
812 /// # run().unwrap();
813 /// ```
814 #[inline]
has_authority(&self) -> bool815 pub fn has_authority(&self) -> bool {
816 debug_assert!(self.byte_at(self.scheme_end) == b':');
817 self.slice(self.scheme_end..).starts_with("://")
818 }
819
820 /// Return whether this URL is a cannot-be-a-base URL,
821 /// meaning that parsing a relative URL string with this URL as the base will return an error.
822 ///
823 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
824 /// as is typically the case of `data:` and `mailto:` URLs.
825 ///
826 /// # Examples
827 ///
828 /// ```
829 /// use url::Url;
830 /// # use url::ParseError;
831 ///
832 /// # fn run() -> Result<(), ParseError> {
833 /// let url = Url::parse("ftp://rms@example.com")?;
834 /// assert!(!url.cannot_be_a_base());
835 ///
836 /// let url = Url::parse("unix:/run/foo.socket")?;
837 /// assert!(!url.cannot_be_a_base());
838 ///
839 /// let url = Url::parse("data:text/plain,Stuff")?;
840 /// assert!(url.cannot_be_a_base());
841 /// # Ok(())
842 /// # }
843 /// # run().unwrap();
844 /// ```
845 #[inline]
cannot_be_a_base(&self) -> bool846 pub fn cannot_be_a_base(&self) -> bool {
847 !self.slice(self.scheme_end + 1..).starts_with('/')
848 }
849
850 /// Return the username for this URL (typically the empty string)
851 /// as a percent-encoded ASCII string.
852 ///
853 /// # Examples
854 ///
855 /// ```
856 /// use url::Url;
857 /// # use url::ParseError;
858 ///
859 /// # fn run() -> Result<(), ParseError> {
860 /// let url = Url::parse("ftp://rms@example.com")?;
861 /// assert_eq!(url.username(), "rms");
862 ///
863 /// let url = Url::parse("ftp://:secret123@example.com")?;
864 /// assert_eq!(url.username(), "");
865 ///
866 /// let url = Url::parse("https://example.com")?;
867 /// assert_eq!(url.username(), "");
868 /// # Ok(())
869 /// # }
870 /// # run().unwrap();
871 /// ```
username(&self) -> &str872 pub fn username(&self) -> &str {
873 let scheme_separator_len = "://".len() as u32;
874 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
875 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
876 } else {
877 ""
878 }
879 }
880
881 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
882 ///
883 /// # Examples
884 ///
885 /// ```
886 /// use url::Url;
887 /// # use url::ParseError;
888 ///
889 /// # fn run() -> Result<(), ParseError> {
890 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
891 /// assert_eq!(url.password(), Some("secret123"));
892 ///
893 /// let url = Url::parse("ftp://:secret123@example.com")?;
894 /// assert_eq!(url.password(), Some("secret123"));
895 ///
896 /// let url = Url::parse("ftp://rms@example.com")?;
897 /// assert_eq!(url.password(), None);
898 ///
899 /// let url = Url::parse("https://example.com")?;
900 /// assert_eq!(url.password(), None);
901 /// # Ok(())
902 /// # }
903 /// # run().unwrap();
904 /// ```
password(&self) -> Option<&str>905 pub fn password(&self) -> Option<&str> {
906 // This ':' is not the one marking a port number since a host can not be empty.
907 // (Except for file: URLs, which do not have port numbers.)
908 if self.has_authority()
909 && self.username_end != self.serialization.len() as u32
910 && self.byte_at(self.username_end) == b':'
911 {
912 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
913 Some(self.slice(self.username_end + 1..self.host_start - 1))
914 } else {
915 None
916 }
917 }
918
919 /// Equivalent to `url.host().is_some()`.
920 ///
921 /// # Examples
922 ///
923 /// ```
924 /// use url::Url;
925 /// # use url::ParseError;
926 ///
927 /// # fn run() -> Result<(), ParseError> {
928 /// let url = Url::parse("ftp://rms@example.com")?;
929 /// assert!(url.has_host());
930 ///
931 /// let url = Url::parse("unix:/run/foo.socket")?;
932 /// assert!(!url.has_host());
933 ///
934 /// let url = Url::parse("data:text/plain,Stuff")?;
935 /// assert!(!url.has_host());
936 /// # Ok(())
937 /// # }
938 /// # run().unwrap();
939 /// ```
has_host(&self) -> bool940 pub fn has_host(&self) -> bool {
941 !matches!(self.host, HostInternal::None)
942 }
943
944 /// Return the string representation of the host (domain or IP address) for this URL, if any.
945 ///
946 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
947 /// of a special URL, or percent encoded for non-special URLs.
948 /// IPv6 addresses are given between `[` and `]` brackets.
949 ///
950 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
951 /// don’t have a host.
952 ///
953 /// See also the `host` method.
954 ///
955 /// # Examples
956 ///
957 /// ```
958 /// use url::Url;
959 /// # use url::ParseError;
960 ///
961 /// # fn run() -> Result<(), ParseError> {
962 /// let url = Url::parse("https://127.0.0.1/index.html")?;
963 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
964 ///
965 /// let url = Url::parse("ftp://rms@example.com")?;
966 /// assert_eq!(url.host_str(), Some("example.com"));
967 ///
968 /// let url = Url::parse("unix:/run/foo.socket")?;
969 /// assert_eq!(url.host_str(), None);
970 ///
971 /// let url = Url::parse("data:text/plain,Stuff")?;
972 /// assert_eq!(url.host_str(), None);
973 /// # Ok(())
974 /// # }
975 /// # run().unwrap();
976 /// ```
host_str(&self) -> Option<&str>977 pub fn host_str(&self) -> Option<&str> {
978 if self.has_host() {
979 Some(self.slice(self.host_start..self.host_end))
980 } else {
981 None
982 }
983 }
984
985 /// Return the parsed representation of the host for this URL.
986 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
987 /// of a special URL, or percent encoded for non-special URLs.
988 ///
989 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
990 /// don’t have a host.
991 ///
992 /// See also the `host_str` method.
993 ///
994 /// # Examples
995 ///
996 /// ```
997 /// use url::Url;
998 /// # use url::ParseError;
999 ///
1000 /// # fn run() -> Result<(), ParseError> {
1001 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1002 /// assert!(url.host().is_some());
1003 ///
1004 /// let url = Url::parse("ftp://rms@example.com")?;
1005 /// assert!(url.host().is_some());
1006 ///
1007 /// let url = Url::parse("unix:/run/foo.socket")?;
1008 /// assert!(url.host().is_none());
1009 ///
1010 /// let url = Url::parse("data:text/plain,Stuff")?;
1011 /// assert!(url.host().is_none());
1012 /// # Ok(())
1013 /// # }
1014 /// # run().unwrap();
1015 /// ```
host(&self) -> Option<Host<&str>>1016 pub fn host(&self) -> Option<Host<&str>> {
1017 match self.host {
1018 HostInternal::None => None,
1019 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1020 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1021 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1022 }
1023 }
1024
1025 /// If this URL has a host and it is a domain name (not an IP address), return it.
1026 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1027 /// of a special URL, or percent encoded for non-special URLs.
1028 ///
1029 /// # Examples
1030 ///
1031 /// ```
1032 /// use url::Url;
1033 /// # use url::ParseError;
1034 ///
1035 /// # fn run() -> Result<(), ParseError> {
1036 /// let url = Url::parse("https://127.0.0.1/")?;
1037 /// assert_eq!(url.domain(), None);
1038 ///
1039 /// let url = Url::parse("mailto:rms@example.net")?;
1040 /// assert_eq!(url.domain(), None);
1041 ///
1042 /// let url = Url::parse("https://example.com/")?;
1043 /// assert_eq!(url.domain(), Some("example.com"));
1044 /// # Ok(())
1045 /// # }
1046 /// # run().unwrap();
1047 /// ```
domain(&self) -> Option<&str>1048 pub fn domain(&self) -> Option<&str> {
1049 match self.host {
1050 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1051 _ => None,
1052 }
1053 }
1054
1055 /// Return the port number for this URL, if any.
1056 ///
1057 /// Note that default port numbers are never reflected by the serialization,
1058 /// use the `port_or_known_default()` method if you want a default port number returned.
1059 ///
1060 /// # Examples
1061 ///
1062 /// ```
1063 /// use url::Url;
1064 /// # use url::ParseError;
1065 ///
1066 /// # fn run() -> Result<(), ParseError> {
1067 /// let url = Url::parse("https://example.com")?;
1068 /// assert_eq!(url.port(), None);
1069 ///
1070 /// let url = Url::parse("https://example.com:443/")?;
1071 /// assert_eq!(url.port(), None);
1072 ///
1073 /// let url = Url::parse("ssh://example.com:22")?;
1074 /// assert_eq!(url.port(), Some(22));
1075 /// # Ok(())
1076 /// # }
1077 /// # run().unwrap();
1078 /// ```
1079 #[inline]
port(&self) -> Option<u16>1080 pub fn port(&self) -> Option<u16> {
1081 self.port
1082 }
1083
1084 /// Return the port number for this URL, or the default port number if it is known.
1085 ///
1086 /// This method only knows the default port number
1087 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1088 ///
1089 /// For URLs in these schemes, this method always returns `Some(_)`.
1090 /// For other schemes, it is the same as `Url::port()`.
1091 ///
1092 /// # Examples
1093 ///
1094 /// ```
1095 /// use url::Url;
1096 /// # use url::ParseError;
1097 ///
1098 /// # fn run() -> Result<(), ParseError> {
1099 /// let url = Url::parse("foo://example.com")?;
1100 /// assert_eq!(url.port_or_known_default(), None);
1101 ///
1102 /// let url = Url::parse("foo://example.com:1456")?;
1103 /// assert_eq!(url.port_or_known_default(), Some(1456));
1104 ///
1105 /// let url = Url::parse("https://example.com")?;
1106 /// assert_eq!(url.port_or_known_default(), Some(443));
1107 /// # Ok(())
1108 /// # }
1109 /// # run().unwrap();
1110 /// ```
1111 #[inline]
port_or_known_default(&self) -> Option<u16>1112 pub fn port_or_known_default(&self) -> Option<u16> {
1113 self.port.or_else(|| parser::default_port(self.scheme()))
1114 }
1115
1116 /// Resolve a URL’s host and port number to `SocketAddr`.
1117 ///
1118 /// If the URL has the default port number of a scheme that is unknown to this library,
1119 /// `default_port_number` provides an opportunity to provide the actual port number.
1120 /// In non-example code this should be implemented either simply as `|| None`,
1121 /// or by matching on the URL’s `.scheme()`.
1122 ///
1123 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1124 ///
1125 /// # Examples
1126 ///
1127 /// ```no_run
1128 /// let url = url::Url::parse("https://example.net/").unwrap();
1129 /// let addrs = url.socket_addrs(|| None).unwrap();
1130 /// std::net::TcpStream::connect(&*addrs)
1131 /// # ;
1132 /// ```
1133 ///
1134 /// ```
1135 /// /// With application-specific known default port numbers
1136 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1137 /// url.socket_addrs(|| match url.scheme() {
1138 /// "socks5" | "socks5h" => Some(1080),
1139 /// _ => None,
1140 /// })
1141 /// }
1142 /// ```
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1143 pub fn socket_addrs(
1144 &self,
1145 default_port_number: impl Fn() -> Option<u16>,
1146 ) -> io::Result<Vec<SocketAddr>> {
1147 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1148 // causes borrowck issues because the return value borrows `default_port_number`:
1149 //
1150 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1151 //
1152 // > This RFC proposes that *all* type parameters are considered in scope
1153 // > for `impl Trait` in return position
1154
1155 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1156 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1157 }
1158
1159 let host = io_result(self.host(), "No host name in the URL")?;
1160 let port = io_result(
1161 self.port_or_known_default().or_else(default_port_number),
1162 "No port number in the URL",
1163 )?;
1164 Ok(match host {
1165 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1166 Host::Ipv4(ip) => vec![(ip, port).into()],
1167 Host::Ipv6(ip) => vec![(ip, port).into()],
1168 })
1169 }
1170
1171 /// Return the path for this URL, as a percent-encoded ASCII string.
1172 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1173 /// For other URLs, this starts with a '/' slash
1174 /// and continues with slash-separated path segments.
1175 ///
1176 /// # Examples
1177 ///
1178 /// ```rust
1179 /// use url::{Url, ParseError};
1180 ///
1181 /// # fn run() -> Result<(), ParseError> {
1182 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1183 /// assert_eq!(url.path(), "/api/versions");
1184 ///
1185 /// let url = Url::parse("https://example.com")?;
1186 /// assert_eq!(url.path(), "/");
1187 ///
1188 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1189 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1190 /// # Ok(())
1191 /// # }
1192 /// # run().unwrap();
1193 /// ```
path(&self) -> &str1194 pub fn path(&self) -> &str {
1195 match (self.query_start, self.fragment_start) {
1196 (None, None) => self.slice(self.path_start..),
1197 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1198 self.slice(self.path_start..next_component_start)
1199 }
1200 }
1201 }
1202
1203 /// Unless this URL is cannot-be-a-base,
1204 /// return an iterator of '/' slash-separated path segments,
1205 /// each as a percent-encoded ASCII string.
1206 ///
1207 /// Return `None` for cannot-be-a-base URLs.
1208 ///
1209 /// When `Some` is returned, the iterator always contains at least one string
1210 /// (which may be empty).
1211 ///
1212 /// # Examples
1213 ///
1214 /// ```
1215 /// use url::Url;
1216 /// # use std::error::Error;
1217 ///
1218 /// # fn run() -> Result<(), Box<dyn Error>> {
1219 /// let url = Url::parse("https://example.com/foo/bar")?;
1220 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1221 /// assert_eq!(path_segments.next(), Some("foo"));
1222 /// assert_eq!(path_segments.next(), Some("bar"));
1223 /// assert_eq!(path_segments.next(), None);
1224 ///
1225 /// let url = Url::parse("https://example.com")?;
1226 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1227 /// assert_eq!(path_segments.next(), Some(""));
1228 /// assert_eq!(path_segments.next(), None);
1229 ///
1230 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1231 /// assert!(url.path_segments().is_none());
1232 ///
1233 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1234 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1235 /// assert_eq!(path_segments.next(), Some("countries"));
1236 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1237 /// # Ok(())
1238 /// # }
1239 /// # run().unwrap();
1240 /// ```
path_segments(&self) -> Option<str::Split<'_, char>>1241 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1242 let path = self.path();
1243 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1244 }
1245
1246 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1247 ///
1248 /// # Examples
1249 ///
1250 /// ```rust
1251 /// use url::Url;
1252 /// # use url::ParseError;
1253 ///
1254 /// fn run() -> Result<(), ParseError> {
1255 /// let url = Url::parse("https://example.com/products?page=2")?;
1256 /// let query = url.query();
1257 /// assert_eq!(query, Some("page=2"));
1258 ///
1259 /// let url = Url::parse("https://example.com/products")?;
1260 /// let query = url.query();
1261 /// assert!(query.is_none());
1262 ///
1263 /// let url = Url::parse("https://example.com/?country=español")?;
1264 /// let query = url.query();
1265 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1266 /// # Ok(())
1267 /// # }
1268 /// # run().unwrap();
1269 /// ```
query(&self) -> Option<&str>1270 pub fn query(&self) -> Option<&str> {
1271 match (self.query_start, self.fragment_start) {
1272 (None, _) => None,
1273 (Some(query_start), None) => {
1274 debug_assert!(self.byte_at(query_start) == b'?');
1275 Some(self.slice(query_start + 1..))
1276 }
1277 (Some(query_start), Some(fragment_start)) => {
1278 debug_assert!(self.byte_at(query_start) == b'?');
1279 Some(self.slice(query_start + 1..fragment_start))
1280 }
1281 }
1282 }
1283
1284 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1285 /// and return an iterator of (key, value) pairs.
1286 ///
1287 /// # Examples
1288 ///
1289 /// ```rust
1290 /// use std::borrow::Cow;
1291 ///
1292 /// use url::Url;
1293 /// # use url::ParseError;
1294 ///
1295 /// # fn run() -> Result<(), ParseError> {
1296 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1297 /// let mut pairs = url.query_pairs();
1298 ///
1299 /// assert_eq!(pairs.count(), 2);
1300 ///
1301 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1302 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1303 /// # Ok(())
1304 /// # }
1305 /// # run().unwrap();
1306 /// ```
1307
1308 #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1309 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1310 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1311 }
1312
1313 /// Return this URL’s fragment identifier, if any.
1314 ///
1315 /// A fragment is the part of the URL after the `#` symbol.
1316 /// The fragment is optional and, if present, contains a fragment identifier
1317 /// that identifies a secondary resource, such as a section heading
1318 /// of a document.
1319 ///
1320 /// In HTML, the fragment identifier is usually the id attribute of a an element
1321 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1322 /// of a URL to the server.
1323 ///
1324 /// **Note:** the parser did *not* percent-encode this component,
1325 /// but the input may have been percent-encoded already.
1326 ///
1327 /// # Examples
1328 ///
1329 /// ```rust
1330 /// use url::Url;
1331 /// # use url::ParseError;
1332 ///
1333 /// # fn run() -> Result<(), ParseError> {
1334 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1335 ///
1336 /// assert_eq!(url.fragment(), Some("row=4"));
1337 ///
1338 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1339 ///
1340 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1341 /// # Ok(())
1342 /// # }
1343 /// # run().unwrap();
1344 /// ```
fragment(&self) -> Option<&str>1345 pub fn fragment(&self) -> Option<&str> {
1346 self.fragment_start.map(|start| {
1347 debug_assert!(self.byte_at(start) == b'#');
1348 self.slice(start + 1..)
1349 })
1350 }
1351
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1352 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1353 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1354 let result = f(&mut parser);
1355 self.serialization = parser.serialization;
1356 result
1357 }
1358
1359 /// Change this URL’s fragment identifier.
1360 ///
1361 /// # Examples
1362 ///
1363 /// ```rust
1364 /// use url::Url;
1365 /// # use url::ParseError;
1366 ///
1367 /// # fn run() -> Result<(), ParseError> {
1368 /// let mut url = Url::parse("https://example.com/data.csv")?;
1369 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1370
1371 /// url.set_fragment(Some("cell=4,1-6,2"));
1372 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1373 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1374 ///
1375 /// url.set_fragment(None);
1376 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1377 /// assert!(url.fragment().is_none());
1378 /// # Ok(())
1379 /// # }
1380 /// # run().unwrap();
1381 /// ```
set_fragment(&mut self, fragment: Option<&str>)1382 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1383 // Remove any previous fragment
1384 if let Some(start) = self.fragment_start {
1385 debug_assert!(self.byte_at(start) == b'#');
1386 self.serialization.truncate(start as usize);
1387 }
1388 // Write the new one
1389 if let Some(input) = fragment {
1390 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1391 self.serialization.push('#');
1392 self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
1393 } else {
1394 self.fragment_start = None
1395 }
1396 }
1397
take_fragment(&mut self) -> Option<String>1398 fn take_fragment(&mut self) -> Option<String> {
1399 self.fragment_start.take().map(|start| {
1400 debug_assert!(self.byte_at(start) == b'#');
1401 let fragment = self.slice(start + 1..).to_owned();
1402 self.serialization.truncate(start as usize);
1403 fragment
1404 })
1405 }
1406
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1407 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1408 if let Some(ref fragment) = fragment {
1409 assert!(self.fragment_start.is_none());
1410 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1411 self.serialization.push('#');
1412 self.serialization.push_str(fragment);
1413 }
1414 }
1415
1416 /// Change this URL’s query string.
1417 ///
1418 /// # Examples
1419 ///
1420 /// ```rust
1421 /// use url::Url;
1422 /// # use url::ParseError;
1423 ///
1424 /// # fn run() -> Result<(), ParseError> {
1425 /// let mut url = Url::parse("https://example.com/products")?;
1426 /// assert_eq!(url.as_str(), "https://example.com/products");
1427 ///
1428 /// url.set_query(Some("page=2"));
1429 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1430 /// assert_eq!(url.query(), Some("page=2"));
1431 /// # Ok(())
1432 /// # }
1433 /// # run().unwrap();
1434 /// ```
set_query(&mut self, query: Option<&str>)1435 pub fn set_query(&mut self, query: Option<&str>) {
1436 let fragment = self.take_fragment();
1437
1438 // Remove any previous query
1439 if let Some(start) = self.query_start.take() {
1440 debug_assert!(self.byte_at(start) == b'?');
1441 self.serialization.truncate(start as usize);
1442 }
1443 // Write the new query, if any
1444 if let Some(input) = query {
1445 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1446 self.serialization.push('?');
1447 let scheme_type = SchemeType::from(self.scheme());
1448 let scheme_end = self.scheme_end;
1449 self.mutate(|parser| {
1450 let vfn = parser.violation_fn;
1451 parser.parse_query(
1452 scheme_type,
1453 scheme_end,
1454 parser::Input::trim_tab_and_newlines(input, vfn),
1455 )
1456 });
1457 }
1458
1459 self.restore_already_parsed_fragment(fragment);
1460 }
1461
1462 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1463 /// in `application/x-www-form-urlencoded` syntax.
1464 ///
1465 /// The return value has a method-chaining API:
1466 ///
1467 /// ```rust
1468 /// # use url::{Url, ParseError};
1469 ///
1470 /// # fn run() -> Result<(), ParseError> {
1471 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1472 /// assert_eq!(url.query(), Some("lang=fr"));
1473 ///
1474 /// url.query_pairs_mut().append_pair("foo", "bar");
1475 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1476 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1477 ///
1478 /// url.query_pairs_mut()
1479 /// .clear()
1480 /// .append_pair("foo", "bar & baz")
1481 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1482 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1483 /// assert_eq!(url.as_str(),
1484 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1485 /// # Ok(())
1486 /// # }
1487 /// # run().unwrap();
1488 /// ```
1489 ///
1490 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1491 /// not `url.set_query(None)`.
1492 ///
1493 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1494 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1495 let fragment = self.take_fragment();
1496
1497 let query_start;
1498 if let Some(start) = self.query_start {
1499 debug_assert!(self.byte_at(start) == b'?');
1500 query_start = start as usize;
1501 } else {
1502 query_start = self.serialization.len();
1503 self.query_start = Some(to_u32(query_start).unwrap());
1504 self.serialization.push('?');
1505 }
1506
1507 let query = UrlQuery {
1508 url: Some(self),
1509 fragment,
1510 };
1511 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1512 }
1513
take_after_path(&mut self) -> String1514 fn take_after_path(&mut self) -> String {
1515 match (self.query_start, self.fragment_start) {
1516 (Some(i), _) | (None, Some(i)) => {
1517 let after_path = self.slice(i..).to_owned();
1518 self.serialization.truncate(i as usize);
1519 after_path
1520 }
1521 (None, None) => String::new(),
1522 }
1523 }
1524
1525 /// Change this URL’s path.
1526 ///
1527 /// # Examples
1528 ///
1529 /// ```rust
1530 /// use url::Url;
1531 /// # use url::ParseError;
1532 ///
1533 /// # fn run() -> Result<(), ParseError> {
1534 /// let mut url = Url::parse("https://example.com")?;
1535 /// url.set_path("api/comments");
1536 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1537 /// assert_eq!(url.path(), "/api/comments");
1538 ///
1539 /// let mut url = Url::parse("https://example.com/api")?;
1540 /// url.set_path("data/report.csv");
1541 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1542 /// assert_eq!(url.path(), "/data/report.csv");
1543 ///
1544 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1545 /// let mut url = Url::parse("https://example.com")?;
1546 /// url.set_path("api/some comments");
1547 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1548 /// assert_eq!(url.path(), "/api/some%20comments");
1549 ///
1550 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1551 /// let mut url = Url::parse("https://example.com")?;
1552 /// url.set_path("api/some%20comments");
1553 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1554 /// assert_eq!(url.path(), "/api/some%20comments");
1555 ///
1556 /// # Ok(())
1557 /// # }
1558 /// # run().unwrap();
1559 /// ```
set_path(&mut self, mut path: &str)1560 pub fn set_path(&mut self, mut path: &str) {
1561 let after_path = self.take_after_path();
1562 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1563 let cannot_be_a_base = self.cannot_be_a_base();
1564 let scheme_type = SchemeType::from(self.scheme());
1565 self.serialization.truncate(self.path_start as usize);
1566 self.mutate(|parser| {
1567 if cannot_be_a_base {
1568 if path.starts_with('/') {
1569 parser.serialization.push_str("%2F");
1570 path = &path[1..];
1571 }
1572 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1573 } else {
1574 let mut has_host = true; // FIXME
1575 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1576 }
1577 });
1578 self.restore_after_path(old_after_path_pos, &after_path);
1579 }
1580
1581 /// Return an object with methods to manipulate this URL’s path segments.
1582 ///
1583 /// Return `Err(())` if this URL is cannot-be-a-base.
1584 #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1585 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1586 if self.cannot_be_a_base() {
1587 Err(())
1588 } else {
1589 Ok(path_segments::new(self))
1590 }
1591 }
1592
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1593 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1594 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1595 let adjust = |index: &mut u32| {
1596 *index -= old_after_path_position;
1597 *index += new_after_path_position;
1598 };
1599 if let Some(ref mut index) = self.query_start {
1600 adjust(index)
1601 }
1602 if let Some(ref mut index) = self.fragment_start {
1603 adjust(index)
1604 }
1605 self.serialization.push_str(after_path)
1606 }
1607
1608 /// Change this URL’s port number.
1609 ///
1610 /// Note that default port numbers are not reflected in the serialization.
1611 ///
1612 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1613 /// do nothing and return `Err`.
1614 ///
1615 /// # Examples
1616 ///
1617 /// ```
1618 /// use url::Url;
1619 /// # use std::error::Error;
1620 ///
1621 /// # fn run() -> Result<(), Box<dyn Error>> {
1622 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1623 ///
1624 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1625 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1626 ///
1627 /// url.set_port(None).map_err(|_| "cannot be base")?;
1628 /// assert_eq!(url.as_str(), "ssh://example.net/");
1629 /// # Ok(())
1630 /// # }
1631 /// # run().unwrap();
1632 /// ```
1633 ///
1634 /// Known default port numbers are not reflected:
1635 ///
1636 /// ```rust
1637 /// use url::Url;
1638 /// # use std::error::Error;
1639 ///
1640 /// # fn run() -> Result<(), Box<dyn Error>> {
1641 /// let mut url = Url::parse("https://example.org/")?;
1642 ///
1643 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1644 /// assert!(url.port().is_none());
1645 /// # Ok(())
1646 /// # }
1647 /// # run().unwrap();
1648 /// ```
1649 ///
1650 /// Cannot set port for cannot-be-a-base URLs:
1651 ///
1652 /// ```
1653 /// use url::Url;
1654 /// # use url::ParseError;
1655 ///
1656 /// # fn run() -> Result<(), ParseError> {
1657 /// let mut url = Url::parse("mailto:rms@example.net")?;
1658 ///
1659 /// let result = url.set_port(Some(80));
1660 /// assert!(result.is_err());
1661 ///
1662 /// let result = url.set_port(None);
1663 /// assert!(result.is_err());
1664 /// # Ok(())
1665 /// # }
1666 /// # run().unwrap();
1667 /// ```
1668 #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1669 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1670 // has_host implies !cannot_be_a_base
1671 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1672 return Err(());
1673 }
1674 if port.is_some() && port == parser::default_port(self.scheme()) {
1675 port = None
1676 }
1677 self.set_port_internal(port);
1678 Ok(())
1679 }
1680
set_port_internal(&mut self, port: Option<u16>)1681 fn set_port_internal(&mut self, port: Option<u16>) {
1682 match (self.port, port) {
1683 (None, None) => {}
1684 (Some(_), None) => {
1685 self.serialization
1686 .drain(self.host_end as usize..self.path_start as usize);
1687 let offset = self.path_start - self.host_end;
1688 self.path_start = self.host_end;
1689 if let Some(ref mut index) = self.query_start {
1690 *index -= offset
1691 }
1692 if let Some(ref mut index) = self.fragment_start {
1693 *index -= offset
1694 }
1695 }
1696 (Some(old), Some(new)) if old == new => {}
1697 (_, Some(new)) => {
1698 let path_and_after = self.slice(self.path_start..).to_owned();
1699 self.serialization.truncate(self.host_end as usize);
1700 write!(&mut self.serialization, ":{}", new).unwrap();
1701 let old_path_start = self.path_start;
1702 let new_path_start = to_u32(self.serialization.len()).unwrap();
1703 self.path_start = new_path_start;
1704 let adjust = |index: &mut u32| {
1705 *index -= old_path_start;
1706 *index += new_path_start;
1707 };
1708 if let Some(ref mut index) = self.query_start {
1709 adjust(index)
1710 }
1711 if let Some(ref mut index) = self.fragment_start {
1712 adjust(index)
1713 }
1714 self.serialization.push_str(&path_and_after);
1715 }
1716 }
1717 self.port = port;
1718 }
1719
1720 /// Change this URL’s host.
1721 ///
1722 /// Removing the host (calling this with `None`)
1723 /// will also remove any username, password, and port number.
1724 ///
1725 /// # Examples
1726 ///
1727 /// Change host:
1728 ///
1729 /// ```
1730 /// use url::Url;
1731 /// # use url::ParseError;
1732 ///
1733 /// # fn run() -> Result<(), ParseError> {
1734 /// let mut url = Url::parse("https://example.net")?;
1735 /// let result = url.set_host(Some("rust-lang.org"));
1736 /// assert!(result.is_ok());
1737 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1738 /// # Ok(())
1739 /// # }
1740 /// # run().unwrap();
1741 /// ```
1742 ///
1743 /// Remove host:
1744 ///
1745 /// ```
1746 /// use url::Url;
1747 /// # use url::ParseError;
1748 ///
1749 /// # fn run() -> Result<(), ParseError> {
1750 /// let mut url = Url::parse("foo://example.net")?;
1751 /// let result = url.set_host(None);
1752 /// assert!(result.is_ok());
1753 /// assert_eq!(url.as_str(), "foo:/");
1754 /// # Ok(())
1755 /// # }
1756 /// # run().unwrap();
1757 /// ```
1758 ///
1759 /// Cannot remove host for 'special' schemes (e.g. `http`):
1760 ///
1761 /// ```
1762 /// use url::Url;
1763 /// # use url::ParseError;
1764 ///
1765 /// # fn run() -> Result<(), ParseError> {
1766 /// let mut url = Url::parse("https://example.net")?;
1767 /// let result = url.set_host(None);
1768 /// assert!(result.is_err());
1769 /// assert_eq!(url.as_str(), "https://example.net/");
1770 /// # Ok(())
1771 /// # }
1772 /// # run().unwrap();
1773 /// ```
1774 ///
1775 /// Cannot change or remove host for cannot-be-a-base URLs:
1776 ///
1777 /// ```
1778 /// use url::Url;
1779 /// # use url::ParseError;
1780 ///
1781 /// # fn run() -> Result<(), ParseError> {
1782 /// let mut url = Url::parse("mailto:rms@example.net")?;
1783 ///
1784 /// let result = url.set_host(Some("rust-lang.org"));
1785 /// assert!(result.is_err());
1786 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1787 ///
1788 /// let result = url.set_host(None);
1789 /// assert!(result.is_err());
1790 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1791 /// # Ok(())
1792 /// # }
1793 /// # run().unwrap();
1794 /// ```
1795 ///
1796 /// # Errors
1797 ///
1798 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1799 /// a [`ParseError`] variant will be returned.
1800 ///
1801 /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1802 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1803 if self.cannot_be_a_base() {
1804 return Err(ParseError::SetHostOnCannotBeABaseUrl);
1805 }
1806
1807 let scheme_type = SchemeType::from(self.scheme());
1808
1809 if let Some(host) = host {
1810 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
1811 return Err(ParseError::EmptyHost);
1812 }
1813 let mut host_substr = host;
1814 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1815 if !host.starts_with('[') || !host.ends_with(']') {
1816 match host.find(':') {
1817 Some(0) => {
1818 // If buffer is the empty string, validation error, return failure.
1819 return Err(ParseError::InvalidDomainCharacter);
1820 }
1821 // Let host be the result of host parsing buffer
1822 Some(colon_index) => {
1823 host_substr = &host[..colon_index];
1824 }
1825 None => {}
1826 }
1827 }
1828 if SchemeType::from(self.scheme()).is_special() {
1829 self.set_host_internal(Host::parse(host_substr)?, None);
1830 } else {
1831 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1832 }
1833 } else if self.has_host() {
1834 if scheme_type.is_special() && !scheme_type.is_file() {
1835 return Err(ParseError::EmptyHost);
1836 } else if self.serialization.len() == self.path_start as usize {
1837 self.serialization.push('/');
1838 }
1839 debug_assert!(self.byte_at(self.scheme_end) == b':');
1840 debug_assert!(self.byte_at(self.path_start) == b'/');
1841
1842 let new_path_start = if scheme_type.is_file() {
1843 self.scheme_end + 3
1844 } else {
1845 self.scheme_end + 1
1846 };
1847
1848 self.serialization
1849 .drain(new_path_start as usize..self.path_start as usize);
1850 let offset = self.path_start - new_path_start;
1851 self.path_start = new_path_start;
1852 self.username_end = new_path_start;
1853 self.host_start = new_path_start;
1854 self.host_end = new_path_start;
1855 self.port = None;
1856 if let Some(ref mut index) = self.query_start {
1857 *index -= offset
1858 }
1859 if let Some(ref mut index) = self.fragment_start {
1860 *index -= offset
1861 }
1862 }
1863 Ok(())
1864 }
1865
1866 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1867 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1868 let old_suffix_pos = if opt_new_port.is_some() {
1869 self.path_start
1870 } else {
1871 self.host_end
1872 };
1873 let suffix = self.slice(old_suffix_pos..).to_owned();
1874 self.serialization.truncate(self.host_start as usize);
1875 if !self.has_authority() {
1876 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1877 debug_assert!(self.username_end == self.host_start);
1878 self.serialization.push('/');
1879 self.serialization.push('/');
1880 self.username_end += 2;
1881 self.host_start += 2;
1882 }
1883 write!(&mut self.serialization, "{}", host).unwrap();
1884 self.host_end = to_u32(self.serialization.len()).unwrap();
1885 self.host = host.into();
1886
1887 if let Some(new_port) = opt_new_port {
1888 self.port = new_port;
1889 if let Some(port) = new_port {
1890 write!(&mut self.serialization, ":{}", port).unwrap();
1891 }
1892 }
1893 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1894 self.serialization.push_str(&suffix);
1895
1896 let adjust = |index: &mut u32| {
1897 *index -= old_suffix_pos;
1898 *index += new_suffix_pos;
1899 };
1900 adjust(&mut self.path_start);
1901 if let Some(ref mut index) = self.query_start {
1902 adjust(index)
1903 }
1904 if let Some(ref mut index) = self.fragment_start {
1905 adjust(index)
1906 }
1907 }
1908
1909 /// Change this URL’s host to the given IP address.
1910 ///
1911 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1912 ///
1913 /// Compared to `Url::set_host`, this skips the host parser.
1914 ///
1915 /// # Examples
1916 ///
1917 /// ```rust
1918 /// use url::{Url, ParseError};
1919 ///
1920 /// # fn run() -> Result<(), ParseError> {
1921 /// let mut url = Url::parse("http://example.com")?;
1922 /// url.set_ip_host("127.0.0.1".parse().unwrap());
1923 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1924 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1925 /// # Ok(())
1926 /// # }
1927 /// # run().unwrap();
1928 /// ```
1929 ///
1930 /// Cannot change URL's from mailto(cannot-be-base) to ip:
1931 ///
1932 /// ```rust
1933 /// use url::{Url, ParseError};
1934 ///
1935 /// # fn run() -> Result<(), ParseError> {
1936 /// let mut url = Url::parse("mailto:rms@example.com")?;
1937 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1938 ///
1939 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1940 /// assert!(result.is_err());
1941 /// # Ok(())
1942 /// # }
1943 /// # run().unwrap();
1944 /// ```
1945 ///
1946 #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1947 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1948 if self.cannot_be_a_base() {
1949 return Err(());
1950 }
1951
1952 let address = match address {
1953 IpAddr::V4(address) => Host::Ipv4(address),
1954 IpAddr::V6(address) => Host::Ipv6(address),
1955 };
1956 self.set_host_internal(address, None);
1957 Ok(())
1958 }
1959
1960 /// Change this URL’s password.
1961 ///
1962 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1963 ///
1964 /// # Examples
1965 ///
1966 /// ```rust
1967 /// use url::{Url, ParseError};
1968 ///
1969 /// # fn run() -> Result<(), ParseError> {
1970 /// let mut url = Url::parse("mailto:rmz@example.com")?;
1971 /// let result = url.set_password(Some("secret_password"));
1972 /// assert!(result.is_err());
1973 ///
1974 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1975 /// let result = url.set_password(Some("secret_password"));
1976 /// assert_eq!(url.password(), Some("secret_password"));
1977 ///
1978 /// let mut url = Url::parse("ftp://user2:@example.com")?;
1979 /// let result = url.set_password(Some("secret2"));
1980 /// assert!(result.is_ok());
1981 /// assert_eq!(url.password(), Some("secret2"));
1982 /// # Ok(())
1983 /// # }
1984 /// # run().unwrap();
1985 /// ```
1986 #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1987 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1988 // has_host implies !cannot_be_a_base
1989 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1990 return Err(());
1991 }
1992 if let Some(password) = password {
1993 let host_and_after = self.slice(self.host_start..).to_owned();
1994 self.serialization.truncate(self.username_end as usize);
1995 self.serialization.push(':');
1996 self.serialization
1997 .extend(utf8_percent_encode(password, USERINFO));
1998 self.serialization.push('@');
1999
2000 let old_host_start = self.host_start;
2001 let new_host_start = to_u32(self.serialization.len()).unwrap();
2002 let adjust = |index: &mut u32| {
2003 *index -= old_host_start;
2004 *index += new_host_start;
2005 };
2006 self.host_start = new_host_start;
2007 adjust(&mut self.host_end);
2008 adjust(&mut self.path_start);
2009 if let Some(ref mut index) = self.query_start {
2010 adjust(index)
2011 }
2012 if let Some(ref mut index) = self.fragment_start {
2013 adjust(index)
2014 }
2015
2016 self.serialization.push_str(&host_and_after);
2017 } else if self.byte_at(self.username_end) == b':' {
2018 // If there is a password to remove
2019 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2020 debug_assert!(has_username_or_password);
2021 let username_start = self.scheme_end + 3;
2022 let empty_username = username_start == self.username_end;
2023 let start = self.username_end; // Remove the ':'
2024 let end = if empty_username {
2025 self.host_start // Remove the '@' as well
2026 } else {
2027 self.host_start - 1 // Keep the '@' to separate the username from the host
2028 };
2029 self.serialization.drain(start as usize..end as usize);
2030 let offset = end - start;
2031 self.host_start -= offset;
2032 self.host_end -= offset;
2033 self.path_start -= offset;
2034 if let Some(ref mut index) = self.query_start {
2035 *index -= offset
2036 }
2037 if let Some(ref mut index) = self.fragment_start {
2038 *index -= offset
2039 }
2040 }
2041 Ok(())
2042 }
2043
2044 /// Change this URL’s username.
2045 ///
2046 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2047 /// # Examples
2048 ///
2049 /// Cannot setup username from mailto(cannot-be-base)
2050 ///
2051 /// ```rust
2052 /// use url::{Url, ParseError};
2053 ///
2054 /// # fn run() -> Result<(), ParseError> {
2055 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2056 /// let result = url.set_username("user1");
2057 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2058 /// assert!(result.is_err());
2059 /// # Ok(())
2060 /// # }
2061 /// # run().unwrap();
2062 /// ```
2063 ///
2064 /// Setup username to user1
2065 ///
2066 /// ```rust
2067 /// use url::{Url, ParseError};
2068 ///
2069 /// # fn run() -> Result<(), ParseError> {
2070 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2071 /// let result = url.set_username("user1");
2072 /// assert!(result.is_ok());
2073 /// assert_eq!(url.username(), "user1");
2074 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2075 /// # Ok(())
2076 /// # }
2077 /// # run().unwrap();
2078 /// ```
2079 #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2080 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2081 // has_host implies !cannot_be_a_base
2082 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2083 return Err(());
2084 }
2085 let username_start = self.scheme_end + 3;
2086 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2087 if self.slice(username_start..self.username_end) == username {
2088 return Ok(());
2089 }
2090 let after_username = self.slice(self.username_end..).to_owned();
2091 self.serialization.truncate(username_start as usize);
2092 self.serialization
2093 .extend(utf8_percent_encode(username, USERINFO));
2094
2095 let mut removed_bytes = self.username_end;
2096 self.username_end = to_u32(self.serialization.len()).unwrap();
2097 let mut added_bytes = self.username_end;
2098
2099 let new_username_is_empty = self.username_end == username_start;
2100 match (new_username_is_empty, after_username.chars().next()) {
2101 (true, Some('@')) => {
2102 removed_bytes += 1;
2103 self.serialization.push_str(&after_username[1..]);
2104 }
2105 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2106 self.serialization.push_str(&after_username);
2107 }
2108 (false, _) => {
2109 added_bytes += 1;
2110 self.serialization.push('@');
2111 self.serialization.push_str(&after_username);
2112 }
2113 }
2114
2115 let adjust = |index: &mut u32| {
2116 *index -= removed_bytes;
2117 *index += added_bytes;
2118 };
2119 adjust(&mut self.host_start);
2120 adjust(&mut self.host_end);
2121 adjust(&mut self.path_start);
2122 if let Some(ref mut index) = self.query_start {
2123 adjust(index)
2124 }
2125 if let Some(ref mut index) = self.fragment_start {
2126 adjust(index)
2127 }
2128 Ok(())
2129 }
2130
2131 /// Change this URL’s scheme.
2132 ///
2133 /// Do nothing and return `Err` under the following circumstances:
2134 ///
2135 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2136 /// * If this URL is cannot-be-a-base and the new scheme is one of
2137 /// `http`, `https`, `ws`, `wss` or `ftp`
2138 /// * If either the old or new scheme is `http`, `https`, `ws`,
2139 /// `wss` or `ftp` and the other is not one of these
2140 /// * If the new scheme is `file` and this URL includes credentials
2141 /// or has a non-null port
2142 /// * If this URL's scheme is `file` and its host is empty or null
2143 ///
2144 /// See also [the URL specification's section on legal scheme state
2145 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2146 ///
2147 /// # Examples
2148 ///
2149 /// Change the URL’s scheme from `https` to `http`:
2150 ///
2151 /// ```
2152 /// use url::Url;
2153 /// # use url::ParseError;
2154 ///
2155 /// # fn run() -> Result<(), ParseError> {
2156 /// let mut url = Url::parse("https://example.net")?;
2157 /// let result = url.set_scheme("http");
2158 /// assert_eq!(url.as_str(), "http://example.net/");
2159 /// assert!(result.is_ok());
2160 /// # Ok(())
2161 /// # }
2162 /// # run().unwrap();
2163 /// ```
2164 /// Change the URL’s scheme from `foo` to `bar`:
2165 ///
2166 /// ```
2167 /// use url::Url;
2168 /// # use url::ParseError;
2169 ///
2170 /// # fn run() -> Result<(), ParseError> {
2171 /// let mut url = Url::parse("foo://example.net")?;
2172 /// let result = url.set_scheme("bar");
2173 /// assert_eq!(url.as_str(), "bar://example.net");
2174 /// assert!(result.is_ok());
2175 /// # Ok(())
2176 /// # }
2177 /// # run().unwrap();
2178 /// ```
2179 ///
2180 /// Cannot change URL’s scheme from `https` to `foõ`:
2181 ///
2182 /// ```
2183 /// use url::Url;
2184 /// # use url::ParseError;
2185 ///
2186 /// # fn run() -> Result<(), ParseError> {
2187 /// let mut url = Url::parse("https://example.net")?;
2188 /// let result = url.set_scheme("foõ");
2189 /// assert_eq!(url.as_str(), "https://example.net/");
2190 /// assert!(result.is_err());
2191 /// # Ok(())
2192 /// # }
2193 /// # run().unwrap();
2194 /// ```
2195 ///
2196 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2197 ///
2198 /// ```
2199 /// use url::Url;
2200 /// # use url::ParseError;
2201 ///
2202 /// # fn run() -> Result<(), ParseError> {
2203 /// let mut url = Url::parse("mailto:rms@example.net")?;
2204 /// let result = url.set_scheme("https");
2205 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2206 /// assert!(result.is_err());
2207 /// # Ok(())
2208 /// # }
2209 /// # run().unwrap();
2210 /// ```
2211 /// Cannot change the URL’s scheme from `foo` to `https`:
2212 ///
2213 /// ```
2214 /// use url::Url;
2215 /// # use url::ParseError;
2216 ///
2217 /// # fn run() -> Result<(), ParseError> {
2218 /// let mut url = Url::parse("foo://example.net")?;
2219 /// let result = url.set_scheme("https");
2220 /// assert_eq!(url.as_str(), "foo://example.net");
2221 /// assert!(result.is_err());
2222 /// # Ok(())
2223 /// # }
2224 /// # run().unwrap();
2225 /// ```
2226 /// Cannot change the URL’s scheme from `http` to `foo`:
2227 ///
2228 /// ```
2229 /// use url::Url;
2230 /// # use url::ParseError;
2231 ///
2232 /// # fn run() -> Result<(), ParseError> {
2233 /// let mut url = Url::parse("http://example.net")?;
2234 /// let result = url.set_scheme("foo");
2235 /// assert_eq!(url.as_str(), "http://example.net/");
2236 /// assert!(result.is_err());
2237 /// # Ok(())
2238 /// # }
2239 /// # run().unwrap();
2240 /// ```
2241 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2242 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2243 let mut parser = Parser::for_setter(String::new());
2244 let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
2245 let new_scheme_type = SchemeType::from(&parser.serialization);
2246 let old_scheme_type = SchemeType::from(self.scheme());
2247 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2248 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2249 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2250 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2251 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2252 // If url’s scheme is "file" and its host is an empty host or null, then return.
2253 (new_scheme_type.is_file() && self.has_authority())
2254 {
2255 return Err(());
2256 }
2257
2258 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2259 return Err(());
2260 }
2261 let old_scheme_end = self.scheme_end;
2262 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2263 let adjust = |index: &mut u32| {
2264 *index -= old_scheme_end;
2265 *index += new_scheme_end;
2266 };
2267
2268 self.scheme_end = new_scheme_end;
2269 adjust(&mut self.username_end);
2270 adjust(&mut self.host_start);
2271 adjust(&mut self.host_end);
2272 adjust(&mut self.path_start);
2273 if let Some(ref mut index) = self.query_start {
2274 adjust(index)
2275 }
2276 if let Some(ref mut index) = self.fragment_start {
2277 adjust(index)
2278 }
2279
2280 parser.serialization.push_str(self.slice(old_scheme_end..));
2281 self.serialization = parser.serialization;
2282
2283 // Update the port so it can be removed
2284 // If it is the scheme's default
2285 // we don't mind it silently failing
2286 // if there was no port in the first place
2287 let previous_port = self.port();
2288 let _ = self.set_port(previous_port);
2289
2290 Ok(())
2291 }
2292
2293 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2294 ///
2295 /// This returns `Err` if the given path is not absolute or,
2296 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2297 ///
2298 /// # Examples
2299 ///
2300 /// On Unix-like platforms:
2301 ///
2302 /// ```
2303 /// # if cfg!(unix) {
2304 /// use url::Url;
2305 ///
2306 /// # fn run() -> Result<(), ()> {
2307 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2308 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2309 ///
2310 /// let url = Url::from_file_path("../foo.txt");
2311 /// assert!(url.is_err());
2312 ///
2313 /// let url = Url::from_file_path("https://google.com/");
2314 /// assert!(url.is_err());
2315 /// # Ok(())
2316 /// # }
2317 /// # run().unwrap();
2318 /// # }
2319 /// ```
2320 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2321 #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2322 pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2323 let mut serialization = "file://".to_owned();
2324 let host_start = serialization.len() as u32;
2325 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2326 Ok(Url {
2327 serialization,
2328 scheme_end: "file".len() as u32,
2329 username_end: host_start,
2330 host_start,
2331 host_end,
2332 host,
2333 port: None,
2334 path_start: host_end,
2335 query_start: None,
2336 fragment_start: None,
2337 })
2338 }
2339
2340 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2341 ///
2342 /// This returns `Err` if the given path is not absolute or,
2343 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2344 ///
2345 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2346 /// so that the entire path is considered when using this URL as a base URL.
2347 ///
2348 /// For example:
2349 ///
2350 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2351 /// as the base URL is `file:///var/www/index.html`
2352 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2353 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2354 ///
2355 /// Note that `std::path` does not consider trailing slashes significant
2356 /// and usually does not include them (e.g. in `Path::parent()`).
2357 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2358 #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2359 pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2360 let mut url = Url::from_file_path(path)?;
2361 if !url.serialization.ends_with('/') {
2362 url.serialization.push('/')
2363 }
2364 Ok(url)
2365 }
2366
2367 /// Serialize with Serde using the internal representation of the `Url` struct.
2368 ///
2369 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2370 /// for speed, compared to the `Deserialize` trait impl.
2371 ///
2372 /// This method is only available if the `serde` Cargo feature is enabled.
2373 #[cfg(feature = "serde")]
2374 #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2375 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2376 where
2377 S: serde::Serializer,
2378 {
2379 use serde::Serialize;
2380 // Destructuring first lets us ensure that adding or removing fields forces this method
2381 // to be updated
2382 let Url {
2383 ref serialization,
2384 ref scheme_end,
2385 ref username_end,
2386 ref host_start,
2387 ref host_end,
2388 ref host,
2389 ref port,
2390 ref path_start,
2391 ref query_start,
2392 ref fragment_start,
2393 } = *self;
2394 (
2395 serialization,
2396 scheme_end,
2397 username_end,
2398 host_start,
2399 host_end,
2400 host,
2401 port,
2402 path_start,
2403 query_start,
2404 fragment_start,
2405 )
2406 .serialize(serializer)
2407 }
2408
2409 /// Serialize with Serde using the internal representation of the `Url` struct.
2410 ///
2411 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2412 /// for speed, compared to the `Deserialize` trait impl.
2413 ///
2414 /// This method is only available if the `serde` Cargo feature is enabled.
2415 #[cfg(feature = "serde")]
2416 #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2417 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2418 where
2419 D: serde::Deserializer<'de>,
2420 {
2421 use serde::de::{Deserialize, Error, Unexpected};
2422 let (
2423 serialization,
2424 scheme_end,
2425 username_end,
2426 host_start,
2427 host_end,
2428 host,
2429 port,
2430 path_start,
2431 query_start,
2432 fragment_start,
2433 ) = Deserialize::deserialize(deserializer)?;
2434 let url = Url {
2435 serialization,
2436 scheme_end,
2437 username_end,
2438 host_start,
2439 host_end,
2440 host,
2441 port,
2442 path_start,
2443 query_start,
2444 fragment_start,
2445 };
2446 if cfg!(debug_assertions) {
2447 url.check_invariants().map_err(|reason| {
2448 let reason: &str = &reason;
2449 Error::invalid_value(Unexpected::Other("value"), &reason)
2450 })?
2451 }
2452 Ok(url)
2453 }
2454
2455 /// Assuming the URL is in the `file` scheme or similar,
2456 /// convert its path to an absolute `std::path::Path`.
2457 ///
2458 /// **Note:** This does not actually check the URL’s `scheme`,
2459 /// and may give nonsensical results for other schemes.
2460 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2461 ///
2462 /// ```
2463 /// # use url::Url;
2464 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2465 /// let path = url.to_file_path();
2466 /// ```
2467 ///
2468 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2469 /// `file:` URLs may have a non-local host),
2470 /// or if `Path::new_opt()` returns `None`.
2471 /// (That is, if the percent-decoded path contains a NUL byte or,
2472 /// for a Windows path, is not UTF-8.)
2473 #[inline]
2474 #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
2475 #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2476 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2477 if let Some(segments) = self.path_segments() {
2478 let host = match self.host() {
2479 None | Some(Host::Domain("localhost")) => None,
2480 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2481 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2482 }
2483 _ => return Err(()),
2484 };
2485
2486 return file_url_segments_to_pathbuf(host, segments);
2487 }
2488 Err(())
2489 }
2490
2491 // Private helper methods:
2492
2493 #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2494 fn slice<R>(&self, range: R) -> &str
2495 where
2496 R: RangeArg,
2497 {
2498 range.slice_of(&self.serialization)
2499 }
2500
2501 #[inline]
byte_at(&self, i: u32) -> u82502 fn byte_at(&self, i: u32) -> u8 {
2503 self.serialization.as_bytes()[i as usize]
2504 }
2505 }
2506
2507 /// Parse a string as an URL, without a base URL or encoding override.
2508 impl str::FromStr for Url {
2509 type Err = ParseError;
2510
2511 #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2512 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2513 Url::parse(input)
2514 }
2515 }
2516
2517 impl<'a> TryFrom<&'a str> for Url {
2518 type Error = ParseError;
2519
try_from(s: &'a str) -> Result<Self, Self::Error>2520 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2521 Url::parse(s)
2522 }
2523 }
2524
2525 /// Display the serialization of this URL.
2526 impl fmt::Display for Url {
2527 #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2528 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2529 fmt::Display::fmt(&self.serialization, formatter)
2530 }
2531 }
2532
2533 /// String conversion.
2534 impl From<Url> for String {
from(value: Url) -> String2535 fn from(value: Url) -> String {
2536 value.serialization
2537 }
2538 }
2539
2540 /// Debug the serialization of this URL.
2541 impl fmt::Debug for Url {
2542 #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2543 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2544 formatter
2545 .debug_struct("Url")
2546 .field("scheme", &self.scheme())
2547 .field("cannot_be_a_base", &self.cannot_be_a_base())
2548 .field("username", &self.username())
2549 .field("password", &self.password())
2550 .field("host", &self.host())
2551 .field("port", &self.port())
2552 .field("path", &self.path())
2553 .field("query", &self.query())
2554 .field("fragment", &self.fragment())
2555 .finish()
2556 }
2557 }
2558
2559 /// URLs compare like their serialization.
2560 impl Eq for Url {}
2561
2562 /// URLs compare like their serialization.
2563 impl PartialEq for Url {
2564 #[inline]
eq(&self, other: &Self) -> bool2565 fn eq(&self, other: &Self) -> bool {
2566 self.serialization == other.serialization
2567 }
2568 }
2569
2570 /// URLs compare like their serialization.
2571 impl Ord for Url {
2572 #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2573 fn cmp(&self, other: &Self) -> cmp::Ordering {
2574 self.serialization.cmp(&other.serialization)
2575 }
2576 }
2577
2578 /// URLs compare like their serialization.
2579 impl PartialOrd for Url {
2580 #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2581 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2582 self.serialization.partial_cmp(&other.serialization)
2583 }
2584 }
2585
2586 /// URLs hash like their serialization.
2587 impl hash::Hash for Url {
2588 #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2589 fn hash<H>(&self, state: &mut H)
2590 where
2591 H: hash::Hasher,
2592 {
2593 hash::Hash::hash(&self.serialization, state)
2594 }
2595 }
2596
2597 /// Return the serialization of this URL.
2598 impl AsRef<str> for Url {
2599 #[inline]
as_ref(&self) -> &str2600 fn as_ref(&self) -> &str {
2601 &self.serialization
2602 }
2603 }
2604
2605 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2606 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2607 }
2608
2609 impl RangeArg for Range<u32> {
2610 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2611 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2612 &s[self.start as usize..self.end as usize]
2613 }
2614 }
2615
2616 impl RangeArg for RangeFrom<u32> {
2617 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2618 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2619 &s[self.start as usize..]
2620 }
2621 }
2622
2623 impl RangeArg for RangeTo<u32> {
2624 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2625 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2626 &s[..self.end as usize]
2627 }
2628 }
2629
2630 /// Serializes this URL into a `serde` stream.
2631 ///
2632 /// This implementation is only available if the `serde` Cargo feature is enabled.
2633 #[cfg(feature = "serde")]
2634 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2635 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2636 where
2637 S: serde::Serializer,
2638 {
2639 serializer.serialize_str(self.as_str())
2640 }
2641 }
2642
2643 /// Deserializes this URL from a `serde` stream.
2644 ///
2645 /// This implementation is only available if the `serde` Cargo feature is enabled.
2646 #[cfg(feature = "serde")]
2647 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2648 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2649 where
2650 D: serde::Deserializer<'de>,
2651 {
2652 use serde::de::{Error, Unexpected, Visitor};
2653
2654 struct UrlVisitor;
2655
2656 impl<'de> Visitor<'de> for UrlVisitor {
2657 type Value = Url;
2658
2659 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2660 formatter.write_str("a string representing an URL")
2661 }
2662
2663 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2664 where
2665 E: Error,
2666 {
2667 Url::parse(s).map_err(|err| {
2668 let err_s = format!("{}", err);
2669 Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2670 })
2671 }
2672 }
2673
2674 deserializer.deserialize_str(UrlVisitor)
2675 }
2676 }
2677
2678 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2679 fn path_to_file_url_segments(
2680 path: &Path,
2681 serialization: &mut String,
2682 ) -> Result<(u32, HostInternal), ()> {
2683 #[cfg(any(unix, target_os = "redox"))]
2684 use std::os::unix::prelude::OsStrExt;
2685 #[cfg(target_os = "wasi")]
2686 use std::os::wasi::prelude::OsStrExt;
2687 if !path.is_absolute() {
2688 return Err(());
2689 }
2690 let host_end = to_u32(serialization.len()).unwrap();
2691 let mut empty = true;
2692 // skip the root component
2693 for component in path.components().skip(1) {
2694 empty = false;
2695 serialization.push('/');
2696 serialization.extend(percent_encode(
2697 component.as_os_str().as_bytes(),
2698 PATH_SEGMENT,
2699 ));
2700 }
2701 if empty {
2702 // An URL’s path must not be empty.
2703 serialization.push('/');
2704 }
2705 Ok((host_end, HostInternal::None))
2706 }
2707
2708 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2709 fn path_to_file_url_segments(
2710 path: &Path,
2711 serialization: &mut String,
2712 ) -> Result<(u32, HostInternal), ()> {
2713 path_to_file_url_segments_windows(path, serialization)
2714 }
2715
2716 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2717 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2718 fn path_to_file_url_segments_windows(
2719 path: &Path,
2720 serialization: &mut String,
2721 ) -> Result<(u32, HostInternal), ()> {
2722 use std::path::{Component, Prefix};
2723 if !path.is_absolute() {
2724 return Err(());
2725 }
2726 let mut components = path.components();
2727
2728 let host_start = serialization.len() + 1;
2729 let host_end;
2730 let host_internal;
2731
2732 match components.next() {
2733 Some(Component::Prefix(ref p)) => match p.kind() {
2734 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2735 host_end = to_u32(serialization.len()).unwrap();
2736 host_internal = HostInternal::None;
2737 serialization.push('/');
2738 serialization.push(letter as char);
2739 serialization.push(':');
2740 }
2741 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2742 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2743 write!(serialization, "{}", host).unwrap();
2744 host_end = to_u32(serialization.len()).unwrap();
2745 host_internal = host.into();
2746 serialization.push('/');
2747 let share = share.to_str().ok_or(())?;
2748 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2749 }
2750 _ => return Err(()),
2751 },
2752 _ => return Err(()),
2753 }
2754
2755 let mut path_only_has_prefix = true;
2756 for component in components {
2757 if component == Component::RootDir {
2758 continue;
2759 }
2760
2761 path_only_has_prefix = false;
2762 // FIXME: somehow work with non-unicode?
2763 let component = component.as_os_str().to_str().ok_or(())?;
2764
2765 serialization.push('/');
2766 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2767 }
2768
2769 // A windows drive letter must end with a slash.
2770 if serialization.len() > host_start
2771 && parser::is_windows_drive_letter(&serialization[host_start..])
2772 && path_only_has_prefix
2773 {
2774 serialization.push('/');
2775 }
2776
2777 Ok((host_end, host_internal))
2778 }
2779
2780 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2781 fn file_url_segments_to_pathbuf(
2782 host: Option<&str>,
2783 segments: str::Split<'_, char>,
2784 ) -> Result<PathBuf, ()> {
2785 use std::ffi::OsStr;
2786 #[cfg(any(unix, target_os = "redox"))]
2787 use std::os::unix::prelude::OsStrExt;
2788 #[cfg(target_os = "wasi")]
2789 use std::os::wasi::prelude::OsStrExt;
2790
2791 if host.is_some() {
2792 return Err(());
2793 }
2794
2795 let mut bytes = if cfg!(target_os = "redox") {
2796 b"file:".to_vec()
2797 } else {
2798 Vec::new()
2799 };
2800
2801 for segment in segments {
2802 bytes.push(b'/');
2803 bytes.extend(percent_decode(segment.as_bytes()));
2804 }
2805
2806 // A windows drive letter must end with a slash.
2807 if bytes.len() > 2
2808 && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2809 && matches!(bytes[bytes.len() - 1], b':' | b'|')
2810 {
2811 bytes.push(b'/');
2812 }
2813
2814 let os_str = OsStr::from_bytes(&bytes);
2815 let path = PathBuf::from(os_str);
2816
2817 debug_assert!(
2818 path.is_absolute(),
2819 "to_file_path() failed to produce an absolute Path"
2820 );
2821
2822 Ok(path)
2823 }
2824
2825 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2826 fn file_url_segments_to_pathbuf(
2827 host: Option<&str>,
2828 segments: str::Split<char>,
2829 ) -> Result<PathBuf, ()> {
2830 file_url_segments_to_pathbuf_windows(host, segments)
2831 }
2832
2833 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2834 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2835 fn file_url_segments_to_pathbuf_windows(
2836 host: Option<&str>,
2837 mut segments: str::Split<'_, char>,
2838 ) -> Result<PathBuf, ()> {
2839 let mut string = if let Some(host) = host {
2840 r"\\".to_owned() + host
2841 } else {
2842 let first = segments.next().ok_or(())?;
2843
2844 match first.len() {
2845 2 => {
2846 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2847 return Err(());
2848 }
2849
2850 first.to_owned()
2851 }
2852
2853 4 => {
2854 if !first.starts_with(parser::ascii_alpha) {
2855 return Err(());
2856 }
2857 let bytes = first.as_bytes();
2858 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2859 return Err(());
2860 }
2861
2862 first[0..1].to_owned() + ":"
2863 }
2864
2865 _ => return Err(()),
2866 }
2867 };
2868
2869 for segment in segments {
2870 string.push('\\');
2871
2872 // Currently non-unicode windows paths cannot be represented
2873 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2874 Ok(s) => string.push_str(&s),
2875 Err(..) => return Err(()),
2876 }
2877 }
2878 let path = PathBuf::from(string);
2879 debug_assert!(
2880 path.is_absolute(),
2881 "to_file_path() failed to produce an absolute Path"
2882 );
2883 Ok(path)
2884 }
2885
2886 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2887 #[derive(Debug)]
2888 pub struct UrlQuery<'a> {
2889 url: Option<&'a mut Url>,
2890 fragment: Option<String>,
2891 }
2892
2893 // `as_mut_string` string here exposes the internal serialization of an `Url`,
2894 // which should not be exposed to users.
2895 // We achieve that by not giving users direct access to `UrlQuery`:
2896 // * Its fields are private
2897 // (and so can not be constructed with struct literal syntax outside of this crate),
2898 // * It has no constructor
2899 // * It is only visible (on the type level) to users in the return type of
2900 // `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
2901 // * `Serializer` keeps its target in a private field
2902 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
2903 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String2904 fn as_mut_string(&mut self) -> &mut String {
2905 &mut self.url.as_mut().unwrap().serialization
2906 }
2907
finish(mut self) -> &'a mut Url2908 fn finish(mut self) -> &'a mut Url {
2909 let url = self.url.take().unwrap();
2910 url.restore_already_parsed_fragment(self.fragment.take());
2911 url
2912 }
2913
2914 type Finished = &'a mut Url;
2915 }
2916
2917 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2918 fn drop(&mut self) {
2919 if let Some(url) = self.url.take() {
2920 url.restore_already_parsed_fragment(self.fragment.take())
2921 }
2922 }
2923 }
2924