1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 /*!
10
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13
14
15 # URL parsing and data structures
16
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19 ```
20 use url::{Url, ParseError};
21
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24
25 Let’s parse a valid URL and look at its components.
26
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34
35
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57
58 ```
59 use url::Url;
60 # use url::ParseError;
61
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75
76 ## Serde
77
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79
80 # Base URL
81
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89
90 ```
91 use url::{Url, ParseError};
92
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95
96 Use the `join` method on an `Url` to use it as a base URL:
97
98 ```
99 use url::Url;
100 # use url::ParseError;
101
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110
111 # Feature: `serde`
112
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121 */
122
123 #![doc(html_root_url = "https://docs.rs/url/2.2.2")]
124
125 #[macro_use]
126 extern crate matches;
127 pub use form_urlencoded;
128
129 #[cfg(feature = "serde")]
130 extern crate serde;
131
132 use crate::host::HostInternal;
133 use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
134 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
135 use std::borrow::Borrow;
136 use std::cmp;
137 use std::fmt::{self, Write};
138 use std::hash;
139 use std::io;
140 use std::mem;
141 use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
142 use std::ops::{Range, RangeFrom, RangeTo};
143 use std::path::{Path, PathBuf};
144 use std::str;
145
146 use std::convert::TryFrom;
147
148 pub use crate::host::Host;
149 pub use crate::origin::{OpaqueOrigin, Origin};
150 pub use crate::parser::{ParseError, SyntaxViolation};
151 pub use crate::path_segments::PathSegmentsMut;
152 pub use crate::slicing::Position;
153 pub use form_urlencoded::EncodingOverride;
154
155 mod host;
156 mod origin;
157 mod parser;
158 mod path_segments;
159 mod slicing;
160
161 #[doc(hidden)]
162 pub mod quirks;
163
164 /// A parsed URL record.
165 #[derive(Clone)]
166 pub struct Url {
167 /// Syntax in pseudo-BNF:
168 ///
169 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
170 /// non-hierarchical = non-hierarchical-path
171 /// non-hierarchical-path = /* Does not start with "/" */
172 /// hierarchical = authority? hierarchical-path
173 /// authority = "//" userinfo? host [ ":" port ]?
174 /// userinfo = username [ ":" password ]? "@"
175 /// hierarchical-path = [ "/" path-segment ]+
176 serialization: String,
177
178 // Components
179 scheme_end: u32, // Before ':'
180 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
181 host_start: u32,
182 host_end: u32,
183 host: HostInternal,
184 port: Option<u16>,
185 path_start: u32, // Before initial '/', if any
186 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
187 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
188 }
189
190 /// Full configuration for the URL parser.
191 #[derive(Copy, Clone)]
192 pub struct ParseOptions<'a> {
193 base_url: Option<&'a Url>,
194 encoding_override: EncodingOverride<'a>,
195 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
196 }
197
198 impl<'a> ParseOptions<'a> {
199 /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self200 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
201 self.base_url = new;
202 self
203 }
204
205 /// Override the character encoding of query strings.
206 /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self207 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
208 self.encoding_override = new;
209 self
210 }
211
212 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
213 /// when it occurs during parsing. Note that since the provided function is
214 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
215 /// a `RefCell`, to collect the violations.
216 ///
217 /// ## Example
218 /// ```
219 /// use std::cell::RefCell;
220 /// use url::{Url, SyntaxViolation};
221 /// # use url::ParseError;
222 /// # fn run() -> Result<(), url::ParseError> {
223 /// let violations = RefCell::new(Vec::new());
224 /// let url = Url::options()
225 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
226 /// .parse("https:////example.com")?;
227 /// assert_eq!(url.as_str(), "https://example.com/");
228 /// assert_eq!(violations.into_inner(),
229 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
230 /// # Ok(())
231 /// # }
232 /// # run().unwrap();
233 /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self234 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
235 self.violation_fn = new;
236 self
237 }
238
239 /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>240 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
241 Parser {
242 serialization: String::with_capacity(input.len()),
243 base_url: self.base_url,
244 query_encoding_override: self.encoding_override,
245 violation_fn: self.violation_fn,
246 context: Context::UrlParser,
247 }
248 .parse_url(input)
249 }
250 }
251
252 impl Url {
253 /// Parse an absolute URL from a string.
254 ///
255 /// # Examples
256 ///
257 /// ```rust
258 /// use url::Url;
259 /// # use url::ParseError;
260 ///
261 /// # fn run() -> Result<(), ParseError> {
262 /// let url = Url::parse("https://example.net")?;
263 /// # Ok(())
264 /// # }
265 /// # run().unwrap();
266 /// ```
267 ///
268 /// # Errors
269 ///
270 /// If the function can not parse an absolute URL from the given string,
271 /// a [`ParseError`] variant will be returned.
272 ///
273 /// [`ParseError`]: enum.ParseError.html
274 #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>275 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
276 Url::options().parse(input)
277 }
278
279 /// Parse an absolute URL from a string and add params to its query string.
280 ///
281 /// Existing params are not removed.
282 ///
283 /// # Examples
284 ///
285 /// ```rust
286 /// use url::Url;
287 /// # use url::ParseError;
288 ///
289 /// # fn run() -> Result<(), ParseError> {
290 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
291 /// &[("lang", "rust"), ("browser", "servo")])?;
292 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
293 /// # Ok(())
294 /// # }
295 /// # run().unwrap();
296 /// ```
297 ///
298 /// # Errors
299 ///
300 /// If the function can not parse an absolute URL from the given string,
301 /// a [`ParseError`] variant will be returned.
302 ///
303 /// [`ParseError`]: enum.ParseError.html
304 #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,305 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
306 where
307 I: IntoIterator,
308 I::Item: Borrow<(K, V)>,
309 K: AsRef<str>,
310 V: AsRef<str>,
311 {
312 let mut url = Url::options().parse(input);
313
314 if let Ok(ref mut url) = url {
315 url.query_pairs_mut().extend_pairs(iter);
316 }
317
318 url
319 }
320
321 /// Parse a string as an URL, with this URL as the base URL.
322 ///
323 /// The inverse of this is [`make_relative`].
324 ///
325 /// Note: a trailing slash is significant.
326 /// Without it, the last path component is considered to be a “file” name
327 /// to be removed to get at the “directory” that is used as the base:
328 ///
329 /// # Examples
330 ///
331 /// ```rust
332 /// use url::Url;
333 /// # use url::ParseError;
334 ///
335 /// # fn run() -> Result<(), ParseError> {
336 /// let base = Url::parse("https://example.net/a/b.html")?;
337 /// let url = base.join("c.png")?;
338 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
339 ///
340 /// let base = Url::parse("https://example.net/a/b/")?;
341 /// let url = base.join("c.png")?;
342 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
343 /// # Ok(())
344 /// # }
345 /// # run().unwrap();
346 /// ```
347 ///
348 /// # Errors
349 ///
350 /// If the function can not parse an URL from the given string
351 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
352 ///
353 /// [`ParseError`]: enum.ParseError.html
354 /// [`make_relative`]: #method.make_relative
355 #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>356 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
357 Url::options().base_url(Some(self)).parse(input)
358 }
359
360 /// Creates a relative URL if possible, with this URL as the base URL.
361 ///
362 /// This is the inverse of [`join`].
363 ///
364 /// # Examples
365 ///
366 /// ```rust
367 /// use url::Url;
368 /// # use url::ParseError;
369 ///
370 /// # fn run() -> Result<(), ParseError> {
371 /// let base = Url::parse("https://example.net/a/b.html")?;
372 /// let url = Url::parse("https://example.net/a/c.png")?;
373 /// let relative = base.make_relative(&url);
374 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
375 ///
376 /// let base = Url::parse("https://example.net/a/b/")?;
377 /// let url = Url::parse("https://example.net/a/b/c.png")?;
378 /// let relative = base.make_relative(&url);
379 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
380 ///
381 /// let base = Url::parse("https://example.net/a/b/")?;
382 /// let url = Url::parse("https://example.net/a/d/c.png")?;
383 /// let relative = base.make_relative(&url);
384 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
385 ///
386 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
387 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
388 /// let relative = base.make_relative(&url);
389 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
390 /// # Ok(())
391 /// # }
392 /// # run().unwrap();
393 /// ```
394 ///
395 /// # Errors
396 ///
397 /// If this URL can't be a base for the given URL, `None` is returned.
398 /// This is for example the case if the scheme, host or port are not the same.
399 ///
400 /// [`join`]: #method.join
make_relative(&self, url: &Url) -> Option<String>401 pub fn make_relative(&self, url: &Url) -> Option<String> {
402 if self.cannot_be_a_base() {
403 return None;
404 }
405
406 // Scheme, host and port need to be the same
407 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
408 return None;
409 }
410
411 // We ignore username/password at this point
412
413 // The path has to be transformed
414 let mut relative = String::new();
415
416 // Extract the filename of both URIs, these need to be handled separately
417 fn extract_path_filename(s: &str) -> (&str, &str) {
418 let last_slash_idx = s.rfind('/').unwrap_or(0);
419 let (path, filename) = s.split_at(last_slash_idx);
420 if filename.is_empty() {
421 (path, "")
422 } else {
423 (path, &filename[1..])
424 }
425 }
426
427 let (base_path, base_filename) = extract_path_filename(self.path());
428 let (url_path, url_filename) = extract_path_filename(url.path());
429
430 let mut base_path = base_path.split('/').peekable();
431 let mut url_path = url_path.split('/').peekable();
432
433 // Skip over the common prefix
434 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
435 base_path.next();
436 url_path.next();
437 }
438
439 // Add `..` segments for the remainder of the base path
440 for base_path_segment in base_path {
441 // Skip empty last segments
442 if base_path_segment.is_empty() {
443 break;
444 }
445
446 if !relative.is_empty() {
447 relative.push('/');
448 }
449
450 relative.push_str("..");
451 }
452
453 // Append the remainder of the other URI
454 for url_path_segment in url_path {
455 if !relative.is_empty() {
456 relative.push('/');
457 }
458
459 relative.push_str(url_path_segment);
460 }
461
462 // Add the filename if they are not the same
463 if base_filename != url_filename {
464 // If the URIs filename is empty this means that it was a directory
465 // so we'll have to append a '/'.
466 //
467 // Otherwise append it directly as the new filename.
468 if url_filename.is_empty() {
469 relative.push('/');
470 } else {
471 if !relative.is_empty() {
472 relative.push('/');
473 }
474 relative.push_str(url_filename);
475 }
476 }
477
478 // Query and fragment are only taken from the other URI
479 if let Some(query) = url.query() {
480 relative.push('?');
481 relative.push_str(query);
482 }
483
484 if let Some(fragment) = url.fragment() {
485 relative.push('#');
486 relative.push_str(fragment);
487 }
488
489 Some(relative)
490 }
491
492 /// Return a default `ParseOptions` that can fully configure the URL parser.
493 ///
494 /// # Examples
495 ///
496 /// Get default `ParseOptions`, then change base url
497 ///
498 /// ```rust
499 /// use url::Url;
500 /// # use url::ParseError;
501 /// # fn run() -> Result<(), ParseError> {
502 /// let options = Url::options();
503 /// let api = Url::parse("https://api.example.com")?;
504 /// let base_url = options.base_url(Some(&api));
505 /// let version_url = base_url.parse("version.json")?;
506 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
507 /// # Ok(())
508 /// # }
509 /// # run().unwrap();
510 /// ```
options<'a>() -> ParseOptions<'a>511 pub fn options<'a>() -> ParseOptions<'a> {
512 ParseOptions {
513 base_url: None,
514 encoding_override: None,
515 violation_fn: None,
516 }
517 }
518
519 /// Return the serialization of this URL.
520 ///
521 /// This is fast since that serialization is already stored in the `Url` struct.
522 ///
523 /// # Examples
524 ///
525 /// ```rust
526 /// use url::Url;
527 /// # use url::ParseError;
528 ///
529 /// # fn run() -> Result<(), ParseError> {
530 /// let url_str = "https://example.net/";
531 /// let url = Url::parse(url_str)?;
532 /// assert_eq!(url.as_str(), url_str);
533 /// # Ok(())
534 /// # }
535 /// # run().unwrap();
536 /// ```
537 #[inline]
as_str(&self) -> &str538 pub fn as_str(&self) -> &str {
539 &self.serialization
540 }
541
542 /// Return the serialization of this URL.
543 ///
544 /// This consumes the `Url` and takes ownership of the `String` stored in it.
545 ///
546 /// # Examples
547 ///
548 /// ```rust
549 /// use url::Url;
550 /// # use url::ParseError;
551 ///
552 /// # fn run() -> Result<(), ParseError> {
553 /// let url_str = "https://example.net/";
554 /// let url = Url::parse(url_str)?;
555 /// assert_eq!(String::from(url), url_str);
556 /// # Ok(())
557 /// # }
558 /// # run().unwrap();
559 /// ```
560 #[inline]
561 #[deprecated(since = "2.3.0", note = "use Into<String>")]
into_string(self) -> String562 pub fn into_string(self) -> String {
563 self.into()
564 }
565
566 /// For internal testing, not part of the public API.
567 ///
568 /// Methods of the `Url` struct assume a number of invariants.
569 /// This checks each of these invariants and panic if one is not met.
570 /// This is for testing rust-url itself.
571 #[doc(hidden)]
check_invariants(&self) -> Result<(), String>572 pub fn check_invariants(&self) -> Result<(), String> {
573 macro_rules! assert {
574 ($x: expr) => {
575 if !$x {
576 return Err(format!(
577 "!( {} ) for URL {:?}",
578 stringify!($x),
579 self.serialization
580 ));
581 }
582 };
583 }
584
585 macro_rules! assert_eq {
586 ($a: expr, $b: expr) => {
587 {
588 let a = $a;
589 let b = $b;
590 if a != b {
591 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
592 a, b, stringify!($a), stringify!($b),
593 self.serialization))
594 }
595 }
596 }
597 }
598
599 assert!(self.scheme_end >= 1);
600 assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
601 assert!(self
602 .slice(1..self.scheme_end)
603 .chars()
604 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
605 assert_eq!(self.byte_at(self.scheme_end), b':');
606
607 if self.slice(self.scheme_end + 1..).starts_with("//") {
608 // URL with authority
609 if self.username_end != self.serialization.len() as u32 {
610 match self.byte_at(self.username_end) {
611 b':' => {
612 assert!(self.host_start >= self.username_end + 2);
613 assert_eq!(self.byte_at(self.host_start - 1), b'@');
614 }
615 b'@' => assert!(self.host_start == self.username_end + 1),
616 _ => assert_eq!(self.username_end, self.scheme_end + 3),
617 }
618 }
619 assert!(self.host_start >= self.username_end);
620 assert!(self.host_end >= self.host_start);
621 let host_str = self.slice(self.host_start..self.host_end);
622 match self.host {
623 HostInternal::None => assert_eq!(host_str, ""),
624 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
625 HostInternal::Ipv6(address) => {
626 let h: Host<String> = Host::Ipv6(address);
627 assert_eq!(host_str, h.to_string())
628 }
629 HostInternal::Domain => {
630 if SchemeType::from(self.scheme()).is_special() {
631 assert!(!host_str.is_empty())
632 }
633 }
634 }
635 if self.path_start == self.host_end {
636 assert_eq!(self.port, None);
637 } else {
638 assert_eq!(self.byte_at(self.host_end), b':');
639 let port_str = self.slice(self.host_end + 1..self.path_start);
640 assert_eq!(
641 self.port,
642 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
643 );
644 }
645 assert!(
646 self.path_start as usize == self.serialization.len()
647 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
648 );
649 } else {
650 // Anarchist URL (no authority)
651 assert_eq!(self.username_end, self.scheme_end + 1);
652 assert_eq!(self.host_start, self.scheme_end + 1);
653 assert_eq!(self.host_end, self.scheme_end + 1);
654 assert_eq!(self.host, HostInternal::None);
655 assert_eq!(self.port, None);
656 assert_eq!(self.path_start, self.scheme_end + 1);
657 }
658 if let Some(start) = self.query_start {
659 assert!(start >= self.path_start);
660 assert_eq!(self.byte_at(start), b'?');
661 }
662 if let Some(start) = self.fragment_start {
663 assert!(start >= self.path_start);
664 assert_eq!(self.byte_at(start), b'#');
665 }
666 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
667 assert!(fragment_start > query_start);
668 }
669
670 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
671 assert_eq!(&self.serialization, &other.serialization);
672 assert_eq!(self.scheme_end, other.scheme_end);
673 assert_eq!(self.username_end, other.username_end);
674 assert_eq!(self.host_start, other.host_start);
675 assert_eq!(self.host_end, other.host_end);
676 assert!(
677 self.host == other.host ||
678 // XXX No host round-trips to empty host.
679 // See https://github.com/whatwg/url/issues/79
680 (self.host_str(), other.host_str()) == (None, Some(""))
681 );
682 assert_eq!(self.port, other.port);
683 assert_eq!(self.path_start, other.path_start);
684 assert_eq!(self.query_start, other.query_start);
685 assert_eq!(self.fragment_start, other.fragment_start);
686 Ok(())
687 }
688
689 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
690 ///
691 /// Note: this returns an opaque origin for `file:` URLs, which causes
692 /// `url.origin() != url.origin()`.
693 ///
694 /// # Examples
695 ///
696 /// URL with `ftp` scheme:
697 ///
698 /// ```rust
699 /// use url::{Host, Origin, Url};
700 /// # use url::ParseError;
701 ///
702 /// # fn run() -> Result<(), ParseError> {
703 /// let url = Url::parse("ftp://example.com/foo")?;
704 /// assert_eq!(url.origin(),
705 /// Origin::Tuple("ftp".into(),
706 /// Host::Domain("example.com".into()),
707 /// 21));
708 /// # Ok(())
709 /// # }
710 /// # run().unwrap();
711 /// ```
712 ///
713 /// URL with `blob` scheme:
714 ///
715 /// ```rust
716 /// use url::{Host, Origin, Url};
717 /// # use url::ParseError;
718 ///
719 /// # fn run() -> Result<(), ParseError> {
720 /// let url = Url::parse("blob:https://example.com/foo")?;
721 /// assert_eq!(url.origin(),
722 /// Origin::Tuple("https".into(),
723 /// Host::Domain("example.com".into()),
724 /// 443));
725 /// # Ok(())
726 /// # }
727 /// # run().unwrap();
728 /// ```
729 ///
730 /// URL with `file` scheme:
731 ///
732 /// ```rust
733 /// use url::{Host, Origin, Url};
734 /// # use url::ParseError;
735 ///
736 /// # fn run() -> Result<(), ParseError> {
737 /// let url = Url::parse("file:///tmp/foo")?;
738 /// assert!(!url.origin().is_tuple());
739 ///
740 /// let other_url = Url::parse("file:///tmp/foo")?;
741 /// assert!(url.origin() != other_url.origin());
742 /// # Ok(())
743 /// # }
744 /// # run().unwrap();
745 /// ```
746 ///
747 /// URL with other scheme:
748 ///
749 /// ```rust
750 /// use url::{Host, Origin, Url};
751 /// # use url::ParseError;
752 ///
753 /// # fn run() -> Result<(), ParseError> {
754 /// let url = Url::parse("foo:bar")?;
755 /// assert!(!url.origin().is_tuple());
756 /// # Ok(())
757 /// # }
758 /// # run().unwrap();
759 /// ```
760 #[inline]
origin(&self) -> Origin761 pub fn origin(&self) -> Origin {
762 origin::url_origin(self)
763 }
764
765 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
766 ///
767 /// # Examples
768 ///
769 /// ```
770 /// use url::Url;
771 /// # use url::ParseError;
772 ///
773 /// # fn run() -> Result<(), ParseError> {
774 /// let url = Url::parse("file:///tmp/foo")?;
775 /// assert_eq!(url.scheme(), "file");
776 /// # Ok(())
777 /// # }
778 /// # run().unwrap();
779 /// ```
780 #[inline]
scheme(&self) -> &str781 pub fn scheme(&self) -> &str {
782 self.slice(..self.scheme_end)
783 }
784
785 /// Return whether the URL has an 'authority',
786 /// which can contain a username, password, host, and port number.
787 ///
788 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
789 /// or cannot-be-a-base like `data:text/plain,Stuff`.
790 ///
791 /// # Examples
792 ///
793 /// ```
794 /// use url::Url;
795 /// # use url::ParseError;
796 ///
797 /// # fn run() -> Result<(), ParseError> {
798 /// let url = Url::parse("ftp://rms@example.com")?;
799 /// assert!(url.has_authority());
800 ///
801 /// let url = Url::parse("unix:/run/foo.socket")?;
802 /// assert!(!url.has_authority());
803 ///
804 /// let url = Url::parse("data:text/plain,Stuff")?;
805 /// assert!(!url.has_authority());
806 /// # Ok(())
807 /// # }
808 /// # run().unwrap();
809 /// ```
810 #[inline]
has_authority(&self) -> bool811 pub fn has_authority(&self) -> bool {
812 debug_assert!(self.byte_at(self.scheme_end) == b':');
813 self.slice(self.scheme_end..).starts_with("://")
814 }
815
816 /// Return whether this URL is a cannot-be-a-base URL,
817 /// meaning that parsing a relative URL string with this URL as the base will return an error.
818 ///
819 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
820 /// as is typically the case of `data:` and `mailto:` URLs.
821 ///
822 /// # Examples
823 ///
824 /// ```
825 /// use url::Url;
826 /// # use url::ParseError;
827 ///
828 /// # fn run() -> Result<(), ParseError> {
829 /// let url = Url::parse("ftp://rms@example.com")?;
830 /// assert!(!url.cannot_be_a_base());
831 ///
832 /// let url = Url::parse("unix:/run/foo.socket")?;
833 /// assert!(!url.cannot_be_a_base());
834 ///
835 /// let url = Url::parse("data:text/plain,Stuff")?;
836 /// assert!(url.cannot_be_a_base());
837 /// # Ok(())
838 /// # }
839 /// # run().unwrap();
840 /// ```
841 #[inline]
cannot_be_a_base(&self) -> bool842 pub fn cannot_be_a_base(&self) -> bool {
843 !self.slice(self.scheme_end + 1..).starts_with('/')
844 }
845
846 /// Return the username for this URL (typically the empty string)
847 /// as a percent-encoded ASCII string.
848 ///
849 /// # Examples
850 ///
851 /// ```
852 /// use url::Url;
853 /// # use url::ParseError;
854 ///
855 /// # fn run() -> Result<(), ParseError> {
856 /// let url = Url::parse("ftp://rms@example.com")?;
857 /// assert_eq!(url.username(), "rms");
858 ///
859 /// let url = Url::parse("ftp://:secret123@example.com")?;
860 /// assert_eq!(url.username(), "");
861 ///
862 /// let url = Url::parse("https://example.com")?;
863 /// assert_eq!(url.username(), "");
864 /// # Ok(())
865 /// # }
866 /// # run().unwrap();
867 /// ```
username(&self) -> &str868 pub fn username(&self) -> &str {
869 let scheme_separator_len = "://".len() as u32;
870 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
871 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
872 } else {
873 ""
874 }
875 }
876
877 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
878 ///
879 /// # Examples
880 ///
881 /// ```
882 /// use url::Url;
883 /// # use url::ParseError;
884 ///
885 /// # fn run() -> Result<(), ParseError> {
886 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
887 /// assert_eq!(url.password(), Some("secret123"));
888 ///
889 /// let url = Url::parse("ftp://:secret123@example.com")?;
890 /// assert_eq!(url.password(), Some("secret123"));
891 ///
892 /// let url = Url::parse("ftp://rms@example.com")?;
893 /// assert_eq!(url.password(), None);
894 ///
895 /// let url = Url::parse("https://example.com")?;
896 /// assert_eq!(url.password(), None);
897 /// # Ok(())
898 /// # }
899 /// # run().unwrap();
900 /// ```
password(&self) -> Option<&str>901 pub fn password(&self) -> Option<&str> {
902 // This ':' is not the one marking a port number since a host can not be empty.
903 // (Except for file: URLs, which do not have port numbers.)
904 if self.has_authority()
905 && self.username_end != self.serialization.len() as u32
906 && self.byte_at(self.username_end) == b':'
907 {
908 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
909 Some(self.slice(self.username_end + 1..self.host_start - 1))
910 } else {
911 None
912 }
913 }
914
915 /// Equivalent to `url.host().is_some()`.
916 ///
917 /// # Examples
918 ///
919 /// ```
920 /// use url::Url;
921 /// # use url::ParseError;
922 ///
923 /// # fn run() -> Result<(), ParseError> {
924 /// let url = Url::parse("ftp://rms@example.com")?;
925 /// assert!(url.has_host());
926 ///
927 /// let url = Url::parse("unix:/run/foo.socket")?;
928 /// assert!(!url.has_host());
929 ///
930 /// let url = Url::parse("data:text/plain,Stuff")?;
931 /// assert!(!url.has_host());
932 /// # Ok(())
933 /// # }
934 /// # run().unwrap();
935 /// ```
has_host(&self) -> bool936 pub fn has_host(&self) -> bool {
937 !matches!(self.host, HostInternal::None)
938 }
939
940 /// Return the string representation of the host (domain or IP address) for this URL, if any.
941 ///
942 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
943 /// of a special URL, or percent encoded for non-special URLs.
944 /// IPv6 addresses are given between `[` and `]` brackets.
945 ///
946 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
947 /// don’t have a host.
948 ///
949 /// See also the `host` method.
950 ///
951 /// # Examples
952 ///
953 /// ```
954 /// use url::Url;
955 /// # use url::ParseError;
956 ///
957 /// # fn run() -> Result<(), ParseError> {
958 /// let url = Url::parse("https://127.0.0.1/index.html")?;
959 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
960 ///
961 /// let url = Url::parse("ftp://rms@example.com")?;
962 /// assert_eq!(url.host_str(), Some("example.com"));
963 ///
964 /// let url = Url::parse("unix:/run/foo.socket")?;
965 /// assert_eq!(url.host_str(), None);
966 ///
967 /// let url = Url::parse("data:text/plain,Stuff")?;
968 /// assert_eq!(url.host_str(), None);
969 /// # Ok(())
970 /// # }
971 /// # run().unwrap();
972 /// ```
host_str(&self) -> Option<&str>973 pub fn host_str(&self) -> Option<&str> {
974 if self.has_host() {
975 Some(self.slice(self.host_start..self.host_end))
976 } else {
977 None
978 }
979 }
980
981 /// Return the parsed representation of the host for this URL.
982 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
983 /// of a special URL, or percent encoded for non-special URLs.
984 ///
985 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
986 /// don’t have a host.
987 ///
988 /// See also the `host_str` method.
989 ///
990 /// # Examples
991 ///
992 /// ```
993 /// use url::Url;
994 /// # use url::ParseError;
995 ///
996 /// # fn run() -> Result<(), ParseError> {
997 /// let url = Url::parse("https://127.0.0.1/index.html")?;
998 /// assert!(url.host().is_some());
999 ///
1000 /// let url = Url::parse("ftp://rms@example.com")?;
1001 /// assert!(url.host().is_some());
1002 ///
1003 /// let url = Url::parse("unix:/run/foo.socket")?;
1004 /// assert!(url.host().is_none());
1005 ///
1006 /// let url = Url::parse("data:text/plain,Stuff")?;
1007 /// assert!(url.host().is_none());
1008 /// # Ok(())
1009 /// # }
1010 /// # run().unwrap();
1011 /// ```
host(&self) -> Option<Host<&str>>1012 pub fn host(&self) -> Option<Host<&str>> {
1013 match self.host {
1014 HostInternal::None => None,
1015 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1016 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1017 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1018 }
1019 }
1020
1021 /// If this URL has a host and it is a domain name (not an IP address), return it.
1022 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1023 /// of a special URL, or percent encoded for non-special URLs.
1024 ///
1025 /// # Examples
1026 ///
1027 /// ```
1028 /// use url::Url;
1029 /// # use url::ParseError;
1030 ///
1031 /// # fn run() -> Result<(), ParseError> {
1032 /// let url = Url::parse("https://127.0.0.1/")?;
1033 /// assert_eq!(url.domain(), None);
1034 ///
1035 /// let url = Url::parse("mailto:rms@example.net")?;
1036 /// assert_eq!(url.domain(), None);
1037 ///
1038 /// let url = Url::parse("https://example.com/")?;
1039 /// assert_eq!(url.domain(), Some("example.com"));
1040 /// # Ok(())
1041 /// # }
1042 /// # run().unwrap();
1043 /// ```
domain(&self) -> Option<&str>1044 pub fn domain(&self) -> Option<&str> {
1045 match self.host {
1046 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1047 _ => None,
1048 }
1049 }
1050
1051 /// Return the port number for this URL, if any.
1052 ///
1053 /// Note that default port numbers are never reflected by the serialization,
1054 /// use the `port_or_known_default()` method if you want a default port number returned.
1055 ///
1056 /// # Examples
1057 ///
1058 /// ```
1059 /// use url::Url;
1060 /// # use url::ParseError;
1061 ///
1062 /// # fn run() -> Result<(), ParseError> {
1063 /// let url = Url::parse("https://example.com")?;
1064 /// assert_eq!(url.port(), None);
1065 ///
1066 /// let url = Url::parse("https://example.com:443/")?;
1067 /// assert_eq!(url.port(), None);
1068 ///
1069 /// let url = Url::parse("ssh://example.com:22")?;
1070 /// assert_eq!(url.port(), Some(22));
1071 /// # Ok(())
1072 /// # }
1073 /// # run().unwrap();
1074 /// ```
1075 #[inline]
port(&self) -> Option<u16>1076 pub fn port(&self) -> Option<u16> {
1077 self.port
1078 }
1079
1080 /// Return the port number for this URL, or the default port number if it is known.
1081 ///
1082 /// This method only knows the default port number
1083 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1084 ///
1085 /// For URLs in these schemes, this method always returns `Some(_)`.
1086 /// For other schemes, it is the same as `Url::port()`.
1087 ///
1088 /// # Examples
1089 ///
1090 /// ```
1091 /// use url::Url;
1092 /// # use url::ParseError;
1093 ///
1094 /// # fn run() -> Result<(), ParseError> {
1095 /// let url = Url::parse("foo://example.com")?;
1096 /// assert_eq!(url.port_or_known_default(), None);
1097 ///
1098 /// let url = Url::parse("foo://example.com:1456")?;
1099 /// assert_eq!(url.port_or_known_default(), Some(1456));
1100 ///
1101 /// let url = Url::parse("https://example.com")?;
1102 /// assert_eq!(url.port_or_known_default(), Some(443));
1103 /// # Ok(())
1104 /// # }
1105 /// # run().unwrap();
1106 /// ```
1107 #[inline]
port_or_known_default(&self) -> Option<u16>1108 pub fn port_or_known_default(&self) -> Option<u16> {
1109 self.port.or_else(|| parser::default_port(self.scheme()))
1110 }
1111
1112 /// Resolve a URL’s host and port number to `SocketAddr`.
1113 ///
1114 /// If the URL has the default port number of a scheme that is unknown to this library,
1115 /// `default_port_number` provides an opportunity to provide the actual port number.
1116 /// In non-example code this should be implemented either simply as `|| None`,
1117 /// or by matching on the URL’s `.scheme()`.
1118 ///
1119 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1120 ///
1121 /// # Examples
1122 ///
1123 /// ```no_run
1124 /// let url = url::Url::parse("https://example.net/").unwrap();
1125 /// let addrs = url.socket_addrs(|| None).unwrap();
1126 /// std::net::TcpStream::connect(&*addrs)
1127 /// # ;
1128 /// ```
1129 ///
1130 /// ```
1131 /// /// With application-specific known default port numbers
1132 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1133 /// url.socket_addrs(|| match url.scheme() {
1134 /// "socks5" | "socks5h" => Some(1080),
1135 /// _ => None,
1136 /// })
1137 /// }
1138 /// ```
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1139 pub fn socket_addrs(
1140 &self,
1141 default_port_number: impl Fn() -> Option<u16>,
1142 ) -> io::Result<Vec<SocketAddr>> {
1143 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1144 // causes borrowck issues because the return value borrows `default_port_number`:
1145 //
1146 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1147 //
1148 // > This RFC proposes that *all* type parameters are considered in scope
1149 // > for `impl Trait` in return position
1150
1151 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1152 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1153 }
1154
1155 let host = io_result(self.host(), "No host name in the URL")?;
1156 let port = io_result(
1157 self.port_or_known_default().or_else(default_port_number),
1158 "No port number in the URL",
1159 )?;
1160 Ok(match host {
1161 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1162 Host::Ipv4(ip) => vec![(ip, port).into()],
1163 Host::Ipv6(ip) => vec![(ip, port).into()],
1164 })
1165 }
1166
1167 /// Return the path for this URL, as a percent-encoded ASCII string.
1168 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1169 /// For other URLs, this starts with a '/' slash
1170 /// and continues with slash-separated path segments.
1171 ///
1172 /// # Examples
1173 ///
1174 /// ```rust
1175 /// use url::{Url, ParseError};
1176 ///
1177 /// # fn run() -> Result<(), ParseError> {
1178 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1179 /// assert_eq!(url.path(), "/api/versions");
1180 ///
1181 /// let url = Url::parse("https://example.com")?;
1182 /// assert_eq!(url.path(), "/");
1183 ///
1184 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1185 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1186 /// # Ok(())
1187 /// # }
1188 /// # run().unwrap();
1189 /// ```
path(&self) -> &str1190 pub fn path(&self) -> &str {
1191 match (self.query_start, self.fragment_start) {
1192 (None, None) => self.slice(self.path_start..),
1193 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1194 self.slice(self.path_start..next_component_start)
1195 }
1196 }
1197 }
1198
1199 /// Unless this URL is cannot-be-a-base,
1200 /// return an iterator of '/' slash-separated path segments,
1201 /// each as a percent-encoded ASCII string.
1202 ///
1203 /// Return `None` for cannot-be-a-base URLs.
1204 ///
1205 /// When `Some` is returned, the iterator always contains at least one string
1206 /// (which may be empty).
1207 ///
1208 /// # Examples
1209 ///
1210 /// ```
1211 /// use url::Url;
1212 /// # use std::error::Error;
1213 ///
1214 /// # fn run() -> Result<(), Box<dyn Error>> {
1215 /// let url = Url::parse("https://example.com/foo/bar")?;
1216 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1217 /// assert_eq!(path_segments.next(), Some("foo"));
1218 /// assert_eq!(path_segments.next(), Some("bar"));
1219 /// assert_eq!(path_segments.next(), None);
1220 ///
1221 /// let url = Url::parse("https://example.com")?;
1222 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1223 /// assert_eq!(path_segments.next(), Some(""));
1224 /// assert_eq!(path_segments.next(), None);
1225 ///
1226 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1227 /// assert!(url.path_segments().is_none());
1228 ///
1229 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1230 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1231 /// assert_eq!(path_segments.next(), Some("countries"));
1232 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1233 /// # Ok(())
1234 /// # }
1235 /// # run().unwrap();
1236 /// ```
1237 #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36
path_segments(&self) -> Option<str::Split<'_, char>>1238 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1239 let path = self.path();
1240 if path.starts_with('/') {
1241 Some(path[1..].split('/'))
1242 } else {
1243 None
1244 }
1245 }
1246
1247 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1248 ///
1249 /// # Examples
1250 ///
1251 /// ```rust
1252 /// use url::Url;
1253 /// # use url::ParseError;
1254 ///
1255 /// fn run() -> Result<(), ParseError> {
1256 /// let url = Url::parse("https://example.com/products?page=2")?;
1257 /// let query = url.query();
1258 /// assert_eq!(query, Some("page=2"));
1259 ///
1260 /// let url = Url::parse("https://example.com/products")?;
1261 /// let query = url.query();
1262 /// assert!(query.is_none());
1263 ///
1264 /// let url = Url::parse("https://example.com/?country=español")?;
1265 /// let query = url.query();
1266 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1267 /// # Ok(())
1268 /// # }
1269 /// # run().unwrap();
1270 /// ```
query(&self) -> Option<&str>1271 pub fn query(&self) -> Option<&str> {
1272 match (self.query_start, self.fragment_start) {
1273 (None, _) => None,
1274 (Some(query_start), None) => {
1275 debug_assert!(self.byte_at(query_start) == b'?');
1276 Some(self.slice(query_start + 1..))
1277 }
1278 (Some(query_start), Some(fragment_start)) => {
1279 debug_assert!(self.byte_at(query_start) == b'?');
1280 Some(self.slice(query_start + 1..fragment_start))
1281 }
1282 }
1283 }
1284
1285 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1286 /// and return an iterator of (key, value) pairs.
1287 ///
1288 /// # Examples
1289 ///
1290 /// ```rust
1291 /// use std::borrow::Cow;
1292 ///
1293 /// use url::Url;
1294 /// # use url::ParseError;
1295 ///
1296 /// # fn run() -> Result<(), ParseError> {
1297 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1298 /// let mut pairs = url.query_pairs();
1299 ///
1300 /// assert_eq!(pairs.count(), 2);
1301 ///
1302 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1303 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1304 /// # Ok(())
1305 /// # }
1306 /// # run().unwrap();
1307 ///
1308
1309 #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1310 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1311 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1312 }
1313
1314 /// Return this URL’s fragment identifier, if any.
1315 ///
1316 /// A fragment is the part of the URL after the `#` symbol.
1317 /// The fragment is optional and, if present, contains a fragment identifier
1318 /// that identifies a secondary resource, such as a section heading
1319 /// of a document.
1320 ///
1321 /// In HTML, the fragment identifier is usually the id attribute of a an element
1322 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1323 /// of a URL to the server.
1324 ///
1325 /// **Note:** the parser did *not* percent-encode this component,
1326 /// but the input may have been percent-encoded already.
1327 ///
1328 /// # Examples
1329 ///
1330 /// ```rust
1331 /// use url::Url;
1332 /// # use url::ParseError;
1333 ///
1334 /// # fn run() -> Result<(), ParseError> {
1335 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1336 ///
1337 /// assert_eq!(url.fragment(), Some("row=4"));
1338 ///
1339 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1340 ///
1341 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1342 /// # Ok(())
1343 /// # }
1344 /// # run().unwrap();
1345 /// ```
fragment(&self) -> Option<&str>1346 pub fn fragment(&self) -> Option<&str> {
1347 self.fragment_start.map(|start| {
1348 debug_assert!(self.byte_at(start) == b'#');
1349 self.slice(start + 1..)
1350 })
1351 }
1352
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1353 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1354 let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
1355 let result = f(&mut parser);
1356 self.serialization = parser.serialization;
1357 result
1358 }
1359
1360 /// Change this URL’s fragment identifier.
1361 ///
1362 /// # Examples
1363 ///
1364 /// ```rust
1365 /// use url::Url;
1366 /// # use url::ParseError;
1367 ///
1368 /// # fn run() -> Result<(), ParseError> {
1369 /// let mut url = Url::parse("https://example.com/data.csv")?;
1370 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1371
1372 /// url.set_fragment(Some("cell=4,1-6,2"));
1373 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1374 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1375 ///
1376 /// url.set_fragment(None);
1377 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1378 /// assert!(url.fragment().is_none());
1379 /// # Ok(())
1380 /// # }
1381 /// # run().unwrap();
1382 /// ```
set_fragment(&mut self, fragment: Option<&str>)1383 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1384 // Remove any previous fragment
1385 if let Some(start) = self.fragment_start {
1386 debug_assert!(self.byte_at(start) == b'#');
1387 self.serialization.truncate(start as usize);
1388 }
1389 // Write the new one
1390 if let Some(input) = fragment {
1391 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1392 self.serialization.push('#');
1393 self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
1394 } else {
1395 self.fragment_start = None
1396 }
1397 }
1398
take_fragment(&mut self) -> Option<String>1399 fn take_fragment(&mut self) -> Option<String> {
1400 self.fragment_start.take().map(|start| {
1401 debug_assert!(self.byte_at(start) == b'#');
1402 let fragment = self.slice(start + 1..).to_owned();
1403 self.serialization.truncate(start as usize);
1404 fragment
1405 })
1406 }
1407
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1408 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1409 if let Some(ref fragment) = fragment {
1410 assert!(self.fragment_start.is_none());
1411 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1412 self.serialization.push('#');
1413 self.serialization.push_str(fragment);
1414 }
1415 }
1416
1417 /// Change this URL’s query string.
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```rust
1422 /// use url::Url;
1423 /// # use url::ParseError;
1424 ///
1425 /// # fn run() -> Result<(), ParseError> {
1426 /// let mut url = Url::parse("https://example.com/products")?;
1427 /// assert_eq!(url.as_str(), "https://example.com/products");
1428 ///
1429 /// url.set_query(Some("page=2"));
1430 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1431 /// assert_eq!(url.query(), Some("page=2"));
1432 /// # Ok(())
1433 /// # }
1434 /// # run().unwrap();
1435 /// ```
set_query(&mut self, query: Option<&str>)1436 pub fn set_query(&mut self, query: Option<&str>) {
1437 let fragment = self.take_fragment();
1438
1439 // Remove any previous query
1440 if let Some(start) = self.query_start.take() {
1441 debug_assert!(self.byte_at(start) == b'?');
1442 self.serialization.truncate(start as usize);
1443 }
1444 // Write the new query, if any
1445 if let Some(input) = query {
1446 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1447 self.serialization.push('?');
1448 let scheme_type = SchemeType::from(self.scheme());
1449 let scheme_end = self.scheme_end;
1450 self.mutate(|parser| {
1451 let vfn = parser.violation_fn;
1452 parser.parse_query(
1453 scheme_type,
1454 scheme_end,
1455 parser::Input::trim_tab_and_newlines(input, vfn),
1456 )
1457 });
1458 }
1459
1460 self.restore_already_parsed_fragment(fragment);
1461 }
1462
1463 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1464 /// in `application/x-www-form-urlencoded` syntax.
1465 ///
1466 /// The return value has a method-chaining API:
1467 ///
1468 /// ```rust
1469 /// # use url::{Url, ParseError};
1470 ///
1471 /// # fn run() -> Result<(), ParseError> {
1472 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1473 /// assert_eq!(url.query(), Some("lang=fr"));
1474 ///
1475 /// url.query_pairs_mut().append_pair("foo", "bar");
1476 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1477 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1478 ///
1479 /// url.query_pairs_mut()
1480 /// .clear()
1481 /// .append_pair("foo", "bar & baz")
1482 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1483 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1484 /// assert_eq!(url.as_str(),
1485 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1486 /// # Ok(())
1487 /// # }
1488 /// # run().unwrap();
1489 /// ```
1490 ///
1491 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1492 /// not `url.set_query(None)`.
1493 ///
1494 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1495 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1496 let fragment = self.take_fragment();
1497
1498 let query_start;
1499 if let Some(start) = self.query_start {
1500 debug_assert!(self.byte_at(start) == b'?');
1501 query_start = start as usize;
1502 } else {
1503 query_start = self.serialization.len();
1504 self.query_start = Some(to_u32(query_start).unwrap());
1505 self.serialization.push('?');
1506 }
1507
1508 let query = UrlQuery {
1509 url: Some(self),
1510 fragment,
1511 };
1512 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1513 }
1514
take_after_path(&mut self) -> String1515 fn take_after_path(&mut self) -> String {
1516 match (self.query_start, self.fragment_start) {
1517 (Some(i), _) | (None, Some(i)) => {
1518 let after_path = self.slice(i..).to_owned();
1519 self.serialization.truncate(i as usize);
1520 after_path
1521 }
1522 (None, None) => String::new(),
1523 }
1524 }
1525
1526 /// Change this URL’s path.
1527 ///
1528 /// # Examples
1529 ///
1530 /// ```rust
1531 /// use url::Url;
1532 /// # use url::ParseError;
1533 ///
1534 /// # fn run() -> Result<(), ParseError> {
1535 /// let mut url = Url::parse("https://example.com")?;
1536 /// url.set_path("api/comments");
1537 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1538 /// assert_eq!(url.path(), "/api/comments");
1539 ///
1540 /// let mut url = Url::parse("https://example.com/api")?;
1541 /// url.set_path("data/report.csv");
1542 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1543 /// assert_eq!(url.path(), "/data/report.csv");
1544 /// # Ok(())
1545 /// # }
1546 /// # run().unwrap();
1547 /// ```
set_path(&mut self, mut path: &str)1548 pub fn set_path(&mut self, mut path: &str) {
1549 let after_path = self.take_after_path();
1550 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1551 let cannot_be_a_base = self.cannot_be_a_base();
1552 let scheme_type = SchemeType::from(self.scheme());
1553 self.serialization.truncate(self.path_start as usize);
1554 self.mutate(|parser| {
1555 if cannot_be_a_base {
1556 if path.starts_with('/') {
1557 parser.serialization.push_str("%2F");
1558 path = &path[1..];
1559 }
1560 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1561 } else {
1562 let mut has_host = true; // FIXME
1563 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1564 }
1565 });
1566 self.restore_after_path(old_after_path_pos, &after_path);
1567 }
1568
1569 /// Return an object with methods to manipulate this URL’s path segments.
1570 ///
1571 /// Return `Err(())` if this URL is cannot-be-a-base.
1572 #[allow(clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1573 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1574 if self.cannot_be_a_base() {
1575 Err(())
1576 } else {
1577 Ok(path_segments::new(self))
1578 }
1579 }
1580
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1581 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1582 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1583 let adjust = |index: &mut u32| {
1584 *index -= old_after_path_position;
1585 *index += new_after_path_position;
1586 };
1587 if let Some(ref mut index) = self.query_start {
1588 adjust(index)
1589 }
1590 if let Some(ref mut index) = self.fragment_start {
1591 adjust(index)
1592 }
1593 self.serialization.push_str(after_path)
1594 }
1595
1596 /// Change this URL’s port number.
1597 ///
1598 /// Note that default port numbers are not reflected in the serialization.
1599 ///
1600 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1601 /// do nothing and return `Err`.
1602 ///
1603 /// # Examples
1604 ///
1605 /// ```
1606 /// use url::Url;
1607 /// # use std::error::Error;
1608 ///
1609 /// # fn run() -> Result<(), Box<dyn Error>> {
1610 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1611 ///
1612 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1613 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1614 ///
1615 /// url.set_port(None).map_err(|_| "cannot be base")?;
1616 /// assert_eq!(url.as_str(), "ssh://example.net/");
1617 /// # Ok(())
1618 /// # }
1619 /// # run().unwrap();
1620 /// ```
1621 ///
1622 /// Known default port numbers are not reflected:
1623 ///
1624 /// ```rust
1625 /// use url::Url;
1626 /// # use std::error::Error;
1627 ///
1628 /// # fn run() -> Result<(), Box<dyn Error>> {
1629 /// let mut url = Url::parse("https://example.org/")?;
1630 ///
1631 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1632 /// assert!(url.port().is_none());
1633 /// # Ok(())
1634 /// # }
1635 /// # run().unwrap();
1636 /// ```
1637 ///
1638 /// Cannot set port for cannot-be-a-base URLs:
1639 ///
1640 /// ```
1641 /// use url::Url;
1642 /// # use url::ParseError;
1643 ///
1644 /// # fn run() -> Result<(), ParseError> {
1645 /// let mut url = Url::parse("mailto:rms@example.net")?;
1646 ///
1647 /// let result = url.set_port(Some(80));
1648 /// assert!(result.is_err());
1649 ///
1650 /// let result = url.set_port(None);
1651 /// assert!(result.is_err());
1652 /// # Ok(())
1653 /// # }
1654 /// # run().unwrap();
1655 /// ```
1656 #[allow(clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1657 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1658 // has_host implies !cannot_be_a_base
1659 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1660 return Err(());
1661 }
1662 if port.is_some() && port == parser::default_port(self.scheme()) {
1663 port = None
1664 }
1665 self.set_port_internal(port);
1666 Ok(())
1667 }
1668
set_port_internal(&mut self, port: Option<u16>)1669 fn set_port_internal(&mut self, port: Option<u16>) {
1670 match (self.port, port) {
1671 (None, None) => {}
1672 (Some(_), None) => {
1673 self.serialization
1674 .drain(self.host_end as usize..self.path_start as usize);
1675 let offset = self.path_start - self.host_end;
1676 self.path_start = self.host_end;
1677 if let Some(ref mut index) = self.query_start {
1678 *index -= offset
1679 }
1680 if let Some(ref mut index) = self.fragment_start {
1681 *index -= offset
1682 }
1683 }
1684 (Some(old), Some(new)) if old == new => {}
1685 (_, Some(new)) => {
1686 let path_and_after = self.slice(self.path_start..).to_owned();
1687 self.serialization.truncate(self.host_end as usize);
1688 write!(&mut self.serialization, ":{}", new).unwrap();
1689 let old_path_start = self.path_start;
1690 let new_path_start = to_u32(self.serialization.len()).unwrap();
1691 self.path_start = new_path_start;
1692 let adjust = |index: &mut u32| {
1693 *index -= old_path_start;
1694 *index += new_path_start;
1695 };
1696 if let Some(ref mut index) = self.query_start {
1697 adjust(index)
1698 }
1699 if let Some(ref mut index) = self.fragment_start {
1700 adjust(index)
1701 }
1702 self.serialization.push_str(&path_and_after);
1703 }
1704 }
1705 self.port = port;
1706 }
1707
1708 /// Change this URL’s host.
1709 ///
1710 /// Removing the host (calling this with `None`)
1711 /// will also remove any username, password, and port number.
1712 ///
1713 /// # Examples
1714 ///
1715 /// Change host:
1716 ///
1717 /// ```
1718 /// use url::Url;
1719 /// # use url::ParseError;
1720 ///
1721 /// # fn run() -> Result<(), ParseError> {
1722 /// let mut url = Url::parse("https://example.net")?;
1723 /// let result = url.set_host(Some("rust-lang.org"));
1724 /// assert!(result.is_ok());
1725 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1726 /// # Ok(())
1727 /// # }
1728 /// # run().unwrap();
1729 /// ```
1730 ///
1731 /// Remove host:
1732 ///
1733 /// ```
1734 /// use url::Url;
1735 /// # use url::ParseError;
1736 ///
1737 /// # fn run() -> Result<(), ParseError> {
1738 /// let mut url = Url::parse("foo://example.net")?;
1739 /// let result = url.set_host(None);
1740 /// assert!(result.is_ok());
1741 /// assert_eq!(url.as_str(), "foo:/");
1742 /// # Ok(())
1743 /// # }
1744 /// # run().unwrap();
1745 /// ```
1746 ///
1747 /// Cannot remove host for 'special' schemes (e.g. `http`):
1748 ///
1749 /// ```
1750 /// use url::Url;
1751 /// # use url::ParseError;
1752 ///
1753 /// # fn run() -> Result<(), ParseError> {
1754 /// let mut url = Url::parse("https://example.net")?;
1755 /// let result = url.set_host(None);
1756 /// assert!(result.is_err());
1757 /// assert_eq!(url.as_str(), "https://example.net/");
1758 /// # Ok(())
1759 /// # }
1760 /// # run().unwrap();
1761 /// ```
1762 ///
1763 /// Cannot change or remove host for cannot-be-a-base URLs:
1764 ///
1765 /// ```
1766 /// use url::Url;
1767 /// # use url::ParseError;
1768 ///
1769 /// # fn run() -> Result<(), ParseError> {
1770 /// let mut url = Url::parse("mailto:rms@example.net")?;
1771 ///
1772 /// let result = url.set_host(Some("rust-lang.org"));
1773 /// assert!(result.is_err());
1774 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1775 ///
1776 /// let result = url.set_host(None);
1777 /// assert!(result.is_err());
1778 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1779 /// # Ok(())
1780 /// # }
1781 /// # run().unwrap();
1782 /// ```
1783 ///
1784 /// # Errors
1785 ///
1786 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1787 /// a [`ParseError`] variant will be returned.
1788 ///
1789 /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1790 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1791 if self.cannot_be_a_base() {
1792 return Err(ParseError::SetHostOnCannotBeABaseUrl);
1793 }
1794
1795 if let Some(host) = host {
1796 if host.is_empty() && SchemeType::from(self.scheme()).is_special() {
1797 return Err(ParseError::EmptyHost);
1798 }
1799 let mut host_substr = host;
1800 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1801 if !host.starts_with('[') || !host.ends_with(']') {
1802 match host.find(':') {
1803 Some(0) => {
1804 // If buffer is the empty string, validation error, return failure.
1805 return Err(ParseError::InvalidDomainCharacter);
1806 }
1807 // Let host be the result of host parsing buffer
1808 Some(colon_index) => {
1809 host_substr = &host[..colon_index];
1810 }
1811 None => {}
1812 }
1813 }
1814 if SchemeType::from(self.scheme()).is_special() {
1815 self.set_host_internal(Host::parse(host_substr)?, None);
1816 } else {
1817 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1818 }
1819 } else if self.has_host() {
1820 let scheme_type = SchemeType::from(self.scheme());
1821 if scheme_type.is_special() {
1822 return Err(ParseError::EmptyHost);
1823 } else if self.serialization.len() == self.path_start as usize {
1824 self.serialization.push('/');
1825 }
1826 debug_assert!(self.byte_at(self.scheme_end) == b':');
1827 debug_assert!(self.byte_at(self.path_start) == b'/');
1828 let new_path_start = self.scheme_end + 1;
1829 self.serialization
1830 .drain(new_path_start as usize..self.path_start as usize);
1831 let offset = self.path_start - new_path_start;
1832 self.path_start = new_path_start;
1833 self.username_end = new_path_start;
1834 self.host_start = new_path_start;
1835 self.host_end = new_path_start;
1836 self.port = None;
1837 if let Some(ref mut index) = self.query_start {
1838 *index -= offset
1839 }
1840 if let Some(ref mut index) = self.fragment_start {
1841 *index -= offset
1842 }
1843 }
1844 Ok(())
1845 }
1846
1847 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1848 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1849 let old_suffix_pos = if opt_new_port.is_some() {
1850 self.path_start
1851 } else {
1852 self.host_end
1853 };
1854 let suffix = self.slice(old_suffix_pos..).to_owned();
1855 self.serialization.truncate(self.host_start as usize);
1856 if !self.has_authority() {
1857 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1858 debug_assert!(self.username_end == self.host_start);
1859 self.serialization.push('/');
1860 self.serialization.push('/');
1861 self.username_end += 2;
1862 self.host_start += 2;
1863 }
1864 write!(&mut self.serialization, "{}", host).unwrap();
1865 self.host_end = to_u32(self.serialization.len()).unwrap();
1866 self.host = host.into();
1867
1868 if let Some(new_port) = opt_new_port {
1869 self.port = new_port;
1870 if let Some(port) = new_port {
1871 write!(&mut self.serialization, ":{}", port).unwrap();
1872 }
1873 }
1874 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1875 self.serialization.push_str(&suffix);
1876
1877 let adjust = |index: &mut u32| {
1878 *index -= old_suffix_pos;
1879 *index += new_suffix_pos;
1880 };
1881 adjust(&mut self.path_start);
1882 if let Some(ref mut index) = self.query_start {
1883 adjust(index)
1884 }
1885 if let Some(ref mut index) = self.fragment_start {
1886 adjust(index)
1887 }
1888 }
1889
1890 /// Change this URL’s host to the given IP address.
1891 ///
1892 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1893 ///
1894 /// Compared to `Url::set_host`, this skips the host parser.
1895 ///
1896 /// # Examples
1897 ///
1898 /// ```rust
1899 /// use url::{Url, ParseError};
1900 ///
1901 /// # fn run() -> Result<(), ParseError> {
1902 /// let mut url = Url::parse("http://example.com")?;
1903 /// url.set_ip_host("127.0.0.1".parse().unwrap());
1904 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1905 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1906 /// # Ok(())
1907 /// # }
1908 /// # run().unwrap();
1909 /// ```
1910 ///
1911 /// Cannot change URL's from mailto(cannot-be-base) to ip:
1912 ///
1913 /// ```rust
1914 /// use url::{Url, ParseError};
1915 ///
1916 /// # fn run() -> Result<(), ParseError> {
1917 /// let mut url = Url::parse("mailto:rms@example.com")?;
1918 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1919 ///
1920 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1921 /// assert!(result.is_err());
1922 /// # Ok(())
1923 /// # }
1924 /// # run().unwrap();
1925 /// ```
1926 ///
1927 #[allow(clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1928 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1929 if self.cannot_be_a_base() {
1930 return Err(());
1931 }
1932
1933 let address = match address {
1934 IpAddr::V4(address) => Host::Ipv4(address),
1935 IpAddr::V6(address) => Host::Ipv6(address),
1936 };
1937 self.set_host_internal(address, None);
1938 Ok(())
1939 }
1940
1941 /// Change this URL’s password.
1942 ///
1943 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1944 ///
1945 /// # Examples
1946 ///
1947 /// ```rust
1948 /// use url::{Url, ParseError};
1949 ///
1950 /// # fn run() -> Result<(), ParseError> {
1951 /// let mut url = Url::parse("mailto:rmz@example.com")?;
1952 /// let result = url.set_password(Some("secret_password"));
1953 /// assert!(result.is_err());
1954 ///
1955 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1956 /// let result = url.set_password(Some("secret_password"));
1957 /// assert_eq!(url.password(), Some("secret_password"));
1958 ///
1959 /// let mut url = Url::parse("ftp://user2:@example.com")?;
1960 /// let result = url.set_password(Some("secret2"));
1961 /// assert!(result.is_ok());
1962 /// assert_eq!(url.password(), Some("secret2"));
1963 /// # Ok(())
1964 /// # }
1965 /// # run().unwrap();
1966 /// ```
1967 #[allow(clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1968 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1969 // has_host implies !cannot_be_a_base
1970 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1971 return Err(());
1972 }
1973 if let Some(password) = password {
1974 let host_and_after = self.slice(self.host_start..).to_owned();
1975 self.serialization.truncate(self.username_end as usize);
1976 self.serialization.push(':');
1977 self.serialization
1978 .extend(utf8_percent_encode(password, USERINFO));
1979 self.serialization.push('@');
1980
1981 let old_host_start = self.host_start;
1982 let new_host_start = to_u32(self.serialization.len()).unwrap();
1983 let adjust = |index: &mut u32| {
1984 *index -= old_host_start;
1985 *index += new_host_start;
1986 };
1987 self.host_start = new_host_start;
1988 adjust(&mut self.host_end);
1989 adjust(&mut self.path_start);
1990 if let Some(ref mut index) = self.query_start {
1991 adjust(index)
1992 }
1993 if let Some(ref mut index) = self.fragment_start {
1994 adjust(index)
1995 }
1996
1997 self.serialization.push_str(&host_and_after);
1998 } else if self.byte_at(self.username_end) == b':' {
1999 // If there is a password to remove
2000 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2001 debug_assert!(has_username_or_password);
2002 let username_start = self.scheme_end + 3;
2003 let empty_username = username_start == self.username_end;
2004 let start = self.username_end; // Remove the ':'
2005 let end = if empty_username {
2006 self.host_start // Remove the '@' as well
2007 } else {
2008 self.host_start - 1 // Keep the '@' to separate the username from the host
2009 };
2010 self.serialization.drain(start as usize..end as usize);
2011 let offset = end - start;
2012 self.host_start -= offset;
2013 self.host_end -= offset;
2014 self.path_start -= offset;
2015 if let Some(ref mut index) = self.query_start {
2016 *index -= offset
2017 }
2018 if let Some(ref mut index) = self.fragment_start {
2019 *index -= offset
2020 }
2021 }
2022 Ok(())
2023 }
2024
2025 /// Change this URL’s username.
2026 ///
2027 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2028 /// # Examples
2029 ///
2030 /// Cannot setup username from mailto(cannot-be-base)
2031 ///
2032 /// ```rust
2033 /// use url::{Url, ParseError};
2034 ///
2035 /// # fn run() -> Result<(), ParseError> {
2036 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2037 /// let result = url.set_username("user1");
2038 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2039 /// assert!(result.is_err());
2040 /// # Ok(())
2041 /// # }
2042 /// # run().unwrap();
2043 /// ```
2044 ///
2045 /// Setup username to user1
2046 ///
2047 /// ```rust
2048 /// use url::{Url, ParseError};
2049 ///
2050 /// # fn run() -> Result<(), ParseError> {
2051 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2052 /// let result = url.set_username("user1");
2053 /// assert!(result.is_ok());
2054 /// assert_eq!(url.username(), "user1");
2055 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2056 /// # Ok(())
2057 /// # }
2058 /// # run().unwrap();
2059 /// ```
2060 #[allow(clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>2061 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2062 // has_host implies !cannot_be_a_base
2063 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2064 return Err(());
2065 }
2066 let username_start = self.scheme_end + 3;
2067 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2068 if self.slice(username_start..self.username_end) == username {
2069 return Ok(());
2070 }
2071 let after_username = self.slice(self.username_end..).to_owned();
2072 self.serialization.truncate(username_start as usize);
2073 self.serialization
2074 .extend(utf8_percent_encode(username, USERINFO));
2075
2076 let mut removed_bytes = self.username_end;
2077 self.username_end = to_u32(self.serialization.len()).unwrap();
2078 let mut added_bytes = self.username_end;
2079
2080 let new_username_is_empty = self.username_end == username_start;
2081 match (new_username_is_empty, after_username.chars().next()) {
2082 (true, Some('@')) => {
2083 removed_bytes += 1;
2084 self.serialization.push_str(&after_username[1..]);
2085 }
2086 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2087 self.serialization.push_str(&after_username);
2088 }
2089 (false, _) => {
2090 added_bytes += 1;
2091 self.serialization.push('@');
2092 self.serialization.push_str(&after_username);
2093 }
2094 }
2095
2096 let adjust = |index: &mut u32| {
2097 *index -= removed_bytes;
2098 *index += added_bytes;
2099 };
2100 adjust(&mut self.host_start);
2101 adjust(&mut self.host_end);
2102 adjust(&mut self.path_start);
2103 if let Some(ref mut index) = self.query_start {
2104 adjust(index)
2105 }
2106 if let Some(ref mut index) = self.fragment_start {
2107 adjust(index)
2108 }
2109 Ok(())
2110 }
2111
2112 /// Change this URL’s scheme.
2113 ///
2114 /// Do nothing and return `Err` under the following circumstances:
2115 ///
2116 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2117 /// * If this URL is cannot-be-a-base and the new scheme is one of
2118 /// `http`, `https`, `ws`, `wss` or `ftp`
2119 /// * If either the old or new scheme is `http`, `https`, `ws`,
2120 /// `wss` or `ftp` and the other is not one of these
2121 /// * If the new scheme is `file` and this URL includes credentials
2122 /// or has a non-null port
2123 /// * If this URL's scheme is `file` and its host is empty or null
2124 ///
2125 /// See also [the URL specification's section on legal scheme state
2126 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2127 ///
2128 /// # Examples
2129 ///
2130 /// Change the URL’s scheme from `https` to `foo`:
2131 ///
2132 /// ```
2133 /// use url::Url;
2134 /// # use url::ParseError;
2135 ///
2136 /// # fn run() -> Result<(), ParseError> {
2137 /// let mut url = Url::parse("https://example.net")?;
2138 /// let result = url.set_scheme("http");
2139 /// assert_eq!(url.as_str(), "http://example.net/");
2140 /// assert!(result.is_ok());
2141 /// # Ok(())
2142 /// # }
2143 /// # run().unwrap();
2144 /// ```
2145 /// Change the URL’s scheme from `foo` to `bar`:
2146 ///
2147 /// ```
2148 /// use url::Url;
2149 /// # use url::ParseError;
2150 ///
2151 /// # fn run() -> Result<(), ParseError> {
2152 /// let mut url = Url::parse("foo://example.net")?;
2153 /// let result = url.set_scheme("bar");
2154 /// assert_eq!(url.as_str(), "bar://example.net");
2155 /// assert!(result.is_ok());
2156 /// # Ok(())
2157 /// # }
2158 /// # run().unwrap();
2159 /// ```
2160 ///
2161 /// Cannot change URL’s scheme from `https` to `foõ`:
2162 ///
2163 /// ```
2164 /// use url::Url;
2165 /// # use url::ParseError;
2166 ///
2167 /// # fn run() -> Result<(), ParseError> {
2168 /// let mut url = Url::parse("https://example.net")?;
2169 /// let result = url.set_scheme("foõ");
2170 /// assert_eq!(url.as_str(), "https://example.net/");
2171 /// assert!(result.is_err());
2172 /// # Ok(())
2173 /// # }
2174 /// # run().unwrap();
2175 /// ```
2176 ///
2177 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2178 ///
2179 /// ```
2180 /// use url::Url;
2181 /// # use url::ParseError;
2182 ///
2183 /// # fn run() -> Result<(), ParseError> {
2184 /// let mut url = Url::parse("mailto:rms@example.net")?;
2185 /// let result = url.set_scheme("https");
2186 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2187 /// assert!(result.is_err());
2188 /// # Ok(())
2189 /// # }
2190 /// # run().unwrap();
2191 /// ```
2192 /// Cannot change the URL’s scheme from `foo` to `https`:
2193 ///
2194 /// ```
2195 /// use url::Url;
2196 /// # use url::ParseError;
2197 ///
2198 /// # fn run() -> Result<(), ParseError> {
2199 /// let mut url = Url::parse("foo://example.net")?;
2200 /// let result = url.set_scheme("https");
2201 /// assert_eq!(url.as_str(), "foo://example.net");
2202 /// assert!(result.is_err());
2203 /// # Ok(())
2204 /// # }
2205 /// # run().unwrap();
2206 /// ```
2207 /// Cannot change the URL’s scheme from `http` to `foo`:
2208 ///
2209 /// ```
2210 /// use url::Url;
2211 /// # use url::ParseError;
2212 ///
2213 /// # fn run() -> Result<(), ParseError> {
2214 /// let mut url = Url::parse("http://example.net")?;
2215 /// let result = url.set_scheme("foo");
2216 /// assert_eq!(url.as_str(), "http://example.net/");
2217 /// assert!(result.is_err());
2218 /// # Ok(())
2219 /// # }
2220 /// # run().unwrap();
2221 /// ```
2222 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2223 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2224 let mut parser = Parser::for_setter(String::new());
2225 let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
2226 let new_scheme_type = SchemeType::from(&parser.serialization);
2227 let old_scheme_type = SchemeType::from(self.scheme());
2228 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2229 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2230 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2231 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2232 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2233 // If url’s scheme is "file" and its host is an empty host or null, then return.
2234 (new_scheme_type.is_file() && self.has_authority())
2235 {
2236 return Err(());
2237 }
2238
2239 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2240 return Err(());
2241 }
2242 let old_scheme_end = self.scheme_end;
2243 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2244 let adjust = |index: &mut u32| {
2245 *index -= old_scheme_end;
2246 *index += new_scheme_end;
2247 };
2248
2249 self.scheme_end = new_scheme_end;
2250 adjust(&mut self.username_end);
2251 adjust(&mut self.host_start);
2252 adjust(&mut self.host_end);
2253 adjust(&mut self.path_start);
2254 if let Some(ref mut index) = self.query_start {
2255 adjust(index)
2256 }
2257 if let Some(ref mut index) = self.fragment_start {
2258 adjust(index)
2259 }
2260
2261 parser.serialization.push_str(self.slice(old_scheme_end..));
2262 self.serialization = parser.serialization;
2263
2264 // Update the port so it can be removed
2265 // If it is the scheme's default
2266 // we don't mind it silently failing
2267 // if there was no port in the first place
2268 let previous_port = self.port();
2269 let _ = self.set_port(previous_port);
2270
2271 Ok(())
2272 }
2273
2274 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2275 ///
2276 /// This returns `Err` if the given path is not absolute or,
2277 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2278 ///
2279 /// # Examples
2280 ///
2281 /// On Unix-like platforms:
2282 ///
2283 /// ```
2284 /// # if cfg!(unix) {
2285 /// use url::Url;
2286 ///
2287 /// # fn run() -> Result<(), ()> {
2288 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2289 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2290 ///
2291 /// let url = Url::from_file_path("../foo.txt");
2292 /// assert!(url.is_err());
2293 ///
2294 /// let url = Url::from_file_path("https://google.com/");
2295 /// assert!(url.is_err());
2296 /// # Ok(())
2297 /// # }
2298 /// # run().unwrap();
2299 /// # }
2300 /// ```
2301 #[cfg(any(unix, windows, target_os = "redox"))]
2302 #[allow(clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2303 pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2304 let mut serialization = "file://".to_owned();
2305 let host_start = serialization.len() as u32;
2306 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2307 Ok(Url {
2308 serialization,
2309 scheme_end: "file".len() as u32,
2310 username_end: host_start,
2311 host_start,
2312 host_end,
2313 host,
2314 port: None,
2315 path_start: host_end,
2316 query_start: None,
2317 fragment_start: None,
2318 })
2319 }
2320
2321 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2322 ///
2323 /// This returns `Err` if the given path is not absolute or,
2324 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2325 ///
2326 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2327 /// so that the entire path is considered when using this URL as a base URL.
2328 ///
2329 /// For example:
2330 ///
2331 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2332 /// as the base URL is `file:///var/www/index.html`
2333 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2334 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2335 ///
2336 /// Note that `std::path` does not consider trailing slashes significant
2337 /// and usually does not include them (e.g. in `Path::parent()`).
2338 #[cfg(any(unix, windows, target_os = "redox"))]
2339 #[allow(clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2340 pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2341 let mut url = Url::from_file_path(path)?;
2342 if !url.serialization.ends_with('/') {
2343 url.serialization.push('/')
2344 }
2345 Ok(url)
2346 }
2347
2348 /// Serialize with Serde using the internal representation of the `Url` struct.
2349 ///
2350 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2351 /// for speed, compared to the `Deserialize` trait impl.
2352 ///
2353 /// This method is only available if the `serde` Cargo feature is enabled.
2354 #[cfg(feature = "serde")]
2355 #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2356 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2357 where
2358 S: serde::Serializer,
2359 {
2360 use serde::Serialize;
2361 // Destructuring first lets us ensure that adding or removing fields forces this method
2362 // to be updated
2363 let Url {
2364 ref serialization,
2365 ref scheme_end,
2366 ref username_end,
2367 ref host_start,
2368 ref host_end,
2369 ref host,
2370 ref port,
2371 ref path_start,
2372 ref query_start,
2373 ref fragment_start,
2374 } = *self;
2375 (
2376 serialization,
2377 scheme_end,
2378 username_end,
2379 host_start,
2380 host_end,
2381 host,
2382 port,
2383 path_start,
2384 query_start,
2385 fragment_start,
2386 )
2387 .serialize(serializer)
2388 }
2389
2390 /// Serialize with Serde using the internal representation of the `Url` struct.
2391 ///
2392 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2393 /// for speed, compared to the `Deserialize` trait impl.
2394 ///
2395 /// This method is only available if the `serde` Cargo feature is enabled.
2396 #[cfg(feature = "serde")]
2397 #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2398 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2399 where
2400 D: serde::Deserializer<'de>,
2401 {
2402 use serde::de::{Deserialize, Error, Unexpected};
2403 let (
2404 serialization,
2405 scheme_end,
2406 username_end,
2407 host_start,
2408 host_end,
2409 host,
2410 port,
2411 path_start,
2412 query_start,
2413 fragment_start,
2414 ) = Deserialize::deserialize(deserializer)?;
2415 let url = Url {
2416 serialization,
2417 scheme_end,
2418 username_end,
2419 host_start,
2420 host_end,
2421 host,
2422 port,
2423 path_start,
2424 query_start,
2425 fragment_start,
2426 };
2427 if cfg!(debug_assertions) {
2428 url.check_invariants().map_err(|reason| {
2429 let reason: &str = &reason;
2430 Error::invalid_value(Unexpected::Other("value"), &reason)
2431 })?
2432 }
2433 Ok(url)
2434 }
2435
2436 /// Assuming the URL is in the `file` scheme or similar,
2437 /// convert its path to an absolute `std::path::Path`.
2438 ///
2439 /// **Note:** This does not actually check the URL’s `scheme`,
2440 /// and may give nonsensical results for other schemes.
2441 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2442 ///
2443 /// ```
2444 /// # use url::Url;
2445 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2446 /// let path = url.to_file_path();
2447 /// ```
2448 ///
2449 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2450 /// `file:` URLs may have a non-local host),
2451 /// or if `Path::new_opt()` returns `None`.
2452 /// (That is, if the percent-decoded path contains a NUL byte or,
2453 /// for a Windows path, is not UTF-8.)
2454 #[inline]
2455 #[cfg(any(unix, windows, target_os = "redox"))]
2456 #[allow(clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2457 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2458 if let Some(segments) = self.path_segments() {
2459 let host = match self.host() {
2460 None | Some(Host::Domain("localhost")) => None,
2461 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2462 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2463 }
2464 _ => return Err(()),
2465 };
2466
2467 return file_url_segments_to_pathbuf(host, segments);
2468 }
2469 Err(())
2470 }
2471
2472 // Private helper methods:
2473
2474 #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2475 fn slice<R>(&self, range: R) -> &str
2476 where
2477 R: RangeArg,
2478 {
2479 range.slice_of(&self.serialization)
2480 }
2481
2482 #[inline]
byte_at(&self, i: u32) -> u82483 fn byte_at(&self, i: u32) -> u8 {
2484 self.serialization.as_bytes()[i as usize]
2485 }
2486 }
2487
2488 /// Parse a string as an URL, without a base URL or encoding override.
2489 impl str::FromStr for Url {
2490 type Err = ParseError;
2491
2492 #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2493 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2494 Url::parse(input)
2495 }
2496 }
2497
2498 impl<'a> TryFrom<&'a str> for Url {
2499 type Error = ParseError;
2500
try_from(s: &'a str) -> Result<Self, Self::Error>2501 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2502 Url::parse(s)
2503 }
2504 }
2505
2506 /// Display the serialization of this URL.
2507 impl fmt::Display for Url {
2508 #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2509 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2510 fmt::Display::fmt(&self.serialization, formatter)
2511 }
2512 }
2513
2514 /// String converstion.
2515 impl From<Url> for String {
from(value: Url) -> String2516 fn from(value: Url) -> String {
2517 value.serialization
2518 }
2519 }
2520
2521 /// Debug the serialization of this URL.
2522 impl fmt::Debug for Url {
2523 #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2524 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2525 formatter
2526 .debug_struct("Url")
2527 .field("scheme", &self.scheme())
2528 .field("cannot_be_a_base", &self.cannot_be_a_base())
2529 .field("username", &self.username())
2530 .field("password", &self.password())
2531 .field("host", &self.host())
2532 .field("port", &self.port())
2533 .field("path", &self.path())
2534 .field("query", &self.query())
2535 .field("fragment", &self.fragment())
2536 .finish()
2537 }
2538 }
2539
2540 /// URLs compare like their serialization.
2541 impl Eq for Url {}
2542
2543 /// URLs compare like their serialization.
2544 impl PartialEq for Url {
2545 #[inline]
eq(&self, other: &Self) -> bool2546 fn eq(&self, other: &Self) -> bool {
2547 self.serialization == other.serialization
2548 }
2549 }
2550
2551 /// URLs compare like their serialization.
2552 impl Ord for Url {
2553 #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2554 fn cmp(&self, other: &Self) -> cmp::Ordering {
2555 self.serialization.cmp(&other.serialization)
2556 }
2557 }
2558
2559 /// URLs compare like their serialization.
2560 impl PartialOrd for Url {
2561 #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2562 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2563 self.serialization.partial_cmp(&other.serialization)
2564 }
2565 }
2566
2567 /// URLs hash like their serialization.
2568 impl hash::Hash for Url {
2569 #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2570 fn hash<H>(&self, state: &mut H)
2571 where
2572 H: hash::Hasher,
2573 {
2574 hash::Hash::hash(&self.serialization, state)
2575 }
2576 }
2577
2578 /// Return the serialization of this URL.
2579 impl AsRef<str> for Url {
2580 #[inline]
as_ref(&self) -> &str2581 fn as_ref(&self) -> &str {
2582 &self.serialization
2583 }
2584 }
2585
2586 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2587 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2588 }
2589
2590 impl RangeArg for Range<u32> {
2591 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2592 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2593 &s[self.start as usize..self.end as usize]
2594 }
2595 }
2596
2597 impl RangeArg for RangeFrom<u32> {
2598 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2599 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2600 &s[self.start as usize..]
2601 }
2602 }
2603
2604 impl RangeArg for RangeTo<u32> {
2605 #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2606 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2607 &s[..self.end as usize]
2608 }
2609 }
2610
2611 /// Serializes this URL into a `serde` stream.
2612 ///
2613 /// This implementation is only available if the `serde` Cargo feature is enabled.
2614 #[cfg(feature = "serde")]
2615 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2616 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2617 where
2618 S: serde::Serializer,
2619 {
2620 serializer.serialize_str(self.as_str())
2621 }
2622 }
2623
2624 /// Deserializes this URL from a `serde` stream.
2625 ///
2626 /// This implementation is only available if the `serde` Cargo feature is enabled.
2627 #[cfg(feature = "serde")]
2628 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2629 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2630 where
2631 D: serde::Deserializer<'de>,
2632 {
2633 use serde::de::{Error, Unexpected, Visitor};
2634
2635 struct UrlVisitor;
2636
2637 impl<'de> Visitor<'de> for UrlVisitor {
2638 type Value = Url;
2639
2640 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2641 formatter.write_str("a string representing an URL")
2642 }
2643
2644 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2645 where
2646 E: Error,
2647 {
2648 Url::parse(s).map_err(|err| {
2649 let err_s = format!("{}", err);
2650 Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2651 })
2652 }
2653 }
2654
2655 deserializer.deserialize_str(UrlVisitor)
2656 }
2657 }
2658
2659 #[cfg(any(unix, target_os = "redox"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2660 fn path_to_file_url_segments(
2661 path: &Path,
2662 serialization: &mut String,
2663 ) -> Result<(u32, HostInternal), ()> {
2664 use std::os::unix::prelude::OsStrExt;
2665 if !path.is_absolute() {
2666 return Err(());
2667 }
2668 let host_end = to_u32(serialization.len()).unwrap();
2669 let mut empty = true;
2670 // skip the root component
2671 for component in path.components().skip(1) {
2672 empty = false;
2673 serialization.push('/');
2674 serialization.extend(percent_encode(
2675 component.as_os_str().as_bytes(),
2676 PATH_SEGMENT,
2677 ));
2678 }
2679 if empty {
2680 // An URL’s path must not be empty.
2681 serialization.push('/');
2682 }
2683 Ok((host_end, HostInternal::None))
2684 }
2685
2686 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2687 fn path_to_file_url_segments(
2688 path: &Path,
2689 serialization: &mut String,
2690 ) -> Result<(u32, HostInternal), ()> {
2691 path_to_file_url_segments_windows(path, serialization)
2692 }
2693
2694 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2695 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2696 fn path_to_file_url_segments_windows(
2697 path: &Path,
2698 serialization: &mut String,
2699 ) -> Result<(u32, HostInternal), ()> {
2700 use std::path::{Component, Prefix};
2701 if !path.is_absolute() {
2702 return Err(());
2703 }
2704 let mut components = path.components();
2705
2706 let host_start = serialization.len() + 1;
2707 let host_end;
2708 let host_internal;
2709 match components.next() {
2710 Some(Component::Prefix(ref p)) => match p.kind() {
2711 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2712 host_end = to_u32(serialization.len()).unwrap();
2713 host_internal = HostInternal::None;
2714 serialization.push('/');
2715 serialization.push(letter as char);
2716 serialization.push(':');
2717 }
2718 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2719 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2720 write!(serialization, "{}", host).unwrap();
2721 host_end = to_u32(serialization.len()).unwrap();
2722 host_internal = host.into();
2723 serialization.push('/');
2724 let share = share.to_str().ok_or(())?;
2725 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2726 }
2727 _ => return Err(()),
2728 },
2729
2730 _ => return Err(()),
2731 }
2732
2733 let mut path_only_has_prefix = true;
2734 for component in components {
2735 if component == Component::RootDir {
2736 continue;
2737 }
2738 path_only_has_prefix = false;
2739 // FIXME: somehow work with non-unicode?
2740 let component = component.as_os_str().to_str().ok_or(())?;
2741 serialization.push('/');
2742 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2743 }
2744 // A windows drive letter must end with a slash.
2745 if serialization.len() > host_start
2746 && parser::is_windows_drive_letter(&serialization[host_start..])
2747 && path_only_has_prefix
2748 {
2749 serialization.push('/');
2750 }
2751 Ok((host_end, host_internal))
2752 }
2753
2754 #[cfg(any(unix, target_os = "redox"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2755 fn file_url_segments_to_pathbuf(
2756 host: Option<&str>,
2757 segments: str::Split<'_, char>,
2758 ) -> Result<PathBuf, ()> {
2759 use std::ffi::OsStr;
2760 use std::os::unix::prelude::OsStrExt;
2761
2762 if host.is_some() {
2763 return Err(());
2764 }
2765
2766 let mut bytes = if cfg!(target_os = "redox") {
2767 b"file:".to_vec()
2768 } else {
2769 Vec::new()
2770 };
2771 for segment in segments {
2772 bytes.push(b'/');
2773 bytes.extend(percent_decode(segment.as_bytes()));
2774 }
2775 // A windows drive letter must end with a slash.
2776 if bytes.len() > 2
2777 && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2778 && matches!(bytes[bytes.len() - 1], b':' | b'|')
2779 {
2780 bytes.push(b'/');
2781 }
2782 let os_str = OsStr::from_bytes(&bytes);
2783 let path = PathBuf::from(os_str);
2784 debug_assert!(
2785 path.is_absolute(),
2786 "to_file_path() failed to produce an absolute Path"
2787 );
2788 Ok(path)
2789 }
2790
2791 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2792 fn file_url_segments_to_pathbuf(
2793 host: Option<&str>,
2794 segments: str::Split<char>,
2795 ) -> Result<PathBuf, ()> {
2796 file_url_segments_to_pathbuf_windows(host, segments)
2797 }
2798
2799 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2800 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2801 fn file_url_segments_to_pathbuf_windows(
2802 host: Option<&str>,
2803 mut segments: str::Split<'_, char>,
2804 ) -> Result<PathBuf, ()> {
2805 let mut string = if let Some(host) = host {
2806 r"\\".to_owned() + host
2807 } else {
2808 let first = segments.next().ok_or(())?;
2809
2810 match first.len() {
2811 2 => {
2812 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2813 return Err(());
2814 }
2815
2816 first.to_owned()
2817 }
2818
2819 4 => {
2820 if !first.starts_with(parser::ascii_alpha) {
2821 return Err(());
2822 }
2823 let bytes = first.as_bytes();
2824 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2825 return Err(());
2826 }
2827
2828 first[0..1].to_owned() + ":"
2829 }
2830
2831 _ => return Err(()),
2832 }
2833 };
2834
2835 for segment in segments {
2836 string.push('\\');
2837
2838 // Currently non-unicode windows paths cannot be represented
2839 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2840 Ok(s) => string.push_str(&s),
2841 Err(..) => return Err(()),
2842 }
2843 }
2844 let path = PathBuf::from(string);
2845 debug_assert!(
2846 path.is_absolute(),
2847 "to_file_path() failed to produce an absolute Path"
2848 );
2849 Ok(path)
2850 }
2851
2852 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2853 #[derive(Debug)]
2854 pub struct UrlQuery<'a> {
2855 url: Option<&'a mut Url>,
2856 fragment: Option<String>,
2857 }
2858
2859 // `as_mut_string` string here exposes the internal serialization of an `Url`,
2860 // which should not be exposed to users.
2861 // We achieve that by not giving users direct access to `UrlQuery`:
2862 // * Its fields are private
2863 // (and so can not be constructed with struct literal syntax outside of this crate),
2864 // * It has no constructor
2865 // * It is only visible (on the type level) to users in the return type of
2866 // `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
2867 // * `Serializer` keeps its target in a private field
2868 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
2869 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String2870 fn as_mut_string(&mut self) -> &mut String {
2871 &mut self.url.as_mut().unwrap().serialization
2872 }
2873
finish(mut self) -> &'a mut Url2874 fn finish(mut self) -> &'a mut Url {
2875 let url = self.url.take().unwrap();
2876 url.restore_already_parsed_fragment(self.fragment.take());
2877 url
2878 }
2879
2880 type Finished = &'a mut Url;
2881 }
2882
2883 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2884 fn drop(&mut self) {
2885 if let Some(url) = self.url.take() {
2886 url.restore_already_parsed_fragment(self.fragment.take())
2887 }
2888 }
2889 }
2890