• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! This Rust crate implements IDNA
10 //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
11 //!
12 //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
13 //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
14 //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
15 //!
16 //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
17 //!
18 //! > Initially, domain names were restricted to ASCII characters.
19 //! > A system was introduced in 2003 for internationalized domain names (IDN).
20 //! > This system is called Internationalizing Domain Names for Applications,
21 //! > or IDNA2003 for short.
22 //! > This mechanism supports IDNs by means of a client software transformation
23 //! > into a format known as Punycode.
24 //! > A revision of IDNA was approved in 2010 (IDNA2008).
25 //! > This revision has a number of incompatibilities with IDNA2003.
26 //! >
27 //! > The incompatibilities force implementers of client software,
28 //! > such as browsers and emailers,
29 //! > to face difficult choices during the transition period
30 //! > as registries shift from IDNA2003 to IDNA2008.
31 //! > This document specifies a mechanism
32 //! > that minimizes the impact of this transition for client software,
33 //! > allowing client software to access domains that are valid under either system.
34 
35 #[cfg(test)]
36 #[macro_use]
37 extern crate assert_matches;
38 
39 pub mod punycode;
40 mod uts46;
41 
42 pub use crate::uts46::{Config, Errors, Idna};
43 
44 /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
45 ///
46 /// Return the ASCII representation a domain name,
47 /// normalizing characters (upper-case to lower-case and other kinds of equivalence)
48 /// and using Punycode as necessary.
49 ///
50 /// This process may fail.
domain_to_ascii(domain: &str) -> Result<String, uts46::Errors>51 pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
52     Config::default().to_ascii(domain)
53 }
54 
55 /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
56 /// with the `beStrict` flag set.
domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors>57 pub fn domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors> {
58     Config::default()
59         .use_std3_ascii_rules(true)
60         .verify_dns_length(true)
61         .to_ascii(domain)
62 }
63 
64 /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
65 ///
66 /// Return the Unicode representation of a domain name,
67 /// normalizing characters (upper-case to lower-case and other kinds of equivalence)
68 /// and decoding Punycode as necessary.
69 ///
70 /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
71 /// but always returns a string for the mapped domain.
domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>)72 pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
73     Config::default().to_unicode(domain)
74 }
75