1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! Language Identifier and Locale contains a set of subtags 6 //! which represent different fields of the structure. 7 //! 8 //! * [`Language`] is the only mandatory field, which when empty, 9 //! takes the value `und`. 10 //! * [`Script`] is an optional field representing the written script used by the locale. 11 //! * [`Region`] is the region used by the locale. 12 //! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the 13 //! variant adjustments used by the locale. 14 //! 15 //! Subtags can be used in isolation, and all basic operations such as parsing, syntax normalization 16 //! and serialization are supported on each individual subtag, but most commonly 17 //! they are used to construct a [`LanguageIdentifier`] instance. 18 //! 19 //! [`Variants`] is a special structure which contains a list of [`Variant`] subtags. 20 //! It is wrapped around to allow for sorting and deduplication of variants, which 21 //! is one of the required steps of language identifier and locale syntax normalization. 22 //! 23 //! # Examples 24 //! 25 //! ``` 26 //! use icu::locale::subtags::{Language, Region, Script, Variant}; 27 //! 28 //! let language: Language = 29 //! "en".parse().expect("Failed to parse a language subtag."); 30 //! let script: Script = 31 //! "arab".parse().expect("Failed to parse a script subtag."); 32 //! let region: Region = 33 //! "cn".parse().expect("Failed to parse a region subtag."); 34 //! let variant: Variant = 35 //! "MacOS".parse().expect("Failed to parse a variant subtag."); 36 //! 37 //! assert_eq!(language.as_str(), "en"); 38 //! assert_eq!(script.as_str(), "Arab"); 39 //! assert_eq!(region.as_str(), "CN"); 40 //! assert_eq!(variant.as_str(), "macos"); 41 //! ``` 42 //! 43 //! `Notice`: The subtags are normalized on parsing. That means 44 //! that all operations work on a normalized version of the subtag 45 //! and serialization is very cheap. 46 //! 47 //! [`LanguageIdentifier`]: super::LanguageIdentifier 48 mod language; 49 mod region; 50 mod script; 51 mod variant; 52 mod variants; 53 54 #[doc(inline)] 55 pub use language::{language, Language}; 56 #[doc(inline)] 57 pub use region::{region, Region}; 58 #[doc(inline)] 59 pub use script::{script, Script}; 60 #[doc(inline)] 61 pub use variant::{variant, Variant}; 62 pub use variants::Variants; 63 64 impl_tinystr_subtag!( 65 /// A generic subtag. 66 /// 67 /// The subtag has to be an ASCII alphanumerical string no shorter than 68 /// two characters and no longer than eight. 69 /// 70 /// # Examples 71 /// 72 /// ``` 73 /// use icu::locale::subtags::Subtag; 74 /// 75 /// let subtag1: Subtag = "Foo".parse() 76 /// .expect("Failed to parse a Subtag."); 77 /// 78 /// assert_eq!(subtag1.as_str(), "foo"); 79 /// ``` 80 Subtag, 81 subtags, 82 subtag, 83 subtags_subtag, 84 2..=8, 85 s, 86 s.is_ascii_alphanumeric(), 87 s.to_ascii_lowercase(), 88 s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), 89 InvalidSubtag, 90 ["foo12"], 91 ["f", "toolooong"], 92 ); 93 94 #[allow(clippy::len_without_is_empty)] 95 impl Subtag { 96 #[allow(dead_code)] valid_key(v: &[u8]) -> bool97 pub(crate) const fn valid_key(v: &[u8]) -> bool { 98 2 <= v.len() && v.len() <= 8 99 } 100 101 /// Returns the length of `self`. 102 /// 103 /// # Examples 104 /// 105 /// ``` 106 /// use icu::locale::subtags::subtag; 107 /// let s = subtag!("foo"); 108 /// assert_eq!(s.len(), 3); 109 /// ``` len(&self) -> usize110 pub fn len(&self) -> usize { 111 self.0.len() 112 } 113 114 #[doc(hidden)] from_tinystr_unvalidated(input: tinystr::TinyAsciiStr<8>) -> Self115 pub fn from_tinystr_unvalidated(input: tinystr::TinyAsciiStr<8>) -> Self { 116 Self(input) 117 } 118 119 #[doc(hidden)] as_tinystr(&self) -> tinystr::TinyAsciiStr<8>120 pub fn as_tinystr(&self) -> tinystr::TinyAsciiStr<8> { 121 self.0 122 } 123 124 #[allow(dead_code)] to_ascii_lowercase(self) -> Self125 pub(crate) fn to_ascii_lowercase(self) -> Self { 126 Self(self.0.to_ascii_lowercase()) 127 } 128 } 129 130 impl<const N: usize> TryFrom<tinystr::TinyAsciiStr<N>> for Subtag { 131 type Error = crate::parser::errors::ParseError; 132 try_from(value: tinystr::TinyAsciiStr<N>) -> Result<Self, Self::Error>133 fn try_from(value: tinystr::TinyAsciiStr<N>) -> Result<Self, Self::Error> { 134 Self::try_from_str(&value) 135 } 136 } 137 138 impl PartialEq<str> for Subtag { eq(&self, other: &str) -> bool139 fn eq(&self, other: &str) -> bool { 140 self.0 == other 141 } 142 } 143 144 #[cfg(test)] 145 mod tests { 146 use super::*; 147 use tinystr::tinystr; 148 149 #[test] test_subtag()150 fn test_subtag() { 151 let subtag = subtag!("foo"); 152 assert_eq!(subtag.as_str(), "foo"); 153 } 154 155 #[test] test_subtag_from_tinystr()156 fn test_subtag_from_tinystr() { 157 let subtag = Subtag::try_from(tinystr!(3, "foo")); 158 assert!(subtag.is_ok()); 159 160 let subtag = Subtag::try_from(tinystr!(1, "f")); 161 assert!(subtag.is_err()); 162 } 163 } 164