1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! Transform Extensions provide information on content transformations in a given locale. 6 //! 7 //! The main struct for this extension is [`Transform`] which contains [`Fields`] and an 8 //! optional [`LanguageIdentifier`]. 9 //! 10 //! [`LanguageIdentifier`]: super::super::LanguageIdentifier 11 //! 12 //! # Examples 13 //! 14 //! ``` 15 //! use icu::locale::extensions::transform::{Fields, Key, Transform, Value}; 16 //! use icu::locale::{LanguageIdentifier, Locale}; 17 //! 18 //! let mut loc: Locale = 19 //! "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed."); 20 //! 21 //! let lang: LanguageIdentifier = 22 //! "es-AR".parse().expect("Parsing LanguageIdentifier failed."); 23 //! 24 //! let key: Key = "h0".parse().expect("Parsing key failed."); 25 //! let value: Value = "hybrid".parse().expect("Parsing value failed."); 26 //! 27 //! assert_eq!(loc.extensions.transform.lang, Some(lang)); 28 //! assert!(loc.extensions.transform.fields.contains_key(&key)); 29 //! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); 30 //! 31 //! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid"); 32 //! ``` 33 mod fields; 34 mod key; 35 mod value; 36 37 use core::cmp::Ordering; 38 #[cfg(feature = "alloc")] 39 use core::str::FromStr; 40 41 pub use fields::Fields; 42 #[doc(inline)] 43 pub use key::{key, Key}; 44 pub use value::Value; 45 46 #[cfg(feature = "alloc")] 47 use super::ExtensionType; 48 #[cfg(feature = "alloc")] 49 use crate::parser::SubtagIterator; 50 #[cfg(feature = "alloc")] 51 use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode}; 52 #[cfg(feature = "alloc")] 53 use crate::shortvec::ShortBoxSlice; 54 use crate::subtags; 55 #[cfg(feature = "alloc")] 56 use crate::subtags::Language; 57 use crate::LanguageIdentifier; 58 #[cfg(feature = "alloc")] 59 use litemap::LiteMap; 60 61 pub(crate) const TRANSFORM_EXT_CHAR: char = 't'; 62 pub(crate) const TRANSFORM_EXT_STR: &str = "t"; 63 64 /// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale 65 /// Identifier`] specification. 66 /// 67 /// Transform extension carries information about source language or script of 68 /// transformed content, including content that has been transliterated, transcribed, 69 /// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details). 70 /// 71 /// # Examples 72 /// 73 /// ``` 74 /// use icu::locale::extensions::transform::{Key, Value}; 75 /// use icu::locale::{LanguageIdentifier, Locale}; 76 /// 77 /// let mut loc: Locale = 78 /// "de-t-en-us-h0-hybrid".parse().expect("Parsing failed."); 79 /// 80 /// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed."); 81 /// 82 /// assert_eq!(loc.extensions.transform.lang, Some(en_us)); 83 /// let key: Key = "h0".parse().expect("Parsing key failed."); 84 /// let value: Value = "hybrid".parse().expect("Parsing value failed."); 85 /// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); 86 /// ``` 87 /// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension 88 /// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt 89 /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier 90 #[derive(Clone, PartialEq, Eq, Debug, Default, Hash)] 91 #[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure 92 pub struct Transform { 93 /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present. 94 pub lang: Option<LanguageIdentifier>, 95 /// The key-value pairs present in this locale extension, with each extension key subtag 96 /// associated to its provided value subtag. 97 pub fields: Fields, 98 } 99 100 impl Transform { 101 /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`. 102 /// 103 /// # Examples 104 /// 105 /// ``` 106 /// use icu::locale::extensions::transform::Transform; 107 /// 108 /// assert_eq!(Transform::new(), Transform::default()); 109 /// ``` 110 #[inline] new() -> Self111 pub const fn new() -> Self { 112 Self { 113 lang: None, 114 fields: Fields::new(), 115 } 116 } 117 118 /// A constructor which takes a str slice, parses it and 119 /// produces a well-formed [`Transform`]. 120 #[inline] 121 #[cfg(feature = "alloc")] try_from_str(s: &str) -> Result<Self, ParseError>122 pub fn try_from_str(s: &str) -> Result<Self, ParseError> { 123 Self::try_from_utf8(s.as_bytes()) 124 } 125 126 /// See [`Self::try_from_str`] 127 #[cfg(feature = "alloc")] try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError>128 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> { 129 let mut iter = SubtagIterator::new(code_units); 130 131 let ext = iter.next().ok_or(ParseError::InvalidExtension)?; 132 if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? { 133 return Self::try_from_iter(&mut iter); 134 } 135 136 Err(ParseError::InvalidExtension) 137 } 138 139 /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`. 140 /// 141 /// # Examples 142 /// 143 /// ``` 144 /// use icu::locale::Locale; 145 /// 146 /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed."); 147 /// 148 /// assert!(!loc.extensions.transform.is_empty()); 149 /// ``` is_empty(&self) -> bool150 pub fn is_empty(&self) -> bool { 151 self.lang.is_none() && self.fields.is_empty() 152 } 153 154 /// Clears the transform extension, effectively removing it from the locale. 155 /// 156 /// # Examples 157 /// 158 /// ``` 159 /// use icu::locale::Locale; 160 /// 161 /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap(); 162 /// loc.extensions.transform.clear(); 163 /// assert_eq!(loc, "en-US".parse().unwrap()); 164 /// ``` clear(&mut self)165 pub fn clear(&mut self) { 166 self.lang = None; 167 self.fields.clear(); 168 } 169 170 #[allow(clippy::type_complexity)] as_tuple( &self, ) -> ( Option<( subtags::Language, Option<subtags::Script>, Option<subtags::Region>, &subtags::Variants, )>, &Fields, )171 pub(crate) fn as_tuple( 172 &self, 173 ) -> ( 174 Option<( 175 subtags::Language, 176 Option<subtags::Script>, 177 Option<subtags::Region>, 178 &subtags::Variants, 179 )>, 180 &Fields, 181 ) { 182 (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields) 183 } 184 185 /// Returns an ordering suitable for use in [`BTreeSet`]. 186 /// 187 /// The ordering may or may not be equivalent to string ordering, and it 188 /// may or may not be stable across ICU4X releases. 189 /// 190 /// [`BTreeSet`]: alloc::collections::BTreeSet total_cmp(&self, other: &Self) -> Ordering191 pub fn total_cmp(&self, other: &Self) -> Ordering { 192 self.as_tuple().cmp(&other.as_tuple()) 193 } 194 195 #[cfg(feature = "alloc")] try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError>196 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> { 197 let mut tlang = None; 198 let mut tfields = LiteMap::new(); 199 200 if let Some(subtag) = iter.peek() { 201 if Language::try_from_utf8(subtag).is_ok() { 202 tlang = Some(parse_language_identifier_from_iter( 203 iter, 204 ParserMode::Partial, 205 )?); 206 } 207 } 208 209 let mut current_tkey = None; 210 let mut current_tvalue = ShortBoxSlice::new(); 211 let mut has_current_tvalue = false; 212 213 while let Some(subtag) = iter.peek() { 214 if let Some(tkey) = current_tkey { 215 if let Ok(val) = Value::parse_subtag(subtag) { 216 has_current_tvalue = true; 217 if let Some(val) = val { 218 current_tvalue.push(val); 219 } 220 } else { 221 if !has_current_tvalue { 222 return Err(ParseError::InvalidExtension); 223 } 224 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue)); 225 current_tkey = None; 226 current_tvalue = ShortBoxSlice::new(); 227 has_current_tvalue = false; 228 continue; 229 } 230 } else if let Ok(tkey) = Key::try_from_utf8(subtag) { 231 current_tkey = Some(tkey); 232 } else { 233 break; 234 } 235 236 iter.next(); 237 } 238 239 if let Some(tkey) = current_tkey { 240 if !has_current_tvalue { 241 return Err(ParseError::InvalidExtension); 242 } 243 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue)); 244 } 245 246 if tlang.is_none() && tfields.is_empty() { 247 Err(ParseError::InvalidExtension) 248 } else { 249 Ok(Self { 250 lang: tlang, 251 fields: tfields.into(), 252 }) 253 } 254 } 255 for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>,256 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E> 257 where 258 F: FnMut(&str) -> Result<(), E>, 259 { 260 if self.is_empty() { 261 return Ok(()); 262 } 263 if with_ext { 264 f(TRANSFORM_EXT_STR)?; 265 } 266 if let Some(lang) = &self.lang { 267 lang.for_each_subtag_str_lowercased(f)?; 268 } 269 self.fields.for_each_subtag_str(f) 270 } 271 } 272 273 #[cfg(feature = "alloc")] 274 impl FromStr for Transform { 275 type Err = ParseError; 276 277 #[inline] from_str(s: &str) -> Result<Self, Self::Err>278 fn from_str(s: &str) -> Result<Self, Self::Err> { 279 Self::try_from_str(s) 280 } 281 } 282 283 writeable::impl_display_with_writeable!(Transform); 284 285 impl writeable::Writeable for Transform { write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result286 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { 287 if self.is_empty() { 288 return Ok(()); 289 } 290 sink.write_char(TRANSFORM_EXT_CHAR)?; 291 if let Some(lang) = &self.lang { 292 sink.write_char('-')?; 293 lang.write_lowercased_to(sink)?; 294 } 295 if !self.fields.is_empty() { 296 sink.write_char('-')?; 297 writeable::Writeable::write_to(&self.fields, sink)?; 298 } 299 Ok(()) 300 } 301 writeable_length_hint(&self) -> writeable::LengthHint302 fn writeable_length_hint(&self) -> writeable::LengthHint { 303 if self.is_empty() { 304 return writeable::LengthHint::exact(0); 305 } 306 let mut result = writeable::LengthHint::exact(1); 307 if let Some(lang) = &self.lang { 308 result += writeable::Writeable::writeable_length_hint(lang) + 1; 309 } 310 if !self.fields.is_empty() { 311 result += writeable::Writeable::writeable_length_hint(&self.fields) + 1; 312 } 313 result 314 } 315 } 316 317 #[cfg(test)] 318 mod tests { 319 use super::*; 320 321 #[test] test_transform_extension_fromstr()322 fn test_transform_extension_fromstr() { 323 let te: Transform = "t-en-us-h0-hybrid" 324 .parse() 325 .expect("Failed to parse Transform"); 326 assert_eq!(te.to_string(), "t-en-us-h0-hybrid"); 327 328 let te: Result<Transform, _> = "t".parse(); 329 assert!(te.is_err()); 330 } 331 } 332