• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! Transform Extensions provide information on content transformations in a given locale.
6 //!
7 //! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8 //! optional [`LanguageIdentifier`].
9 //!
10 //! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11 //!
12 //! # Examples
13 //!
14 //! ```
15 //! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16 //! use icu::locale::{LanguageIdentifier, Locale};
17 //!
18 //! let mut loc: Locale =
19 //!     "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20 //!
21 //! let lang: LanguageIdentifier =
22 //!     "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23 //!
24 //! let key: Key = "h0".parse().expect("Parsing key failed.");
25 //! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26 //!
27 //! assert_eq!(loc.extensions.transform.lang, Some(lang));
28 //! assert!(loc.extensions.transform.fields.contains_key(&key));
29 //! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30 //!
31 //! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32 //! ```
33 mod fields;
34 mod key;
35 mod value;
36 
37 use core::cmp::Ordering;
38 #[cfg(feature = "alloc")]
39 use core::str::FromStr;
40 
41 pub use fields::Fields;
42 #[doc(inline)]
43 pub use key::{key, Key};
44 pub use value::Value;
45 
46 #[cfg(feature = "alloc")]
47 use super::ExtensionType;
48 #[cfg(feature = "alloc")]
49 use crate::parser::SubtagIterator;
50 #[cfg(feature = "alloc")]
51 use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52 #[cfg(feature = "alloc")]
53 use crate::shortvec::ShortBoxSlice;
54 use crate::subtags;
55 #[cfg(feature = "alloc")]
56 use crate::subtags::Language;
57 use crate::LanguageIdentifier;
58 #[cfg(feature = "alloc")]
59 use litemap::LiteMap;
60 
61 pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
62 pub(crate) const TRANSFORM_EXT_STR: &str = "t";
63 
64 /// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
65 /// Identifier`] specification.
66 ///
67 /// Transform extension carries information about source language or script of
68 /// transformed content, including content that has been transliterated, transcribed,
69 /// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
70 ///
71 /// # Examples
72 ///
73 /// ```
74 /// use icu::locale::extensions::transform::{Key, Value};
75 /// use icu::locale::{LanguageIdentifier, Locale};
76 ///
77 /// let mut loc: Locale =
78 ///     "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
79 ///
80 /// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
81 ///
82 /// assert_eq!(loc.extensions.transform.lang, Some(en_us));
83 /// let key: Key = "h0".parse().expect("Parsing key failed.");
84 /// let value: Value = "hybrid".parse().expect("Parsing value failed.");
85 /// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
86 /// ```
87 /// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
88 /// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
89 /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
90 #[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
91 #[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
92 pub struct Transform {
93     /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
94     pub lang: Option<LanguageIdentifier>,
95     /// The key-value pairs present in this locale extension, with each extension key subtag
96     /// associated to its provided value subtag.
97     pub fields: Fields,
98 }
99 
100 impl Transform {
101     /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
102     ///
103     /// # Examples
104     ///
105     /// ```
106     /// use icu::locale::extensions::transform::Transform;
107     ///
108     /// assert_eq!(Transform::new(), Transform::default());
109     /// ```
110     #[inline]
new() -> Self111     pub const fn new() -> Self {
112         Self {
113             lang: None,
114             fields: Fields::new(),
115         }
116     }
117 
118     /// A constructor which takes a str slice, parses it and
119     /// produces a well-formed [`Transform`].
120     #[inline]
121     #[cfg(feature = "alloc")]
try_from_str(s: &str) -> Result<Self, ParseError>122     pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
123         Self::try_from_utf8(s.as_bytes())
124     }
125 
126     /// See [`Self::try_from_str`]
127     #[cfg(feature = "alloc")]
try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError>128     pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
129         let mut iter = SubtagIterator::new(code_units);
130 
131         let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
132         if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
133             return Self::try_from_iter(&mut iter);
134         }
135 
136         Err(ParseError::InvalidExtension)
137     }
138 
139     /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
140     ///
141     /// # Examples
142     ///
143     /// ```
144     /// use icu::locale::Locale;
145     ///
146     /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
147     ///
148     /// assert!(!loc.extensions.transform.is_empty());
149     /// ```
is_empty(&self) -> bool150     pub fn is_empty(&self) -> bool {
151         self.lang.is_none() && self.fields.is_empty()
152     }
153 
154     /// Clears the transform extension, effectively removing it from the locale.
155     ///
156     /// # Examples
157     ///
158     /// ```
159     /// use icu::locale::Locale;
160     ///
161     /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
162     /// loc.extensions.transform.clear();
163     /// assert_eq!(loc, "en-US".parse().unwrap());
164     /// ```
clear(&mut self)165     pub fn clear(&mut self) {
166         self.lang = None;
167         self.fields.clear();
168     }
169 
170     #[allow(clippy::type_complexity)]
as_tuple( &self, ) -> ( Option<( subtags::Language, Option<subtags::Script>, Option<subtags::Region>, &subtags::Variants, )>, &Fields, )171     pub(crate) fn as_tuple(
172         &self,
173     ) -> (
174         Option<(
175             subtags::Language,
176             Option<subtags::Script>,
177             Option<subtags::Region>,
178             &subtags::Variants,
179         )>,
180         &Fields,
181     ) {
182         (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
183     }
184 
185     /// Returns an ordering suitable for use in [`BTreeSet`].
186     ///
187     /// The ordering may or may not be equivalent to string ordering, and it
188     /// may or may not be stable across ICU4X releases.
189     ///
190     /// [`BTreeSet`]: alloc::collections::BTreeSet
total_cmp(&self, other: &Self) -> Ordering191     pub fn total_cmp(&self, other: &Self) -> Ordering {
192         self.as_tuple().cmp(&other.as_tuple())
193     }
194 
195     #[cfg(feature = "alloc")]
try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError>196     pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
197         let mut tlang = None;
198         let mut tfields = LiteMap::new();
199 
200         if let Some(subtag) = iter.peek() {
201             if Language::try_from_utf8(subtag).is_ok() {
202                 tlang = Some(parse_language_identifier_from_iter(
203                     iter,
204                     ParserMode::Partial,
205                 )?);
206             }
207         }
208 
209         let mut current_tkey = None;
210         let mut current_tvalue = ShortBoxSlice::new();
211         let mut has_current_tvalue = false;
212 
213         while let Some(subtag) = iter.peek() {
214             if let Some(tkey) = current_tkey {
215                 if let Ok(val) = Value::parse_subtag(subtag) {
216                     has_current_tvalue = true;
217                     if let Some(val) = val {
218                         current_tvalue.push(val);
219                     }
220                 } else {
221                     if !has_current_tvalue {
222                         return Err(ParseError::InvalidExtension);
223                     }
224                     tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
225                     current_tkey = None;
226                     current_tvalue = ShortBoxSlice::new();
227                     has_current_tvalue = false;
228                     continue;
229                 }
230             } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
231                 current_tkey = Some(tkey);
232             } else {
233                 break;
234             }
235 
236             iter.next();
237         }
238 
239         if let Some(tkey) = current_tkey {
240             if !has_current_tvalue {
241                 return Err(ParseError::InvalidExtension);
242             }
243             tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
244         }
245 
246         if tlang.is_none() && tfields.is_empty() {
247             Err(ParseError::InvalidExtension)
248         } else {
249             Ok(Self {
250                 lang: tlang,
251                 fields: tfields.into(),
252             })
253         }
254     }
255 
for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>,256     pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
257     where
258         F: FnMut(&str) -> Result<(), E>,
259     {
260         if self.is_empty() {
261             return Ok(());
262         }
263         if with_ext {
264             f(TRANSFORM_EXT_STR)?;
265         }
266         if let Some(lang) = &self.lang {
267             lang.for_each_subtag_str_lowercased(f)?;
268         }
269         self.fields.for_each_subtag_str(f)
270     }
271 }
272 
273 #[cfg(feature = "alloc")]
274 impl FromStr for Transform {
275     type Err = ParseError;
276 
277     #[inline]
from_str(s: &str) -> Result<Self, Self::Err>278     fn from_str(s: &str) -> Result<Self, Self::Err> {
279         Self::try_from_str(s)
280     }
281 }
282 
283 writeable::impl_display_with_writeable!(Transform);
284 
285 impl writeable::Writeable for Transform {
write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result286     fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
287         if self.is_empty() {
288             return Ok(());
289         }
290         sink.write_char(TRANSFORM_EXT_CHAR)?;
291         if let Some(lang) = &self.lang {
292             sink.write_char('-')?;
293             lang.write_lowercased_to(sink)?;
294         }
295         if !self.fields.is_empty() {
296             sink.write_char('-')?;
297             writeable::Writeable::write_to(&self.fields, sink)?;
298         }
299         Ok(())
300     }
301 
writeable_length_hint(&self) -> writeable::LengthHint302     fn writeable_length_hint(&self) -> writeable::LengthHint {
303         if self.is_empty() {
304             return writeable::LengthHint::exact(0);
305         }
306         let mut result = writeable::LengthHint::exact(1);
307         if let Some(lang) = &self.lang {
308             result += writeable::Writeable::writeable_length_hint(lang) + 1;
309         }
310         if !self.fields.is_empty() {
311             result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
312         }
313         result
314     }
315 }
316 
317 #[cfg(test)]
318 mod tests {
319     use super::*;
320 
321     #[test]
test_transform_extension_fromstr()322     fn test_transform_extension_fromstr() {
323         let te: Transform = "t-en-us-h0-hybrid"
324             .parse()
325             .expect("Failed to parse Transform");
326         assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
327 
328         let te: Result<Transform, _> = "t".parse();
329         assert!(te.is_err());
330     }
331 }
332