• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::extensions::unicode as unicode_ext;
6 use crate::subtags::{Language, Region, Script, Subtag, Variant};
7 #[cfg(feature = "alloc")]
8 use crate::ParseError;
9 use crate::{LanguageIdentifier, Locale};
10 use core::cmp::Ordering;
11 use core::default::Default;
12 use core::fmt;
13 use core::hash::Hash;
14 #[cfg(feature = "alloc")]
15 use core::str::FromStr;
16 
17 /// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
18 ///
19 /// [`DataLocale`] contains less functionality than [`Locale`] but more than
20 /// [`LanguageIdentifier`] for better size and performance while still meeting
21 /// the needs of the ICU4X data pipeline.
22 ///
23 /// You can create a [`DataLocale`] from a borrowed [`Locale`], which is more
24 /// efficient than cloning the [`Locale`], but less efficient than converting an owned
25 /// [`Locale`]:
26 ///
27 /// ```
28 /// use icu_locale_core::locale;
29 /// use icu_provider::DataLocale;
30 ///
31 /// let locale1 = locale!("en-u-ca-buddhist");
32 /// let data_locale = DataLocale::from(&locale1);
33 /// ```
34 ///
35 /// [`DataLocale`] only supports `-u-sd` keywords, to reflect the current state of CLDR data
36 /// lookup and fallback. This may change in the future.
37 ///
38 /// ```
39 /// use icu_locale_core::{locale, Locale};
40 /// use icu_provider::DataLocale;
41 ///
42 /// let locale = "hi-IN-t-en-h0-hybrid-u-attr-ca-buddhist-sd-inas"
43 ///     .parse::<Locale>()
44 ///     .unwrap();
45 ///
46 /// assert_eq!(
47 ///     DataLocale::from(locale),
48 ///     DataLocale::from(locale!("hi-IN-u-sd-inas"))
49 /// );
50 /// ```
51 #[derive(Clone, Copy, Default, PartialEq, Hash, Eq)]
52 #[non_exhaustive]
53 pub struct DataLocale {
54     /// Language subtag
55     pub language: Language,
56     /// Script subtag
57     pub script: Option<Script>,
58     /// Region subtag
59     pub region: Option<Region>,
60     /// Variant subtag
61     pub variant: Option<Variant>,
62     /// Subivision (-u-sd-) subtag
63     pub subdivision: Option<Subtag>,
64 }
65 
66 impl DataLocale {
67     /// `const` version of `Default::default`
default() -> Self68     pub const fn default() -> Self {
69         DataLocale {
70             language: Language::UND,
71             script: None,
72             region: None,
73             variant: None,
74             subdivision: None,
75         }
76     }
77 }
78 
79 impl Default for &DataLocale {
default() -> Self80     fn default() -> Self {
81         static DEFAULT: DataLocale = DataLocale::default();
82         &DEFAULT
83     }
84 }
85 
86 impl fmt::Debug for DataLocale {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result87     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88         write!(f, "DataLocale{{{self}}}")
89     }
90 }
91 
92 impl_writeable_for_each_subtag_str_no_test!(DataLocale, selff, selff.script.is_none() && selff.region.is_none() && selff.variant.is_none() && selff.subdivision.is_none() => selff.language.write_to_string());
93 
94 impl From<LanguageIdentifier> for DataLocale {
from(langid: LanguageIdentifier) -> Self95     fn from(langid: LanguageIdentifier) -> Self {
96         Self::from(&langid)
97     }
98 }
99 
100 impl From<Locale> for DataLocale {
from(locale: Locale) -> Self101     fn from(locale: Locale) -> Self {
102         Self::from(&locale)
103     }
104 }
105 
106 impl From<&LanguageIdentifier> for DataLocale {
from(langid: &LanguageIdentifier) -> Self107     fn from(langid: &LanguageIdentifier) -> Self {
108         Self {
109             language: langid.language,
110             script: langid.script,
111             region: langid.region,
112             variant: langid.variants.iter().copied().next(),
113             subdivision: None,
114         }
115     }
116 }
117 
118 impl From<&Locale> for DataLocale {
from(locale: &Locale) -> Self119     fn from(locale: &Locale) -> Self {
120         let mut r = Self::from(&locale.id);
121 
122         r.subdivision = locale
123             .extensions
124             .unicode
125             .keywords
126             .get(&unicode_ext::key!("sd"))
127             .and_then(|v| v.as_single_subtag().copied());
128         r
129     }
130 }
131 
132 #[cfg(feature = "alloc")]
133 impl FromStr for DataLocale {
134     type Err = ParseError;
135     #[inline]
from_str(s: &str) -> Result<Self, Self::Err>136     fn from_str(s: &str) -> Result<Self, Self::Err> {
137         Self::try_from_str(s)
138     }
139 }
140 
141 impl DataLocale {
142     #[inline]
143     /// Parses a [`DataLocale`].
144     #[cfg(feature = "alloc")]
try_from_str(s: &str) -> Result<Self, ParseError>145     pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
146         Self::try_from_utf8(s.as_bytes())
147     }
148 
149     /// Parses a [`DataLocale`] from a UTF-8 byte slice.
150     #[cfg(feature = "alloc")]
try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError>151     pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
152         let locale = Locale::try_from_utf8(code_units)?;
153         if locale.id.variants.len() > 1
154             || !locale.extensions.transform.is_empty()
155             || !locale.extensions.private.is_empty()
156             || !locale.extensions.other.is_empty()
157             || !locale.extensions.unicode.attributes.is_empty()
158         {
159             return Err(ParseError::InvalidExtension);
160         }
161 
162         let unicode_extensions_count = locale.extensions.unicode.keywords.iter().count();
163 
164         if unicode_extensions_count != 0
165             && (unicode_extensions_count != 1
166                 || !locale
167                     .extensions
168                     .unicode
169                     .keywords
170                     .contains_key(&unicode_ext::key!("sd")))
171         {
172             return Err(ParseError::InvalidExtension);
173         }
174 
175         Ok(locale.into())
176     }
177 
for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>,178     pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
179     where
180         F: FnMut(&str) -> Result<(), E>,
181     {
182         f(self.language.as_str())?;
183         if let Some(ref script) = self.script {
184             f(script.as_str())?;
185         }
186         if let Some(ref region) = self.region {
187             f(region.as_str())?;
188         }
189         if let Some(ref single_variant) = self.variant {
190             f(single_variant.as_str())?;
191         }
192         if let Some(ref subdivision) = self.subdivision {
193             f("u")?;
194             f("sd")?;
195             f(subdivision.as_str())?;
196         }
197         Ok(())
198     }
199 
as_tuple( &self, ) -> ( Language, Option<Script>, Option<Region>, Option<Variant>, Option<Subtag>, )200     fn as_tuple(
201         &self,
202     ) -> (
203         Language,
204         Option<Script>,
205         Option<Region>,
206         Option<Variant>,
207         Option<Subtag>,
208     ) {
209         (
210             self.language,
211             self.script,
212             self.region,
213             self.variant,
214             self.subdivision,
215         )
216     }
217 
218     /// Returns an ordering suitable for use in [`BTreeSet`].
219     ///
220     /// [`BTreeSet`]: alloc::collections::BTreeSet
total_cmp(&self, other: &Self) -> Ordering221     pub fn total_cmp(&self, other: &Self) -> Ordering {
222         self.as_tuple().cmp(&other.as_tuple())
223     }
224 
225     /// Compare this [`DataLocale`] with BCP-47 bytes.
226     ///
227     /// The return value is equivalent to what would happen if you first converted this
228     /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
229     ///
230     /// This function is case-sensitive and results in a *total order*, so it is appropriate for
231     /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
232     ///
233     /// # Examples
234     ///
235     /// ```
236     /// use core::cmp::Ordering;
237     /// use icu_provider::DataLocale;
238     ///
239     /// let bcp47_strings: &[&str] = &[
240     ///     "ca",
241     ///     "ca-ES",
242     ///     "ca-ES-u-sd-esct",
243     ///     "ca-ES-valencia",
244     ///     "cat",
245     ///     "pl-Latn-PL",
246     ///     "und",
247     ///     "und-fonipa",
248     ///     "zh",
249     /// ];
250     ///
251     /// for ab in bcp47_strings.windows(2) {
252     ///     let a = ab[0];
253     ///     let b = ab[1];
254     ///     assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
255     ///     let a_loc: DataLocale = a.parse().unwrap();
256     ///     assert_eq!(
257     ///         a_loc.strict_cmp(a.as_bytes()),
258     ///         Ordering::Equal,
259     ///         "strict_cmp: {} == {}",
260     ///         a_loc,
261     ///         a
262     ///     );
263     ///     assert_eq!(
264     ///         a_loc.strict_cmp(b.as_bytes()),
265     ///         Ordering::Less,
266     ///         "strict_cmp: {} < {}",
267     ///         a_loc,
268     ///         b
269     ///     );
270     ///     let b_loc: DataLocale = b.parse().unwrap();
271     ///     assert_eq!(
272     ///         b_loc.strict_cmp(b.as_bytes()),
273     ///         Ordering::Equal,
274     ///         "strict_cmp: {} == {}",
275     ///         b_loc,
276     ///         b
277     ///     );
278     ///     assert_eq!(
279     ///         b_loc.strict_cmp(a.as_bytes()),
280     ///         Ordering::Greater,
281     ///         "strict_cmp: {} > {}",
282     ///         b_loc,
283     ///         a
284     ///     );
285     /// }
286     /// ```
287     ///
288     /// Comparison against invalid strings:
289     ///
290     /// ```
291     /// use icu_provider::DataLocale;
292     ///
293     /// let invalid_strings: &[&str] = &[
294     ///     // Less than "ca-ES"
295     ///     "CA",
296     ///     "ar-x-gbp-FOO",
297     ///     // Greater than "ca-AR"
298     ///     "ca_ES",
299     ///     "ca-ES-x-gbp-FOO",
300     /// ];
301     ///
302     /// let data_locale = "ca-ES".parse::<DataLocale>().unwrap();
303     ///
304     /// for s in invalid_strings.iter() {
305     ///     let expected_ordering = "ca-AR".cmp(s);
306     ///     let actual_ordering = data_locale.strict_cmp(s.as_bytes());
307     ///     assert_eq!(expected_ordering, actual_ordering, "{}", s);
308     /// }
309     /// ```
strict_cmp(&self, other: &[u8]) -> Ordering310     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
311         writeable::cmp_utf8(self, other)
312     }
313 
314     /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
315     ///
316     /// # Examples
317     ///
318     /// ```
319     /// use icu_provider::DataLocale;
320     ///
321     /// assert!("und".parse::<DataLocale>().unwrap().is_default());
322     /// assert!(!"de-u-sd-denw".parse::<DataLocale>().unwrap().is_default());
323     /// assert!(!"und-ES".parse::<DataLocale>().unwrap().is_default());
324     /// ```
is_default(&self) -> bool325     pub fn is_default(&self) -> bool {
326         self.language.is_default()
327             && self.script.is_none()
328             && self.region.is_none()
329             && self.variant.is_none()
330             && self.subdivision.is_none()
331     }
332 
333     /// Converts this `DataLocale` into a [`Locale`].
into_locale(self) -> Locale334     pub fn into_locale(self) -> Locale {
335         Locale {
336             id: LanguageIdentifier {
337                 language: self.language,
338                 script: self.script,
339                 region: self.region,
340                 variants: self
341                     .variant
342                     .map(crate::subtags::Variants::from_variant)
343                     .unwrap_or_default(),
344             },
345             extensions: {
346                 let mut extensions = crate::extensions::Extensions::default();
347                 if let Some(sd) = self.subdivision {
348                     extensions.unicode = unicode_ext::Unicode {
349                         keywords: unicode_ext::Keywords::new_single(
350                             unicode_ext::key!("sd"),
351                             unicode_ext::Value::from_subtag(Some(sd)),
352                         ),
353                         ..Default::default()
354                     }
355                 }
356                 extensions
357             },
358         }
359     }
360 }
361 
362 #[test]
test_data_locale_to_string()363 fn test_data_locale_to_string() {
364     struct TestCase {
365         pub locale: &'static str,
366         pub expected: &'static str,
367     }
368 
369     for cas in [
370         TestCase {
371             locale: "und",
372             expected: "und",
373         },
374         TestCase {
375             locale: "und-u-sd-sdd",
376             expected: "und-u-sd-sdd",
377         },
378         TestCase {
379             locale: "en-ZA-u-sd-zaa",
380             expected: "en-ZA-u-sd-zaa",
381         },
382     ] {
383         let locale = cas.locale.parse::<DataLocale>().unwrap();
384         writeable::assert_writeable_eq!(locale, cas.expected);
385     }
386 }
387 
388 #[test]
test_data_locale_from_string()389 fn test_data_locale_from_string() {
390     #[derive(Debug)]
391     struct TestCase {
392         pub input: &'static str,
393         pub success: bool,
394     }
395 
396     for cas in [
397         TestCase {
398             input: "und",
399             success: true,
400         },
401         TestCase {
402             input: "und-u-cu-gbp",
403             success: false,
404         },
405         TestCase {
406             input: "en-ZA-u-sd-zaa",
407             success: true,
408         },
409         TestCase {
410             input: "en...",
411             success: false,
412         },
413     ] {
414         let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
415             (Ok(l), true) => l,
416             (Err(_), false) => {
417                 continue;
418             }
419             (Ok(_), false) => {
420                 panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
421             }
422             (Err(_), true) => {
423                 panic!("DataLocale was supposed to parse but it failed: {cas:?}");
424             }
425         };
426         writeable::assert_writeable_eq!(data_locale, cas.input);
427     }
428 }
429