1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use crate::extensions::unicode as unicode_ext;
6 use crate::subtags::{Language, Region, Script, Subtag, Variant};
7 #[cfg(feature = "alloc")]
8 use crate::ParseError;
9 use crate::{LanguageIdentifier, Locale};
10 use core::cmp::Ordering;
11 use core::default::Default;
12 use core::fmt;
13 use core::hash::Hash;
14 #[cfg(feature = "alloc")]
15 use core::str::FromStr;
16
17 /// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
18 ///
19 /// [`DataLocale`] contains less functionality than [`Locale`] but more than
20 /// [`LanguageIdentifier`] for better size and performance while still meeting
21 /// the needs of the ICU4X data pipeline.
22 ///
23 /// You can create a [`DataLocale`] from a borrowed [`Locale`], which is more
24 /// efficient than cloning the [`Locale`], but less efficient than converting an owned
25 /// [`Locale`]:
26 ///
27 /// ```
28 /// use icu_locale_core::locale;
29 /// use icu_provider::DataLocale;
30 ///
31 /// let locale1 = locale!("en-u-ca-buddhist");
32 /// let data_locale = DataLocale::from(&locale1);
33 /// ```
34 ///
35 /// [`DataLocale`] only supports `-u-sd` keywords, to reflect the current state of CLDR data
36 /// lookup and fallback. This may change in the future.
37 ///
38 /// ```
39 /// use icu_locale_core::{locale, Locale};
40 /// use icu_provider::DataLocale;
41 ///
42 /// let locale = "hi-IN-t-en-h0-hybrid-u-attr-ca-buddhist-sd-inas"
43 /// .parse::<Locale>()
44 /// .unwrap();
45 ///
46 /// assert_eq!(
47 /// DataLocale::from(locale),
48 /// DataLocale::from(locale!("hi-IN-u-sd-inas"))
49 /// );
50 /// ```
51 #[derive(Clone, Copy, Default, PartialEq, Hash, Eq)]
52 #[non_exhaustive]
53 pub struct DataLocale {
54 /// Language subtag
55 pub language: Language,
56 /// Script subtag
57 pub script: Option<Script>,
58 /// Region subtag
59 pub region: Option<Region>,
60 /// Variant subtag
61 pub variant: Option<Variant>,
62 /// Subivision (-u-sd-) subtag
63 pub subdivision: Option<Subtag>,
64 }
65
66 impl DataLocale {
67 /// `const` version of `Default::default`
default() -> Self68 pub const fn default() -> Self {
69 DataLocale {
70 language: Language::UND,
71 script: None,
72 region: None,
73 variant: None,
74 subdivision: None,
75 }
76 }
77 }
78
79 impl Default for &DataLocale {
default() -> Self80 fn default() -> Self {
81 static DEFAULT: DataLocale = DataLocale::default();
82 &DEFAULT
83 }
84 }
85
86 impl fmt::Debug for DataLocale {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result87 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88 write!(f, "DataLocale{{{self}}}")
89 }
90 }
91
92 impl_writeable_for_each_subtag_str_no_test!(DataLocale, selff, selff.script.is_none() && selff.region.is_none() && selff.variant.is_none() && selff.subdivision.is_none() => selff.language.write_to_string());
93
94 impl From<LanguageIdentifier> for DataLocale {
from(langid: LanguageIdentifier) -> Self95 fn from(langid: LanguageIdentifier) -> Self {
96 Self::from(&langid)
97 }
98 }
99
100 impl From<Locale> for DataLocale {
from(locale: Locale) -> Self101 fn from(locale: Locale) -> Self {
102 Self::from(&locale)
103 }
104 }
105
106 impl From<&LanguageIdentifier> for DataLocale {
from(langid: &LanguageIdentifier) -> Self107 fn from(langid: &LanguageIdentifier) -> Self {
108 Self {
109 language: langid.language,
110 script: langid.script,
111 region: langid.region,
112 variant: langid.variants.iter().copied().next(),
113 subdivision: None,
114 }
115 }
116 }
117
118 impl From<&Locale> for DataLocale {
from(locale: &Locale) -> Self119 fn from(locale: &Locale) -> Self {
120 let mut r = Self::from(&locale.id);
121
122 r.subdivision = locale
123 .extensions
124 .unicode
125 .keywords
126 .get(&unicode_ext::key!("sd"))
127 .and_then(|v| v.as_single_subtag().copied());
128 r
129 }
130 }
131
132 #[cfg(feature = "alloc")]
133 impl FromStr for DataLocale {
134 type Err = ParseError;
135 #[inline]
from_str(s: &str) -> Result<Self, Self::Err>136 fn from_str(s: &str) -> Result<Self, Self::Err> {
137 Self::try_from_str(s)
138 }
139 }
140
141 impl DataLocale {
142 #[inline]
143 /// Parses a [`DataLocale`].
144 #[cfg(feature = "alloc")]
try_from_str(s: &str) -> Result<Self, ParseError>145 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
146 Self::try_from_utf8(s.as_bytes())
147 }
148
149 /// Parses a [`DataLocale`] from a UTF-8 byte slice.
150 #[cfg(feature = "alloc")]
try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError>151 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
152 let locale = Locale::try_from_utf8(code_units)?;
153 if locale.id.variants.len() > 1
154 || !locale.extensions.transform.is_empty()
155 || !locale.extensions.private.is_empty()
156 || !locale.extensions.other.is_empty()
157 || !locale.extensions.unicode.attributes.is_empty()
158 {
159 return Err(ParseError::InvalidExtension);
160 }
161
162 let unicode_extensions_count = locale.extensions.unicode.keywords.iter().count();
163
164 if unicode_extensions_count != 0
165 && (unicode_extensions_count != 1
166 || !locale
167 .extensions
168 .unicode
169 .keywords
170 .contains_key(&unicode_ext::key!("sd")))
171 {
172 return Err(ParseError::InvalidExtension);
173 }
174
175 Ok(locale.into())
176 }
177
for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>,178 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
179 where
180 F: FnMut(&str) -> Result<(), E>,
181 {
182 f(self.language.as_str())?;
183 if let Some(ref script) = self.script {
184 f(script.as_str())?;
185 }
186 if let Some(ref region) = self.region {
187 f(region.as_str())?;
188 }
189 if let Some(ref single_variant) = self.variant {
190 f(single_variant.as_str())?;
191 }
192 if let Some(ref subdivision) = self.subdivision {
193 f("u")?;
194 f("sd")?;
195 f(subdivision.as_str())?;
196 }
197 Ok(())
198 }
199
as_tuple( &self, ) -> ( Language, Option<Script>, Option<Region>, Option<Variant>, Option<Subtag>, )200 fn as_tuple(
201 &self,
202 ) -> (
203 Language,
204 Option<Script>,
205 Option<Region>,
206 Option<Variant>,
207 Option<Subtag>,
208 ) {
209 (
210 self.language,
211 self.script,
212 self.region,
213 self.variant,
214 self.subdivision,
215 )
216 }
217
218 /// Returns an ordering suitable for use in [`BTreeSet`].
219 ///
220 /// [`BTreeSet`]: alloc::collections::BTreeSet
total_cmp(&self, other: &Self) -> Ordering221 pub fn total_cmp(&self, other: &Self) -> Ordering {
222 self.as_tuple().cmp(&other.as_tuple())
223 }
224
225 /// Compare this [`DataLocale`] with BCP-47 bytes.
226 ///
227 /// The return value is equivalent to what would happen if you first converted this
228 /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
229 ///
230 /// This function is case-sensitive and results in a *total order*, so it is appropriate for
231 /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
232 ///
233 /// # Examples
234 ///
235 /// ```
236 /// use core::cmp::Ordering;
237 /// use icu_provider::DataLocale;
238 ///
239 /// let bcp47_strings: &[&str] = &[
240 /// "ca",
241 /// "ca-ES",
242 /// "ca-ES-u-sd-esct",
243 /// "ca-ES-valencia",
244 /// "cat",
245 /// "pl-Latn-PL",
246 /// "und",
247 /// "und-fonipa",
248 /// "zh",
249 /// ];
250 ///
251 /// for ab in bcp47_strings.windows(2) {
252 /// let a = ab[0];
253 /// let b = ab[1];
254 /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
255 /// let a_loc: DataLocale = a.parse().unwrap();
256 /// assert_eq!(
257 /// a_loc.strict_cmp(a.as_bytes()),
258 /// Ordering::Equal,
259 /// "strict_cmp: {} == {}",
260 /// a_loc,
261 /// a
262 /// );
263 /// assert_eq!(
264 /// a_loc.strict_cmp(b.as_bytes()),
265 /// Ordering::Less,
266 /// "strict_cmp: {} < {}",
267 /// a_loc,
268 /// b
269 /// );
270 /// let b_loc: DataLocale = b.parse().unwrap();
271 /// assert_eq!(
272 /// b_loc.strict_cmp(b.as_bytes()),
273 /// Ordering::Equal,
274 /// "strict_cmp: {} == {}",
275 /// b_loc,
276 /// b
277 /// );
278 /// assert_eq!(
279 /// b_loc.strict_cmp(a.as_bytes()),
280 /// Ordering::Greater,
281 /// "strict_cmp: {} > {}",
282 /// b_loc,
283 /// a
284 /// );
285 /// }
286 /// ```
287 ///
288 /// Comparison against invalid strings:
289 ///
290 /// ```
291 /// use icu_provider::DataLocale;
292 ///
293 /// let invalid_strings: &[&str] = &[
294 /// // Less than "ca-ES"
295 /// "CA",
296 /// "ar-x-gbp-FOO",
297 /// // Greater than "ca-AR"
298 /// "ca_ES",
299 /// "ca-ES-x-gbp-FOO",
300 /// ];
301 ///
302 /// let data_locale = "ca-ES".parse::<DataLocale>().unwrap();
303 ///
304 /// for s in invalid_strings.iter() {
305 /// let expected_ordering = "ca-AR".cmp(s);
306 /// let actual_ordering = data_locale.strict_cmp(s.as_bytes());
307 /// assert_eq!(expected_ordering, actual_ordering, "{}", s);
308 /// }
309 /// ```
strict_cmp(&self, other: &[u8]) -> Ordering310 pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
311 writeable::cmp_utf8(self, other)
312 }
313
314 /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
315 ///
316 /// # Examples
317 ///
318 /// ```
319 /// use icu_provider::DataLocale;
320 ///
321 /// assert!("und".parse::<DataLocale>().unwrap().is_default());
322 /// assert!(!"de-u-sd-denw".parse::<DataLocale>().unwrap().is_default());
323 /// assert!(!"und-ES".parse::<DataLocale>().unwrap().is_default());
324 /// ```
is_default(&self) -> bool325 pub fn is_default(&self) -> bool {
326 self.language.is_default()
327 && self.script.is_none()
328 && self.region.is_none()
329 && self.variant.is_none()
330 && self.subdivision.is_none()
331 }
332
333 /// Converts this `DataLocale` into a [`Locale`].
into_locale(self) -> Locale334 pub fn into_locale(self) -> Locale {
335 Locale {
336 id: LanguageIdentifier {
337 language: self.language,
338 script: self.script,
339 region: self.region,
340 variants: self
341 .variant
342 .map(crate::subtags::Variants::from_variant)
343 .unwrap_or_default(),
344 },
345 extensions: {
346 let mut extensions = crate::extensions::Extensions::default();
347 if let Some(sd) = self.subdivision {
348 extensions.unicode = unicode_ext::Unicode {
349 keywords: unicode_ext::Keywords::new_single(
350 unicode_ext::key!("sd"),
351 unicode_ext::Value::from_subtag(Some(sd)),
352 ),
353 ..Default::default()
354 }
355 }
356 extensions
357 },
358 }
359 }
360 }
361
362 #[test]
test_data_locale_to_string()363 fn test_data_locale_to_string() {
364 struct TestCase {
365 pub locale: &'static str,
366 pub expected: &'static str,
367 }
368
369 for cas in [
370 TestCase {
371 locale: "und",
372 expected: "und",
373 },
374 TestCase {
375 locale: "und-u-sd-sdd",
376 expected: "und-u-sd-sdd",
377 },
378 TestCase {
379 locale: "en-ZA-u-sd-zaa",
380 expected: "en-ZA-u-sd-zaa",
381 },
382 ] {
383 let locale = cas.locale.parse::<DataLocale>().unwrap();
384 writeable::assert_writeable_eq!(locale, cas.expected);
385 }
386 }
387
388 #[test]
test_data_locale_from_string()389 fn test_data_locale_from_string() {
390 #[derive(Debug)]
391 struct TestCase {
392 pub input: &'static str,
393 pub success: bool,
394 }
395
396 for cas in [
397 TestCase {
398 input: "und",
399 success: true,
400 },
401 TestCase {
402 input: "und-u-cu-gbp",
403 success: false,
404 },
405 TestCase {
406 input: "en-ZA-u-sd-zaa",
407 success: true,
408 },
409 TestCase {
410 input: "en...",
411 success: false,
412 },
413 ] {
414 let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
415 (Ok(l), true) => l,
416 (Err(_), false) => {
417 continue;
418 }
419 (Ok(_), false) => {
420 panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
421 }
422 (Err(_), true) => {
423 panic!("DataLocale was supposed to parse but it failed: {cas:?}");
424 }
425 };
426 writeable::assert_writeable_eq!(data_locale, cas.input);
427 }
428 }
429