• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::provider::*;
6 
7 use icu_locale_core::subtags::{Language, Region, Script};
8 use icu_locale_core::LanguageIdentifier;
9 use icu_provider::prelude::*;
10 
11 use crate::TransformResult;
12 
13 /// Implements the *Add Likely Subtags* and *Remove Likely Subtags*
14 /// algorithms as defined in *[UTS #35: Likely Subtags]*.
15 ///
16 /// # Examples
17 ///
18 /// Add likely subtags:
19 ///
20 /// ```
21 /// use icu::locale::locale;
22 /// use icu::locale::{LocaleExpander, TransformResult};
23 ///
24 /// let lc = LocaleExpander::new_common();
25 ///
26 /// let mut locale = locale!("zh-CN");
27 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
28 /// assert_eq!(locale, locale!("zh-Hans-CN"));
29 ///
30 /// let mut locale = locale!("zh-Hant-TW");
31 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
32 /// assert_eq!(locale, locale!("zh-Hant-TW"));
33 /// ```
34 ///
35 /// Remove likely subtags:
36 ///
37 /// ```
38 /// use icu::locale::{locale, LocaleExpander, TransformResult};
39 ///
40 /// let lc = LocaleExpander::new_common();
41 ///
42 /// let mut locale = locale!("zh-Hans-CN");
43 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
44 /// assert_eq!(locale, locale!("zh"));
45 ///
46 /// let mut locale = locale!("zh");
47 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Unmodified);
48 /// assert_eq!(locale, locale!("zh"));
49 /// ```
50 ///
51 /// Normally, only CLDR locales with Basic or higher coverage are included. To include more
52 /// locales for maximization, use [`try_new_extended`](Self::try_new_extended_unstable):
53 ///
54 /// ```
55 /// use icu::locale::{locale, LocaleExpander, TransformResult};
56 ///
57 /// let lc = LocaleExpander::new_extended();
58 ///
59 /// let mut locale = locale!("atj");
60 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
61 /// assert_eq!(locale, locale!("atj-Latn-CA"));
62 /// ```
63 ///
64 /// [UTS #35: Likely Subtags]: https://www.unicode.org/reports/tr35/#Likely_Subtags
65 #[derive(Debug, Clone)]
66 pub struct LocaleExpander {
67     likely_subtags_l: DataPayload<LocaleLikelySubtagsLanguageV1>,
68     likely_subtags_sr: DataPayload<LocaleLikelySubtagsScriptRegionV1>,
69     likely_subtags_ext: Option<DataPayload<LocaleLikelySubtagsExtendedV1>>,
70 }
71 
72 struct LocaleExpanderBorrowed<'a> {
73     likely_subtags_l: &'a LikelySubtagsForLanguage<'a>,
74     likely_subtags_sr: &'a LikelySubtagsForScriptRegion<'a>,
75     likely_subtags_ext: Option<&'a LikelySubtagsExtended<'a>>,
76 }
77 
78 impl LocaleExpanderBorrowed<'_> {
get_l(&self, l: Language) -> Option<(Script, Region)>79     fn get_l(&self, l: Language) -> Option<(Script, Region)> {
80         let key = &l.to_tinystr().to_unvalidated();
81         self.likely_subtags_l.language.get_copied(key).or_else(|| {
82             self.likely_subtags_ext
83                 .and_then(|ext| ext.language.get_copied(key))
84         })
85     }
86 
get_ls(&self, l: Language, s: Script) -> Option<Region>87     fn get_ls(&self, l: Language, s: Script) -> Option<Region> {
88         let key = &(
89             l.to_tinystr().to_unvalidated(),
90             s.to_tinystr().to_unvalidated(),
91         );
92         self.likely_subtags_l
93             .language_script
94             .get_copied(key)
95             .or_else(|| {
96                 self.likely_subtags_ext
97                     .and_then(|ext| ext.language_script.get_copied(key))
98             })
99     }
100 
get_lr(&self, l: Language, r: Region) -> Option<Script>101     fn get_lr(&self, l: Language, r: Region) -> Option<Script> {
102         let key = &(
103             l.to_tinystr().to_unvalidated(),
104             r.to_tinystr().to_unvalidated(),
105         );
106         self.likely_subtags_l
107             .language_region
108             .get_copied(key)
109             .or_else(|| {
110                 self.likely_subtags_ext
111                     .and_then(|ext| ext.language_region.get_copied(key))
112             })
113     }
114 
get_s(&self, s: Script) -> Option<(Language, Region)>115     fn get_s(&self, s: Script) -> Option<(Language, Region)> {
116         let key = &s.to_tinystr().to_unvalidated();
117         self.likely_subtags_sr.script.get_copied(key).or_else(|| {
118             self.likely_subtags_ext
119                 .and_then(|ext| ext.script.get_copied(key))
120         })
121     }
122 
get_sr(&self, s: Script, r: Region) -> Option<Language>123     fn get_sr(&self, s: Script, r: Region) -> Option<Language> {
124         let key = &(
125             s.to_tinystr().to_unvalidated(),
126             r.to_tinystr().to_unvalidated(),
127         );
128         self.likely_subtags_sr
129             .script_region
130             .get_copied(key)
131             .or_else(|| {
132                 self.likely_subtags_ext
133                     .and_then(|ext| ext.script_region.get_copied(key))
134             })
135     }
136 
get_r(&self, r: Region) -> Option<(Language, Script)>137     fn get_r(&self, r: Region) -> Option<(Language, Script)> {
138         let key = &r.to_tinystr().to_unvalidated();
139         self.likely_subtags_sr.region.get_copied(key).or_else(|| {
140             self.likely_subtags_ext
141                 .and_then(|ext| ext.region.get_copied(key))
142         })
143     }
144 
get_und(&self) -> (Language, Script, Region)145     fn get_und(&self) -> (Language, Script, Region) {
146         self.likely_subtags_l.und
147     }
148 }
149 
150 #[inline]
update_langid( language: Language, script: Option<Script>, region: Option<Region>, langid: &mut LanguageIdentifier, ) -> TransformResult151 fn update_langid(
152     language: Language,
153     script: Option<Script>,
154     region: Option<Region>,
155     langid: &mut LanguageIdentifier,
156 ) -> TransformResult {
157     let mut modified = false;
158 
159     if langid.language.is_default() && !language.is_default() {
160         langid.language = language;
161         modified = true;
162     }
163 
164     if langid.script.is_none() && script.is_some() {
165         langid.script = script;
166         modified = true;
167     }
168 
169     if langid.region.is_none() && region.is_some() {
170         langid.region = region;
171         modified = true;
172     }
173 
174     if modified {
175         TransformResult::Modified
176     } else {
177         TransformResult::Unmodified
178     }
179 }
180 
181 #[inline]
update_langid_minimize( language: Language, script: Option<Script>, region: Option<Region>, langid: &mut LanguageIdentifier, ) -> TransformResult182 fn update_langid_minimize(
183     language: Language,
184     script: Option<Script>,
185     region: Option<Region>,
186     langid: &mut LanguageIdentifier,
187 ) -> TransformResult {
188     let mut modified = false;
189 
190     if langid.language != language {
191         langid.language = language;
192         modified = true;
193     }
194 
195     if langid.script != script {
196         langid.script = script;
197         modified = true;
198     }
199 
200     if langid.region != region {
201         langid.region = region;
202         modified = true;
203     }
204 
205     if modified {
206         TransformResult::Modified
207     } else {
208         TransformResult::Unmodified
209     }
210 }
211 
212 impl LocaleExpander {
213     /// Creates a [`LocaleExpander`] with compiled data for commonly-used locales
214     /// (locales with *Basic* or higher [CLDR coverage]).
215     ///
216     /// Use this constructor if you want limited likely subtags for data-oriented use cases.
217     ///
218     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
219     ///
220     /// [�� Help choosing a constructor](icu_provider::constructors)
221     ///
222     /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
223     #[cfg(feature = "compiled_data")]
new_common() -> Self224     pub const fn new_common() -> Self {
225         LocaleExpander {
226             likely_subtags_l: DataPayload::from_static_ref(
227                 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_LANGUAGE_V1,
228             ),
229             likely_subtags_sr: DataPayload::from_static_ref(
230                 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_SCRIPT_REGION_V1,
231             ),
232             likely_subtags_ext: None,
233         }
234     }
235 
236     icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
237         functions: [
238         new_common: skip,
239         try_new_common_with_buffer_provider,
240         try_new_common_unstable,
241         Self
242     ]);
243 
244     #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_common)]
try_new_common_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError> where P: DataProvider<LocaleLikelySubtagsLanguageV1> + DataProvider<LocaleLikelySubtagsScriptRegionV1> + ?Sized,245     pub fn try_new_common_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError>
246     where
247         P: DataProvider<LocaleLikelySubtagsLanguageV1>
248             + DataProvider<LocaleLikelySubtagsScriptRegionV1>
249             + ?Sized,
250     {
251         let likely_subtags_l = provider.load(Default::default())?.payload;
252         let likely_subtags_sr = provider.load(Default::default())?.payload;
253 
254         Ok(LocaleExpander {
255             likely_subtags_l,
256             likely_subtags_sr,
257             likely_subtags_ext: None,
258         })
259     }
260 
261     /// Creates a [`LocaleExpander`] with compiled data for all locales.
262     ///
263     /// Use this constructor if you want to include data for all locales, including ones
264     /// that may not have data for other services (i.e. [CLDR coverage] below *Basic*).
265     ///
266     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
267     ///
268     /// [�� Help choosing a constructor](icu_provider::constructors)
269     ///
270     /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
271     #[cfg(feature = "compiled_data")]
new_extended() -> Self272     pub const fn new_extended() -> Self {
273         LocaleExpander {
274             likely_subtags_l: DataPayload::from_static_ref(
275                 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_LANGUAGE_V1,
276             ),
277             likely_subtags_sr: DataPayload::from_static_ref(
278                 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_SCRIPT_REGION_V1,
279             ),
280             likely_subtags_ext: Some(DataPayload::from_static_ref(
281                 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_EXTENDED_V1,
282             )),
283         }
284     }
285 
286     icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
287         functions: [
288         new_extended: skip,
289         try_new_extended_with_buffer_provider,
290         try_new_extended_unstable,
291         Self
292     ]);
293 
294     #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_extended)]
try_new_extended_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError> where P: DataProvider<LocaleLikelySubtagsLanguageV1> + DataProvider<LocaleLikelySubtagsScriptRegionV1> + DataProvider<LocaleLikelySubtagsExtendedV1> + ?Sized,295     pub fn try_new_extended_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError>
296     where
297         P: DataProvider<LocaleLikelySubtagsLanguageV1>
298             + DataProvider<LocaleLikelySubtagsScriptRegionV1>
299             + DataProvider<LocaleLikelySubtagsExtendedV1>
300             + ?Sized,
301     {
302         let likely_subtags_l = provider.load(Default::default())?.payload;
303         let likely_subtags_sr = provider.load(Default::default())?.payload;
304         let likely_subtags_ext = Some(provider.load(Default::default())?.payload);
305 
306         Ok(LocaleExpander {
307             likely_subtags_l,
308             likely_subtags_sr,
309             likely_subtags_ext,
310         })
311     }
312 
as_borrowed(&self) -> LocaleExpanderBorrowed313     fn as_borrowed(&self) -> LocaleExpanderBorrowed {
314         LocaleExpanderBorrowed {
315             likely_subtags_l: self.likely_subtags_l.get(),
316             likely_subtags_sr: self.likely_subtags_sr.get(),
317             likely_subtags_ext: self.likely_subtags_ext.as_ref().map(|p| p.get()),
318         }
319     }
320 
321     /// The maximize method potentially updates a passed in locale in place
322     /// depending up the results of running the 'Add Likely Subtags' algorithm
323     /// from <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
324     ///
325     /// If the result of running the algorithm would result in a new locale, the
326     /// locale argument is updated in place to match the result, and the method
327     /// returns [`TransformResult::Modified`]. Otherwise, the method
328     /// returns [`TransformResult::Unmodified`] and the locale argument is
329     /// unchanged.
330     ///
331     /// This function does not guarantee that any particular set of subtags
332     /// will be present in the resulting locale.
333     ///
334     /// # Examples
335     ///
336     /// ```
337     /// use icu::locale::{locale, LocaleExpander, TransformResult};
338     ///
339     /// let lc = LocaleExpander::new_common();
340     ///
341     /// let mut locale = locale!("zh-CN");
342     /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
343     /// assert_eq!(locale, locale!("zh-Hans-CN"));
344     ///
345     /// let mut locale = locale!("zh-Hant-TW");
346     /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
347     /// assert_eq!(locale, locale!("zh-Hant-TW"));
348     /// ```
349     ///
350     /// If there is no data for a particular language, the result is not
351     /// modified. Note that [`LocaleExpander::new_extended`] supports
352     /// more languages.
353     ///
354     /// ```
355     /// use icu::locale::{locale, LocaleExpander, TransformResult};
356     ///
357     /// let lc = LocaleExpander::new_common();
358     ///
359     /// // No subtags data for ccp in the default set:
360     /// let mut locale = locale!("ccp");
361     /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
362     /// assert_eq!(locale, locale!("ccp"));
363     ///
364     /// // The extended set supports it:
365     /// let lc = LocaleExpander::new_extended();
366     /// let mut locale = locale!("ccp");
367     /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
368     /// assert_eq!(locale, locale!("ccp-Cakm-BD"));
369     ///
370     /// // But even the extended set does not support all language subtags:
371     /// let mut locale = locale!("mul");
372     /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
373     /// assert_eq!(locale, locale!("mul"));
374     /// ```
maximize(&self, langid: &mut LanguageIdentifier) -> TransformResult375     pub fn maximize(&self, langid: &mut LanguageIdentifier) -> TransformResult {
376         let data = self.as_borrowed();
377 
378         if !langid.language.is_default() && langid.script.is_some() && langid.region.is_some() {
379             return TransformResult::Unmodified;
380         }
381 
382         if !langid.language.is_default() {
383             if let Some(region) = langid.region {
384                 if let Some(script) = data.get_lr(langid.language, region) {
385                     return update_langid(Language::UND, Some(script), None, langid);
386                 }
387             }
388             if let Some(script) = langid.script {
389                 if let Some(region) = data.get_ls(langid.language, script) {
390                     return update_langid(Language::UND, None, Some(region), langid);
391                 }
392             }
393             if let Some((script, region)) = data.get_l(langid.language) {
394                 return update_langid(Language::UND, Some(script), Some(region), langid);
395             }
396             // Language not found: return unmodified.
397             return TransformResult::Unmodified;
398         }
399         if let Some(script) = langid.script {
400             if let Some(region) = langid.region {
401                 if let Some(language) = data.get_sr(script, region) {
402                     return update_langid(language, None, None, langid);
403                 }
404             }
405             if let Some((language, region)) = data.get_s(script) {
406                 return update_langid(language, None, Some(region), langid);
407             }
408         }
409         if let Some(region) = langid.region {
410             if let Some((language, script)) = data.get_r(region) {
411                 return update_langid(language, Some(script), None, langid);
412             }
413         }
414 
415         // We failed to find anything in the und-SR, und-S, or und-R tables,
416         // to fall back to bare "und"
417         debug_assert!(langid.language.is_default());
418         update_langid(
419             data.get_und().0,
420             Some(data.get_und().1),
421             Some(data.get_und().2),
422             langid,
423         )
424     }
425 
426     /// This returns a new Locale that is the result of running the
427     /// 'Remove Likely Subtags' algorithm from
428     /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
429     ///
430     /// If the result of running the algorithm would result in a new locale, the
431     /// locale argument is updated in place to match the result, and the method
432     /// returns [`TransformResult::Modified`]. Otherwise, the method
433     /// returns [`TransformResult::Unmodified`] and the locale argument is
434     /// unchanged.
435     ///
436     /// # Examples
437     ///
438     /// ```
439     /// use icu::locale::{locale, LocaleExpander, TransformResult};
440     ///
441     /// let lc = LocaleExpander::new_common();
442     ///
443     /// let mut locale = locale!("zh-Hans-CN");
444     /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
445     /// assert_eq!(locale, locale!("zh"));
446     ///
447     /// let mut locale = locale!("zh");
448     /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Unmodified);
449     /// assert_eq!(locale, locale!("zh"));
450     /// ```
minimize(&self, langid: &mut LanguageIdentifier) -> TransformResult451     pub fn minimize(&self, langid: &mut LanguageIdentifier) -> TransformResult {
452         self.minimize_impl(langid, true)
453     }
454 
455     /// This returns a new Locale that is the result of running the
456     /// 'Remove Likely Subtags, favoring script' algorithm from
457     /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
458     ///
459     /// If the result of running the algorithm would result in a new locale, the
460     /// locale argument is updated in place to match the result, and the method
461     /// returns [`TransformResult::Modified`]. Otherwise, the method
462     /// returns [`TransformResult::Unmodified`] and the locale argument is
463     /// unchanged.
464     ///
465     /// # Examples
466     ///
467     /// ```
468     /// use icu::locale::{locale, LocaleExpander, TransformResult};
469     ///
470     /// let lc = LocaleExpander::new_common();
471     ///
472     /// let mut locale = locale!("zh-TW");
473     /// assert_eq!(
474     ///     lc.minimize_favor_script(&mut locale.id),
475     ///     TransformResult::Modified
476     /// );
477     /// assert_eq!(locale, locale!("zh-Hant"));
478     /// ```
minimize_favor_script(&self, langid: &mut LanguageIdentifier) -> TransformResult479     pub fn minimize_favor_script(&self, langid: &mut LanguageIdentifier) -> TransformResult {
480         self.minimize_impl(langid, false)
481     }
482 
minimize_impl( &self, langid: &mut LanguageIdentifier, favor_region: bool, ) -> TransformResult483     fn minimize_impl(
484         &self,
485         langid: &mut LanguageIdentifier,
486         favor_region: bool,
487     ) -> TransformResult {
488         let mut max = langid.clone();
489         self.maximize(&mut max);
490 
491         let mut trial = max.clone();
492 
493         trial.script = None;
494         trial.region = None;
495         self.maximize(&mut trial);
496         if trial == max {
497             return update_langid_minimize(max.language, None, None, langid);
498         }
499 
500         if favor_region {
501             trial.script = None;
502             trial.region = max.region;
503             self.maximize(&mut trial);
504 
505             if trial == max {
506                 return update_langid_minimize(max.language, None, max.region, langid);
507             }
508 
509             trial.script = max.script;
510             trial.region = None;
511             self.maximize(&mut trial);
512             if trial == max {
513                 return update_langid_minimize(max.language, max.script, None, langid);
514             }
515         } else {
516             trial.script = max.script;
517             trial.region = None;
518             self.maximize(&mut trial);
519             if trial == max {
520                 return update_langid_minimize(max.language, max.script, None, langid);
521             }
522 
523             trial.script = None;
524             trial.region = max.region;
525             self.maximize(&mut trial);
526 
527             if trial == max {
528                 return update_langid_minimize(max.language, None, max.region, langid);
529             }
530         }
531 
532         update_langid_minimize(max.language, max.script, max.region, langid)
533     }
534 
535     // TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
536     #[inline]
get_likely_script(&self, langid: &LanguageIdentifier) -> Option<Script>537     pub(crate) fn get_likely_script(&self, langid: &LanguageIdentifier) -> Option<Script> {
538         langid
539             .script
540             .or_else(|| self.infer_likely_script(langid.language, langid.region))
541     }
542 
infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script>543     fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
544         let data = self.as_borrowed();
545 
546         // proceed through _all possible cases_ in order of specificity
547         // (borrowed from LocaleExpander::maximize):
548         // 1. language + region
549         // 2. language
550         // 3. region
551         // we need to check all cases, because e.g. for "en-US" the default script is associated
552         // with "en" but not "en-US"
553         if !language.is_default() {
554             if let Some(region) = region {
555                 // 1. we know both language and region
556                 if let Some(script) = data.get_lr(language, region) {
557                     return Some(script);
558                 }
559             }
560             // 2. we know language, but we either do not know region or knowing region did not help
561             if let Some((script, _)) = data.get_l(language) {
562                 return Some(script);
563             }
564         }
565         if let Some(region) = region {
566             // 3. we know region, but we either do not know language or knowing language did not help
567             if let Some((_, script)) = data.get_r(region) {
568                 return Some(script);
569             }
570         }
571         // we could not figure out the script from the given locale
572         None
573     }
574 }
575 
576 impl AsRef<LocaleExpander> for LocaleExpander {
as_ref(&self) -> &LocaleExpander577     fn as_ref(&self) -> &LocaleExpander {
578         self
579     }
580 }
581 
582 #[cfg(feature = "serde")]
583 #[cfg(test)]
584 mod tests {
585     use super::*;
586     use icu_locale_core::locale;
587 
588     #[test]
test_minimize_favor_script()589     fn test_minimize_favor_script() {
590         let lc = LocaleExpander::new_common();
591         let mut locale = locale!("yue-Hans");
592         assert_eq!(
593             lc.minimize_favor_script(&mut locale.id),
594             TransformResult::Unmodified
595         );
596         assert_eq!(locale, locale!("yue-Hans"));
597     }
598 
599     #[test]
test_minimize_favor_region()600     fn test_minimize_favor_region() {
601         let lc = LocaleExpander::new_common();
602         let mut locale = locale!("yue-Hans");
603         assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
604         assert_eq!(locale, locale!("yue-CN"));
605     }
606 }
607