1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use crate::provider::*;
6
7 use icu_locale_core::subtags::{Language, Region, Script};
8 use icu_locale_core::LanguageIdentifier;
9 use icu_provider::prelude::*;
10
11 use crate::TransformResult;
12
13 /// Implements the *Add Likely Subtags* and *Remove Likely Subtags*
14 /// algorithms as defined in *[UTS #35: Likely Subtags]*.
15 ///
16 /// # Examples
17 ///
18 /// Add likely subtags:
19 ///
20 /// ```
21 /// use icu::locale::locale;
22 /// use icu::locale::{LocaleExpander, TransformResult};
23 ///
24 /// let lc = LocaleExpander::new_common();
25 ///
26 /// let mut locale = locale!("zh-CN");
27 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
28 /// assert_eq!(locale, locale!("zh-Hans-CN"));
29 ///
30 /// let mut locale = locale!("zh-Hant-TW");
31 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
32 /// assert_eq!(locale, locale!("zh-Hant-TW"));
33 /// ```
34 ///
35 /// Remove likely subtags:
36 ///
37 /// ```
38 /// use icu::locale::{locale, LocaleExpander, TransformResult};
39 ///
40 /// let lc = LocaleExpander::new_common();
41 ///
42 /// let mut locale = locale!("zh-Hans-CN");
43 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
44 /// assert_eq!(locale, locale!("zh"));
45 ///
46 /// let mut locale = locale!("zh");
47 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Unmodified);
48 /// assert_eq!(locale, locale!("zh"));
49 /// ```
50 ///
51 /// Normally, only CLDR locales with Basic or higher coverage are included. To include more
52 /// locales for maximization, use [`try_new_extended`](Self::try_new_extended_unstable):
53 ///
54 /// ```
55 /// use icu::locale::{locale, LocaleExpander, TransformResult};
56 ///
57 /// let lc = LocaleExpander::new_extended();
58 ///
59 /// let mut locale = locale!("atj");
60 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
61 /// assert_eq!(locale, locale!("atj-Latn-CA"));
62 /// ```
63 ///
64 /// [UTS #35: Likely Subtags]: https://www.unicode.org/reports/tr35/#Likely_Subtags
65 #[derive(Debug, Clone)]
66 pub struct LocaleExpander {
67 likely_subtags_l: DataPayload<LocaleLikelySubtagsLanguageV1>,
68 likely_subtags_sr: DataPayload<LocaleLikelySubtagsScriptRegionV1>,
69 likely_subtags_ext: Option<DataPayload<LocaleLikelySubtagsExtendedV1>>,
70 }
71
72 struct LocaleExpanderBorrowed<'a> {
73 likely_subtags_l: &'a LikelySubtagsForLanguage<'a>,
74 likely_subtags_sr: &'a LikelySubtagsForScriptRegion<'a>,
75 likely_subtags_ext: Option<&'a LikelySubtagsExtended<'a>>,
76 }
77
78 impl LocaleExpanderBorrowed<'_> {
get_l(&self, l: Language) -> Option<(Script, Region)>79 fn get_l(&self, l: Language) -> Option<(Script, Region)> {
80 let key = &l.to_tinystr().to_unvalidated();
81 self.likely_subtags_l.language.get_copied(key).or_else(|| {
82 self.likely_subtags_ext
83 .and_then(|ext| ext.language.get_copied(key))
84 })
85 }
86
get_ls(&self, l: Language, s: Script) -> Option<Region>87 fn get_ls(&self, l: Language, s: Script) -> Option<Region> {
88 let key = &(
89 l.to_tinystr().to_unvalidated(),
90 s.to_tinystr().to_unvalidated(),
91 );
92 self.likely_subtags_l
93 .language_script
94 .get_copied(key)
95 .or_else(|| {
96 self.likely_subtags_ext
97 .and_then(|ext| ext.language_script.get_copied(key))
98 })
99 }
100
get_lr(&self, l: Language, r: Region) -> Option<Script>101 fn get_lr(&self, l: Language, r: Region) -> Option<Script> {
102 let key = &(
103 l.to_tinystr().to_unvalidated(),
104 r.to_tinystr().to_unvalidated(),
105 );
106 self.likely_subtags_l
107 .language_region
108 .get_copied(key)
109 .or_else(|| {
110 self.likely_subtags_ext
111 .and_then(|ext| ext.language_region.get_copied(key))
112 })
113 }
114
get_s(&self, s: Script) -> Option<(Language, Region)>115 fn get_s(&self, s: Script) -> Option<(Language, Region)> {
116 let key = &s.to_tinystr().to_unvalidated();
117 self.likely_subtags_sr.script.get_copied(key).or_else(|| {
118 self.likely_subtags_ext
119 .and_then(|ext| ext.script.get_copied(key))
120 })
121 }
122
get_sr(&self, s: Script, r: Region) -> Option<Language>123 fn get_sr(&self, s: Script, r: Region) -> Option<Language> {
124 let key = &(
125 s.to_tinystr().to_unvalidated(),
126 r.to_tinystr().to_unvalidated(),
127 );
128 self.likely_subtags_sr
129 .script_region
130 .get_copied(key)
131 .or_else(|| {
132 self.likely_subtags_ext
133 .and_then(|ext| ext.script_region.get_copied(key))
134 })
135 }
136
get_r(&self, r: Region) -> Option<(Language, Script)>137 fn get_r(&self, r: Region) -> Option<(Language, Script)> {
138 let key = &r.to_tinystr().to_unvalidated();
139 self.likely_subtags_sr.region.get_copied(key).or_else(|| {
140 self.likely_subtags_ext
141 .and_then(|ext| ext.region.get_copied(key))
142 })
143 }
144
get_und(&self) -> (Language, Script, Region)145 fn get_und(&self) -> (Language, Script, Region) {
146 self.likely_subtags_l.und
147 }
148 }
149
150 #[inline]
update_langid( language: Language, script: Option<Script>, region: Option<Region>, langid: &mut LanguageIdentifier, ) -> TransformResult151 fn update_langid(
152 language: Language,
153 script: Option<Script>,
154 region: Option<Region>,
155 langid: &mut LanguageIdentifier,
156 ) -> TransformResult {
157 let mut modified = false;
158
159 if langid.language.is_default() && !language.is_default() {
160 langid.language = language;
161 modified = true;
162 }
163
164 if langid.script.is_none() && script.is_some() {
165 langid.script = script;
166 modified = true;
167 }
168
169 if langid.region.is_none() && region.is_some() {
170 langid.region = region;
171 modified = true;
172 }
173
174 if modified {
175 TransformResult::Modified
176 } else {
177 TransformResult::Unmodified
178 }
179 }
180
181 #[inline]
update_langid_minimize( language: Language, script: Option<Script>, region: Option<Region>, langid: &mut LanguageIdentifier, ) -> TransformResult182 fn update_langid_minimize(
183 language: Language,
184 script: Option<Script>,
185 region: Option<Region>,
186 langid: &mut LanguageIdentifier,
187 ) -> TransformResult {
188 let mut modified = false;
189
190 if langid.language != language {
191 langid.language = language;
192 modified = true;
193 }
194
195 if langid.script != script {
196 langid.script = script;
197 modified = true;
198 }
199
200 if langid.region != region {
201 langid.region = region;
202 modified = true;
203 }
204
205 if modified {
206 TransformResult::Modified
207 } else {
208 TransformResult::Unmodified
209 }
210 }
211
212 impl LocaleExpander {
213 /// Creates a [`LocaleExpander`] with compiled data for commonly-used locales
214 /// (locales with *Basic* or higher [CLDR coverage]).
215 ///
216 /// Use this constructor if you want limited likely subtags for data-oriented use cases.
217 ///
218 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
219 ///
220 /// [ Help choosing a constructor](icu_provider::constructors)
221 ///
222 /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
223 #[cfg(feature = "compiled_data")]
new_common() -> Self224 pub const fn new_common() -> Self {
225 LocaleExpander {
226 likely_subtags_l: DataPayload::from_static_ref(
227 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_LANGUAGE_V1,
228 ),
229 likely_subtags_sr: DataPayload::from_static_ref(
230 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_SCRIPT_REGION_V1,
231 ),
232 likely_subtags_ext: None,
233 }
234 }
235
236 icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
237 functions: [
238 new_common: skip,
239 try_new_common_with_buffer_provider,
240 try_new_common_unstable,
241 Self
242 ]);
243
244 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_common)]
try_new_common_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError> where P: DataProvider<LocaleLikelySubtagsLanguageV1> + DataProvider<LocaleLikelySubtagsScriptRegionV1> + ?Sized,245 pub fn try_new_common_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError>
246 where
247 P: DataProvider<LocaleLikelySubtagsLanguageV1>
248 + DataProvider<LocaleLikelySubtagsScriptRegionV1>
249 + ?Sized,
250 {
251 let likely_subtags_l = provider.load(Default::default())?.payload;
252 let likely_subtags_sr = provider.load(Default::default())?.payload;
253
254 Ok(LocaleExpander {
255 likely_subtags_l,
256 likely_subtags_sr,
257 likely_subtags_ext: None,
258 })
259 }
260
261 /// Creates a [`LocaleExpander`] with compiled data for all locales.
262 ///
263 /// Use this constructor if you want to include data for all locales, including ones
264 /// that may not have data for other services (i.e. [CLDR coverage] below *Basic*).
265 ///
266 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
267 ///
268 /// [ Help choosing a constructor](icu_provider::constructors)
269 ///
270 /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
271 #[cfg(feature = "compiled_data")]
new_extended() -> Self272 pub const fn new_extended() -> Self {
273 LocaleExpander {
274 likely_subtags_l: DataPayload::from_static_ref(
275 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_LANGUAGE_V1,
276 ),
277 likely_subtags_sr: DataPayload::from_static_ref(
278 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_SCRIPT_REGION_V1,
279 ),
280 likely_subtags_ext: Some(DataPayload::from_static_ref(
281 crate::provider::Baked::SINGLETON_LOCALE_LIKELY_SUBTAGS_EXTENDED_V1,
282 )),
283 }
284 }
285
286 icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
287 functions: [
288 new_extended: skip,
289 try_new_extended_with_buffer_provider,
290 try_new_extended_unstable,
291 Self
292 ]);
293
294 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_extended)]
try_new_extended_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError> where P: DataProvider<LocaleLikelySubtagsLanguageV1> + DataProvider<LocaleLikelySubtagsScriptRegionV1> + DataProvider<LocaleLikelySubtagsExtendedV1> + ?Sized,295 pub fn try_new_extended_unstable<P>(provider: &P) -> Result<LocaleExpander, DataError>
296 where
297 P: DataProvider<LocaleLikelySubtagsLanguageV1>
298 + DataProvider<LocaleLikelySubtagsScriptRegionV1>
299 + DataProvider<LocaleLikelySubtagsExtendedV1>
300 + ?Sized,
301 {
302 let likely_subtags_l = provider.load(Default::default())?.payload;
303 let likely_subtags_sr = provider.load(Default::default())?.payload;
304 let likely_subtags_ext = Some(provider.load(Default::default())?.payload);
305
306 Ok(LocaleExpander {
307 likely_subtags_l,
308 likely_subtags_sr,
309 likely_subtags_ext,
310 })
311 }
312
as_borrowed(&self) -> LocaleExpanderBorrowed313 fn as_borrowed(&self) -> LocaleExpanderBorrowed {
314 LocaleExpanderBorrowed {
315 likely_subtags_l: self.likely_subtags_l.get(),
316 likely_subtags_sr: self.likely_subtags_sr.get(),
317 likely_subtags_ext: self.likely_subtags_ext.as_ref().map(|p| p.get()),
318 }
319 }
320
321 /// The maximize method potentially updates a passed in locale in place
322 /// depending up the results of running the 'Add Likely Subtags' algorithm
323 /// from <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
324 ///
325 /// If the result of running the algorithm would result in a new locale, the
326 /// locale argument is updated in place to match the result, and the method
327 /// returns [`TransformResult::Modified`]. Otherwise, the method
328 /// returns [`TransformResult::Unmodified`] and the locale argument is
329 /// unchanged.
330 ///
331 /// This function does not guarantee that any particular set of subtags
332 /// will be present in the resulting locale.
333 ///
334 /// # Examples
335 ///
336 /// ```
337 /// use icu::locale::{locale, LocaleExpander, TransformResult};
338 ///
339 /// let lc = LocaleExpander::new_common();
340 ///
341 /// let mut locale = locale!("zh-CN");
342 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
343 /// assert_eq!(locale, locale!("zh-Hans-CN"));
344 ///
345 /// let mut locale = locale!("zh-Hant-TW");
346 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
347 /// assert_eq!(locale, locale!("zh-Hant-TW"));
348 /// ```
349 ///
350 /// If there is no data for a particular language, the result is not
351 /// modified. Note that [`LocaleExpander::new_extended`] supports
352 /// more languages.
353 ///
354 /// ```
355 /// use icu::locale::{locale, LocaleExpander, TransformResult};
356 ///
357 /// let lc = LocaleExpander::new_common();
358 ///
359 /// // No subtags data for ccp in the default set:
360 /// let mut locale = locale!("ccp");
361 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
362 /// assert_eq!(locale, locale!("ccp"));
363 ///
364 /// // The extended set supports it:
365 /// let lc = LocaleExpander::new_extended();
366 /// let mut locale = locale!("ccp");
367 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Modified);
368 /// assert_eq!(locale, locale!("ccp-Cakm-BD"));
369 ///
370 /// // But even the extended set does not support all language subtags:
371 /// let mut locale = locale!("mul");
372 /// assert_eq!(lc.maximize(&mut locale.id), TransformResult::Unmodified);
373 /// assert_eq!(locale, locale!("mul"));
374 /// ```
maximize(&self, langid: &mut LanguageIdentifier) -> TransformResult375 pub fn maximize(&self, langid: &mut LanguageIdentifier) -> TransformResult {
376 let data = self.as_borrowed();
377
378 if !langid.language.is_default() && langid.script.is_some() && langid.region.is_some() {
379 return TransformResult::Unmodified;
380 }
381
382 if !langid.language.is_default() {
383 if let Some(region) = langid.region {
384 if let Some(script) = data.get_lr(langid.language, region) {
385 return update_langid(Language::UND, Some(script), None, langid);
386 }
387 }
388 if let Some(script) = langid.script {
389 if let Some(region) = data.get_ls(langid.language, script) {
390 return update_langid(Language::UND, None, Some(region), langid);
391 }
392 }
393 if let Some((script, region)) = data.get_l(langid.language) {
394 return update_langid(Language::UND, Some(script), Some(region), langid);
395 }
396 // Language not found: return unmodified.
397 return TransformResult::Unmodified;
398 }
399 if let Some(script) = langid.script {
400 if let Some(region) = langid.region {
401 if let Some(language) = data.get_sr(script, region) {
402 return update_langid(language, None, None, langid);
403 }
404 }
405 if let Some((language, region)) = data.get_s(script) {
406 return update_langid(language, None, Some(region), langid);
407 }
408 }
409 if let Some(region) = langid.region {
410 if let Some((language, script)) = data.get_r(region) {
411 return update_langid(language, Some(script), None, langid);
412 }
413 }
414
415 // We failed to find anything in the und-SR, und-S, or und-R tables,
416 // to fall back to bare "und"
417 debug_assert!(langid.language.is_default());
418 update_langid(
419 data.get_und().0,
420 Some(data.get_und().1),
421 Some(data.get_und().2),
422 langid,
423 )
424 }
425
426 /// This returns a new Locale that is the result of running the
427 /// 'Remove Likely Subtags' algorithm from
428 /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
429 ///
430 /// If the result of running the algorithm would result in a new locale, the
431 /// locale argument is updated in place to match the result, and the method
432 /// returns [`TransformResult::Modified`]. Otherwise, the method
433 /// returns [`TransformResult::Unmodified`] and the locale argument is
434 /// unchanged.
435 ///
436 /// # Examples
437 ///
438 /// ```
439 /// use icu::locale::{locale, LocaleExpander, TransformResult};
440 ///
441 /// let lc = LocaleExpander::new_common();
442 ///
443 /// let mut locale = locale!("zh-Hans-CN");
444 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
445 /// assert_eq!(locale, locale!("zh"));
446 ///
447 /// let mut locale = locale!("zh");
448 /// assert_eq!(lc.minimize(&mut locale.id), TransformResult::Unmodified);
449 /// assert_eq!(locale, locale!("zh"));
450 /// ```
minimize(&self, langid: &mut LanguageIdentifier) -> TransformResult451 pub fn minimize(&self, langid: &mut LanguageIdentifier) -> TransformResult {
452 self.minimize_impl(langid, true)
453 }
454
455 /// This returns a new Locale that is the result of running the
456 /// 'Remove Likely Subtags, favoring script' algorithm from
457 /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
458 ///
459 /// If the result of running the algorithm would result in a new locale, the
460 /// locale argument is updated in place to match the result, and the method
461 /// returns [`TransformResult::Modified`]. Otherwise, the method
462 /// returns [`TransformResult::Unmodified`] and the locale argument is
463 /// unchanged.
464 ///
465 /// # Examples
466 ///
467 /// ```
468 /// use icu::locale::{locale, LocaleExpander, TransformResult};
469 ///
470 /// let lc = LocaleExpander::new_common();
471 ///
472 /// let mut locale = locale!("zh-TW");
473 /// assert_eq!(
474 /// lc.minimize_favor_script(&mut locale.id),
475 /// TransformResult::Modified
476 /// );
477 /// assert_eq!(locale, locale!("zh-Hant"));
478 /// ```
minimize_favor_script(&self, langid: &mut LanguageIdentifier) -> TransformResult479 pub fn minimize_favor_script(&self, langid: &mut LanguageIdentifier) -> TransformResult {
480 self.minimize_impl(langid, false)
481 }
482
minimize_impl( &self, langid: &mut LanguageIdentifier, favor_region: bool, ) -> TransformResult483 fn minimize_impl(
484 &self,
485 langid: &mut LanguageIdentifier,
486 favor_region: bool,
487 ) -> TransformResult {
488 let mut max = langid.clone();
489 self.maximize(&mut max);
490
491 let mut trial = max.clone();
492
493 trial.script = None;
494 trial.region = None;
495 self.maximize(&mut trial);
496 if trial == max {
497 return update_langid_minimize(max.language, None, None, langid);
498 }
499
500 if favor_region {
501 trial.script = None;
502 trial.region = max.region;
503 self.maximize(&mut trial);
504
505 if trial == max {
506 return update_langid_minimize(max.language, None, max.region, langid);
507 }
508
509 trial.script = max.script;
510 trial.region = None;
511 self.maximize(&mut trial);
512 if trial == max {
513 return update_langid_minimize(max.language, max.script, None, langid);
514 }
515 } else {
516 trial.script = max.script;
517 trial.region = None;
518 self.maximize(&mut trial);
519 if trial == max {
520 return update_langid_minimize(max.language, max.script, None, langid);
521 }
522
523 trial.script = None;
524 trial.region = max.region;
525 self.maximize(&mut trial);
526
527 if trial == max {
528 return update_langid_minimize(max.language, None, max.region, langid);
529 }
530 }
531
532 update_langid_minimize(max.language, max.script, max.region, langid)
533 }
534
535 // TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
536 #[inline]
get_likely_script(&self, langid: &LanguageIdentifier) -> Option<Script>537 pub(crate) fn get_likely_script(&self, langid: &LanguageIdentifier) -> Option<Script> {
538 langid
539 .script
540 .or_else(|| self.infer_likely_script(langid.language, langid.region))
541 }
542
infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script>543 fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
544 let data = self.as_borrowed();
545
546 // proceed through _all possible cases_ in order of specificity
547 // (borrowed from LocaleExpander::maximize):
548 // 1. language + region
549 // 2. language
550 // 3. region
551 // we need to check all cases, because e.g. for "en-US" the default script is associated
552 // with "en" but not "en-US"
553 if !language.is_default() {
554 if let Some(region) = region {
555 // 1. we know both language and region
556 if let Some(script) = data.get_lr(language, region) {
557 return Some(script);
558 }
559 }
560 // 2. we know language, but we either do not know region or knowing region did not help
561 if let Some((script, _)) = data.get_l(language) {
562 return Some(script);
563 }
564 }
565 if let Some(region) = region {
566 // 3. we know region, but we either do not know language or knowing language did not help
567 if let Some((_, script)) = data.get_r(region) {
568 return Some(script);
569 }
570 }
571 // we could not figure out the script from the given locale
572 None
573 }
574 }
575
576 impl AsRef<LocaleExpander> for LocaleExpander {
as_ref(&self) -> &LocaleExpander577 fn as_ref(&self) -> &LocaleExpander {
578 self
579 }
580 }
581
582 #[cfg(feature = "serde")]
583 #[cfg(test)]
584 mod tests {
585 use super::*;
586 use icu_locale_core::locale;
587
588 #[test]
test_minimize_favor_script()589 fn test_minimize_favor_script() {
590 let lc = LocaleExpander::new_common();
591 let mut locale = locale!("yue-Hans");
592 assert_eq!(
593 lc.minimize_favor_script(&mut locale.id),
594 TransformResult::Unmodified
595 );
596 assert_eq!(locale, locale!("yue-Hans"));
597 }
598
599 #[test]
test_minimize_favor_region()600 fn test_minimize_favor_region() {
601 let lc = LocaleExpander::new_common();
602 let mut locale = locale!("yue-Hans");
603 assert_eq!(lc.minimize(&mut locale.id), TransformResult::Modified);
604 assert_eq!(locale, locale!("yue-CN"));
605 }
606 }
607