1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 // Provider structs must be stable 6 #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] 7 8 //! \[Unstable\] Data provider struct definitions for this ICU4X component. 9 //! 10 //! <div class="stab unstable"> 11 //! This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 12 //! including in SemVer minor releases. While the serde representation of data structs is guaranteed 13 //! to be stable, their Rust representation might not be. Use with caution. 14 //! </div> 15 //! 16 //! Read more about data providers: [`icu_provider`] 17 18 #[cfg(feature = "compiled_data")] 19 #[derive(Debug)] 20 /// Baked data 21 /// 22 /// <div class="stab unstable"> 23 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 24 /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only 25 /// guaranteed to match with this version's `*_unstable` providers. Use with caution. 26 /// </div> 27 pub struct Baked; 28 29 #[cfg(feature = "compiled_data")] 30 #[allow(unused_imports)] 31 const _: () = { 32 use icu_locale_data::*; 33 pub mod icu { 34 pub use crate as locale; 35 pub use icu_collections as collections; 36 } 37 make_provider!(Baked); 38 impl_locale_aliases_v1!(Baked); 39 impl_locale_likely_subtags_extended_v1!(Baked); 40 impl_locale_likely_subtags_language_v1!(Baked); 41 impl_locale_likely_subtags_script_region_v1!(Baked); 42 impl_locale_parents_v1!(Baked); 43 impl_locale_script_direction_v1!(Baked); 44 45 impl_locale_exemplar_characters_auxiliary_v1!(Baked); 46 impl_locale_exemplar_characters_index_v1!(Baked); 47 impl_locale_exemplar_characters_main_v1!(Baked); 48 impl_locale_exemplar_characters_numbers_v1!(Baked); 49 impl_locale_exemplar_characters_punctuation_v1!(Baked); 50 }; 51 52 icu_provider::data_marker!( 53 /// Marker for locale alias data. 54 LocaleAliasesV1, 55 "locale/aliases/v1", 56 Aliases<'static>, 57 is_singleton = true 58 ); 59 icu_provider::data_marker!( 60 /// Marker for data for likely subtags for languages. 61 LocaleLikelySubtagsLanguageV1, 62 "locale/likely/subtags/language/v1", 63 LikelySubtagsForLanguage<'static>, 64 is_singleton = true 65 ); 66 icu_provider::data_marker!( 67 /// Marker for data for likely subtags for scripts and regions. 68 LocaleLikelySubtagsScriptRegionV1, 69 "locale/likely/subtags/script/region/v1", 70 LikelySubtagsForScriptRegion<'static>, 71 is_singleton = true 72 ); 73 icu_provider::data_marker!( 74 /// Marker for extended data for likely subtags. 75 LocaleLikelySubtagsExtendedV1, 76 "locale/likely/subtags/extended/v1", 77 LikelySubtagsExtended<'static>, 78 is_singleton = true 79 ); 80 icu_provider::data_marker!( 81 /// Marker for locale fallback parents data. 82 LocaleParentsV1, 83 "locale/parents/v1", 84 Parents<'static>, 85 is_singleton = true 86 ); 87 88 icu_provider::data_marker!( 89 /// Marker for script direction data. 90 LocaleScriptDirectionV1, 91 "locale/script/direction/v1", 92 ScriptDirection<'static>, 93 is_singleton = true 94 ); 95 96 icu_provider::data_marker!( 97 /// Marker for auxiliary exemplar characters data. 98 LocaleExemplarCharactersAuxiliaryV1, 99 "locale/exemplar/characters/auxiliary/v1", 100 ExemplarCharactersData<'static>, 101 ); 102 icu_provider::data_marker!( 103 /// Marker for index exemplar characters data. 104 LocaleExemplarCharactersIndexV1, 105 "locale/exemplar/characters/index/v1", 106 ExemplarCharactersData<'static>, 107 ); 108 icu_provider::data_marker!( 109 /// Marker for main exemplar characters data. 110 LocaleExemplarCharactersMainV1, 111 "locale/exemplar/characters/main/v1", 112 ExemplarCharactersData<'static>, 113 ); 114 icu_provider::data_marker!( 115 /// Marker for numbers exemplar characters data. 116 LocaleExemplarCharactersNumbersV1, 117 "locale/exemplar/characters/numbers/v1", 118 ExemplarCharactersData<'static>, 119 ); 120 icu_provider::data_marker!( 121 /// Marker for punctuation exemplar characters data. 122 LocaleExemplarCharactersPunctuationV1, 123 "locale/exemplar/characters/punctuation/v1", 124 ExemplarCharactersData<'static>, 125 ); 126 127 #[cfg(feature = "datagen")] 128 /// The latest minimum set of markers required by this component. 129 pub const MARKERS: &[DataMarkerInfo] = &[ 130 LocaleAliasesV1::INFO, 131 LocaleExemplarCharactersAuxiliaryV1::INFO, 132 LocaleExemplarCharactersIndexV1::INFO, 133 LocaleExemplarCharactersMainV1::INFO, 134 LocaleExemplarCharactersNumbersV1::INFO, 135 LocaleExemplarCharactersPunctuationV1::INFO, 136 LocaleLikelySubtagsExtendedV1::INFO, 137 LocaleLikelySubtagsLanguageV1::INFO, 138 LocaleLikelySubtagsScriptRegionV1::INFO, 139 LocaleParentsV1::INFO, 140 LocaleScriptDirectionV1::INFO, 141 ]; 142 143 use alloc::borrow::Cow; 144 use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; 145 use icu_locale_core::subtags::{Language, Region, Script, Variant}; 146 use icu_provider::prelude::*; 147 use potential_utf::PotentialUtf8; 148 use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr}; 149 use zerovec::{VarZeroVec, ZeroMap, ZeroSlice, ZeroVec}; 150 151 // We use raw TinyAsciiStrs for map keys, as we then don't have to 152 // validate them as subtags on deserialization. Map lookup can be 153 // done even if they are not valid tags (an invalid key will just 154 // become inaccessible). 155 type UnvalidatedLanguage = UnvalidatedTinyAsciiStr<3>; 156 type UnvalidatedScript = UnvalidatedTinyAsciiStr<4>; 157 type UnvalidatedRegion = UnvalidatedTinyAsciiStr<3>; 158 type UnvalidatedVariant = UnvalidatedTinyAsciiStr<8>; 159 type UnvalidatedSubdivision = UnvalidatedTinyAsciiStr<7>; 160 type SemivalidatedSubdivision = TinyAsciiStr<7>; 161 162 // LanguageIdentifier doesn't have an AsULE implementation, so we have 163 // to store strs and parse when needed. 164 type UnvalidatedLanguageIdentifier = str; 165 type UnvalidatedLanguageIdentifierPair = StrStrPairVarULE; 166 type UnvalidatedLanguageVariantsPair = LanguageStrStrPairVarULE; 167 168 #[zerovec::make_varule(StrStrPairVarULE)] 169 #[zerovec::derive(Debug)] 170 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] 171 #[cfg_attr( 172 feature = "serde", 173 derive(serde::Deserialize), 174 zerovec::derive(Deserialize) 175 )] 176 #[cfg_attr( 177 feature = "datagen", 178 derive(serde::Serialize, databake::Bake), 179 zerovec::derive(Serialize), 180 databake(path = icu_locale::provider), 181 )] 182 /// A pair of strings with a EncodeAsVarULE implementation. 183 /// 184 /// <div class="stab unstable"> 185 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 186 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 187 /// to be stable, their Rust representation might not be. Use with caution. 188 /// </div> 189 pub struct StrStrPair<'a>( 190 #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, 191 #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, 192 ); 193 194 #[zerovec::make_varule(LanguageStrStrPairVarULE)] 195 #[zerovec::derive(Debug)] 196 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] 197 #[cfg_attr( 198 feature = "serde", 199 derive(serde::Deserialize), 200 zerovec::derive(Deserialize) 201 )] 202 #[cfg_attr( 203 feature = "datagen", 204 derive(serde::Serialize, databake::Bake), 205 zerovec::derive(Serialize), 206 databake(path = icu_locale::provider), 207 )] 208 /// A triplet of strings with a EncodeAsVarULE implementation. 209 pub struct LanguageStrStrPair<'a>( 210 pub Language, 211 #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, 212 #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>, 213 ); 214 215 #[derive(PartialEq, Clone, Default, yoke::Yokeable, zerofrom::ZeroFrom)] 216 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 217 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 218 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 219 #[yoke(prove_covariance_manually)] 220 /// This alias data is used for locale canonicalization. 221 /// 222 /// Each field defines a 223 /// mapping from an old identifier to a new identifier, based upon the rules in 224 /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data 225 /// is stored in sorted order, allowing for binary search to identify rules to 226 /// apply. It is broken down into smaller vectors based upon some characteristic 227 /// of the data, to help avoid unnecessary searches. For example, the `sgn_region` 228 /// field contains aliases for sign language and region, so that it is not 229 /// necessary to search the data unless the input is a sign language. 230 /// 231 /// The algorithm in tr35 is not guaranteed to terminate on data other than what 232 /// is currently in CLDR. For this reason, it is not a good idea to attempt to add 233 /// or modify aliases for use in this structure. 234 /// 235 /// <div class="stab unstable"> 236 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 237 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 238 /// to be stable, their Rust representation might not be. Use with caution. 239 /// </div> 240 // TODO: Use validated types as value types 241 // Notice: improves the alignment of `language_variants` speeding up canonicalization by upon 242 // to 40%. See https://github.com/unicode-org/icu4x/pull/2935 for details. 243 #[derive(Debug)] 244 pub struct Aliases<'data> { 245 /// `[language, variant(-variant)*] -> [langid]` 246 /// This is not a map as it's searched linearly according to the canonicalization rules. 247 #[cfg_attr(feature = "serde", serde(borrow))] 248 pub language_variants: VarZeroVec<'data, UnvalidatedLanguageVariantsPair>, 249 /// `sgn-[region] -> [language]` 250 #[cfg_attr(feature = "serde", serde(borrow))] 251 pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>, 252 /// `[language{2}] -> [langid]` 253 #[cfg_attr(feature = "serde", serde(borrow))] 254 pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>, 255 /// `[language{3}] -> [langid]` 256 #[cfg_attr(feature = "serde", serde(borrow))] 257 pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>, 258 /// `[langid] -> [langid]` 259 /// This is not a map as it's searched linearly according to the canonicalization rules. 260 #[cfg_attr(feature = "serde", serde(borrow))] 261 pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, 262 263 /// `[script] -> [script]` 264 #[cfg_attr(feature = "serde", serde(borrow))] 265 pub script: ZeroMap<'data, UnvalidatedScript, Script>, 266 267 /// `[region{2}] -> [region]` 268 #[cfg_attr(feature = "serde", serde(borrow))] 269 pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>, 270 /// `[region{3}] -> [region]` 271 #[cfg_attr(feature = "serde", serde(borrow))] 272 pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>, 273 274 /// `[region] -> [region]+` 275 #[cfg_attr(feature = "serde", serde(borrow))] 276 pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>, 277 278 /// `[variant] -> [variant]` 279 #[cfg_attr(feature = "serde", serde(borrow))] 280 pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>, 281 282 /// `[value{7}] -> [value{7}]` 283 #[cfg_attr(feature = "serde", serde(borrow))] 284 pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>, 285 } 286 287 icu_provider::data_struct!( 288 Aliases<'_>, 289 #[cfg(feature = "datagen")] 290 ); 291 292 #[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] 293 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 294 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 295 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 296 /// This likely subtags data is used for the minimize and maximize operations. 297 /// 298 /// Each field defines a mapping from an old identifier to a new identifier, 299 /// based upon the rules in 300 /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. 301 /// 302 /// The data is stored is broken down into smaller vectors based upon the rules 303 /// defined for the likely subtags maximize algorithm. 304 /// 305 /// For efficiency, only the relevant part of the LanguageIdentifier is stored 306 /// for searching and replacing. E.g., the `language_script` field is used to store 307 /// rules for `LanguageIdentifier`s that contain a language and a script, but not a 308 /// region. 309 /// 310 /// This struct contains mappings when the input contains a language subtag. 311 /// Also see [`LikelySubtagsForScriptRegion`]. 312 /// 313 /// <div class="stab unstable"> 314 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 315 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 316 /// to be stable, their Rust representation might not be. Use with caution. 317 /// </div> 318 #[yoke(prove_covariance_manually)] 319 pub struct LikelySubtagsForLanguage<'data> { 320 /// Language and script. 321 #[cfg_attr(feature = "serde", serde(borrow))] 322 pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>, 323 /// Language and region. 324 #[cfg_attr(feature = "serde", serde(borrow))] 325 pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>, 326 /// Just language. 327 #[cfg_attr(feature = "serde", serde(borrow))] 328 pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>, 329 /// Undefined. 330 pub und: (Language, Script, Region), 331 } 332 333 icu_provider::data_struct!( 334 LikelySubtagsForLanguage<'_>, 335 #[cfg(feature = "datagen")] 336 ); 337 338 #[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] 339 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 340 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 341 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 342 /// This likely subtags data is used for the minimize and maximize operations. 343 /// 344 /// Each field defines a mapping from an old identifier to a new identifier, 345 /// based upon the rules in 346 /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>. 347 /// 348 /// The data is stored is broken down into smaller vectors based upon the rules 349 /// defined for the likely subtags maximize algorithm. 350 /// 351 /// For efficiency, only the relevant part of the LanguageIdentifier is stored 352 /// for searching and replacing. E.g., the `script_region` field is used to store 353 /// rules for `LanguageIdentifier`s that contain a script and a region, but not a 354 /// language. 355 /// 356 /// This struct contains mappings when the input does not contain a language subtag. 357 /// Also see [`LikelySubtagsForLanguage`]. 358 /// 359 /// <div class="stab unstable"> 360 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 361 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 362 /// to be stable, their Rust representation might not be. Use with caution. 363 /// </div> 364 #[yoke(prove_covariance_manually)] 365 pub struct LikelySubtagsForScriptRegion<'data> { 366 /// Script and region. 367 #[cfg_attr(feature = "serde", serde(borrow))] 368 pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>, 369 /// Just script. 370 #[cfg_attr(feature = "serde", serde(borrow))] 371 pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>, 372 /// Just region. 373 #[cfg_attr(feature = "serde", serde(borrow))] 374 pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>, 375 } 376 377 icu_provider::data_struct!( 378 LikelySubtagsForScriptRegion<'_>, 379 #[cfg(feature = "datagen")] 380 ); 381 382 #[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] 383 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 384 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 385 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 386 /// This likely subtags data is used for full coverage of locales, including ones that 387 /// don't otherwise have data in the Common Locale Data Repository (CLDR). 388 /// 389 /// <div class="stab unstable"> 390 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 391 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 392 /// to be stable, their Rust representation might not be. Use with caution. 393 /// </div> 394 #[yoke(prove_covariance_manually)] 395 pub struct LikelySubtagsExtended<'data> { 396 /// Language and script. 397 #[cfg_attr(feature = "serde", serde(borrow))] 398 pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>, 399 /// Language and region. 400 #[cfg_attr(feature = "serde", serde(borrow))] 401 pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>, 402 /// Just language. 403 #[cfg_attr(feature = "serde", serde(borrow))] 404 pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>, 405 /// Script and region. 406 #[cfg_attr(feature = "serde", serde(borrow))] 407 pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>, 408 /// Just script. 409 #[cfg_attr(feature = "serde", serde(borrow))] 410 pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>, 411 /// Just region. 412 #[cfg_attr(feature = "serde", serde(borrow))] 413 pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>, 414 } 415 416 icu_provider::data_struct!( 417 LikelySubtagsExtended<'_>, 418 #[cfg(feature = "datagen")] 419 ); 420 421 /// Locale fallback rules derived from CLDR parent locales data. 422 #[derive(Default, Clone, PartialEq, Debug, yoke::Yokeable, zerofrom::ZeroFrom)] 423 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 424 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 425 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 426 #[yoke(prove_covariance_manually)] 427 pub struct Parents<'data> { 428 /// Map from language identifier to language identifier, indicating that the language on the 429 /// left should inherit from the language on the right. 430 #[cfg_attr(feature = "serde", serde(borrow))] 431 pub parents: ZeroMap<'data, PotentialUtf8, (Language, Option<Script>, Option<Region>)>, 432 } 433 434 icu_provider::data_struct!( 435 Parents<'_>, 436 #[cfg(feature = "datagen")] 437 ); 438 439 #[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] 440 #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] 441 #[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))] 442 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 443 /// This directionality data is used to determine the script directionality of a locale. 444 /// 445 /// <div class="stab unstable"> 446 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 447 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 448 /// to be stable, their Rust representation might not be. Use with caution. 449 /// </div> 450 #[yoke(prove_covariance_manually)] 451 pub struct ScriptDirection<'data> { 452 /// Scripts in right-to-left direction. 453 #[cfg_attr(feature = "serde", serde(borrow))] 454 pub rtl: ZeroVec<'data, UnvalidatedScript>, 455 /// Scripts in left-to-right direction. 456 #[cfg_attr(feature = "serde", serde(borrow))] 457 pub ltr: ZeroVec<'data, UnvalidatedScript>, 458 } 459 460 icu_provider::data_struct!( 461 ScriptDirection<'_>, 462 #[cfg(feature = "datagen")] 463 ); 464 465 /// A set of characters and strings which share a particular property value. 466 /// 467 /// <div class="stab unstable"> 468 /// This code is considered unstable; it may change at any time, in breaking or non-breaking ways, 469 /// including in SemVer minor releases. While the serde representation of data structs is guaranteed 470 /// to be stable, their Rust representation might not be. Use with caution. 471 /// </div> 472 #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] 473 #[cfg_attr( 474 feature = "datagen", 475 derive(serde::Serialize, databake::Bake), 476 databake(path = icu_locale::provider), 477 )] 478 #[cfg_attr(feature = "serde", derive(serde::Deserialize))] 479 pub struct ExemplarCharactersData<'data>( 480 #[cfg_attr(feature = "serde", serde(borrow))] pub CodePointInversionListAndStringList<'data>, 481 ); 482 483 icu_provider::data_struct!( 484 ExemplarCharactersData<'_>, 485 #[cfg(feature = "datagen")] 486 ); 487