1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! This module provides APIs for getting exemplar characters for a locale. 6 //! 7 //! Exemplars are characters used by a language, separated into different sets. 8 //! The sets are: main, auxiliary, punctuation, numbers, and index. 9 //! 10 //! The sets define, according to typical usage in the language, 11 //! which characters occur in which contexts with which frequency. 12 //! For more information, see the documentation in the 13 //! [Exemplars section in Unicode Technical Standard #35](https://unicode.org/reports/tr35/tr35-general.html#Exemplars) 14 //! of the LDML specification. 15 //! 16 //! # Examples 17 //! 18 //! ``` 19 //! use icu::locale::exemplar_chars::ExemplarCharacters; 20 //! use icu::locale::locale; 21 //! 22 //! let locale = locale!("en-001").into(); 23 //! let exemplars_main = ExemplarCharacters::try_new_main(&locale) 24 //! .expect("locale should be present"); 25 //! 26 //! assert!(exemplars_main.contains('a')); 27 //! assert!(exemplars_main.contains('z')); 28 //! assert!(exemplars_main.contains_str("a")); 29 //! assert!(!exemplars_main.contains_str("ä")); 30 //! assert!(!exemplars_main.contains_str("ng")); 31 //! ``` 32 33 use crate::provider::*; 34 use core::ops::Deref; 35 use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; 36 use icu_provider::{marker::ErasedMarker, prelude::*}; 37 38 /// A wrapper around `UnicodeSet` data (characters and strings) 39 #[derive(Debug)] 40 pub struct ExemplarCharacters { 41 data: DataPayload<ErasedMarker<ExemplarCharactersData<'static>>>, 42 } 43 44 impl ExemplarCharacters { 45 /// Construct a borrowed version of this type that can be queried. 46 /// 47 /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it 48 /// up front. 49 #[inline] as_borrowed(&self) -> ExemplarCharactersBorrowed<'_>50 pub fn as_borrowed(&self) -> ExemplarCharactersBorrowed<'_> { 51 ExemplarCharactersBorrowed { 52 data: self.data.get(), 53 } 54 } 55 } 56 57 /// A borrowed wrapper around code point set data, returned by 58 /// [`ExemplarCharacters::as_borrowed()`]. More efficient to query. 59 #[derive(Clone, Copy, Debug)] 60 pub struct ExemplarCharactersBorrowed<'a> { 61 data: &'a ExemplarCharactersData<'a>, 62 } 63 64 impl<'a> Deref for ExemplarCharactersBorrowed<'a> { 65 type Target = CodePointInversionListAndStringList<'a>; 66 deref(&self) -> &Self::Target67 fn deref(&self) -> &Self::Target { 68 &self.data.0 69 } 70 } 71 72 impl ExemplarCharactersBorrowed<'static> { 73 /// Cheaply converts a [`ExemplarCharactersBorrowed<'static>`] into a [`ExemplarCharacters`]. 74 /// 75 /// Note: Due to branching and indirection, using [`ExemplarCharacters`] might inhibit some 76 /// compile-time optimizations that are possible with [`ExemplarCharactersBorrowed`]. static_to_owned(self) -> ExemplarCharacters77 pub const fn static_to_owned(self) -> ExemplarCharacters { 78 ExemplarCharacters { 79 data: DataPayload::from_static_ref(self.data), 80 } 81 } 82 } 83 84 macro_rules! make_exemplar_chars_unicode_set_property { 85 ( 86 // currently unused 87 dyn_data_marker: $d:ident; 88 data_marker: $data_marker:ty; 89 func: 90 pub fn $unstable:ident(); 91 $(#[$attr:meta])* 92 pub fn $compiled:ident(); 93 ) => { 94 impl ExemplarCharactersBorrowed<'static> { 95 $(#[$attr])* 96 #[cfg(feature = "compiled_data")] 97 #[inline] 98 pub fn $compiled( 99 locale: &DataLocale, 100 ) -> Result<Self, DataError> { 101 Ok(ExemplarCharactersBorrowed { 102 data: DataProvider::<$data_marker>::load( 103 &crate::provider::Baked, 104 DataRequest { 105 id: DataIdentifierBorrowed::for_locale(locale), 106 ..Default::default() 107 })? 108 .payload 109 .get_static() 110 .ok_or_else(|| DataError::custom("Baked provider didn't return static payload"))? 111 }) 112 } 113 114 } 115 impl ExemplarCharacters { 116 $(#[$attr])* 117 #[cfg(feature = "compiled_data")] 118 pub fn $compiled( 119 locale: &DataLocale, 120 ) -> Result<ExemplarCharactersBorrowed<'static>, DataError> { 121 ExemplarCharactersBorrowed::$compiled(locale) 122 } 123 124 #[doc = concat!("A version of [`Self::", stringify!($compiled), "()`] that uses custom data provided by a [`DataProvider`].")] 125 /// 126 /// [ Help choosing a constructor](icu_provider::constructors) 127 pub fn $unstable( 128 provider: &(impl DataProvider<$data_marker> + ?Sized), 129 locale: &DataLocale, 130 ) -> Result<Self, DataError> { 131 Ok(Self { 132 data: 133 provider.load( 134 DataRequest { 135 id: DataIdentifierBorrowed::for_locale(locale), 136 ..Default::default() 137 })? 138 .payload 139 .cast() 140 }) 141 } 142 } 143 } 144 } 145 146 make_exemplar_chars_unicode_set_property!( 147 dyn_data_marker: ExemplarCharactersMain; 148 data_marker: LocaleExemplarCharactersMainV1; 149 func: 150 pub fn try_new_main_unstable(); 151 152 /// Get the "main" set of exemplar characters. 153 /// 154 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 155 /// 156 /// [ Help choosing a constructor](icu_provider::constructors) 157 /// 158 /// # Examples 159 /// 160 /// ``` 161 /// use icu::locale::locale; 162 /// use icu::locale::exemplar_chars::ExemplarCharacters; 163 /// 164 /// let exemplars_main = ExemplarCharacters::try_new_main(&locale!("en").into()) 165 /// .expect("locale should be present"); 166 /// 167 /// assert!(exemplars_main.contains('a')); 168 /// assert!(exemplars_main.contains('z')); 169 /// assert!(exemplars_main.contains_str("a")); 170 /// assert!(!exemplars_main.contains_str("ä")); 171 /// assert!(!exemplars_main.contains_str("ng")); 172 /// assert!(!exemplars_main.contains_str("A")); 173 /// ``` 174 pub fn try_new_main(); 175 ); 176 177 make_exemplar_chars_unicode_set_property!( 178 dyn_data_marker: ExemplarCharactersAuxiliary; 179 data_marker: LocaleExemplarCharactersAuxiliaryV1; 180 func: 181 pub fn try_new_auxiliary_unstable(); 182 183 /// Get the "auxiliary" set of exemplar characters. 184 /// 185 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 186 /// 187 /// [ Help choosing a constructor](icu_provider::constructors) 188 /// 189 /// # Examples 190 /// 191 /// ``` 192 /// use icu::locale::locale; 193 /// use icu::locale::exemplar_chars::ExemplarCharacters; 194 /// 195 /// let exemplars_auxiliary = 196 /// ExemplarCharacters::try_new_auxiliary(&locale!("en").into()) 197 /// .expect("locale should be present"); 198 /// 199 /// assert!(!exemplars_auxiliary.contains('a')); 200 /// assert!(!exemplars_auxiliary.contains('z')); 201 /// assert!(!exemplars_auxiliary.contains_str("a")); 202 /// assert!(exemplars_auxiliary.contains_str("ä")); 203 /// assert!(!exemplars_auxiliary.contains_str("ng")); 204 /// assert!(!exemplars_auxiliary.contains_str("A")); 205 /// ``` 206 pub fn try_new_auxiliary(); 207 ); 208 209 make_exemplar_chars_unicode_set_property!( 210 dyn_data_marker: ExemplarCharactersPunctuation; 211 data_marker: LocaleExemplarCharactersPunctuationV1; 212 func: 213 pub fn try_new_punctuation_unstable(); 214 215 /// Get the "punctuation" set of exemplar characters. 216 /// 217 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 218 /// 219 /// [ Help choosing a constructor](icu_provider::constructors) 220 /// 221 /// # Examples 222 /// 223 /// ``` 224 /// use icu::locale::locale; 225 /// use icu::locale::exemplar_chars::ExemplarCharacters; 226 /// 227 /// let exemplars_punctuation = 228 /// ExemplarCharacters::try_new_punctuation(&locale!("en").into()) 229 /// .expect("locale should be present"); 230 /// 231 /// assert!(!exemplars_punctuation.contains('0')); 232 /// assert!(!exemplars_punctuation.contains('9')); 233 /// assert!(!exemplars_punctuation.contains('%')); 234 /// assert!(exemplars_punctuation.contains(',')); 235 /// assert!(exemplars_punctuation.contains('.')); 236 /// assert!(exemplars_punctuation.contains('!')); 237 /// assert!(exemplars_punctuation.contains('?')); 238 /// ``` 239 pub fn try_new_punctuation(); 240 ); 241 242 make_exemplar_chars_unicode_set_property!( 243 dyn_data_marker: ExemplarCharactersNumbers; 244 data_marker: LocaleExemplarCharactersNumbersV1; 245 func: 246 pub fn try_new_numbers_unstable(); 247 248 /// Get the "numbers" set of exemplar characters. 249 /// 250 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 251 /// 252 /// [ Help choosing a constructor](icu_provider::constructors) 253 /// 254 /// # Examples 255 /// 256 /// ``` 257 /// use icu::locale::locale; 258 /// use icu::locale::exemplar_chars::ExemplarCharacters; 259 /// 260 /// let exemplars_numbers = 261 /// ExemplarCharacters::try_new_numbers(&locale!("en").into()) 262 /// .expect("locale should be present"); 263 /// 264 /// assert!(exemplars_numbers.contains('0')); 265 /// assert!(exemplars_numbers.contains('9')); 266 /// assert!(exemplars_numbers.contains('%')); 267 /// assert!(exemplars_numbers.contains(',')); 268 /// assert!(exemplars_numbers.contains('.')); 269 /// assert!(!exemplars_numbers.contains('!')); 270 /// assert!(!exemplars_numbers.contains('?')); 271 /// ``` 272 pub fn try_new_numbers(); 273 ); 274 275 make_exemplar_chars_unicode_set_property!( 276 dyn_data_marker: ExemplarCharactersIndex; 277 data_marker: LocaleExemplarCharactersIndexV1; 278 func: 279 pub fn try_new_index_unstable(); 280 281 /// Get the "index" set of exemplar characters. 282 /// 283 /// ✨ *Enabled with the `compiled_data` Cargo feature.* 284 /// 285 /// [ Help choosing a constructor](icu_provider::constructors) 286 /// 287 /// # Examples 288 /// 289 /// ``` 290 /// use icu::locale::locale; 291 /// use icu::locale::exemplar_chars::ExemplarCharacters; 292 /// 293 /// let exemplars_index = 294 /// ExemplarCharacters::try_new_index(&locale!("en").into()) 295 /// .expect("locale should be present"); 296 /// 297 /// assert!(!exemplars_index.contains('a')); 298 /// assert!(!exemplars_index.contains('z')); 299 /// assert!(!exemplars_index.contains_str("a")); 300 /// assert!(!exemplars_index.contains_str("ä")); 301 /// assert!(!exemplars_index.contains_str("ng")); 302 /// assert!(exemplars_index.contains_str("A")); 303 /// ``` 304 pub fn try_new_index(); 305 ); 306