• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! This module provides APIs for getting exemplar characters for a locale.
6 //!
7 //! Exemplars are characters used by a language, separated into different sets.
8 //! The sets are: main, auxiliary, punctuation, numbers, and index.
9 //!
10 //! The sets define, according to typical usage in the language,
11 //! which characters occur in which contexts with which frequency.
12 //! For more information, see the documentation in the
13 //! [Exemplars section in Unicode Technical Standard #35](https://unicode.org/reports/tr35/tr35-general.html#Exemplars)
14 //! of the LDML specification.
15 //!
16 //! # Examples
17 //!
18 //! ```
19 //! use icu::locale::exemplar_chars::ExemplarCharacters;
20 //! use icu::locale::locale;
21 //!
22 //! let locale = locale!("en-001").into();
23 //! let exemplars_main = ExemplarCharacters::try_new_main(&locale)
24 //!     .expect("locale should be present");
25 //!
26 //! assert!(exemplars_main.contains('a'));
27 //! assert!(exemplars_main.contains('z'));
28 //! assert!(exemplars_main.contains_str("a"));
29 //! assert!(!exemplars_main.contains_str("ä"));
30 //! assert!(!exemplars_main.contains_str("ng"));
31 //! ```
32 
33 use crate::provider::*;
34 use core::ops::Deref;
35 use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
36 use icu_provider::{marker::ErasedMarker, prelude::*};
37 
38 /// A wrapper around `UnicodeSet` data (characters and strings)
39 #[derive(Debug)]
40 pub struct ExemplarCharacters {
41     data: DataPayload<ErasedMarker<ExemplarCharactersData<'static>>>,
42 }
43 
44 impl ExemplarCharacters {
45     /// Construct a borrowed version of this type that can be queried.
46     ///
47     /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
48     /// up front.
49     #[inline]
as_borrowed(&self) -> ExemplarCharactersBorrowed<'_>50     pub fn as_borrowed(&self) -> ExemplarCharactersBorrowed<'_> {
51         ExemplarCharactersBorrowed {
52             data: self.data.get(),
53         }
54     }
55 }
56 
57 /// A borrowed wrapper around code point set data, returned by
58 /// [`ExemplarCharacters::as_borrowed()`]. More efficient to query.
59 #[derive(Clone, Copy, Debug)]
60 pub struct ExemplarCharactersBorrowed<'a> {
61     data: &'a ExemplarCharactersData<'a>,
62 }
63 
64 impl<'a> Deref for ExemplarCharactersBorrowed<'a> {
65     type Target = CodePointInversionListAndStringList<'a>;
66 
deref(&self) -> &Self::Target67     fn deref(&self) -> &Self::Target {
68         &self.data.0
69     }
70 }
71 
72 impl ExemplarCharactersBorrowed<'static> {
73     /// Cheaply converts a [`ExemplarCharactersBorrowed<'static>`] into a [`ExemplarCharacters`].
74     ///
75     /// Note: Due to branching and indirection, using [`ExemplarCharacters`] might inhibit some
76     /// compile-time optimizations that are possible with [`ExemplarCharactersBorrowed`].
static_to_owned(self) -> ExemplarCharacters77     pub const fn static_to_owned(self) -> ExemplarCharacters {
78         ExemplarCharacters {
79             data: DataPayload::from_static_ref(self.data),
80         }
81     }
82 }
83 
84 macro_rules! make_exemplar_chars_unicode_set_property {
85     (
86         // currently unused
87         dyn_data_marker: $d:ident;
88         data_marker: $data_marker:ty;
89         func:
90         pub fn $unstable:ident();
91         $(#[$attr:meta])*
92         pub fn $compiled:ident();
93     ) => {
94         impl ExemplarCharactersBorrowed<'static> {
95             $(#[$attr])*
96             #[cfg(feature = "compiled_data")]
97             #[inline]
98             pub fn $compiled(
99                 locale: &DataLocale,
100             ) -> Result<Self, DataError> {
101                 Ok(ExemplarCharactersBorrowed {
102                     data: DataProvider::<$data_marker>::load(
103                         &crate::provider::Baked,
104                         DataRequest {
105                             id: DataIdentifierBorrowed::for_locale(locale),
106                             ..Default::default()
107                         })?
108                     .payload
109                     .get_static()
110                     .ok_or_else(|| DataError::custom("Baked provider didn't return static payload"))?
111                 })
112             }
113 
114         }
115         impl ExemplarCharacters {
116             $(#[$attr])*
117             #[cfg(feature = "compiled_data")]
118             pub fn $compiled(
119                 locale: &DataLocale,
120             ) -> Result<ExemplarCharactersBorrowed<'static>, DataError> {
121                 ExemplarCharactersBorrowed::$compiled(locale)
122             }
123 
124             #[doc = concat!("A version of [`Self::", stringify!($compiled), "()`] that uses custom data provided by a [`DataProvider`].")]
125             ///
126             /// [�� Help choosing a constructor](icu_provider::constructors)
127             pub fn $unstable(
128                 provider: &(impl DataProvider<$data_marker> + ?Sized),
129                 locale: &DataLocale,
130             ) -> Result<Self, DataError> {
131                 Ok(Self {
132                     data:
133                     provider.load(
134                         DataRequest {
135                             id: DataIdentifierBorrowed::for_locale(locale),
136                             ..Default::default()
137                     })?
138                     .payload
139                     .cast()
140                 })
141             }
142         }
143     }
144 }
145 
146 make_exemplar_chars_unicode_set_property!(
147     dyn_data_marker: ExemplarCharactersMain;
148     data_marker: LocaleExemplarCharactersMainV1;
149     func:
150     pub fn try_new_main_unstable();
151 
152     /// Get the "main" set of exemplar characters.
153     ///
154     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
155     ///
156     /// [�� Help choosing a constructor](icu_provider::constructors)
157     ///
158     /// # Examples
159     ///
160     /// ```
161     /// use icu::locale::locale;
162     /// use icu::locale::exemplar_chars::ExemplarCharacters;
163     ///
164     /// let exemplars_main = ExemplarCharacters::try_new_main(&locale!("en").into())
165     ///     .expect("locale should be present");
166     ///
167     /// assert!(exemplars_main.contains('a'));
168     /// assert!(exemplars_main.contains('z'));
169     /// assert!(exemplars_main.contains_str("a"));
170     /// assert!(!exemplars_main.contains_str("ä"));
171     /// assert!(!exemplars_main.contains_str("ng"));
172     /// assert!(!exemplars_main.contains_str("A"));
173     /// ```
174     pub fn try_new_main();
175 );
176 
177 make_exemplar_chars_unicode_set_property!(
178     dyn_data_marker: ExemplarCharactersAuxiliary;
179     data_marker: LocaleExemplarCharactersAuxiliaryV1;
180     func:
181     pub fn try_new_auxiliary_unstable();
182 
183     /// Get the "auxiliary" set of exemplar characters.
184     ///
185     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
186     ///
187     /// [�� Help choosing a constructor](icu_provider::constructors)
188     ///
189     /// # Examples
190     ///
191     /// ```
192     /// use icu::locale::locale;
193     /// use icu::locale::exemplar_chars::ExemplarCharacters;
194     ///
195     /// let exemplars_auxiliary =
196     ///     ExemplarCharacters::try_new_auxiliary(&locale!("en").into())
197     ///     .expect("locale should be present");
198     ///
199     /// assert!(!exemplars_auxiliary.contains('a'));
200     /// assert!(!exemplars_auxiliary.contains('z'));
201     /// assert!(!exemplars_auxiliary.contains_str("a"));
202     /// assert!(exemplars_auxiliary.contains_str("ä"));
203     /// assert!(!exemplars_auxiliary.contains_str("ng"));
204     /// assert!(!exemplars_auxiliary.contains_str("A"));
205     /// ```
206     pub fn try_new_auxiliary();
207 );
208 
209 make_exemplar_chars_unicode_set_property!(
210     dyn_data_marker: ExemplarCharactersPunctuation;
211     data_marker: LocaleExemplarCharactersPunctuationV1;
212     func:
213     pub fn try_new_punctuation_unstable();
214 
215     /// Get the "punctuation" set of exemplar characters.
216     ///
217     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
218     ///
219     /// [�� Help choosing a constructor](icu_provider::constructors)
220     ///
221     /// # Examples
222     ///
223     /// ```
224     /// use icu::locale::locale;
225     /// use icu::locale::exemplar_chars::ExemplarCharacters;
226     ///
227     /// let exemplars_punctuation =
228     ///     ExemplarCharacters::try_new_punctuation(&locale!("en").into())
229     ///     .expect("locale should be present");
230     ///
231     /// assert!(!exemplars_punctuation.contains('0'));
232     /// assert!(!exemplars_punctuation.contains('9'));
233     /// assert!(!exemplars_punctuation.contains('%'));
234     /// assert!(exemplars_punctuation.contains(','));
235     /// assert!(exemplars_punctuation.contains('.'));
236     /// assert!(exemplars_punctuation.contains('!'));
237     /// assert!(exemplars_punctuation.contains('?'));
238     /// ```
239     pub fn try_new_punctuation();
240 );
241 
242 make_exemplar_chars_unicode_set_property!(
243     dyn_data_marker: ExemplarCharactersNumbers;
244     data_marker: LocaleExemplarCharactersNumbersV1;
245     func:
246     pub fn try_new_numbers_unstable();
247 
248     /// Get the "numbers" set of exemplar characters.
249     ///
250     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
251     ///
252     /// [�� Help choosing a constructor](icu_provider::constructors)
253     ///
254     /// # Examples
255     ///
256     /// ```
257     /// use icu::locale::locale;
258     /// use icu::locale::exemplar_chars::ExemplarCharacters;
259     ///
260     /// let exemplars_numbers =
261     ///     ExemplarCharacters::try_new_numbers(&locale!("en").into())
262     ///     .expect("locale should be present");
263     ///
264     /// assert!(exemplars_numbers.contains('0'));
265     /// assert!(exemplars_numbers.contains('9'));
266     /// assert!(exemplars_numbers.contains('%'));
267     /// assert!(exemplars_numbers.contains(','));
268     /// assert!(exemplars_numbers.contains('.'));
269     /// assert!(!exemplars_numbers.contains('!'));
270     /// assert!(!exemplars_numbers.contains('?'));
271     /// ```
272     pub fn try_new_numbers();
273 );
274 
275 make_exemplar_chars_unicode_set_property!(
276     dyn_data_marker: ExemplarCharactersIndex;
277     data_marker: LocaleExemplarCharactersIndexV1;
278     func:
279     pub fn try_new_index_unstable();
280 
281     /// Get the "index" set of exemplar characters.
282     ///
283     /// ✨ *Enabled with the `compiled_data` Cargo feature.*
284     ///
285     /// [�� Help choosing a constructor](icu_provider::constructors)
286     ///
287     /// # Examples
288     ///
289     /// ```
290     /// use icu::locale::locale;
291     /// use icu::locale::exemplar_chars::ExemplarCharacters;
292     ///
293     /// let exemplars_index =
294     ///     ExemplarCharacters::try_new_index(&locale!("en").into())
295     ///     .expect("locale should be present");
296     ///
297     /// assert!(!exemplars_index.contains('a'));
298     /// assert!(!exemplars_index.contains('z'));
299     /// assert!(!exemplars_index.contains_str("a"));
300     /// assert!(!exemplars_index.contains_str("ä"));
301     /// assert!(!exemplars_index.contains_str("ng"));
302     /// assert!(exemplars_index.contains_str("A"));
303     /// ```
304     pub fn try_new_index();
305 );
306