1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use crate::props::*;
6 use crate::provider::names::*;
7 use core::marker::PhantomData;
8 use icu_collections::codepointtrie::TrieValue;
9 use icu_provider::marker::ErasedMarker;
10 use icu_provider::prelude::*;
11 use yoke::Yokeable;
12 use zerotrie::cursor::ZeroTrieSimpleAsciiCursor;
13
14 /// A struct capable of looking up a property value from a string name.
15 /// Access its data by calling [`Self::as_borrowed()`] and using the methods on
16 /// [`PropertyParserBorrowed`].
17 ///
18 /// The name can be a short name (`Lu`), a long name(`Uppercase_Letter`),
19 /// or an alias.
20 ///
21 /// Property names can be looked up using "strict" matching (looking for a name
22 /// that matches exactly), or "loose matching", where the name is allowed to deviate
23 /// in terms of ASCII casing, whitespace, underscores, and hyphens.
24 ///
25 /// # Example
26 ///
27 /// ```
28 /// use icu::properties::props::GeneralCategory;
29 /// use icu::properties::PropertyParser;
30 ///
31 /// let lookup = PropertyParser::<GeneralCategory>::new();
32 /// // short name for value
33 /// assert_eq!(
34 /// lookup.get_strict("Lu"),
35 /// Some(GeneralCategory::UppercaseLetter)
36 /// );
37 /// assert_eq!(
38 /// lookup.get_strict("Pd"),
39 /// Some(GeneralCategory::DashPunctuation)
40 /// );
41 /// // long name for value
42 /// assert_eq!(
43 /// lookup.get_strict("Uppercase_Letter"),
44 /// Some(GeneralCategory::UppercaseLetter)
45 /// );
46 /// assert_eq!(
47 /// lookup.get_strict("Dash_Punctuation"),
48 /// Some(GeneralCategory::DashPunctuation)
49 /// );
50 /// // name has incorrect casing
51 /// assert_eq!(lookup.get_strict("dashpunctuation"), None);
52 /// // loose matching of name
53 /// assert_eq!(
54 /// lookup.get_loose("dash-punctuation"),
55 /// Some(GeneralCategory::DashPunctuation)
56 /// );
57 /// // fake property
58 /// assert_eq!(lookup.get_strict("Animated_Gif"), None);
59 /// ```
60 #[derive(Debug)]
61 pub struct PropertyParser<T> {
62 map: DataPayload<ErasedMarker<PropertyValueNameToEnumMap<'static>>>,
63 markers: PhantomData<fn() -> T>,
64 }
65
66 /// A borrowed wrapper around property value name-to-enum data, returned by
67 /// [`PropertyParser::as_borrowed()`]. More efficient to query.
68 #[derive(Debug)]
69 pub struct PropertyParserBorrowed<'a, T> {
70 map: &'a PropertyValueNameToEnumMap<'a>,
71 markers: PhantomData<fn() -> T>,
72 }
73
74 impl<T> Clone for PropertyParserBorrowed<'_, T> {
clone(&self) -> Self75 fn clone(&self) -> Self {
76 *self
77 }
78 }
79 impl<T> Copy for PropertyParserBorrowed<'_, T> {}
80
81 impl<T> PropertyParser<T> {
82 /// Creates a new instance of `PropertyParser<T>` using compiled data.
83 ///
84 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
85 ///
86 /// [ Help choosing a constructor](icu_provider::constructors)
87 #[cfg(feature = "compiled_data")]
88 #[allow(clippy::new_ret_no_self)]
new() -> PropertyParserBorrowed<'static, T> where T: ParseableEnumeratedProperty,89 pub fn new() -> PropertyParserBorrowed<'static, T>
90 where
91 T: ParseableEnumeratedProperty,
92 {
93 PropertyParserBorrowed::new()
94 }
95
96 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
try_new_unstable( provider: &(impl DataProvider<T::DataMarker> + ?Sized), ) -> Result<Self, DataError> where T: ParseableEnumeratedProperty,97 pub fn try_new_unstable(
98 provider: &(impl DataProvider<T::DataMarker> + ?Sized),
99 ) -> Result<Self, DataError>
100 where
101 T: ParseableEnumeratedProperty,
102 {
103 Ok(Self {
104 map: provider.load(Default::default())?.payload.cast(),
105 markers: PhantomData,
106 })
107 }
108
109 /// Construct a borrowed version of this type that can be queried.
110 ///
111 /// This avoids a potential small underlying cost per API call (like `get_strict()`) by consolidating it
112 /// up front.
113 #[inline]
as_borrowed(&self) -> PropertyParserBorrowed<'_, T>114 pub fn as_borrowed(&self) -> PropertyParserBorrowed<'_, T> {
115 PropertyParserBorrowed {
116 map: self.map.get(),
117 markers: PhantomData,
118 }
119 }
120
121 #[doc(hidden)] // used by FFI code
erase(self) -> PropertyParser<u16>122 pub fn erase(self) -> PropertyParser<u16> {
123 PropertyParser {
124 map: self.map.cast(),
125 markers: PhantomData,
126 }
127 }
128 }
129
130 impl<T: TrieValue> PropertyParserBorrowed<'_, T> {
131 /// Get the property value as a u16, doing a strict search looking for
132 /// names that match exactly
133 ///
134 /// # Example
135 ///
136 /// ```
137 /// use icu::properties::props::GeneralCategory;
138 /// use icu::properties::PropertyParser;
139 ///
140 /// let lookup = PropertyParser::<GeneralCategory>::new();
141 /// assert_eq!(
142 /// lookup.get_strict_u16("Lu"),
143 /// Some(GeneralCategory::UppercaseLetter as u16)
144 /// );
145 /// assert_eq!(
146 /// lookup.get_strict_u16("Uppercase_Letter"),
147 /// Some(GeneralCategory::UppercaseLetter as u16)
148 /// );
149 /// // does not do loose matching
150 /// assert_eq!(lookup.get_strict_u16("UppercaseLetter"), None);
151 /// ```
152 #[inline]
get_strict_u16(self, name: &str) -> Option<u16>153 pub fn get_strict_u16(self, name: &str) -> Option<u16> {
154 get_strict_u16(self.map, name)
155 }
156
157 /// Get the property value as a `T`, doing a strict search looking for
158 /// names that match exactly
159 ///
160 /// # Example
161 ///
162 /// ```
163 /// use icu::properties::props::GeneralCategory;
164 /// use icu::properties::PropertyParser;
165 ///
166 /// let lookup = PropertyParser::<GeneralCategory>::new();
167 /// assert_eq!(
168 /// lookup.get_strict("Lu"),
169 /// Some(GeneralCategory::UppercaseLetter)
170 /// );
171 /// assert_eq!(
172 /// lookup.get_strict("Uppercase_Letter"),
173 /// Some(GeneralCategory::UppercaseLetter)
174 /// );
175 /// // does not do loose matching
176 /// assert_eq!(lookup.get_strict("UppercaseLetter"), None);
177 /// ```
178 #[inline]
get_strict(self, name: &str) -> Option<T>179 pub fn get_strict(self, name: &str) -> Option<T> {
180 T::try_from_u32(self.get_strict_u16(name)? as u32).ok()
181 }
182
183 /// Get the property value as a u16, doing a loose search looking for
184 /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and
185 /// whitespaces.
186 ///
187 /// # Example
188 ///
189 /// ```
190 /// use icu::properties::props::GeneralCategory;
191 /// use icu::properties::PropertyParser;
192 ///
193 /// let lookup = PropertyParser::<GeneralCategory>::new();
194 /// assert_eq!(
195 /// lookup.get_loose_u16("Lu"),
196 /// Some(GeneralCategory::UppercaseLetter as u16)
197 /// );
198 /// assert_eq!(
199 /// lookup.get_loose_u16("Uppercase_Letter"),
200 /// Some(GeneralCategory::UppercaseLetter as u16)
201 /// );
202 /// // does do loose matching
203 /// assert_eq!(
204 /// lookup.get_loose_u16("UppercaseLetter"),
205 /// Some(GeneralCategory::UppercaseLetter as u16)
206 /// );
207 /// ```
208 #[inline]
get_loose_u16(self, name: &str) -> Option<u16>209 pub fn get_loose_u16(self, name: &str) -> Option<u16> {
210 get_loose_u16(self.map, name)
211 }
212
213 /// Get the property value as a `T`, doing a loose search looking for
214 /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and
215 /// whitespaces.
216 ///
217 /// # Example
218 ///
219 /// ```
220 /// use icu::properties::props::GeneralCategory;
221 /// use icu::properties::PropertyParser;
222 ///
223 /// let lookup = PropertyParser::<GeneralCategory>::new();
224 /// assert_eq!(
225 /// lookup.get_loose("Lu"),
226 /// Some(GeneralCategory::UppercaseLetter)
227 /// );
228 /// assert_eq!(
229 /// lookup.get_loose("Uppercase_Letter"),
230 /// Some(GeneralCategory::UppercaseLetter)
231 /// );
232 /// // does do loose matching
233 /// assert_eq!(
234 /// lookup.get_loose("UppercaseLetter"),
235 /// Some(GeneralCategory::UppercaseLetter)
236 /// );
237 /// ```
238 #[inline]
get_loose(self, name: &str) -> Option<T>239 pub fn get_loose(self, name: &str) -> Option<T> {
240 T::try_from_u32(self.get_loose_u16(name)? as u32).ok()
241 }
242 }
243
244 #[cfg(feature = "compiled_data")]
245 impl<T: ParseableEnumeratedProperty> Default for PropertyParserBorrowed<'static, T> {
default() -> Self246 fn default() -> Self {
247 Self::new()
248 }
249 }
250
251 impl<T: TrieValue> PropertyParserBorrowed<'static, T> {
252 /// Creates a new instance of `PropertyParserBorrowed<T>` using compiled data.
253 ///
254 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
255 ///
256 /// [ Help choosing a constructor](icu_provider::constructors)
257 #[cfg(feature = "compiled_data")]
new() -> Self where T: ParseableEnumeratedProperty,258 pub fn new() -> Self
259 where
260 T: ParseableEnumeratedProperty,
261 {
262 Self {
263 map: T::SINGLETON,
264 markers: PhantomData,
265 }
266 }
267
268 /// Cheaply converts a [`PropertyParserBorrowed<'static>`] into a [`PropertyParser`].
269 ///
270 /// Note: Due to branching and indirection, using [`PropertyParser`] might inhibit some
271 /// compile-time optimizations that are possible with [`PropertyParserBorrowed`].
static_to_owned(self) -> PropertyParser<T>272 pub const fn static_to_owned(self) -> PropertyParser<T> {
273 PropertyParser {
274 map: DataPayload::from_static_ref(self.map),
275 markers: PhantomData,
276 }
277 }
278 }
279
280 /// Avoid monomorphizing multiple copies of this function
get_strict_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16>281 fn get_strict_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> {
282 payload.map.get(name).and_then(|i| i.try_into().ok())
283 }
284
285 /// Avoid monomorphizing multiple copies of this function
get_loose_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16>286 fn get_loose_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> {
287 fn recurse(mut cursor: ZeroTrieSimpleAsciiCursor, mut rest: &[u8]) -> Option<usize> {
288 if cursor.is_empty() {
289 return None;
290 }
291
292 // Skip whitespace, underscore, hyphen in trie.
293 for skip in [b'\t', b'\n', b'\x0C', b'\r', b' ', 0x0B, b'_', b'-'] {
294 let mut skip_cursor = cursor.clone();
295 skip_cursor.step(skip);
296 if let Some(r) = recurse(skip_cursor, rest) {
297 return Some(r);
298 }
299 }
300
301 let ascii = loop {
302 let Some((&a, r)) = rest.split_first() else {
303 return cursor.take_value();
304 };
305 rest = r;
306
307 // Skip whitespace, underscore, hyphen in input
308 if !matches!(
309 a,
310 b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | 0x0B | b'_' | b'-'
311 ) {
312 break a;
313 }
314 };
315
316 let mut other_case_cursor = cursor.clone();
317 cursor.step(ascii);
318 other_case_cursor.step(if ascii.is_ascii_lowercase() {
319 ascii.to_ascii_uppercase()
320 } else {
321 ascii.to_ascii_lowercase()
322 });
323 // This uses the call stack as the DFS stack. The recursion will terminate as
324 // rest's length is strictly shrinking. The call stack's depth is limited by
325 // name.len().
326 recurse(cursor, rest).or_else(|| recurse(other_case_cursor, rest))
327 }
328
329 recurse(payload.map.cursor(), name.as_bytes()).and_then(|i| i.try_into().ok())
330 }
331
332 /// A struct capable of looking up a property name from a value
333 /// Access its data by calling [`Self::as_borrowed()`] and using the methods on
334 /// [`PropertyNamesLongBorrowed`].
335 ///
336 /// # Example
337 ///
338 /// ```
339 /// use icu::properties::props::CanonicalCombiningClass;
340 /// use icu::properties::PropertyNamesLong;
341 ///
342 /// let names = PropertyNamesLong::<CanonicalCombiningClass>::new();
343 /// assert_eq!(
344 /// names.get(CanonicalCombiningClass::KanaVoicing),
345 /// Some("Kana_Voicing")
346 /// );
347 /// assert_eq!(
348 /// names.get(CanonicalCombiningClass::AboveLeft),
349 /// Some("Above_Left")
350 /// );
351 /// ```
352 pub struct PropertyNamesLong<T: NamedEnumeratedProperty> {
353 map: DataPayload<ErasedMarker<T::DataStructLong>>,
354 }
355
356 impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesLong<T> {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result357 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
358 f.debug_struct("PropertyNamesLong")
359 // .field("map", &self.map)
360 .finish()
361 }
362 }
363
364 /// A borrowed wrapper around property value name-to-enum data, returned by
365 /// [`PropertyNamesLong::as_borrowed()`]. More efficient to query.
366 #[derive(Debug)]
367 pub struct PropertyNamesLongBorrowed<'a, T: NamedEnumeratedProperty> {
368 map: &'a T::DataStructLongBorrowed<'a>,
369 }
370
371 impl<T: NamedEnumeratedProperty> Clone for PropertyNamesLongBorrowed<'_, T> {
clone(&self) -> Self372 fn clone(&self) -> Self {
373 *self
374 }
375 }
376 impl<T: NamedEnumeratedProperty> Copy for PropertyNamesLongBorrowed<'_, T> {}
377
378 impl<T: NamedEnumeratedProperty> PropertyNamesLong<T> {
379 /// Creates a new instance of `PropertyNamesLongBorrowed<T>`.
380 ///
381 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
382 ///
383 /// [ Help choosing a constructor](icu_provider::constructors)
384 #[cfg(feature = "compiled_data")]
385 #[allow(clippy::new_ret_no_self)]
new() -> PropertyNamesLongBorrowed<'static, T>386 pub fn new() -> PropertyNamesLongBorrowed<'static, T> {
387 PropertyNamesLongBorrowed::new()
388 }
389
390 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
try_new_unstable( provider: &(impl DataProvider<T::DataMarkerLong> + ?Sized), ) -> Result<Self, DataError>391 pub fn try_new_unstable(
392 provider: &(impl DataProvider<T::DataMarkerLong> + ?Sized),
393 ) -> Result<Self, DataError> {
394 Ok(Self {
395 map: provider.load(Default::default())?.payload.cast(),
396 })
397 }
398
399 /// Construct a borrowed version of this type that can be queried.
400 ///
401 /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it
402 /// up front.
403 #[inline]
as_borrowed(&self) -> PropertyNamesLongBorrowed<'_, T>404 pub fn as_borrowed(&self) -> PropertyNamesLongBorrowed<'_, T> {
405 PropertyNamesLongBorrowed {
406 map: T::nep_long_identity(self.map.get()),
407 }
408 }
409 }
410
411 impl<'a, T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'a, T> {
412 /// Get the property name given a value
413 ///
414 /// # Example
415 ///
416 /// ```rust
417 /// use icu::properties::props::CanonicalCombiningClass;
418 /// use icu::properties::PropertyNamesLong;
419 ///
420 /// let lookup = PropertyNamesLong::<CanonicalCombiningClass>::new();
421 /// assert_eq!(
422 /// lookup.get(CanonicalCombiningClass::KanaVoicing),
423 /// Some("Kana_Voicing")
424 /// );
425 /// assert_eq!(
426 /// lookup.get(CanonicalCombiningClass::AboveLeft),
427 /// Some("Above_Left")
428 /// );
429 /// ```
430 #[inline]
get(self, property: T) -> Option<&'a str>431 pub fn get(self, property: T) -> Option<&'a str> {
432 self.map.get(property.to_u32())
433 }
434 }
435
436 #[cfg(feature = "compiled_data")]
437 impl<T: NamedEnumeratedProperty> Default for PropertyNamesLongBorrowed<'static, T> {
default() -> Self438 fn default() -> Self {
439 Self::new()
440 }
441 }
442
443 impl<T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'static, T> {
444 /// Creates a new instance of `PropertyNamesLongBorrowed<T>`.
445 ///
446 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
447 ///
448 /// [ Help choosing a constructor](icu_provider::constructors)
449 #[cfg(feature = "compiled_data")]
new() -> Self450 pub fn new() -> Self {
451 Self {
452 map: T::SINGLETON_LONG,
453 }
454 }
455
456 /// Cheaply converts a [`PropertyNamesLongBorrowed<'static>`] into a [`PropertyNamesLong`].
457 ///
458 /// Note: Due to branching and indirection, using [`PropertyNamesLong`] might inhibit some
459 /// compile-time optimizations that are possible with [`PropertyNamesLongBorrowed`].
460 ///
461 /// This is currently not `const` unlike other `static_to_owned()` functions since it needs
462 /// const traits to do that safely
static_to_owned(self) -> PropertyNamesLong<T>463 pub fn static_to_owned(self) -> PropertyNamesLong<T> {
464 PropertyNamesLong {
465 map: DataPayload::from_static_ref(T::nep_long_identity_static(self.map)),
466 }
467 }
468 }
469
470 /// A struct capable of looking up a property name from a value
471 /// Access its data by calling [`Self::as_borrowed()`] and using the methods on
472 /// [`PropertyNamesShortBorrowed`].
473 ///
474 /// # Example
475 ///
476 /// ```
477 /// use icu::properties::props::CanonicalCombiningClass;
478 /// use icu::properties::PropertyNamesShort;
479 ///
480 /// let names = PropertyNamesShort::<CanonicalCombiningClass>::new();
481 /// assert_eq!(names.get(CanonicalCombiningClass::KanaVoicing), Some("KV"));
482 /// assert_eq!(names.get(CanonicalCombiningClass::AboveLeft), Some("AL"));
483 /// ```
484 pub struct PropertyNamesShort<T: NamedEnumeratedProperty> {
485 map: DataPayload<ErasedMarker<T::DataStructShort>>,
486 }
487
488 impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesShort<T> {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result489 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
490 f.debug_struct("PropertyNamesShort")
491 // .field("map", &self.map)
492 .finish()
493 }
494 }
495
496 /// A borrowed wrapper around property value name-to-enum data, returned by
497 /// [`PropertyNamesShort::as_borrowed()`]. More efficient to query.
498 #[derive(Debug)]
499 pub struct PropertyNamesShortBorrowed<'a, T: NamedEnumeratedProperty> {
500 map: &'a T::DataStructShortBorrowed<'a>,
501 }
502
503 impl<T: NamedEnumeratedProperty> Clone for PropertyNamesShortBorrowed<'_, T> {
clone(&self) -> Self504 fn clone(&self) -> Self {
505 *self
506 }
507 }
508
509 impl<T: NamedEnumeratedProperty> Copy for PropertyNamesShortBorrowed<'_, T> {}
510
511 impl<T: NamedEnumeratedProperty> PropertyNamesShort<T> {
512 /// Creates a new instance of `PropertyNamesShortBorrowed<T>`.
513 ///
514 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
515 ///
516 /// [ Help choosing a constructor](icu_provider::constructors)
517 #[cfg(feature = "compiled_data")]
518 #[allow(clippy::new_ret_no_self)]
new() -> PropertyNamesShortBorrowed<'static, T>519 pub fn new() -> PropertyNamesShortBorrowed<'static, T> {
520 PropertyNamesShortBorrowed::new()
521 }
522
523 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
try_new_unstable( provider: &(impl DataProvider<T::DataMarkerShort> + ?Sized), ) -> Result<Self, DataError>524 pub fn try_new_unstable(
525 provider: &(impl DataProvider<T::DataMarkerShort> + ?Sized),
526 ) -> Result<Self, DataError> {
527 Ok(Self {
528 map: provider.load(Default::default())?.payload.cast(),
529 })
530 }
531
532 /// Construct a borrowed version of this type that can be queried.
533 ///
534 /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it
535 /// up front.
536 #[inline]
as_borrowed(&self) -> PropertyNamesShortBorrowed<'_, T>537 pub fn as_borrowed(&self) -> PropertyNamesShortBorrowed<'_, T> {
538 PropertyNamesShortBorrowed {
539 map: T::nep_short_identity(self.map.get()),
540 }
541 }
542 }
543
544 impl<'a, T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'a, T> {
545 /// Get the property name given a value
546 ///
547 /// # Example
548 ///
549 /// ```rust
550 /// use icu::properties::props::CanonicalCombiningClass;
551 /// use icu::properties::PropertyNamesShort;
552 ///
553 /// let lookup = PropertyNamesShort::<CanonicalCombiningClass>::new();
554 /// assert_eq!(lookup.get(CanonicalCombiningClass::KanaVoicing), Some("KV"));
555 /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("AL"));
556 /// ```
557 #[inline]
get(self, property: T) -> Option<&'a str>558 pub fn get(self, property: T) -> Option<&'a str> {
559 self.map.get(property.to_u32())
560 }
561 }
562
563 impl PropertyNamesShortBorrowed<'_, Script> {
564 /// Gets the "name" of a script property as a `icu::locale::subtags::Script`.
565 ///
566 /// This method is available only on `PropertyNamesShortBorrowed<Script>`.
567 ///
568 /// # Example
569 ///
570 /// ```rust
571 /// use icu::locale::subtags::script;
572 /// use icu::properties::props::Script;
573 /// use icu::properties::PropertyNamesShort;
574 ///
575 /// let lookup = PropertyNamesShort::<Script>::new();
576 /// assert_eq!(
577 /// lookup.get_locale_script(Script::Brahmi),
578 /// Some(script!("Brah"))
579 /// );
580 /// assert_eq!(
581 /// lookup.get_locale_script(Script::Hangul),
582 /// Some(script!("Hang"))
583 /// );
584 /// ```
585 ///
586 /// For the reverse direction, use property parsing as normal:
587 /// ```
588 /// use icu::locale::subtags::script;
589 /// use icu::properties::props::Script;
590 /// use icu::properties::PropertyParser;
591 ///
592 /// let parser = PropertyParser::<Script>::new();
593 /// assert_eq!(
594 /// parser.get_strict(script!("Brah").as_str()),
595 /// Some(Script::Brahmi)
596 /// );
597 /// assert_eq!(
598 /// parser.get_strict(script!("Hang").as_str()),
599 /// Some(Script::Hangul)
600 /// );
601 /// ```
602 #[inline]
get_locale_script(self, property: Script) -> Option<icu_locale_core::subtags::Script>603 pub fn get_locale_script(self, property: Script) -> Option<icu_locale_core::subtags::Script> {
604 let prop = usize::try_from(property.to_u32()).ok()?;
605 self.map.map.get(prop).and_then(|o| o.0)
606 }
607 }
608
609 #[cfg(feature = "compiled_data")]
610 impl<T: NamedEnumeratedProperty> Default for PropertyNamesShortBorrowed<'static, T> {
default() -> Self611 fn default() -> Self {
612 Self::new()
613 }
614 }
615
616 impl<T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'static, T> {
617 /// Creates a new instance of `PropertyNamesShortBorrowed<T>`.
618 ///
619 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
620 ///
621 /// [ Help choosing a constructor](icu_provider::constructors)
622 #[cfg(feature = "compiled_data")]
new() -> Self623 pub fn new() -> Self {
624 Self {
625 map: T::SINGLETON_SHORT,
626 }
627 }
628
629 /// Cheaply converts a [`PropertyNamesShortBorrowed<'static>`] into a [`PropertyNamesShort`].
630 ///
631 /// Note: Due to branching and indirection, using [`PropertyNamesShort`] might inhibit some
632 /// compile-time optimizations that are possible with [`PropertyNamesShortBorrowed`].
633 ///
634 /// This is currently not `const` unlike other `static_to_owned()` functions since it needs
635 /// const traits to do that safely
static_to_owned(self) -> PropertyNamesShort<T>636 pub fn static_to_owned(self) -> PropertyNamesShort<T> {
637 PropertyNamesShort {
638 map: DataPayload::from_static_ref(T::nep_short_identity_static(self.map)),
639 }
640 }
641 }
642
643 /// A property whose value names can be parsed from strings.
644 pub trait ParseableEnumeratedProperty: crate::private::Sealed + TrieValue {
645 #[doc(hidden)]
646 type DataMarker: DataMarker<DataStruct = PropertyValueNameToEnumMap<'static>>;
647 #[doc(hidden)]
648 #[cfg(feature = "compiled_data")]
649 const SINGLETON: &'static PropertyValueNameToEnumMap<'static>;
650 }
651
652 // Abstract over Linear/Sparse/Script representation
653 // This trait is implicitly sealed by not being exported.
654 pub trait PropertyEnumToValueNameLookup {
get(&self, prop: u32) -> Option<&str>655 fn get(&self, prop: u32) -> Option<&str>;
656 }
657
658 impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameLinearMap<'_> {
get(&self, prop: u32) -> Option<&str>659 fn get(&self, prop: u32) -> Option<&str> {
660 self.map.get(usize::try_from(prop).ok()?)
661 }
662 }
663
664 impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameSparseMap<'_> {
get(&self, prop: u32) -> Option<&str>665 fn get(&self, prop: u32) -> Option<&str> {
666 self.map.get(&u16::try_from(prop).ok()?)
667 }
668 }
669
670 impl PropertyEnumToValueNameLookup for PropertyScriptToIcuScriptMap<'_> {
get(&self, prop: u32) -> Option<&str>671 fn get(&self, prop: u32) -> Option<&str> {
672 self.map
673 .get_ule_ref(usize::try_from(prop).ok()?)
674 .and_then(|no| no.as_ref())
675 .map(|s| s.as_str())
676 }
677 }
678
679 /// A property whose value names can be represented as strings.
680 pub trait NamedEnumeratedProperty: ParseableEnumeratedProperty {
681 #[doc(hidden)]
682 type DataStructLong: 'static
683 + for<'a> Yokeable<'a, Output = Self::DataStructLongBorrowed<'a>>
684 + PropertyEnumToValueNameLookup;
685 #[doc(hidden)]
686 type DataStructShort: 'static
687 + for<'a> Yokeable<'a, Output = Self::DataStructShortBorrowed<'a>>
688 + PropertyEnumToValueNameLookup;
689 #[doc(hidden)]
690 type DataStructLongBorrowed<'a>: PropertyEnumToValueNameLookup;
691 #[doc(hidden)]
692 type DataStructShortBorrowed<'a>: PropertyEnumToValueNameLookup;
693 #[doc(hidden)]
694 type DataMarkerLong: DataMarker<DataStruct = Self::DataStructLong>;
695 #[doc(hidden)]
696 type DataMarkerShort: DataMarker<DataStruct = Self::DataStructShort>;
697 #[doc(hidden)]
698 #[cfg(feature = "compiled_data")]
699 const SINGLETON_LONG: &'static Self::DataStructLongBorrowed<'static>;
700 #[doc(hidden)]
701 #[cfg(feature = "compiled_data")]
702 const SINGLETON_SHORT: &'static Self::DataStructShortBorrowed<'static>;
703
704 // These wouldn't be necessary if Yoke used GATs (#6057)
705 #[doc(hidden)]
nep_long_identity<'a>( stat: &'a <Self::DataStructLong as Yokeable<'a>>::Output, ) -> &'a Self::DataStructLongBorrowed<'a>706 fn nep_long_identity<'a>(
707 stat: &'a <Self::DataStructLong as Yokeable<'a>>::Output,
708 ) -> &'a Self::DataStructLongBorrowed<'a>;
709 #[doc(hidden)]
nep_long_identity_static( stat: &'static Self::DataStructLongBorrowed<'static>, ) -> &'static Self::DataStructLong710 fn nep_long_identity_static(
711 stat: &'static Self::DataStructLongBorrowed<'static>,
712 ) -> &'static Self::DataStructLong;
713
714 #[doc(hidden)]
nep_short_identity<'a>( stat: &'a <Self::DataStructShort as Yokeable<'a>>::Output, ) -> &'a Self::DataStructShortBorrowed<'a>715 fn nep_short_identity<'a>(
716 stat: &'a <Self::DataStructShort as Yokeable<'a>>::Output,
717 ) -> &'a Self::DataStructShortBorrowed<'a>;
718 #[doc(hidden)]
nep_short_identity_static( stat: &'static Self::DataStructShortBorrowed<'static>, ) -> &'static Self::DataStructShort719 fn nep_short_identity_static(
720 stat: &'static Self::DataStructShortBorrowed<'static>,
721 ) -> &'static Self::DataStructShort;
722
723 /// Convenience method for `PropertyParser::new().get_loose(s)`
724 ///
725 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
726 #[cfg(feature = "compiled_data")]
try_from_str(s: &str) -> Option<Self>727 fn try_from_str(s: &str) -> Option<Self> {
728 PropertyParser::new().get_loose(s)
729 }
730 /// Convenience method for `PropertyNamesLong::new().get(*self).unwrap()`
731 ///
732 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
733 #[cfg(feature = "compiled_data")]
long_name(&self) -> &'static str734 fn long_name(&self) -> &'static str {
735 PropertyNamesLong::new().get(*self).unwrap_or("unreachable")
736 }
737 /// Convenience method for `PropertyNamesShort::new().get(*self).unwrap()`
738 ///
739 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
740 #[cfg(feature = "compiled_data")]
short_name(&self) -> &'static str741 fn short_name(&self) -> &'static str {
742 PropertyNamesShort::new()
743 .get(*self)
744 .unwrap_or("unreachable")
745 }
746 }
747
748 macro_rules! impl_value_getter {
749 (
750 impl $ty:ident {
751 $marker_n2e:ident / $singleton_n2e:ident;
752 $(
753 $data_struct_s:ident / $marker_e2sn:ident / $singleton_e2sn:ident;
754 $data_struct_l:ident / $marker_e2ln:ident / $singleton_e2ln:ident;
755 )?
756 }
757 ) => {
758 impl ParseableEnumeratedProperty for $ty {
759 type DataMarker = $marker_n2e;
760 #[cfg(feature = "compiled_data")]
761 const SINGLETON: &'static PropertyValueNameToEnumMap<'static> = crate::provider::Baked::$singleton_n2e;
762 }
763
764 $(
765 impl NamedEnumeratedProperty for $ty {
766 type DataStructLong = $data_struct_l<'static>;
767 type DataStructShort = $data_struct_s<'static>;
768 type DataStructLongBorrowed<'a> = $data_struct_l<'a>;
769 type DataStructShortBorrowed<'a> = $data_struct_s<'a>;
770 type DataMarkerLong = crate::provider::$marker_e2ln;
771 type DataMarkerShort = crate::provider::$marker_e2sn;
772 #[cfg(feature = "compiled_data")]
773 const SINGLETON_LONG: &'static Self::DataStructLong = crate::provider::Baked::$singleton_e2ln;
774 #[cfg(feature = "compiled_data")]
775 const SINGLETON_SHORT: &'static Self::DataStructShort = crate::provider::Baked::$singleton_e2sn;
776 fn nep_long_identity<'a>(yoked: &'a $data_struct_l<'a>) -> &'a Self::DataStructLongBorrowed<'a> {
777 yoked
778 }
779
780 fn nep_long_identity_static(stat: &'static $data_struct_l<'static>) -> &'static $data_struct_l<'static> {
781 stat
782 }
783
784
785 fn nep_short_identity<'a>(yoked: &'a $data_struct_s<'a>) -> &'a Self::DataStructShortBorrowed<'a> {
786 yoked
787 }
788 fn nep_short_identity_static(stat: &'static $data_struct_s<'static>) -> &'static $data_struct_s<'static> {
789 stat
790 }
791
792 }
793
794
795 )?
796 };
797 }
798
799 impl_value_getter! {
800 impl BidiClass {
801 BidiClassNameToValueV2 / SINGLETON_BIDI_CLASS_NAME_TO_VALUE_V2;
802 PropertyEnumToValueNameLinearMap / BidiClassValueToShortNameV1 / SINGLETON_BIDI_CLASS_VALUE_TO_SHORT_NAME_V1;
803 PropertyEnumToValueNameLinearMap / BidiClassValueToLongNameV1 / SINGLETON_BIDI_CLASS_VALUE_TO_LONG_NAME_V1;
804 }
805 }
806
807 impl_value_getter! {
808 impl GeneralCategory {
809 GeneralCategoryNameToValueV2 / SINGLETON_GENERAL_CATEGORY_NAME_TO_VALUE_V2;
810 PropertyEnumToValueNameLinearMap / GeneralCategoryValueToShortNameV1 / SINGLETON_GENERAL_CATEGORY_VALUE_TO_SHORT_NAME_V1;
811 PropertyEnumToValueNameLinearMap / GeneralCategoryValueToLongNameV1 / SINGLETON_GENERAL_CATEGORY_VALUE_TO_LONG_NAME_V1;
812 }
813 }
814
815 impl_value_getter! {
816 impl GeneralCategoryGroup {
817 GeneralCategoryMaskNameToValueV2 / SINGLETON_GENERAL_CATEGORY_MASK_NAME_TO_VALUE_V2;
818 }
819 }
820
821 impl_value_getter! {
822 impl Script {
823 ScriptNameToValueV2 / SINGLETON_SCRIPT_NAME_TO_VALUE_V2;
824 PropertyScriptToIcuScriptMap / ScriptValueToShortNameV1 / SINGLETON_SCRIPT_VALUE_TO_SHORT_NAME_V1;
825 PropertyEnumToValueNameLinearMap / ScriptValueToLongNameV1 / SINGLETON_SCRIPT_VALUE_TO_LONG_NAME_V1;
826 }
827 }
828
829 impl_value_getter! {
830 impl HangulSyllableType {
831 HangulSyllableTypeNameToValueV2 / SINGLETON_HANGUL_SYLLABLE_TYPE_NAME_TO_VALUE_V2;
832 PropertyEnumToValueNameLinearMap / HangulSyllableTypeValueToShortNameV1 / SINGLETON_HANGUL_SYLLABLE_TYPE_VALUE_TO_SHORT_NAME_V1;
833 PropertyEnumToValueNameLinearMap / HangulSyllableTypeValueToLongNameV1 / SINGLETON_HANGUL_SYLLABLE_TYPE_VALUE_TO_LONG_NAME_V1;
834 }
835 }
836
837 impl_value_getter! {
838 impl EastAsianWidth {
839 EastAsianWidthNameToValueV2 / SINGLETON_EAST_ASIAN_WIDTH_NAME_TO_VALUE_V2;
840 PropertyEnumToValueNameLinearMap / EastAsianWidthValueToShortNameV1 / SINGLETON_EAST_ASIAN_WIDTH_VALUE_TO_SHORT_NAME_V1;
841 PropertyEnumToValueNameLinearMap / EastAsianWidthValueToLongNameV1 / SINGLETON_EAST_ASIAN_WIDTH_VALUE_TO_LONG_NAME_V1;
842 }
843 }
844
845 impl_value_getter! {
846 impl LineBreak {
847 LineBreakNameToValueV2 / SINGLETON_LINE_BREAK_NAME_TO_VALUE_V2;
848 PropertyEnumToValueNameLinearMap / LineBreakValueToShortNameV1 / SINGLETON_LINE_BREAK_VALUE_TO_SHORT_NAME_V1;
849 PropertyEnumToValueNameLinearMap / LineBreakValueToLongNameV1 / SINGLETON_LINE_BREAK_VALUE_TO_LONG_NAME_V1;
850 }
851 }
852
853 impl_value_getter! {
854 impl GraphemeClusterBreak {
855 GraphemeClusterBreakNameToValueV2 / SINGLETON_GRAPHEME_CLUSTER_BREAK_NAME_TO_VALUE_V2;
856 PropertyEnumToValueNameLinearMap / GraphemeClusterBreakValueToShortNameV1 / SINGLETON_GRAPHEME_CLUSTER_BREAK_VALUE_TO_SHORT_NAME_V1;
857 PropertyEnumToValueNameLinearMap / GraphemeClusterBreakValueToLongNameV1 / SINGLETON_GRAPHEME_CLUSTER_BREAK_VALUE_TO_LONG_NAME_V1;
858 }
859 }
860
861 impl_value_getter! {
862 impl WordBreak {
863 WordBreakNameToValueV2 / SINGLETON_WORD_BREAK_NAME_TO_VALUE_V2;
864 PropertyEnumToValueNameLinearMap / WordBreakValueToShortNameV1 / SINGLETON_WORD_BREAK_VALUE_TO_SHORT_NAME_V1;
865 PropertyEnumToValueNameLinearMap / WordBreakValueToLongNameV1 / SINGLETON_WORD_BREAK_VALUE_TO_LONG_NAME_V1;
866 }
867 }
868
869 impl_value_getter! {
870 impl SentenceBreak {
871 SentenceBreakNameToValueV2 / SINGLETON_SENTENCE_BREAK_NAME_TO_VALUE_V2;
872 PropertyEnumToValueNameLinearMap / SentenceBreakValueToShortNameV1 / SINGLETON_SENTENCE_BREAK_VALUE_TO_SHORT_NAME_V1;
873 PropertyEnumToValueNameLinearMap / SentenceBreakValueToLongNameV1 / SINGLETON_SENTENCE_BREAK_VALUE_TO_LONG_NAME_V1;
874 }
875 }
876
877 impl_value_getter! {
878 impl CanonicalCombiningClass {
879 CanonicalCombiningClassNameToValueV2 / SINGLETON_CANONICAL_COMBINING_CLASS_NAME_TO_VALUE_V2;
880 PropertyEnumToValueNameSparseMap / CanonicalCombiningClassValueToShortNameV1 / SINGLETON_CANONICAL_COMBINING_CLASS_VALUE_TO_SHORT_NAME_V1;
881 PropertyEnumToValueNameSparseMap / CanonicalCombiningClassValueToLongNameV1 / SINGLETON_CANONICAL_COMBINING_CLASS_VALUE_TO_LONG_NAME_V1;
882 }
883 }
884
885 impl_value_getter! {
886 impl IndicSyllabicCategory {
887 IndicSyllabicCategoryNameToValueV2 / SINGLETON_INDIC_SYLLABIC_CATEGORY_NAME_TO_VALUE_V2;
888 PropertyEnumToValueNameLinearMap / IndicSyllabicCategoryValueToShortNameV1 / SINGLETON_INDIC_SYLLABIC_CATEGORY_VALUE_TO_SHORT_NAME_V1;
889 PropertyEnumToValueNameLinearMap / IndicSyllabicCategoryValueToLongNameV1 / SINGLETON_INDIC_SYLLABIC_CATEGORY_VALUE_TO_LONG_NAME_V1;
890 }
891 }
892
893 impl_value_getter! {
894 impl JoiningType {
895 JoiningTypeNameToValueV2 / SINGLETON_JOINING_TYPE_NAME_TO_VALUE_V2;
896 PropertyEnumToValueNameLinearMap / JoiningTypeValueToShortNameV1 / SINGLETON_JOINING_TYPE_VALUE_TO_SHORT_NAME_V1;
897 PropertyEnumToValueNameLinearMap / JoiningTypeValueToLongNameV1 / SINGLETON_JOINING_TYPE_VALUE_TO_LONG_NAME_V1;
898 }
899 }
900