// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::props::*; use crate::provider::names::*; use core::marker::PhantomData; use icu_collections::codepointtrie::TrieValue; use icu_provider::marker::ErasedMarker; use icu_provider::prelude::*; use yoke::Yokeable; use zerotrie::cursor::ZeroTrieSimpleAsciiCursor; /// A struct capable of looking up a property value from a string name. /// Access its data by calling [`Self::as_borrowed()`] and using the methods on /// [`PropertyParserBorrowed`]. /// /// The name can be a short name (`Lu`), a long name(`Uppercase_Letter`), /// or an alias. /// /// Property names can be looked up using "strict" matching (looking for a name /// that matches exactly), or "loose matching", where the name is allowed to deviate /// in terms of ASCII casing, whitespace, underscores, and hyphens. /// /// # Example /// /// ``` /// use icu::properties::props::GeneralCategory; /// use icu::properties::PropertyParser; /// /// let lookup = PropertyParser::::new(); /// // short name for value /// assert_eq!( /// lookup.get_strict("Lu"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// assert_eq!( /// lookup.get_strict("Pd"), /// Some(GeneralCategory::DashPunctuation) /// ); /// // long name for value /// assert_eq!( /// lookup.get_strict("Uppercase_Letter"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// assert_eq!( /// lookup.get_strict("Dash_Punctuation"), /// Some(GeneralCategory::DashPunctuation) /// ); /// // name has incorrect casing /// assert_eq!(lookup.get_strict("dashpunctuation"), None); /// // loose matching of name /// assert_eq!( /// lookup.get_loose("dash-punctuation"), /// Some(GeneralCategory::DashPunctuation) /// ); /// // fake property /// assert_eq!(lookup.get_strict("Animated_Gif"), None); /// ``` #[derive(Debug)] pub struct PropertyParser { map: DataPayload>>, markers: PhantomData T>, } /// A borrowed wrapper around property value name-to-enum data, returned by /// [`PropertyParser::as_borrowed()`]. More efficient to query. #[derive(Debug)] pub struct PropertyParserBorrowed<'a, T> { map: &'a PropertyValueNameToEnumMap<'a>, markers: PhantomData T>, } impl Clone for PropertyParserBorrowed<'_, T> { fn clone(&self) -> Self { *self } } impl Copy for PropertyParserBorrowed<'_, T> {} impl PropertyParser { /// Creates a new instance of `PropertyParser` using compiled data. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] #[allow(clippy::new_ret_no_self)] pub fn new() -> PropertyParserBorrowed<'static, T> where T: ParseableEnumeratedProperty, { PropertyParserBorrowed::new() } #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable( provider: &(impl DataProvider + ?Sized), ) -> Result where T: ParseableEnumeratedProperty, { Ok(Self { map: provider.load(Default::default())?.payload.cast(), markers: PhantomData, }) } /// Construct a borrowed version of this type that can be queried. /// /// This avoids a potential small underlying cost per API call (like `get_strict()`) by consolidating it /// up front. #[inline] pub fn as_borrowed(&self) -> PropertyParserBorrowed<'_, T> { PropertyParserBorrowed { map: self.map.get(), markers: PhantomData, } } #[doc(hidden)] // used by FFI code pub fn erase(self) -> PropertyParser { PropertyParser { map: self.map.cast(), markers: PhantomData, } } } impl PropertyParserBorrowed<'_, T> { /// Get the property value as a u16, doing a strict search looking for /// names that match exactly /// /// # Example /// /// ``` /// use icu::properties::props::GeneralCategory; /// use icu::properties::PropertyParser; /// /// let lookup = PropertyParser::::new(); /// assert_eq!( /// lookup.get_strict_u16("Lu"), /// Some(GeneralCategory::UppercaseLetter as u16) /// ); /// assert_eq!( /// lookup.get_strict_u16("Uppercase_Letter"), /// Some(GeneralCategory::UppercaseLetter as u16) /// ); /// // does not do loose matching /// assert_eq!(lookup.get_strict_u16("UppercaseLetter"), None); /// ``` #[inline] pub fn get_strict_u16(self, name: &str) -> Option { get_strict_u16(self.map, name) } /// Get the property value as a `T`, doing a strict search looking for /// names that match exactly /// /// # Example /// /// ``` /// use icu::properties::props::GeneralCategory; /// use icu::properties::PropertyParser; /// /// let lookup = PropertyParser::::new(); /// assert_eq!( /// lookup.get_strict("Lu"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// assert_eq!( /// lookup.get_strict("Uppercase_Letter"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// // does not do loose matching /// assert_eq!(lookup.get_strict("UppercaseLetter"), None); /// ``` #[inline] pub fn get_strict(self, name: &str) -> Option { T::try_from_u32(self.get_strict_u16(name)? as u32).ok() } /// Get the property value as a u16, doing a loose search looking for /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and /// whitespaces. /// /// # Example /// /// ``` /// use icu::properties::props::GeneralCategory; /// use icu::properties::PropertyParser; /// /// let lookup = PropertyParser::::new(); /// assert_eq!( /// lookup.get_loose_u16("Lu"), /// Some(GeneralCategory::UppercaseLetter as u16) /// ); /// assert_eq!( /// lookup.get_loose_u16("Uppercase_Letter"), /// Some(GeneralCategory::UppercaseLetter as u16) /// ); /// // does do loose matching /// assert_eq!( /// lookup.get_loose_u16("UppercaseLetter"), /// Some(GeneralCategory::UppercaseLetter as u16) /// ); /// ``` #[inline] pub fn get_loose_u16(self, name: &str) -> Option { get_loose_u16(self.map, name) } /// Get the property value as a `T`, doing a loose search looking for /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and /// whitespaces. /// /// # Example /// /// ``` /// use icu::properties::props::GeneralCategory; /// use icu::properties::PropertyParser; /// /// let lookup = PropertyParser::::new(); /// assert_eq!( /// lookup.get_loose("Lu"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// assert_eq!( /// lookup.get_loose("Uppercase_Letter"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// // does do loose matching /// assert_eq!( /// lookup.get_loose("UppercaseLetter"), /// Some(GeneralCategory::UppercaseLetter) /// ); /// ``` #[inline] pub fn get_loose(self, name: &str) -> Option { T::try_from_u32(self.get_loose_u16(name)? as u32).ok() } } #[cfg(feature = "compiled_data")] impl Default for PropertyParserBorrowed<'static, T> { fn default() -> Self { Self::new() } } impl PropertyParserBorrowed<'static, T> { /// Creates a new instance of `PropertyParserBorrowed` using compiled data. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] pub fn new() -> Self where T: ParseableEnumeratedProperty, { Self { map: T::SINGLETON, markers: PhantomData, } } /// Cheaply converts a [`PropertyParserBorrowed<'static>`] into a [`PropertyParser`]. /// /// Note: Due to branching and indirection, using [`PropertyParser`] might inhibit some /// compile-time optimizations that are possible with [`PropertyParserBorrowed`]. pub const fn static_to_owned(self) -> PropertyParser { PropertyParser { map: DataPayload::from_static_ref(self.map), markers: PhantomData, } } } /// Avoid monomorphizing multiple copies of this function fn get_strict_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option { payload.map.get(name).and_then(|i| i.try_into().ok()) } /// Avoid monomorphizing multiple copies of this function fn get_loose_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option { fn recurse(mut cursor: ZeroTrieSimpleAsciiCursor, mut rest: &[u8]) -> Option { if cursor.is_empty() { return None; } // Skip whitespace, underscore, hyphen in trie. for skip in [b'\t', b'\n', b'\x0C', b'\r', b' ', 0x0B, b'_', b'-'] { let mut skip_cursor = cursor.clone(); skip_cursor.step(skip); if let Some(r) = recurse(skip_cursor, rest) { return Some(r); } } let ascii = loop { let Some((&a, r)) = rest.split_first() else { return cursor.take_value(); }; rest = r; // Skip whitespace, underscore, hyphen in input if !matches!( a, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | 0x0B | b'_' | b'-' ) { break a; } }; let mut other_case_cursor = cursor.clone(); cursor.step(ascii); other_case_cursor.step(if ascii.is_ascii_lowercase() { ascii.to_ascii_uppercase() } else { ascii.to_ascii_lowercase() }); // This uses the call stack as the DFS stack. The recursion will terminate as // rest's length is strictly shrinking. The call stack's depth is limited by // name.len(). recurse(cursor, rest).or_else(|| recurse(other_case_cursor, rest)) } recurse(payload.map.cursor(), name.as_bytes()).and_then(|i| i.try_into().ok()) } /// A struct capable of looking up a property name from a value /// Access its data by calling [`Self::as_borrowed()`] and using the methods on /// [`PropertyNamesLongBorrowed`]. /// /// # Example /// /// ``` /// use icu::properties::props::CanonicalCombiningClass; /// use icu::properties::PropertyNamesLong; /// /// let names = PropertyNamesLong::::new(); /// assert_eq!( /// names.get(CanonicalCombiningClass::KanaVoicing), /// Some("Kana_Voicing") /// ); /// assert_eq!( /// names.get(CanonicalCombiningClass::AboveLeft), /// Some("Above_Left") /// ); /// ``` pub struct PropertyNamesLong { map: DataPayload>, } impl core::fmt::Debug for PropertyNamesLong { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("PropertyNamesLong") // .field("map", &self.map) .finish() } } /// A borrowed wrapper around property value name-to-enum data, returned by /// [`PropertyNamesLong::as_borrowed()`]. More efficient to query. #[derive(Debug)] pub struct PropertyNamesLongBorrowed<'a, T: NamedEnumeratedProperty> { map: &'a T::DataStructLongBorrowed<'a>, } impl Clone for PropertyNamesLongBorrowed<'_, T> { fn clone(&self) -> Self { *self } } impl Copy for PropertyNamesLongBorrowed<'_, T> {} impl PropertyNamesLong { /// Creates a new instance of `PropertyNamesLongBorrowed`. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] #[allow(clippy::new_ret_no_self)] pub fn new() -> PropertyNamesLongBorrowed<'static, T> { PropertyNamesLongBorrowed::new() } #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable( provider: &(impl DataProvider + ?Sized), ) -> Result { Ok(Self { map: provider.load(Default::default())?.payload.cast(), }) } /// Construct a borrowed version of this type that can be queried. /// /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it /// up front. #[inline] pub fn as_borrowed(&self) -> PropertyNamesLongBorrowed<'_, T> { PropertyNamesLongBorrowed { map: T::nep_long_identity(self.map.get()), } } } impl<'a, T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'a, T> { /// Get the property name given a value /// /// # Example /// /// ```rust /// use icu::properties::props::CanonicalCombiningClass; /// use icu::properties::PropertyNamesLong; /// /// let lookup = PropertyNamesLong::::new(); /// assert_eq!( /// lookup.get(CanonicalCombiningClass::KanaVoicing), /// Some("Kana_Voicing") /// ); /// assert_eq!( /// lookup.get(CanonicalCombiningClass::AboveLeft), /// Some("Above_Left") /// ); /// ``` #[inline] pub fn get(self, property: T) -> Option<&'a str> { self.map.get(property.to_u32()) } } #[cfg(feature = "compiled_data")] impl Default for PropertyNamesLongBorrowed<'static, T> { fn default() -> Self { Self::new() } } impl PropertyNamesLongBorrowed<'static, T> { /// Creates a new instance of `PropertyNamesLongBorrowed`. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] pub fn new() -> Self { Self { map: T::SINGLETON_LONG, } } /// Cheaply converts a [`PropertyNamesLongBorrowed<'static>`] into a [`PropertyNamesLong`]. /// /// Note: Due to branching and indirection, using [`PropertyNamesLong`] might inhibit some /// compile-time optimizations that are possible with [`PropertyNamesLongBorrowed`]. /// /// This is currently not `const` unlike other `static_to_owned()` functions since it needs /// const traits to do that safely pub fn static_to_owned(self) -> PropertyNamesLong { PropertyNamesLong { map: DataPayload::from_static_ref(T::nep_long_identity_static(self.map)), } } } /// A struct capable of looking up a property name from a value /// Access its data by calling [`Self::as_borrowed()`] and using the methods on /// [`PropertyNamesShortBorrowed`]. /// /// # Example /// /// ``` /// use icu::properties::props::CanonicalCombiningClass; /// use icu::properties::PropertyNamesShort; /// /// let names = PropertyNamesShort::::new(); /// assert_eq!(names.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); /// assert_eq!(names.get(CanonicalCombiningClass::AboveLeft), Some("AL")); /// ``` pub struct PropertyNamesShort { map: DataPayload>, } impl core::fmt::Debug for PropertyNamesShort { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("PropertyNamesShort") // .field("map", &self.map) .finish() } } /// A borrowed wrapper around property value name-to-enum data, returned by /// [`PropertyNamesShort::as_borrowed()`]. More efficient to query. #[derive(Debug)] pub struct PropertyNamesShortBorrowed<'a, T: NamedEnumeratedProperty> { map: &'a T::DataStructShortBorrowed<'a>, } impl Clone for PropertyNamesShortBorrowed<'_, T> { fn clone(&self) -> Self { *self } } impl Copy for PropertyNamesShortBorrowed<'_, T> {} impl PropertyNamesShort { /// Creates a new instance of `PropertyNamesShortBorrowed`. /// /// ✨ *Enabled with the `compiled_data` Cargo feature.* /// /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "compiled_data")] #[allow(clippy::new_ret_no_self)] pub fn new() -> PropertyNamesShortBorrowed<'static, T> { PropertyNamesShortBorrowed::new() } #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable( provider: &(impl DataProvider + ?Sized), ) -> Result { Ok(Self { map: provider.load(Default::default())?.payload.cast(), }) } /// Construct a borrowed version of this type that can be queried. /// /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it /// up front. #[inline] pub fn as_borrowed(&self) -> PropertyNamesShortBorrowed<'_, T> { PropertyNamesShortBorrowed { map: T::nep_short_identity(self.map.get()), } } } impl<'a, T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'a, T> { /// Get the property name given a value /// /// # Example /// /// ```rust /// use icu::properties::props::CanonicalCombiningClass; /// use icu::properties::PropertyNamesShort; /// /// let lookup = PropertyNamesShort::::new(); /// assert_eq!(lookup.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("AL")); /// ``` #[inline] pub fn get(self, property: T) -> Option<&'a str> { self.map.get(property.to_u32()) } } impl PropertyNamesShortBorrowed<'_, Script> { /// Gets the "name" of a script property as a `icu::locale::subtags::Script`. /// /// This method is available only on `PropertyNamesShortBorrowed