/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
///
#[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[non_exhaustive]
pub enum PropertyCodePointMap<'data, T: TrieValue> {
/// A codepoint trie storing the data
CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>),
// new variants should go BELOW existing ones
// Serde serializes based on variant name and index in the enum
// https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
}
icu_provider::data_struct!(
PropertyCodePointMap<'_, T>,
#[cfg(feature = "datagen")]
);
// See CodePointMapData for documentation of these functions
impl<'data, T: TrieValue> PropertyCodePointMap<'data, T> {
#[inline]
pub(crate) fn get32(&self, ch: u32) -> T {
match *self {
Self::CodePointTrie(ref t) => t.get32(ch),
}
}
#[inline]
#[cfg(feature = "alloc")]
pub(crate) fn try_into_converted(
self,
) -> Result, zerovec::ule::UleError>
where
P: TrieValue,
{
match self {
Self::CodePointTrie(t) => t
.try_into_converted()
.map(PropertyCodePointMap::CodePointTrie),
}
}
#[inline]
#[cfg(feature = "alloc")]
pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> {
match *self {
Self::CodePointTrie(ref t) => t.get_set_for_value(value),
}
}
#[inline]
pub(crate) fn iter_ranges(&self) -> impl Iterator- > + '_ {
match *self {
Self::CodePointTrie(ref t) => t.iter_ranges(),
}
}
#[inline]
pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>(
&'a self,
map: impl FnMut(T) -> U + Copy + 'a,
) -> impl Iterator
- > + 'a {
match *self {
Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map),
}
}
#[inline]
pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self {
Self::CodePointTrie(trie)
}
#[inline]
pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> {
match *self {
Self::CodePointTrie(ref t) => Some(t),
// any other backing data structure that cannot return a CPT in O(1) time should return None
}
}
#[inline]
pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> {
match *self {
Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t),
}
}
}
/// A set of characters and strings which share a particular property value.
///
///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
///
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[non_exhaustive]
pub enum PropertyUnicodeSet<'data> {
/// A set representing characters in an inversion list, and the strings in a list.
CPInversionListStrList(
#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>,
),
// new variants should go BELOW existing ones
// Serde serializes based on variant name and index in the enum
// https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
}
icu_provider::data_struct!(
PropertyUnicodeSet<'_>,
#[cfg(feature = "datagen")]
);
impl<'data> PropertyUnicodeSet<'data> {
#[inline]
pub(crate) fn contains_str(&self, s: &str) -> bool {
match *self {
Self::CPInversionListStrList(ref l) => l.contains_str(s),
}
}
#[inline]
pub(crate) fn contains32(&self, cp: u32) -> bool {
match *self {
Self::CPInversionListStrList(ref l) => l.contains32(cp),
}
}
#[inline]
pub(crate) fn contains(&self, ch: char) -> bool {
match *self {
Self::CPInversionListStrList(ref l) => l.contains(ch),
}
}
#[inline]
pub(crate) fn from_code_point_inversion_list_string_list(
l: CodePointInversionListAndStringList<'static>,
) -> Self {
Self::CPInversionListStrList(l)
}
#[inline]
pub(crate) fn as_code_point_inversion_list_string_list(
&'_ self,
) -> Option<&'_ CodePointInversionListAndStringList<'data>> {
match *self {
Self::CPInversionListStrList(ref l) => Some(l),
// any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None
}
}
#[inline]
pub(crate) fn to_code_point_inversion_list_string_list(
&self,
) -> CodePointInversionListAndStringList<'_> {
match *self {
Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t),
}
}
}
/// A struct that efficiently stores `Script` and `Script_Extensions` property data.
///
///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
///
#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct ScriptWithExtensionsProperty<'data> {
/// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2
/// higher order bits 11..10 will indicate how to deduce the Script value and
/// Script_Extensions value, nearly matching the representation
/// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h):
///
/// | High order 2 bits value | Script | Script_Extensions |
/// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------|
/// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits |
/// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits |
/// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits |
/// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array |
///
/// When the lower 10 bits of the value are used as an index, that index is
/// used for the outer-level vector of the nested `extensions` structure.
#[cfg_attr(feature = "serde", serde(borrow))]
pub trie: CodePointTrie<'data, ScriptWithExt>,
/// This companion structure stores Script_Extensions values, which are
/// themselves arrays / vectors. This structure only stores the values for
/// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The
/// sub-vector represents the Script_Extensions array value for a code point,
/// and may also indicate Script value, as described for the `trie` field.
#[cfg_attr(feature = "serde", serde(borrow))]
pub extensions: VarZeroVec<'data, ZeroSlice