// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. //! //!
//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, //! including in SemVer minor releases. While the serde representation of data structs is guaranteed //! to be stable, their Rust representation might not be. Use with caution. //!
//! //! Read more about data providers: [`icu_provider`] // Provider structs must be stable #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] use icu_provider::prelude::*; use crate::provider::data::CaseMapData; use crate::provider::exceptions::CaseMapExceptions; use icu_collections::codepointtrie::CodePointTrie; #[cfg(feature = "datagen")] use icu_collections::codepointtrie::CodePointTrieHeader; pub mod data; pub mod exception_helpers; pub mod exceptions; #[cfg(feature = "datagen")] mod exceptions_builder; mod unfold; #[cfg(feature = "compiled_data")] #[derive(Debug)] /// Baked data /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only /// guaranteed to match with this version's `*_unstable` providers. Use with caution. ///
pub struct Baked; #[cfg(feature = "compiled_data")] #[allow(unused_imports)] const _: () = { use icu_casemap_data::*; pub mod icu { pub use crate as casemap; pub use icu_collections as collections; } make_provider!(Baked); impl_case_map_v1!(Baked); impl_case_map_unfold_v1!(Baked); }; icu_provider::data_marker!( /// Marker for casemapping data. CaseMapV1, "case/map/v1", CaseMap<'static>, is_singleton = true ); icu_provider::data_marker!( /// Reverse case mapping data. CaseMapUnfoldV1, "case/map/unfold/v1", CaseMapUnfold<'static>, is_singleton = true ); #[cfg(feature = "datagen")] /// The latest minimum set of markers required by this component. pub const MARKERS: &[DataMarkerInfo] = &[CaseMapUnfoldV1::INFO, CaseMapV1::INFO]; pub use self::unfold::CaseMapUnfold; /// This type contains all of the casemapping data /// /// The methods in the provider module are primarily about accessing its data, /// however the full algorithms are also implemented as methods on this type in /// the `internals` module of this crate. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, /// including in SemVer minor releases. While the serde representation of data structs is guaranteed /// to be stable, their Rust representation might not be. Use with caution. ///
#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] #[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))] #[yoke(prove_covariance_manually)] /// CaseMapper provides low-level access to the data necessary to /// convert characters and strings to upper, lower, or title case. pub struct CaseMap<'data> { /// Case mapping data pub trie: CodePointTrie<'data, CaseMapData>, /// Exceptions to the case mapping data pub exceptions: CaseMapExceptions<'data>, } icu_provider::data_struct!( CaseMap<'_>, #[cfg(feature = "datagen")] ); #[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for CaseMap<'de> { fn deserialize>(deserializer: D) -> Result { #[derive(serde::Deserialize)] pub struct Raw<'data> { #[serde(borrow)] pub trie: CodePointTrie<'data, CaseMapData>, #[serde(borrow)] pub exceptions: CaseMapExceptions<'data>, } let Raw { trie, exceptions } = Raw::deserialize(deserializer)?; let result = Self { trie, exceptions }; debug_assert!(result.validate().is_ok()); Ok(result) } } impl CaseMap<'_> { /// Creates a new CaseMap using data exported by the // `icuexportdata` tool in ICU4C. Validates that the data is // consistent. #[cfg(feature = "datagen")] pub fn try_from_icu( trie_header: CodePointTrieHeader, trie_index: &[u16], trie_data: &[u16], exceptions: &[u16], ) -> Result { use self::exceptions_builder::CaseMapExceptionsBuilder; use zerovec::ZeroVec; let exceptions_builder = CaseMapExceptionsBuilder::new(exceptions); let (exceptions, idx_map) = exceptions_builder.build()?; let trie_index = ZeroVec::alloc_from_slice(trie_index); #[allow(clippy::unwrap_used)] // datagen only let trie_data = trie_data .iter() .map(|&i| { CaseMapData::try_from_icu_integer(i) .unwrap() .with_updated_exception(&idx_map) }) .collect::>(); let trie = CodePointTrie::try_new(trie_header, trie_index, trie_data) .map_err(|_| DataError::custom("Casemapping data does not form valid trie"))?; let result = Self { trie, exceptions }; result.validate().map_err(DataError::custom)?; Ok(result) } /// Given an existing CaseMapper, validates that the data is /// consistent. A CaseMapper created by the ICU transformer has /// already been validated. Calling this function is only /// necessary if you are concerned about data corruption after /// deserializing. #[cfg(any(feature = "serde", feature = "datagen"))] #[allow(unused)] // is only used in debug mode for serde pub(crate) fn validate(&self) -> Result<(), &'static str> { // First, validate that exception data is well-formed. let valid_exception_indices = self.exceptions.validate()?; let validate_delta = |c: char, delta: i32| -> Result<(), &'static str> { let new_c = u32::try_from(c as i32 + delta).map_err(|_| "Delta larger than character")?; char::from_u32(new_c).ok_or("Invalid delta")?; Ok(()) }; for i in 0..char::MAX as u32 { if let Some(c) = char::from_u32(i) { let data = self.lookup_data(c); if data.has_exception() { let idx = data.exception_index(); let exception = self.exceptions.get(idx); // Verify that the exception index points to a valid exception header. if !valid_exception_indices.contains(&idx) { return Err("Invalid exception index in trie data"); } exception.validate()?; } else { validate_delta(c, data.delta() as i32)?; } } } Ok(()) } pub(crate) fn lookup_data(&self, c: char) -> CaseMapData { self.trie.get32(c as u32) } }