1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 //! Utilities for reading CodePointTrie data from TOML files. 6 7 use crate::codepointtrie::error::Error; 8 use crate::codepointtrie::CodePointTrie; 9 use crate::codepointtrie::CodePointTrieHeader; 10 use crate::codepointtrie::TrieType; 11 use crate::codepointtrie::TrieValue; 12 use alloc::string::String; 13 use alloc::vec::Vec; 14 use core::convert::TryFrom; 15 use zerovec::ZeroVec; 16 17 /// A Serde-compatible struct for reading serialized [`CodePointTrie`] TOML files 18 /// generated by ICU4C. 19 /// 20 /// Use `TryInto` to convert [`CodePointTrieToml`] to a proper [`CodePointTrie`]. 21 #[allow(clippy::upper_case_acronyms)] 22 #[derive(serde::Deserialize)] 23 pub struct CodePointTrieToml { 24 #[serde(skip)] 25 _short_name: String, 26 #[serde(skip)] 27 _long_name: String, 28 #[serde(skip)] 29 _name: String, 30 index: Vec<u16>, 31 data_8: Option<Vec<u8>>, 32 data_16: Option<Vec<u16>>, 33 data_32: Option<Vec<u32>>, 34 #[serde(skip)] 35 _index_length: u32, 36 #[serde(skip)] 37 _data_length: u32, 38 #[serde(rename = "highStart")] 39 high_start: u32, 40 #[serde(rename = "shifted12HighStart")] 41 shifted12_high_start: u16, 42 #[serde(rename = "type")] 43 trie_type_enum_val: u8, 44 #[serde(rename = "valueWidth")] 45 _value_width_enum_val: u8, 46 #[serde(rename = "index3NullOffset")] 47 index3_null_offset: u16, 48 #[serde(rename = "dataNullOffset")] 49 data_null_offset: u32, 50 #[serde(rename = "nullValue")] 51 null_value: u32, 52 } 53 54 /// Data slice from a [`CodePointTrie`] TOML. 55 /// 56 /// ICU4C exports data as either `u8`, `u16`, or `u32`, which may be converted 57 /// to other types as appropriate. 58 #[allow(clippy::exhaustive_enums)] // based on a stable serialized form 59 pub enum CodePointDataSlice<'a> { 60 /// A serialized [`CodePointTrie`] data array 8-bit values. 61 U8(&'a [u8]), 62 /// A serialized [`CodePointTrie`] data array 16-bit values. 63 U16(&'a [u16]), 64 /// A serialized [`CodePointTrie`] data array 32-bit values. 65 U32(&'a [u32]), 66 } 67 68 impl CodePointTrieToml { 69 /// Gets the `index` slice. index_slice(&self) -> &[u16]70 pub fn index_slice(&self) -> &[u16] { 71 self.index.as_slice() 72 } 73 74 /// Gets the `data` slice. data_slice(&self) -> Result<CodePointDataSlice, Error>75 pub fn data_slice(&self) -> Result<CodePointDataSlice, Error> { 76 if let Some(data_8) = &self.data_8 { 77 Ok(CodePointDataSlice::U8(data_8.as_slice())) 78 } else if let Some(data_16) = &self.data_16 { 79 Ok(CodePointDataSlice::U16(data_16.as_slice())) 80 } else if let Some(data_32) = &self.data_32 { 81 Ok(CodePointDataSlice::U32(data_32.as_slice())) 82 } else { 83 Err(Error::FromDeserialized { 84 reason: "Did not find data array for CodePointTrie in TOML", 85 }) 86 } 87 } 88 } 89 90 impl TryFrom<&CodePointTrieToml> for CodePointTrieHeader { 91 type Error = Error; 92 try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error>93 fn try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error> { 94 let trie_type_enum: TrieType = TrieType::try_from(cpt_data.trie_type_enum_val)?; 95 Ok(CodePointTrieHeader { 96 high_start: cpt_data.high_start, 97 shifted12_high_start: cpt_data.shifted12_high_start, 98 index3_null_offset: cpt_data.index3_null_offset, 99 data_null_offset: cpt_data.data_null_offset, 100 null_value: cpt_data.null_value, 101 trie_type: trie_type_enum, 102 }) 103 } 104 } 105 106 impl<T: TrieValue> TryFrom<&CodePointTrieToml> for CodePointTrie<'static, T> { 107 type Error = Error; 108 try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error>109 fn try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error> { 110 use CodePointDataSlice::*; 111 let header = CodePointTrieHeader::try_from(cpt_data)?; 112 let index: ZeroVec<u16> = ZeroVec::alloc_from_slice(&cpt_data.index); 113 let data: Result<ZeroVec<'static, T>, T::TryFromU32Error> = match cpt_data.data_slice()? { 114 U8(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(), 115 U16(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(), 116 U32(s) => s.iter().map(|i| T::try_from_u32(*i)).collect(), 117 }; 118 119 let data = data.map_err(|_| Error::FromDeserialized { 120 reason: "Could not parse data array to typed array", 121 })?; 122 CodePointTrie::<T>::try_new(header, index, data) 123 } 124 } 125