• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! Utilities for reading CodePointTrie data from TOML files.
6 
7 use crate::codepointtrie::error::Error;
8 use crate::codepointtrie::CodePointTrie;
9 use crate::codepointtrie::CodePointTrieHeader;
10 use crate::codepointtrie::TrieType;
11 use crate::codepointtrie::TrieValue;
12 use alloc::string::String;
13 use alloc::vec::Vec;
14 use core::convert::TryFrom;
15 use zerovec::ZeroVec;
16 
17 /// A Serde-compatible struct for reading serialized [`CodePointTrie`] TOML files
18 /// generated by ICU4C.
19 ///
20 /// Use `TryInto` to convert [`CodePointTrieToml`] to a proper [`CodePointTrie`].
21 #[allow(clippy::upper_case_acronyms)]
22 #[derive(serde::Deserialize)]
23 pub struct CodePointTrieToml {
24     #[serde(skip)]
25     _short_name: String,
26     #[serde(skip)]
27     _long_name: String,
28     #[serde(skip)]
29     _name: String,
30     index: Vec<u16>,
31     data_8: Option<Vec<u8>>,
32     data_16: Option<Vec<u16>>,
33     data_32: Option<Vec<u32>>,
34     #[serde(skip)]
35     _index_length: u32,
36     #[serde(skip)]
37     _data_length: u32,
38     #[serde(rename = "highStart")]
39     high_start: u32,
40     #[serde(rename = "shifted12HighStart")]
41     shifted12_high_start: u16,
42     #[serde(rename = "type")]
43     trie_type_enum_val: u8,
44     #[serde(rename = "valueWidth")]
45     _value_width_enum_val: u8,
46     #[serde(rename = "index3NullOffset")]
47     index3_null_offset: u16,
48     #[serde(rename = "dataNullOffset")]
49     data_null_offset: u32,
50     #[serde(rename = "nullValue")]
51     null_value: u32,
52 }
53 
54 /// Data slice from a [`CodePointTrie`] TOML.
55 ///
56 /// ICU4C exports data as either `u8`, `u16`, or `u32`, which may be converted
57 /// to other types as appropriate.
58 #[allow(clippy::exhaustive_enums)] // based on a stable serialized form
59 pub enum CodePointDataSlice<'a> {
60     /// A serialized [`CodePointTrie`] data array 8-bit values.
61     U8(&'a [u8]),
62     /// A serialized [`CodePointTrie`] data array 16-bit values.
63     U16(&'a [u16]),
64     /// A serialized [`CodePointTrie`] data array 32-bit values.
65     U32(&'a [u32]),
66 }
67 
68 impl CodePointTrieToml {
69     /// Gets the `index` slice.
index_slice(&self) -> &[u16]70     pub fn index_slice(&self) -> &[u16] {
71         self.index.as_slice()
72     }
73 
74     /// Gets the `data` slice.
data_slice(&self) -> Result<CodePointDataSlice, Error>75     pub fn data_slice(&self) -> Result<CodePointDataSlice, Error> {
76         if let Some(data_8) = &self.data_8 {
77             Ok(CodePointDataSlice::U8(data_8.as_slice()))
78         } else if let Some(data_16) = &self.data_16 {
79             Ok(CodePointDataSlice::U16(data_16.as_slice()))
80         } else if let Some(data_32) = &self.data_32 {
81             Ok(CodePointDataSlice::U32(data_32.as_slice()))
82         } else {
83             Err(Error::FromDeserialized {
84                 reason: "Did not find data array for CodePointTrie in TOML",
85             })
86         }
87     }
88 }
89 
90 impl TryFrom<&CodePointTrieToml> for CodePointTrieHeader {
91     type Error = Error;
92 
try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error>93     fn try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error> {
94         let trie_type_enum: TrieType = TrieType::try_from(cpt_data.trie_type_enum_val)?;
95         Ok(CodePointTrieHeader {
96             high_start: cpt_data.high_start,
97             shifted12_high_start: cpt_data.shifted12_high_start,
98             index3_null_offset: cpt_data.index3_null_offset,
99             data_null_offset: cpt_data.data_null_offset,
100             null_value: cpt_data.null_value,
101             trie_type: trie_type_enum,
102         })
103     }
104 }
105 
106 impl<T: TrieValue> TryFrom<&CodePointTrieToml> for CodePointTrie<'static, T> {
107     type Error = Error;
108 
try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error>109     fn try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error> {
110         use CodePointDataSlice::*;
111         let header = CodePointTrieHeader::try_from(cpt_data)?;
112         let index: ZeroVec<u16> = ZeroVec::alloc_from_slice(&cpt_data.index);
113         let data: Result<ZeroVec<'static, T>, T::TryFromU32Error> = match cpt_data.data_slice()? {
114             U8(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
115             U16(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
116             U32(s) => s.iter().map(|i| T::try_from_u32(*i)).collect(),
117         };
118 
119         let data = data.map_err(|_| Error::FromDeserialized {
120             reason: "Could not parse data array to typed array",
121         })?;
122         CodePointTrie::<T>::try_new(header, index, data)
123     }
124 }
125