1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 //! Data stored as as [`ZeroTrieSimpleAscii`]
6
7 // This is a valid separator as `DataLocale` will never produce it.
8 const ID_SEPARATOR: u8 = 0x1E;
9
10 pub use icu_provider::DynamicDataMarker;
11 use icu_provider::{
12 prelude::{zerofrom::ZeroFrom, *},
13 ule::MaybeAsVarULE,
14 };
15 pub use zerotrie::ZeroTrieSimpleAscii;
16 use zerovec::VarZeroSlice;
17
18 #[cfg(feature = "export")]
19 use icu_provider::export::ExportMarker;
20
21 #[cfg(feature = "export")]
bake( marker_bake: &databake::TokenStream, bakes_to_ids: &[( &DataPayload<ExportMarker>, &std::collections::BTreeSet<DataIdentifierCow>, )], ctx: &databake::CrateEnv, ) -> (databake::TokenStream, usize)22 pub(crate) fn bake(
23 marker_bake: &databake::TokenStream,
24 bakes_to_ids: &[(
25 &DataPayload<ExportMarker>,
26 &std::collections::BTreeSet<DataIdentifierCow>,
27 )],
28 ctx: &databake::CrateEnv,
29 ) -> (databake::TokenStream, usize) {
30 use databake::*;
31
32 // Safety invariant upheld: the only values being added to the trie are `baked_index`
33 // values, which come from `bakes`
34 let trie = ZeroTrieSimpleAscii::from_iter(bakes_to_ids.iter().enumerate().flat_map(
35 |(bake_index, (_, ids))| {
36 ids.iter().map(move |id| {
37 let mut encoded = id.locale.to_string().into_bytes();
38 if !id.marker_attributes.is_empty() {
39 encoded.push(ID_SEPARATOR);
40 encoded.extend_from_slice(id.marker_attributes.as_bytes());
41 }
42 (encoded, bake_index)
43 })
44 },
45 ));
46
47 let baked_trie = trie.as_borrowed_slice().bake(&Default::default());
48 let baked_trie = quote! {
49 const TRIE: icu_provider_baked::zerotrie::ZeroTrieSimpleAscii<&'static [u8]> = icu_provider_baked:: #baked_trie;
50 };
51
52 let payloads = bakes_to_ids
53 .iter()
54 .map(|(payload, _)| *payload)
55 .collect::<Vec<_>>();
56
57 let maybe_vzv_tokens = DataPayload::tokenize_encoded_seq(&payloads, ctx);
58
59 let (baked_values, value_store_ty) = if let Some(vzv_tokens) = maybe_vzv_tokens {
60 (
61 quote! {
62 const VALUES: &'static zerovec::VarZeroSlice<<<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct as icu_provider::ule::MaybeAsVarULE>::EncodedStruct> = #vzv_tokens;
63 },
64 quote! {
65 icu_provider_baked::zerotrie::DataForVarULEs
66 },
67 )
68 } else {
69 let bakes = payloads.iter().map(|payload| payload.tokenize(ctx));
70 (
71 quote! {
72 const VALUES: &'static [<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct] = &[#(#bakes,)*];
73 },
74 quote! {
75 icu_provider_baked::zerotrie::Data
76 },
77 )
78 };
79
80 (
81 quote! {
82 // Safety invariant upheld: see above
83 #value_store_ty<#marker_bake> = {
84 #baked_trie
85 #baked_values
86 unsafe {
87 #value_store_ty::from_trie_and_values_unchecked(TRIE, VALUES)
88 }
89 }
90
91 },
92 core::mem::size_of::<Data<icu_provider::hello_world::HelloWorldV1>>()
93 + trie.as_borrowed_slice().borrows_size(),
94 )
95 }
96
get_index( trie: ZeroTrieSimpleAscii<&'static [u8]>, id: DataIdentifierBorrowed, attributes_prefix_match: bool, ) -> Option<usize>97 fn get_index(
98 trie: ZeroTrieSimpleAscii<&'static [u8]>,
99 id: DataIdentifierBorrowed,
100 attributes_prefix_match: bool,
101 ) -> Option<usize> {
102 use writeable::Writeable;
103 let mut cursor = trie.cursor();
104 let _is_ascii = id.locale.write_to(&mut cursor);
105 if !id.marker_attributes.is_empty() {
106 cursor.step(ID_SEPARATOR);
107 id.marker_attributes.write_to(&mut cursor).ok()?;
108 loop {
109 if let Some(v) = cursor.take_value() {
110 break Some(v);
111 }
112 if !attributes_prefix_match || cursor.probe(0).is_none() {
113 break None;
114 }
115 }
116 } else {
117 cursor.take_value()
118 }
119 }
120
121 #[cfg(feature = "alloc")]
122 #[allow(clippy::type_complexity)]
iter( trie: &'static ZeroTrieSimpleAscii<&'static [u8]>, ) -> core::iter::FilterMap< zerotrie::ZeroTrieStringIterator<'static>, fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>, >123 fn iter(
124 trie: &'static ZeroTrieSimpleAscii<&'static [u8]>,
125 ) -> core::iter::FilterMap<
126 zerotrie::ZeroTrieStringIterator<'static>,
127 fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
128 > {
129 use alloc::borrow::ToOwned;
130 trie.iter().filter_map(move |(s, _)| {
131 if let Some((locale, attrs)) = s.split_once(ID_SEPARATOR as char) {
132 Some(DataIdentifierCow::from_owned(
133 DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
134 locale.parse().ok()?,
135 ))
136 } else {
137 s.parse().ok().map(DataIdentifierCow::from_locale)
138 }
139 })
140 }
141
142 pub struct Data<M: DataMarker> {
143 // Unsafe invariant: actual values contained MUST be valid indices into `values`
144 trie: ZeroTrieSimpleAscii<&'static [u8]>,
145 values: &'static [M::DataStruct],
146 }
147
148 impl<M: DataMarker> Data<M> {
149 /// Construct from a trie and values
150 ///
151 /// # Safety
152 /// The actual values contained in the trie must be valid indices into `values`
from_trie_and_values_unchecked( trie: ZeroTrieSimpleAscii<&'static [u8]>, values: &'static [M::DataStruct], ) -> Self153 pub const unsafe fn from_trie_and_values_unchecked(
154 trie: ZeroTrieSimpleAscii<&'static [u8]>,
155 values: &'static [M::DataStruct],
156 ) -> Self {
157 Self { trie, values }
158 }
159 }
160
161 impl<M: DataMarker> super::DataStore<M> for Data<M> {
get( &self, id: DataIdentifierBorrowed, attributes_prefix_match: bool, ) -> Option<DataPayload<M>>162 fn get(
163 &self,
164 id: DataIdentifierBorrowed,
165 attributes_prefix_match: bool,
166 ) -> Option<DataPayload<M>> {
167 get_index(self.trie, id, attributes_prefix_match)
168 // Safety: Allowed since `i` came from the trie and the field safety invariant
169 .map(|i| unsafe { self.values.get_unchecked(i) })
170 .map(DataPayload::from_static_ref)
171 }
172
173 #[cfg(feature = "alloc")]
174 type IterReturn = core::iter::FilterMap<
175 zerotrie::ZeroTrieStringIterator<'static>,
176 fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
177 >;
178 #[cfg(feature = "alloc")]
iter(&'static self) -> Self::IterReturn179 fn iter(&'static self) -> Self::IterReturn {
180 iter(&self.trie)
181 }
182 }
183
184 pub struct DataForVarULEs<M: DataMarker>
185 where
186 M::DataStruct: MaybeAsVarULE,
187 M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
188 {
189 // Unsafe invariant: actual values contained MUST be valid indices into `values`
190 trie: ZeroTrieSimpleAscii<&'static [u8]>,
191 values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
192 }
193
194 impl<M: DataMarker> DataForVarULEs<M>
195 where
196 M::DataStruct: MaybeAsVarULE,
197 M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
198 {
199 /// Construct from a trie and values
200 ///
201 /// # Safety
202 /// The actual values contained in the trie must be valid indices into `values`
203 pub const unsafe fn from_trie_and_values_unchecked(
204 trie: ZeroTrieSimpleAscii<&'static [u8]>,
205 values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
206 ) -> Self {
207 Self { trie, values }
208 }
209 }
210
211 impl<M: DataMarker> super::DataStore<M> for DataForVarULEs<M>
212 where
213 M::DataStruct: MaybeAsVarULE,
214 M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
215 {
216 fn get(
217 &self,
218 id: DataIdentifierBorrowed,
219 attributes_prefix_match: bool,
220 ) -> Option<DataPayload<M>> {
221 get_index(self.trie, id, attributes_prefix_match)
222 // Safety: Allowed since `i` came from the trie and the field safety invariant
223 .map(|i| unsafe { self.values.get_unchecked(i) })
224 .map(M::DataStruct::zero_from)
225 .map(DataPayload::from_owned)
226 }
227
228 #[cfg(feature = "alloc")]
229 type IterReturn = core::iter::FilterMap<
230 zerotrie::ZeroTrieStringIterator<'static>,
231 fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
232 >;
233 #[cfg(feature = "alloc")]
234 fn iter(&'static self) -> Self::IterReturn {
235 iter(&self.trie)
236 }
237 }
238