• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 //! Data stored as as [`ZeroTrieSimpleAscii`]
6 
7 // This is a valid separator as `DataLocale` will never produce it.
8 const ID_SEPARATOR: u8 = 0x1E;
9 
10 pub use icu_provider::DynamicDataMarker;
11 use icu_provider::{
12     prelude::{zerofrom::ZeroFrom, *},
13     ule::MaybeAsVarULE,
14 };
15 pub use zerotrie::ZeroTrieSimpleAscii;
16 use zerovec::VarZeroSlice;
17 
18 #[cfg(feature = "export")]
19 use icu_provider::export::ExportMarker;
20 
21 #[cfg(feature = "export")]
bake( marker_bake: &databake::TokenStream, bakes_to_ids: &[( &DataPayload<ExportMarker>, &std::collections::BTreeSet<DataIdentifierCow>, )], ctx: &databake::CrateEnv, ) -> (databake::TokenStream, usize)22 pub(crate) fn bake(
23     marker_bake: &databake::TokenStream,
24     bakes_to_ids: &[(
25         &DataPayload<ExportMarker>,
26         &std::collections::BTreeSet<DataIdentifierCow>,
27     )],
28     ctx: &databake::CrateEnv,
29 ) -> (databake::TokenStream, usize) {
30     use databake::*;
31 
32     // Safety invariant upheld: the only values being added to the trie are `baked_index`
33     // values, which come from `bakes`
34     let trie = ZeroTrieSimpleAscii::from_iter(bakes_to_ids.iter().enumerate().flat_map(
35         |(bake_index, (_, ids))| {
36             ids.iter().map(move |id| {
37                 let mut encoded = id.locale.to_string().into_bytes();
38                 if !id.marker_attributes.is_empty() {
39                     encoded.push(ID_SEPARATOR);
40                     encoded.extend_from_slice(id.marker_attributes.as_bytes());
41                 }
42                 (encoded, bake_index)
43             })
44         },
45     ));
46 
47     let baked_trie = trie.as_borrowed_slice().bake(&Default::default());
48     let baked_trie = quote! {
49         const TRIE: icu_provider_baked::zerotrie::ZeroTrieSimpleAscii<&'static [u8]> = icu_provider_baked:: #baked_trie;
50     };
51 
52     let payloads = bakes_to_ids
53         .iter()
54         .map(|(payload, _)| *payload)
55         .collect::<Vec<_>>();
56 
57     let maybe_vzv_tokens = DataPayload::tokenize_encoded_seq(&payloads, ctx);
58 
59     let (baked_values, value_store_ty) = if let Some(vzv_tokens) = maybe_vzv_tokens {
60         (
61             quote! {
62                 const VALUES: &'static zerovec::VarZeroSlice<<<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct as icu_provider::ule::MaybeAsVarULE>::EncodedStruct> = #vzv_tokens;
63             },
64             quote! {
65                 icu_provider_baked::zerotrie::DataForVarULEs
66             },
67         )
68     } else {
69         let bakes = payloads.iter().map(|payload| payload.tokenize(ctx));
70         (
71             quote! {
72                 const VALUES: &'static [<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct] = &[#(#bakes,)*];
73             },
74             quote! {
75                 icu_provider_baked::zerotrie::Data
76             },
77         )
78     };
79 
80     (
81         quote! {
82             // Safety invariant upheld: see above
83             #value_store_ty<#marker_bake> = {
84                 #baked_trie
85                 #baked_values
86                 unsafe {
87                     #value_store_ty::from_trie_and_values_unchecked(TRIE, VALUES)
88                 }
89             }
90 
91         },
92         core::mem::size_of::<Data<icu_provider::hello_world::HelloWorldV1>>()
93             + trie.as_borrowed_slice().borrows_size(),
94     )
95 }
96 
get_index( trie: ZeroTrieSimpleAscii<&'static [u8]>, id: DataIdentifierBorrowed, attributes_prefix_match: bool, ) -> Option<usize>97 fn get_index(
98     trie: ZeroTrieSimpleAscii<&'static [u8]>,
99     id: DataIdentifierBorrowed,
100     attributes_prefix_match: bool,
101 ) -> Option<usize> {
102     use writeable::Writeable;
103     let mut cursor = trie.cursor();
104     let _is_ascii = id.locale.write_to(&mut cursor);
105     if !id.marker_attributes.is_empty() {
106         cursor.step(ID_SEPARATOR);
107         id.marker_attributes.write_to(&mut cursor).ok()?;
108         loop {
109             if let Some(v) = cursor.take_value() {
110                 break Some(v);
111             }
112             if !attributes_prefix_match || cursor.probe(0).is_none() {
113                 break None;
114             }
115         }
116     } else {
117         cursor.take_value()
118     }
119 }
120 
121 #[cfg(feature = "alloc")]
122 #[allow(clippy::type_complexity)]
iter( trie: &'static ZeroTrieSimpleAscii<&'static [u8]>, ) -> core::iter::FilterMap< zerotrie::ZeroTrieStringIterator<'static>, fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>, >123 fn iter(
124     trie: &'static ZeroTrieSimpleAscii<&'static [u8]>,
125 ) -> core::iter::FilterMap<
126     zerotrie::ZeroTrieStringIterator<'static>,
127     fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
128 > {
129     use alloc::borrow::ToOwned;
130     trie.iter().filter_map(move |(s, _)| {
131         if let Some((locale, attrs)) = s.split_once(ID_SEPARATOR as char) {
132             Some(DataIdentifierCow::from_owned(
133                 DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
134                 locale.parse().ok()?,
135             ))
136         } else {
137             s.parse().ok().map(DataIdentifierCow::from_locale)
138         }
139     })
140 }
141 
142 pub struct Data<M: DataMarker> {
143     // Unsafe invariant: actual values contained MUST be valid indices into `values`
144     trie: ZeroTrieSimpleAscii<&'static [u8]>,
145     values: &'static [M::DataStruct],
146 }
147 
148 impl<M: DataMarker> Data<M> {
149     /// Construct from a trie and values
150     ///
151     /// # Safety
152     /// The actual values contained in the trie must be valid indices into `values`
from_trie_and_values_unchecked( trie: ZeroTrieSimpleAscii<&'static [u8]>, values: &'static [M::DataStruct], ) -> Self153     pub const unsafe fn from_trie_and_values_unchecked(
154         trie: ZeroTrieSimpleAscii<&'static [u8]>,
155         values: &'static [M::DataStruct],
156     ) -> Self {
157         Self { trie, values }
158     }
159 }
160 
161 impl<M: DataMarker> super::DataStore<M> for Data<M> {
get( &self, id: DataIdentifierBorrowed, attributes_prefix_match: bool, ) -> Option<DataPayload<M>>162     fn get(
163         &self,
164         id: DataIdentifierBorrowed,
165         attributes_prefix_match: bool,
166     ) -> Option<DataPayload<M>> {
167         get_index(self.trie, id, attributes_prefix_match)
168             // Safety: Allowed since `i` came from the trie and the field safety invariant
169             .map(|i| unsafe { self.values.get_unchecked(i) })
170             .map(DataPayload::from_static_ref)
171     }
172 
173     #[cfg(feature = "alloc")]
174     type IterReturn = core::iter::FilterMap<
175         zerotrie::ZeroTrieStringIterator<'static>,
176         fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
177     >;
178     #[cfg(feature = "alloc")]
iter(&'static self) -> Self::IterReturn179     fn iter(&'static self) -> Self::IterReturn {
180         iter(&self.trie)
181     }
182 }
183 
184 pub struct DataForVarULEs<M: DataMarker>
185 where
186     M::DataStruct: MaybeAsVarULE,
187     M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
188 {
189     // Unsafe invariant: actual values contained MUST be valid indices into `values`
190     trie: ZeroTrieSimpleAscii<&'static [u8]>,
191     values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
192 }
193 
194 impl<M: DataMarker> DataForVarULEs<M>
195 where
196     M::DataStruct: MaybeAsVarULE,
197     M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
198 {
199     /// Construct from a trie and values
200     ///
201     /// # Safety
202     /// The actual values contained in the trie must be valid indices into `values`
203     pub const unsafe fn from_trie_and_values_unchecked(
204         trie: ZeroTrieSimpleAscii<&'static [u8]>,
205         values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
206     ) -> Self {
207         Self { trie, values }
208     }
209 }
210 
211 impl<M: DataMarker> super::DataStore<M> for DataForVarULEs<M>
212 where
213     M::DataStruct: MaybeAsVarULE,
214     M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
215 {
216     fn get(
217         &self,
218         id: DataIdentifierBorrowed,
219         attributes_prefix_match: bool,
220     ) -> Option<DataPayload<M>> {
221         get_index(self.trie, id, attributes_prefix_match)
222             // Safety: Allowed since `i` came from the trie and the field safety invariant
223             .map(|i| unsafe { self.values.get_unchecked(i) })
224             .map(M::DataStruct::zero_from)
225             .map(DataPayload::from_owned)
226     }
227 
228     #[cfg(feature = "alloc")]
229     type IterReturn = core::iter::FilterMap<
230         zerotrie::ZeroTrieStringIterator<'static>,
231         fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
232     >;
233     #[cfg(feature = "alloc")]
234     fn iter(&'static self) -> Self::IterReturn {
235         iter(&self.trie)
236     }
237 }
238