• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::reader;
6 
7 use core::borrow::Borrow;
8 
9 #[cfg(feature = "alloc")]
10 use crate::{
11     builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
12 };
13 #[cfg(feature = "alloc")]
14 use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
15 #[cfg(feature = "litemap")]
16 use litemap::LiteMap;
17 
18 /// A data structure that compactly maps from byte sequences to integers.
19 ///
20 /// There are several variants of `ZeroTrie` which are very similar but are optimized
21 /// for different use cases:
22 ///
23 /// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
24 ///   Only stores ASCII-encoded strings. Can be const-constructed!
25 /// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
26 ///   strings. It also scales better to large data. Cannot be const-constructed.
27 /// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
28 ///
29 /// You can create a `ZeroTrie` directly, in which case the most appropriate
30 /// backing implementation will be chosen.
31 ///
32 /// # Backing Store
33 ///
34 /// The data structure has a flexible backing data store. The only requirement for most
35 /// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
36 /// ZeroTrie types:
37 ///
38 /// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
39 /// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
40 /// - `ZeroTrie<Vec<u8>>` (fully owned data)
41 /// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
42 /// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
43 /// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
44 ///
45 /// # Examples
46 ///
47 /// ```
48 /// use litemap::LiteMap;
49 /// use zerotrie::ZeroTrie;
50 ///
51 /// let mut map = LiteMap::<&[u8], usize>::new_vec();
52 /// map.insert("foo".as_bytes(), 1);
53 /// map.insert("bar".as_bytes(), 2);
54 /// map.insert("bazzoo".as_bytes(), 3);
55 ///
56 /// let trie = ZeroTrie::try_from(&map)?;
57 ///
58 /// assert_eq!(trie.get("foo"), Some(1));
59 /// assert_eq!(trie.get("bar"), Some(2));
60 /// assert_eq!(trie.get("bazzoo"), Some(3));
61 /// assert_eq!(trie.get("unknown"), None);
62 ///
63 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
64 /// ```
65 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
66 // Note: The absence of the following derive does not cause any test failures in this crate
67 #[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
68 pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
69 
70 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
71 pub(crate) enum ZeroTrieFlavor<Store> {
72     SimpleAscii(ZeroTrieSimpleAscii<Store>),
73     PerfectHash(ZeroTriePerfectHash<Store>),
74     ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
75 }
76 
77 /// A data structure that compactly maps from ASCII strings to integers.
78 ///
79 /// For more information, see [`ZeroTrie`].
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// use litemap::LiteMap;
85 /// use zerotrie::ZeroTrieSimpleAscii;
86 ///
87 /// let mut map = LiteMap::new_vec();
88 /// map.insert(&b"foo"[..], 1);
89 /// map.insert(b"bar", 2);
90 /// map.insert(b"bazzoo", 3);
91 ///
92 /// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
93 ///
94 /// assert_eq!(trie.get(b"foo"), Some(1));
95 /// assert_eq!(trie.get(b"bar"), Some(2));
96 /// assert_eq!(trie.get(b"bazzoo"), Some(3));
97 /// assert_eq!(trie.get(b"unknown"), None);
98 ///
99 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
100 /// ```
101 ///
102 /// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
103 ///
104 /// ```
105 /// use zerotrie::ZeroTrieSimpleAscii;
106 ///
107 /// // A trie with two values: "abc" and "abcdef"
108 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
109 ///
110 /// assert!(matches!(trie.get(b"ab\xFF"), None));
111 /// ```
112 #[repr(transparent)]
113 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
114 #[cfg_attr(feature = "databake", derive(databake::Bake))]
115 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
116 #[allow(clippy::exhaustive_structs)] // databake hidden fields
117 pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
118     #[doc(hidden)] // for databake, but there are no invariants
119     pub store: Store,
120 }
121 
122 impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
transparent_ref_from_store(s: &Store) -> &Self123     fn transparent_ref_from_store(s: &Store) -> &Self {
124         unsafe {
125             // Safety: Self is transparent over Store
126             core::mem::transmute(s)
127         }
128     }
129 }
130 
131 impl<Store> ZeroTrieSimpleAscii<Store> {
132     /// Wrap this specific ZeroTrie variant into a ZeroTrie.
133     #[inline]
into_zerotrie(self) -> ZeroTrie<Store>134     pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
135         ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
136     }
137 }
138 
139 /// A data structure that compactly maps from ASCII strings to integers
140 /// in a case-insensitive way.
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// use litemap::LiteMap;
146 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
147 ///
148 /// let mut map = LiteMap::new_vec();
149 /// map.insert(&b"foo"[..], 1);
150 /// map.insert(b"Bar", 2);
151 /// map.insert(b"Bazzoo", 3);
152 ///
153 /// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
154 ///
155 /// assert_eq!(trie.get(b"foo"), Some(1));
156 /// assert_eq!(trie.get(b"bar"), Some(2));
157 /// assert_eq!(trie.get(b"BAR"), Some(2));
158 /// assert_eq!(trie.get(b"bazzoo"), Some(3));
159 /// assert_eq!(trie.get(b"unknown"), None);
160 ///
161 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
162 /// ```
163 ///
164 /// Strings with different cases of the same character at the same offset are not allowed:
165 ///
166 /// ```
167 /// use litemap::LiteMap;
168 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
169 ///
170 /// let mut map = LiteMap::new_vec();
171 /// map.insert(&b"bar"[..], 1);
172 /// // OK: 'r' and 'Z' are different letters
173 /// map.insert(b"baZ", 2);
174 /// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
175 /// map.insert(b"baR", 2);
176 ///
177 /// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
178 /// ```
179 #[repr(transparent)]
180 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
181 #[cfg_attr(feature = "databake", derive(databake::Bake))]
182 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
183 #[allow(clippy::exhaustive_structs)] // databake hidden fields
184 pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
185     #[doc(hidden)] // for databake, but there are no invariants
186     pub store: Store,
187 }
188 
189 impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
transparent_ref_from_store(s: &Store) -> &Self190     fn transparent_ref_from_store(s: &Store) -> &Self {
191         unsafe {
192             // Safety: Self is transparent over Store
193             core::mem::transmute(s)
194         }
195     }
196 }
197 
198 // Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
199 
200 /// A data structure that compactly maps from byte strings to integers.
201 ///
202 /// For more information, see [`ZeroTrie`].
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use litemap::LiteMap;
208 /// use zerotrie::ZeroTriePerfectHash;
209 ///
210 /// let mut map = LiteMap::<&[u8], usize>::new_vec();
211 /// map.insert("foo".as_bytes(), 1);
212 /// map.insert("bår".as_bytes(), 2);
213 /// map.insert("båzzøø".as_bytes(), 3);
214 ///
215 /// let trie = ZeroTriePerfectHash::try_from(&map)?;
216 ///
217 /// assert_eq!(trie.get("foo".as_bytes()), Some(1));
218 /// assert_eq!(trie.get("bår".as_bytes()), Some(2));
219 /// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
220 /// assert_eq!(trie.get("bazzoo".as_bytes()), None);
221 ///
222 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
223 /// ```
224 #[repr(transparent)]
225 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
226 #[cfg_attr(feature = "databake", derive(databake::Bake))]
227 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
228 #[allow(clippy::exhaustive_structs)] // databake hidden fields
229 pub struct ZeroTriePerfectHash<Store: ?Sized> {
230     #[doc(hidden)] // for databake, but there are no invariants
231     pub store: Store,
232 }
233 
234 impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
transparent_ref_from_store(s: &Store) -> &Self235     fn transparent_ref_from_store(s: &Store) -> &Self {
236         unsafe {
237             // Safety: Self is transparent over Store
238             core::mem::transmute(s)
239         }
240     }
241 }
242 
243 impl<Store> ZeroTriePerfectHash<Store> {
244     /// Wrap this specific ZeroTrie variant into a ZeroTrie.
245     #[inline]
into_zerotrie(self) -> ZeroTrie<Store>246     pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
247         ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
248     }
249 }
250 
251 /// A data structure that maps from a large number of byte strings to integers.
252 ///
253 /// For more information, see [`ZeroTrie`].
254 #[repr(transparent)]
255 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
256 #[cfg_attr(feature = "databake", derive(databake::Bake))]
257 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
258 #[allow(clippy::exhaustive_structs)] // databake hidden fields
259 pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
260     #[doc(hidden)] // for databake, but there are no invariants
261     pub store: Store,
262 }
263 
264 impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
transparent_ref_from_store(s: &Store) -> &Self265     fn transparent_ref_from_store(s: &Store) -> &Self {
266         unsafe {
267             // Safety: Self is transparent over Store
268             core::mem::transmute(s)
269         }
270     }
271 }
272 
273 impl<Store> ZeroTrieExtendedCapacity<Store> {
274     /// Wrap this specific ZeroTrie variant into a ZeroTrie.
275     #[inline]
into_zerotrie(self) -> ZeroTrie<Store>276     pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
277         ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
278     }
279 }
280 
281 macro_rules! impl_zerotrie_subtype {
282     ($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
283         impl<Store> $name<Store> {
284             /// Create a trie directly from a store.
285             ///
286             /// If the store does not contain valid bytes, unexpected behavior may occur.
287             #[inline]
288             pub const fn from_store(store: Store) -> Self {
289                 Self { store }
290             }
291             /// Takes the byte store from this trie.
292             #[inline]
293             pub fn into_store(self) -> Store {
294                 self.store
295             }
296             /// Converts this trie's store to a different store implementing the `From` trait.
297             ///
298             #[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
299             ///
300             /// # Examples
301             ///
302             /// ```
303             /// use std::borrow::Cow;
304             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
305             ///
306             #[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
307             #[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
308             ///
309             /// assert_eq!(cow.get(b"abc"), Some(5));
310             /// ```
311             pub fn convert_store<X: From<Store>>(self) -> $name<X> {
312                 $name::<X>::from_store(X::from(self.store))
313             }
314         }
315         impl<Store> $name<Store>
316         where
317         Store: AsRef<[u8]> + ?Sized,
318         {
319             /// Queries the trie for a string.
320             pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
321                 // TODO: Should this be AsRef or Borrow?
322                 reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
323             }
324             /// Returns `true` if the trie is empty.
325             #[inline]
326             pub fn is_empty(&self) -> bool {
327                 self.store.as_ref().is_empty()
328             }
329             /// Returns the size of the trie in number of bytes.
330             ///
331             /// To get the number of keys in the trie, use `.iter().count()`:
332             ///
333             /// ```
334             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
335             ///
336             /// // A trie with two values: "abc" and "abcdef"
337             #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
338             ///
339             /// assert_eq!(8, trie.byte_len());
340             /// assert_eq!(2, trie.iter().count());
341             /// ```
342             #[inline]
343             pub fn byte_len(&self) -> usize {
344                 self.store.as_ref().len()
345             }
346             /// Returns the bytes contained in the underlying store.
347             #[inline]
348             pub fn as_bytes(&self) -> &[u8] {
349                 self.store.as_ref()
350             }
351             /// Returns this trie as a reference transparent over a byte slice.
352             #[inline]
353             pub fn as_borrowed(&self) -> &$name<[u8]> {
354                 $name::from_bytes(self.store.as_ref())
355             }
356             /// Returns a trie with a store borrowing from this trie.
357             #[inline]
358             pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
359                 $name::from_store(self.store.as_ref())
360             }
361         }
362         impl<Store> AsRef<$name<[u8]>> for $name<Store>
363         where
364         Store: AsRef<[u8]> + ?Sized,
365         {
366             #[inline]
367             fn as_ref(&self) -> &$name<[u8]> {
368                 self.as_borrowed()
369             }
370         }
371         #[cfg(feature = "alloc")]
372         impl<Store> $name<Store>
373         where
374         Store: AsRef<[u8]> + ?Sized,
375         {
376             /// Converts a possibly-borrowed $name to an owned one.
377             ///
378             /// ✨ *Enabled with the `alloc` Cargo feature.*
379             ///
380             /// # Examples
381             ///
382             /// ```
383             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
384             ///
385             #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
386             #[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
387             ///
388             /// assert_eq!(trie.get(b"abc"), Some(5));
389             /// assert_eq!(owned.get(b"abc"), Some(5));
390             /// ```
391             #[inline]
392             pub fn to_owned(&self) -> $name<Vec<u8>> {
393                 $name::from_store(
394                     Vec::from(self.store.as_ref()),
395                 )
396             }
397             /// Returns an iterator over the key/value pairs in this trie.
398             ///
399             /// ✨ *Enabled with the `alloc` Cargo feature.*
400             ///
401             /// # Examples
402             ///
403             /// ```
404             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
405             ///
406             /// // A trie with two values: "abc" and "abcdef"
407             #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
408             ///
409             /// let mut it = trie.iter();
410             /// assert_eq!(it.next(), Some(("abc".into(), 0)));
411             /// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
412             /// assert_eq!(it.next(), None);
413             /// ```
414             #[inline]
415             #[allow(clippy::type_complexity)]
416             pub fn iter(&self) -> $iter_ty {
417                  $iter_fn(self.as_bytes())
418             }
419         }
420         impl $name<[u8]> {
421             /// Casts from a byte slice to a reference to a trie with the same lifetime.
422             ///
423             /// If the bytes are not a valid trie, unexpected behavior may occur.
424             #[inline]
425             pub fn from_bytes(trie: &[u8]) -> &Self {
426                 Self::transparent_ref_from_store(trie)
427             }
428         }
429         #[cfg(feature = "alloc")]
430         impl $name<Vec<u8>> {
431             pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
432                 use crate::options::ZeroTrieWithOptions;
433                 ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
434                     items,
435                     Self::OPTIONS,
436                 )
437                 .map(|s| Self {
438                     store: s.to_bytes(),
439                 })
440             }
441         }
442         #[cfg(feature = "alloc")]
443         impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
444         where
445             K: AsRef<[u8]>
446         {
447             fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
448                 use crate::options::ZeroTrieWithOptions;
449                 use crate::builder::nonconst::ZeroTrieBuilder;
450                 ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
451                     iter,
452                     Self::OPTIONS
453                 )
454                 .map(|s| Self {
455                     store: s.to_bytes(),
456                 })
457                 .unwrap()
458             }
459         }
460         #[cfg(feature = "alloc")]
461         impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
462         where
463             K: Borrow<[u8]>
464         {
465             type Error = crate::error::ZeroTrieBuildError;
466             fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
467                 let tuples: Vec<(&[u8], usize)> = map
468                     .iter()
469                     .map(|(k, v)| (k.borrow(), *v))
470                     .collect();
471                 let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
472                 Self::try_from_tuple_slice(byte_str_slice)
473             }
474         }
475         #[cfg(feature = "alloc")]
476         impl<Store> $name<Store>
477         where
478             Store: AsRef<[u8]> + ?Sized
479         {
480             /// Exports the data from this ZeroTrie type into a BTreeMap.
481             ///
482             /// ✨ *Enabled with the `alloc` Cargo feature.*
483             ///
484             /// # Examples
485             ///
486             /// ```
487             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
488             /// use std::collections::BTreeMap;
489             ///
490             #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
491             /// let items = trie.to_btreemap();
492             ///
493             /// assert_eq!(items.len(), 2);
494             ///
495             #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
496             ///     .into_iter()
497             ///     .collect();
498             /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
499             /// ```
500             pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
501                 self.iter().collect()
502             }
503             #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
504             pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
505                 self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
506             }
507         }
508         #[cfg(feature = "alloc")]
509         impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
510         where
511             Store: AsRef<[u8]> + ?Sized,
512         {
513             #[inline]
514             fn from(other: &$name<Store>) -> Self {
515                 other.to_btreemap()
516             }
517         }
518         #[cfg(feature = "litemap")]
519         impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
520         where
521             K: Borrow<[u8]>,
522             S: litemap::store::StoreIterable<'a, K, usize>,
523         {
524             type Error = crate::error::ZeroTrieBuildError;
525             fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
526                 let tuples: Vec<(&[u8], usize)> = map
527                     .iter()
528                     .map(|(k, v)| (k.borrow(), *v))
529                     .collect();
530                 let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
531                 Self::try_from_tuple_slice(byte_str_slice)
532             }
533         }
534         #[cfg(feature = "litemap")]
535         impl<Store> $name<Store>
536         where
537             Store: AsRef<[u8]> + ?Sized,
538         {
539             /// Exports the data from this ZeroTrie type into a LiteMap.
540             ///
541             /// ✨ *Enabled with the `litemap` Cargo feature.*
542             ///
543             /// # Examples
544             ///
545             /// ```
546             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
547             /// use litemap::LiteMap;
548             ///
549             #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
550             ///
551             /// let items = trie.to_litemap();
552             /// assert_eq!(items.len(), 2);
553             ///
554             #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
555             ///     .iter()
556             ///     .map(|(k, v)| (k, *v))
557             ///     .collect();
558             /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
559             /// ```
560             pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
561                 self.iter().collect()
562             }
563             #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
564             pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
565                 self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
566             }
567         }
568         #[cfg(feature = "litemap")]
569         impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
570         where
571             Store: AsRef<[u8]> + ?Sized,
572         {
573             #[inline]
574             fn from(other: &$name<Store>) -> Self {
575                 other.to_litemap()
576             }
577         }
578         #[cfg(feature = "litemap")]
579         impl $name<Vec<u8>>
580         {
581             #[cfg(feature = "serde")]
582             pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
583                 let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
584                 Self::try_from_tuple_slice(lm_borrowed.as_slice())
585             }
586         }
587         // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
588         impl Borrow<$name<[u8]>> for $name<&[u8]> {
589             #[inline]
590             fn borrow(&self) -> &$name<[u8]> {
591                 self.as_borrowed()
592             }
593         }
594         // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
595         #[cfg(feature = "alloc")]
596         impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
597             #[inline]
598             fn borrow(&self) -> &$name<[u8]> {
599                 self.as_borrowed()
600             }
601         }
602         // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
603         #[cfg(feature = "alloc")]
604         impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
605             #[inline]
606             fn borrow(&self) -> &$name<[u8]> {
607                 self.as_borrowed()
608             }
609         }
610         #[cfg(feature = "alloc")]
611         impl alloc::borrow::ToOwned for $name<[u8]> {
612             type Owned = $name<Box<[u8]>>;
613             #[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
614             ///
615             #[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
616             ///
617             /// ✨ *Enabled with the `alloc` Cargo feature.*
618             ///
619             /// # Examples
620             ///
621             /// ```
622             /// use std::borrow::Cow;
623             #[doc = concat!("use zerotrie::", stringify!($name), ";")]
624             ///
625             #[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
626             /// assert_eq!(trie.get(b"abc"), Some(5));
627             /// ```
628             fn to_owned(&self) -> Self::Owned {
629                 let bytes: &[u8] = self.store.as_ref();
630                 $name::from_store(
631                     Vec::from(bytes).into_boxed_slice(),
632                 )
633             }
634         }
635         // TODO(#2778): Auto-derive these impls based on the repr(transparent).
636         //
637         // Safety (based on the safety checklist on the VarULE trait):
638         //  1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
639         //     over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
640         //  2. `$name` is aligned to 1 byte for the same reason
641         //  3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
642         //  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
643         //  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
644         //  6. `parse_bytes()` is left to its default impl
645         //  7. byte equality is semantic equality
646         #[cfg(feature = "zerovec")]
647         unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
648         where
649             Store: zerovec::ule::VarULE,
650         {
651             #[inline]
652             fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
653                 Store::validate_bytes(bytes)
654             }
655             #[inline]
656             unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
657                 // Safety: we can pass down the validity invariant to Store
658                 Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
659             }
660         }
661         #[cfg(feature = "zerofrom")]
662         impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
663         where
664             Store2: zerofrom::ZeroFrom<'zf, Store1>,
665         {
666             #[inline]
667             fn zero_from(other: &'zf $name<Store1>) -> Self {
668                 $name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
669             }
670         }
671     };
672 }
673 
674 #[cfg(feature = "alloc")]
string_to_box_u8(input: String) -> Box<[u8]>675 fn string_to_box_u8(input: String) -> Box<[u8]> {
676     input.into_boxed_str().into_boxed_bytes()
677 }
678 
679 #[doc(hidden)] // subject to change
680 #[cfg(feature = "alloc")]
681 pub type ZeroTrieStringIterator<'a> =
682     core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
683 
684 impl_zerotrie_subtype!(
685     ZeroTrieSimpleAscii,
686     String,
687     reader::get_iter_ascii_or_panic,
688     ZeroTrieStringIterator,
689     string_to_box_u8
690 );
691 impl_zerotrie_subtype!(
692     ZeroAsciiIgnoreCaseTrie,
693     String,
694     reader::get_iter_ascii_or_panic,
695     ZeroTrieStringIterator,
696     string_to_box_u8
697 );
698 impl_zerotrie_subtype!(
699     ZeroTriePerfectHash,
700     Vec<u8>,
701     reader::get_iter_phf,
702     reader::ZeroTrieIterator<'_>,
703     Vec::into_boxed_slice
704 );
705 impl_zerotrie_subtype!(
706     ZeroTrieExtendedCapacity,
707     Vec<u8>,
708     reader::get_iter_phf,
709     reader::ZeroTrieIterator<'_>,
710     Vec::into_boxed_slice
711 );
712 
713 macro_rules! impl_dispatch {
714     ($self:ident, $inner_fn:ident()) => {
715         match $self.0 {
716             ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
717             ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
718             ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
719         }
720     };
721     ($self:ident, $inner_fn:ident().into_zerotrie()) => {
722         match $self.0 {
723             ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
724             ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
725             ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
726         }
727     };
728     (&$self:ident, $inner_fn:ident()) => {
729         match &$self.0 {
730             ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
731             ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
732             ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
733         }
734     };
735     ($self:ident, $inner_fn:ident($arg:ident)) => {
736         match $self.0 {
737             ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
738             ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
739             ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
740         }
741     };
742     (&$self:ident, $inner_fn:ident($arg:ident)) => {
743         match &$self.0 {
744             ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
745             ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
746             ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
747         }
748     };
749     (&$self:ident, $trait:ident::$inner_fn:ident()) => {
750         match &$self.0 {
751             ZeroTrieFlavor::SimpleAscii(subtype) => {
752                 ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
753             }
754             ZeroTrieFlavor::PerfectHash(subtype) => {
755                 ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
756             }
757             ZeroTrieFlavor::ExtendedCapacity(subtype) => {
758                 ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
759             }
760         }
761     };
762 }
763 
764 impl<Store> ZeroTrie<Store> {
765     /// Takes the byte store from this trie.
into_store(self) -> Store766     pub fn into_store(self) -> Store {
767         impl_dispatch!(self, into_store())
768     }
769     /// Converts this trie's store to a different store implementing the `From` trait.
770     ///
771     /// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
convert_store<NewStore>(self) -> ZeroTrie<NewStore> where NewStore: From<Store>,772     pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
773     where
774         NewStore: From<Store>,
775     {
776         impl_dispatch!(self, convert_store().into_zerotrie())
777     }
778 }
779 
780 impl<Store> ZeroTrie<Store>
781 where
782     Store: AsRef<[u8]>,
783 {
784     /// Queries the trie for a string.
get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]>,785     pub fn get<K>(&self, key: K) -> Option<usize>
786     where
787         K: AsRef<[u8]>,
788     {
789         impl_dispatch!(&self, get(key))
790     }
791     /// Returns `true` if the trie is empty.
is_empty(&self) -> bool792     pub fn is_empty(&self) -> bool {
793         impl_dispatch!(&self, is_empty())
794     }
795     /// Returns the size of the trie in number of bytes.
796     ///
797     /// To get the number of keys in the trie, use `.iter().count()`.
byte_len(&self) -> usize798     pub fn byte_len(&self) -> usize {
799         impl_dispatch!(&self, byte_len())
800     }
801 }
802 
803 #[cfg(feature = "alloc")]
804 impl<Store> ZeroTrie<Store>
805 where
806     Store: AsRef<[u8]>,
807 {
808     /// Exports the data from this ZeroTrie into a BTreeMap.
to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize>809     pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
810         impl_dispatch!(&self, to_btreemap_bytes())
811     }
812 }
813 
814 #[cfg(feature = "litemap")]
815 impl<Store> ZeroTrie<Store>
816 where
817     Store: AsRef<[u8]>,
818 {
819     /// Exports the data from this ZeroTrie into a LiteMap.
to_litemap(&self) -> LiteMap<Box<[u8]>, usize>820     pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
821         impl_dispatch!(&self, to_litemap_bytes())
822     }
823 }
824 
825 #[cfg(feature = "alloc")]
826 impl ZeroTrie<Vec<u8>> {
try_from_tuple_slice( items: &[(&ByteStr, usize)], ) -> Result<Self, ZeroTrieBuildError>827     pub(crate) fn try_from_tuple_slice(
828         items: &[(&ByteStr, usize)],
829     ) -> Result<Self, ZeroTrieBuildError> {
830         let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
831         if is_all_ascii && items.len() < 512 {
832             ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
833         } else {
834             ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
835         }
836     }
837 }
838 
839 #[cfg(feature = "alloc")]
840 impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
841 where
842     K: AsRef<[u8]>,
843 {
from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self844     fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
845         // We need two Vecs because the first one anchors the `K`s that the second one borrows.
846         let items = Vec::from_iter(iter);
847         let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
848         items.sort();
849         let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
850         #[allow(clippy::unwrap_used)] // FromIterator is panicky
851         Self::try_from_tuple_slice(byte_str_slice).unwrap()
852     }
853 }
854 
855 #[cfg(feature = "databake")]
856 impl<Store> databake::Bake for ZeroTrie<Store>
857 where
858     Store: databake::Bake,
859 {
bake(&self, env: &databake::CrateEnv) -> databake::TokenStream860     fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
861         use databake::*;
862         let inner = impl_dispatch!(&self, bake(env));
863         quote! { #inner.into_zerotrie() }
864     }
865 }
866 
867 #[cfg(feature = "databake")]
868 impl<Store> databake::BakeSize for ZeroTrie<Store>
869 where
870     Store: databake::BakeSize,
871 {
borrows_size(&self) -> usize872     fn borrows_size(&self) -> usize {
873         impl_dispatch!(&self, borrows_size())
874     }
875 }
876 
877 #[cfg(feature = "zerofrom")]
878 impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
879 where
880     Store2: zerofrom::ZeroFrom<'zf, Store1>,
881 {
zero_from(other: &'zf ZeroTrie<Store1>) -> Self882     fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
883         use zerofrom::ZeroFrom;
884         impl_dispatch!(&other, ZeroFrom::zero_from())
885     }
886 }
887