1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use crate::reader;
6
7 use core::borrow::Borrow;
8
9 #[cfg(feature = "alloc")]
10 use crate::{
11 builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
12 };
13 #[cfg(feature = "alloc")]
14 use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
15 #[cfg(feature = "litemap")]
16 use litemap::LiteMap;
17
18 /// A data structure that compactly maps from byte sequences to integers.
19 ///
20 /// There are several variants of `ZeroTrie` which are very similar but are optimized
21 /// for different use cases:
22 ///
23 /// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
24 /// Only stores ASCII-encoded strings. Can be const-constructed!
25 /// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
26 /// strings. It also scales better to large data. Cannot be const-constructed.
27 /// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
28 ///
29 /// You can create a `ZeroTrie` directly, in which case the most appropriate
30 /// backing implementation will be chosen.
31 ///
32 /// # Backing Store
33 ///
34 /// The data structure has a flexible backing data store. The only requirement for most
35 /// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
36 /// ZeroTrie types:
37 ///
38 /// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
39 /// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
40 /// - `ZeroTrie<Vec<u8>>` (fully owned data)
41 /// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
42 /// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
43 /// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
44 ///
45 /// # Examples
46 ///
47 /// ```
48 /// use litemap::LiteMap;
49 /// use zerotrie::ZeroTrie;
50 ///
51 /// let mut map = LiteMap::<&[u8], usize>::new_vec();
52 /// map.insert("foo".as_bytes(), 1);
53 /// map.insert("bar".as_bytes(), 2);
54 /// map.insert("bazzoo".as_bytes(), 3);
55 ///
56 /// let trie = ZeroTrie::try_from(&map)?;
57 ///
58 /// assert_eq!(trie.get("foo"), Some(1));
59 /// assert_eq!(trie.get("bar"), Some(2));
60 /// assert_eq!(trie.get("bazzoo"), Some(3));
61 /// assert_eq!(trie.get("unknown"), None);
62 ///
63 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
64 /// ```
65 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
66 // Note: The absence of the following derive does not cause any test failures in this crate
67 #[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
68 pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
69
70 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
71 pub(crate) enum ZeroTrieFlavor<Store> {
72 SimpleAscii(ZeroTrieSimpleAscii<Store>),
73 PerfectHash(ZeroTriePerfectHash<Store>),
74 ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
75 }
76
77 /// A data structure that compactly maps from ASCII strings to integers.
78 ///
79 /// For more information, see [`ZeroTrie`].
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// use litemap::LiteMap;
85 /// use zerotrie::ZeroTrieSimpleAscii;
86 ///
87 /// let mut map = LiteMap::new_vec();
88 /// map.insert(&b"foo"[..], 1);
89 /// map.insert(b"bar", 2);
90 /// map.insert(b"bazzoo", 3);
91 ///
92 /// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
93 ///
94 /// assert_eq!(trie.get(b"foo"), Some(1));
95 /// assert_eq!(trie.get(b"bar"), Some(2));
96 /// assert_eq!(trie.get(b"bazzoo"), Some(3));
97 /// assert_eq!(trie.get(b"unknown"), None);
98 ///
99 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
100 /// ```
101 ///
102 /// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
103 ///
104 /// ```
105 /// use zerotrie::ZeroTrieSimpleAscii;
106 ///
107 /// // A trie with two values: "abc" and "abcdef"
108 /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
109 ///
110 /// assert!(matches!(trie.get(b"ab\xFF"), None));
111 /// ```
112 #[repr(transparent)]
113 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
114 #[cfg_attr(feature = "databake", derive(databake::Bake))]
115 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
116 #[allow(clippy::exhaustive_structs)] // databake hidden fields
117 pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
118 #[doc(hidden)] // for databake, but there are no invariants
119 pub store: Store,
120 }
121
122 impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
transparent_ref_from_store(s: &Store) -> &Self123 fn transparent_ref_from_store(s: &Store) -> &Self {
124 unsafe {
125 // Safety: Self is transparent over Store
126 core::mem::transmute(s)
127 }
128 }
129 }
130
131 impl<Store> ZeroTrieSimpleAscii<Store> {
132 /// Wrap this specific ZeroTrie variant into a ZeroTrie.
133 #[inline]
into_zerotrie(self) -> ZeroTrie<Store>134 pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
135 ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
136 }
137 }
138
139 /// A data structure that compactly maps from ASCII strings to integers
140 /// in a case-insensitive way.
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// use litemap::LiteMap;
146 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
147 ///
148 /// let mut map = LiteMap::new_vec();
149 /// map.insert(&b"foo"[..], 1);
150 /// map.insert(b"Bar", 2);
151 /// map.insert(b"Bazzoo", 3);
152 ///
153 /// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
154 ///
155 /// assert_eq!(trie.get(b"foo"), Some(1));
156 /// assert_eq!(trie.get(b"bar"), Some(2));
157 /// assert_eq!(trie.get(b"BAR"), Some(2));
158 /// assert_eq!(trie.get(b"bazzoo"), Some(3));
159 /// assert_eq!(trie.get(b"unknown"), None);
160 ///
161 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
162 /// ```
163 ///
164 /// Strings with different cases of the same character at the same offset are not allowed:
165 ///
166 /// ```
167 /// use litemap::LiteMap;
168 /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
169 ///
170 /// let mut map = LiteMap::new_vec();
171 /// map.insert(&b"bar"[..], 1);
172 /// // OK: 'r' and 'Z' are different letters
173 /// map.insert(b"baZ", 2);
174 /// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
175 /// map.insert(b"baR", 2);
176 ///
177 /// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
178 /// ```
179 #[repr(transparent)]
180 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
181 #[cfg_attr(feature = "databake", derive(databake::Bake))]
182 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
183 #[allow(clippy::exhaustive_structs)] // databake hidden fields
184 pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
185 #[doc(hidden)] // for databake, but there are no invariants
186 pub store: Store,
187 }
188
189 impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
transparent_ref_from_store(s: &Store) -> &Self190 fn transparent_ref_from_store(s: &Store) -> &Self {
191 unsafe {
192 // Safety: Self is transparent over Store
193 core::mem::transmute(s)
194 }
195 }
196 }
197
198 // Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
199
200 /// A data structure that compactly maps from byte strings to integers.
201 ///
202 /// For more information, see [`ZeroTrie`].
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use litemap::LiteMap;
208 /// use zerotrie::ZeroTriePerfectHash;
209 ///
210 /// let mut map = LiteMap::<&[u8], usize>::new_vec();
211 /// map.insert("foo".as_bytes(), 1);
212 /// map.insert("bår".as_bytes(), 2);
213 /// map.insert("båzzøø".as_bytes(), 3);
214 ///
215 /// let trie = ZeroTriePerfectHash::try_from(&map)?;
216 ///
217 /// assert_eq!(trie.get("foo".as_bytes()), Some(1));
218 /// assert_eq!(trie.get("bår".as_bytes()), Some(2));
219 /// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
220 /// assert_eq!(trie.get("bazzoo".as_bytes()), None);
221 ///
222 /// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
223 /// ```
224 #[repr(transparent)]
225 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
226 #[cfg_attr(feature = "databake", derive(databake::Bake))]
227 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
228 #[allow(clippy::exhaustive_structs)] // databake hidden fields
229 pub struct ZeroTriePerfectHash<Store: ?Sized> {
230 #[doc(hidden)] // for databake, but there are no invariants
231 pub store: Store,
232 }
233
234 impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
transparent_ref_from_store(s: &Store) -> &Self235 fn transparent_ref_from_store(s: &Store) -> &Self {
236 unsafe {
237 // Safety: Self is transparent over Store
238 core::mem::transmute(s)
239 }
240 }
241 }
242
243 impl<Store> ZeroTriePerfectHash<Store> {
244 /// Wrap this specific ZeroTrie variant into a ZeroTrie.
245 #[inline]
into_zerotrie(self) -> ZeroTrie<Store>246 pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
247 ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
248 }
249 }
250
251 /// A data structure that maps from a large number of byte strings to integers.
252 ///
253 /// For more information, see [`ZeroTrie`].
254 #[repr(transparent)]
255 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
256 #[cfg_attr(feature = "databake", derive(databake::Bake))]
257 #[cfg_attr(feature = "databake", databake(path = zerotrie))]
258 #[allow(clippy::exhaustive_structs)] // databake hidden fields
259 pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
260 #[doc(hidden)] // for databake, but there are no invariants
261 pub store: Store,
262 }
263
264 impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
transparent_ref_from_store(s: &Store) -> &Self265 fn transparent_ref_from_store(s: &Store) -> &Self {
266 unsafe {
267 // Safety: Self is transparent over Store
268 core::mem::transmute(s)
269 }
270 }
271 }
272
273 impl<Store> ZeroTrieExtendedCapacity<Store> {
274 /// Wrap this specific ZeroTrie variant into a ZeroTrie.
275 #[inline]
into_zerotrie(self) -> ZeroTrie<Store>276 pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
277 ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
278 }
279 }
280
281 macro_rules! impl_zerotrie_subtype {
282 ($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
283 impl<Store> $name<Store> {
284 /// Create a trie directly from a store.
285 ///
286 /// If the store does not contain valid bytes, unexpected behavior may occur.
287 #[inline]
288 pub const fn from_store(store: Store) -> Self {
289 Self { store }
290 }
291 /// Takes the byte store from this trie.
292 #[inline]
293 pub fn into_store(self) -> Store {
294 self.store
295 }
296 /// Converts this trie's store to a different store implementing the `From` trait.
297 ///
298 #[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use std::borrow::Cow;
304 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
305 ///
306 #[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
307 #[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
308 ///
309 /// assert_eq!(cow.get(b"abc"), Some(5));
310 /// ```
311 pub fn convert_store<X: From<Store>>(self) -> $name<X> {
312 $name::<X>::from_store(X::from(self.store))
313 }
314 }
315 impl<Store> $name<Store>
316 where
317 Store: AsRef<[u8]> + ?Sized,
318 {
319 /// Queries the trie for a string.
320 pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
321 // TODO: Should this be AsRef or Borrow?
322 reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
323 }
324 /// Returns `true` if the trie is empty.
325 #[inline]
326 pub fn is_empty(&self) -> bool {
327 self.store.as_ref().is_empty()
328 }
329 /// Returns the size of the trie in number of bytes.
330 ///
331 /// To get the number of keys in the trie, use `.iter().count()`:
332 ///
333 /// ```
334 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
335 ///
336 /// // A trie with two values: "abc" and "abcdef"
337 #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
338 ///
339 /// assert_eq!(8, trie.byte_len());
340 /// assert_eq!(2, trie.iter().count());
341 /// ```
342 #[inline]
343 pub fn byte_len(&self) -> usize {
344 self.store.as_ref().len()
345 }
346 /// Returns the bytes contained in the underlying store.
347 #[inline]
348 pub fn as_bytes(&self) -> &[u8] {
349 self.store.as_ref()
350 }
351 /// Returns this trie as a reference transparent over a byte slice.
352 #[inline]
353 pub fn as_borrowed(&self) -> &$name<[u8]> {
354 $name::from_bytes(self.store.as_ref())
355 }
356 /// Returns a trie with a store borrowing from this trie.
357 #[inline]
358 pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
359 $name::from_store(self.store.as_ref())
360 }
361 }
362 impl<Store> AsRef<$name<[u8]>> for $name<Store>
363 where
364 Store: AsRef<[u8]> + ?Sized,
365 {
366 #[inline]
367 fn as_ref(&self) -> &$name<[u8]> {
368 self.as_borrowed()
369 }
370 }
371 #[cfg(feature = "alloc")]
372 impl<Store> $name<Store>
373 where
374 Store: AsRef<[u8]> + ?Sized,
375 {
376 /// Converts a possibly-borrowed $name to an owned one.
377 ///
378 /// ✨ *Enabled with the `alloc` Cargo feature.*
379 ///
380 /// # Examples
381 ///
382 /// ```
383 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
384 ///
385 #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
386 #[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
387 ///
388 /// assert_eq!(trie.get(b"abc"), Some(5));
389 /// assert_eq!(owned.get(b"abc"), Some(5));
390 /// ```
391 #[inline]
392 pub fn to_owned(&self) -> $name<Vec<u8>> {
393 $name::from_store(
394 Vec::from(self.store.as_ref()),
395 )
396 }
397 /// Returns an iterator over the key/value pairs in this trie.
398 ///
399 /// ✨ *Enabled with the `alloc` Cargo feature.*
400 ///
401 /// # Examples
402 ///
403 /// ```
404 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
405 ///
406 /// // A trie with two values: "abc" and "abcdef"
407 #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
408 ///
409 /// let mut it = trie.iter();
410 /// assert_eq!(it.next(), Some(("abc".into(), 0)));
411 /// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
412 /// assert_eq!(it.next(), None);
413 /// ```
414 #[inline]
415 #[allow(clippy::type_complexity)]
416 pub fn iter(&self) -> $iter_ty {
417 $iter_fn(self.as_bytes())
418 }
419 }
420 impl $name<[u8]> {
421 /// Casts from a byte slice to a reference to a trie with the same lifetime.
422 ///
423 /// If the bytes are not a valid trie, unexpected behavior may occur.
424 #[inline]
425 pub fn from_bytes(trie: &[u8]) -> &Self {
426 Self::transparent_ref_from_store(trie)
427 }
428 }
429 #[cfg(feature = "alloc")]
430 impl $name<Vec<u8>> {
431 pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
432 use crate::options::ZeroTrieWithOptions;
433 ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
434 items,
435 Self::OPTIONS,
436 )
437 .map(|s| Self {
438 store: s.to_bytes(),
439 })
440 }
441 }
442 #[cfg(feature = "alloc")]
443 impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
444 where
445 K: AsRef<[u8]>
446 {
447 fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
448 use crate::options::ZeroTrieWithOptions;
449 use crate::builder::nonconst::ZeroTrieBuilder;
450 ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
451 iter,
452 Self::OPTIONS
453 )
454 .map(|s| Self {
455 store: s.to_bytes(),
456 })
457 .unwrap()
458 }
459 }
460 #[cfg(feature = "alloc")]
461 impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
462 where
463 K: Borrow<[u8]>
464 {
465 type Error = crate::error::ZeroTrieBuildError;
466 fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
467 let tuples: Vec<(&[u8], usize)> = map
468 .iter()
469 .map(|(k, v)| (k.borrow(), *v))
470 .collect();
471 let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
472 Self::try_from_tuple_slice(byte_str_slice)
473 }
474 }
475 #[cfg(feature = "alloc")]
476 impl<Store> $name<Store>
477 where
478 Store: AsRef<[u8]> + ?Sized
479 {
480 /// Exports the data from this ZeroTrie type into a BTreeMap.
481 ///
482 /// ✨ *Enabled with the `alloc` Cargo feature.*
483 ///
484 /// # Examples
485 ///
486 /// ```
487 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
488 /// use std::collections::BTreeMap;
489 ///
490 #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
491 /// let items = trie.to_btreemap();
492 ///
493 /// assert_eq!(items.len(), 2);
494 ///
495 #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
496 /// .into_iter()
497 /// .collect();
498 /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
499 /// ```
500 pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
501 self.iter().collect()
502 }
503 #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
504 pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
505 self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
506 }
507 }
508 #[cfg(feature = "alloc")]
509 impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
510 where
511 Store: AsRef<[u8]> + ?Sized,
512 {
513 #[inline]
514 fn from(other: &$name<Store>) -> Self {
515 other.to_btreemap()
516 }
517 }
518 #[cfg(feature = "litemap")]
519 impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
520 where
521 K: Borrow<[u8]>,
522 S: litemap::store::StoreIterable<'a, K, usize>,
523 {
524 type Error = crate::error::ZeroTrieBuildError;
525 fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
526 let tuples: Vec<(&[u8], usize)> = map
527 .iter()
528 .map(|(k, v)| (k.borrow(), *v))
529 .collect();
530 let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
531 Self::try_from_tuple_slice(byte_str_slice)
532 }
533 }
534 #[cfg(feature = "litemap")]
535 impl<Store> $name<Store>
536 where
537 Store: AsRef<[u8]> + ?Sized,
538 {
539 /// Exports the data from this ZeroTrie type into a LiteMap.
540 ///
541 /// ✨ *Enabled with the `litemap` Cargo feature.*
542 ///
543 /// # Examples
544 ///
545 /// ```
546 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
547 /// use litemap::LiteMap;
548 ///
549 #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
550 ///
551 /// let items = trie.to_litemap();
552 /// assert_eq!(items.len(), 2);
553 ///
554 #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
555 /// .iter()
556 /// .map(|(k, v)| (k, *v))
557 /// .collect();
558 /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
559 /// ```
560 pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
561 self.iter().collect()
562 }
563 #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
564 pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
565 self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
566 }
567 }
568 #[cfg(feature = "litemap")]
569 impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
570 where
571 Store: AsRef<[u8]> + ?Sized,
572 {
573 #[inline]
574 fn from(other: &$name<Store>) -> Self {
575 other.to_litemap()
576 }
577 }
578 #[cfg(feature = "litemap")]
579 impl $name<Vec<u8>>
580 {
581 #[cfg(feature = "serde")]
582 pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
583 let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
584 Self::try_from_tuple_slice(lm_borrowed.as_slice())
585 }
586 }
587 // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
588 impl Borrow<$name<[u8]>> for $name<&[u8]> {
589 #[inline]
590 fn borrow(&self) -> &$name<[u8]> {
591 self.as_borrowed()
592 }
593 }
594 // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
595 #[cfg(feature = "alloc")]
596 impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
597 #[inline]
598 fn borrow(&self) -> &$name<[u8]> {
599 self.as_borrowed()
600 }
601 }
602 // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
603 #[cfg(feature = "alloc")]
604 impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
605 #[inline]
606 fn borrow(&self) -> &$name<[u8]> {
607 self.as_borrowed()
608 }
609 }
610 #[cfg(feature = "alloc")]
611 impl alloc::borrow::ToOwned for $name<[u8]> {
612 type Owned = $name<Box<[u8]>>;
613 #[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
614 ///
615 #[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
616 ///
617 /// ✨ *Enabled with the `alloc` Cargo feature.*
618 ///
619 /// # Examples
620 ///
621 /// ```
622 /// use std::borrow::Cow;
623 #[doc = concat!("use zerotrie::", stringify!($name), ";")]
624 ///
625 #[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
626 /// assert_eq!(trie.get(b"abc"), Some(5));
627 /// ```
628 fn to_owned(&self) -> Self::Owned {
629 let bytes: &[u8] = self.store.as_ref();
630 $name::from_store(
631 Vec::from(bytes).into_boxed_slice(),
632 )
633 }
634 }
635 // TODO(#2778): Auto-derive these impls based on the repr(transparent).
636 //
637 // Safety (based on the safety checklist on the VarULE trait):
638 // 1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
639 // over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
640 // 2. `$name` is aligned to 1 byte for the same reason
641 // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
642 // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
643 // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
644 // 6. `parse_bytes()` is left to its default impl
645 // 7. byte equality is semantic equality
646 #[cfg(feature = "zerovec")]
647 unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
648 where
649 Store: zerovec::ule::VarULE,
650 {
651 #[inline]
652 fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
653 Store::validate_bytes(bytes)
654 }
655 #[inline]
656 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
657 // Safety: we can pass down the validity invariant to Store
658 Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
659 }
660 }
661 #[cfg(feature = "zerofrom")]
662 impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
663 where
664 Store2: zerofrom::ZeroFrom<'zf, Store1>,
665 {
666 #[inline]
667 fn zero_from(other: &'zf $name<Store1>) -> Self {
668 $name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
669 }
670 }
671 };
672 }
673
674 #[cfg(feature = "alloc")]
string_to_box_u8(input: String) -> Box<[u8]>675 fn string_to_box_u8(input: String) -> Box<[u8]> {
676 input.into_boxed_str().into_boxed_bytes()
677 }
678
679 #[doc(hidden)] // subject to change
680 #[cfg(feature = "alloc")]
681 pub type ZeroTrieStringIterator<'a> =
682 core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
683
684 impl_zerotrie_subtype!(
685 ZeroTrieSimpleAscii,
686 String,
687 reader::get_iter_ascii_or_panic,
688 ZeroTrieStringIterator,
689 string_to_box_u8
690 );
691 impl_zerotrie_subtype!(
692 ZeroAsciiIgnoreCaseTrie,
693 String,
694 reader::get_iter_ascii_or_panic,
695 ZeroTrieStringIterator,
696 string_to_box_u8
697 );
698 impl_zerotrie_subtype!(
699 ZeroTriePerfectHash,
700 Vec<u8>,
701 reader::get_iter_phf,
702 reader::ZeroTrieIterator<'_>,
703 Vec::into_boxed_slice
704 );
705 impl_zerotrie_subtype!(
706 ZeroTrieExtendedCapacity,
707 Vec<u8>,
708 reader::get_iter_phf,
709 reader::ZeroTrieIterator<'_>,
710 Vec::into_boxed_slice
711 );
712
713 macro_rules! impl_dispatch {
714 ($self:ident, $inner_fn:ident()) => {
715 match $self.0 {
716 ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
717 ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
718 ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
719 }
720 };
721 ($self:ident, $inner_fn:ident().into_zerotrie()) => {
722 match $self.0 {
723 ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
724 ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
725 ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
726 }
727 };
728 (&$self:ident, $inner_fn:ident()) => {
729 match &$self.0 {
730 ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
731 ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
732 ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
733 }
734 };
735 ($self:ident, $inner_fn:ident($arg:ident)) => {
736 match $self.0 {
737 ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
738 ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
739 ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
740 }
741 };
742 (&$self:ident, $inner_fn:ident($arg:ident)) => {
743 match &$self.0 {
744 ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
745 ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
746 ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
747 }
748 };
749 (&$self:ident, $trait:ident::$inner_fn:ident()) => {
750 match &$self.0 {
751 ZeroTrieFlavor::SimpleAscii(subtype) => {
752 ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
753 }
754 ZeroTrieFlavor::PerfectHash(subtype) => {
755 ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
756 }
757 ZeroTrieFlavor::ExtendedCapacity(subtype) => {
758 ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
759 }
760 }
761 };
762 }
763
764 impl<Store> ZeroTrie<Store> {
765 /// Takes the byte store from this trie.
into_store(self) -> Store766 pub fn into_store(self) -> Store {
767 impl_dispatch!(self, into_store())
768 }
769 /// Converts this trie's store to a different store implementing the `From` trait.
770 ///
771 /// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
convert_store<NewStore>(self) -> ZeroTrie<NewStore> where NewStore: From<Store>,772 pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
773 where
774 NewStore: From<Store>,
775 {
776 impl_dispatch!(self, convert_store().into_zerotrie())
777 }
778 }
779
780 impl<Store> ZeroTrie<Store>
781 where
782 Store: AsRef<[u8]>,
783 {
784 /// Queries the trie for a string.
get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]>,785 pub fn get<K>(&self, key: K) -> Option<usize>
786 where
787 K: AsRef<[u8]>,
788 {
789 impl_dispatch!(&self, get(key))
790 }
791 /// Returns `true` if the trie is empty.
is_empty(&self) -> bool792 pub fn is_empty(&self) -> bool {
793 impl_dispatch!(&self, is_empty())
794 }
795 /// Returns the size of the trie in number of bytes.
796 ///
797 /// To get the number of keys in the trie, use `.iter().count()`.
byte_len(&self) -> usize798 pub fn byte_len(&self) -> usize {
799 impl_dispatch!(&self, byte_len())
800 }
801 }
802
803 #[cfg(feature = "alloc")]
804 impl<Store> ZeroTrie<Store>
805 where
806 Store: AsRef<[u8]>,
807 {
808 /// Exports the data from this ZeroTrie into a BTreeMap.
to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize>809 pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
810 impl_dispatch!(&self, to_btreemap_bytes())
811 }
812 }
813
814 #[cfg(feature = "litemap")]
815 impl<Store> ZeroTrie<Store>
816 where
817 Store: AsRef<[u8]>,
818 {
819 /// Exports the data from this ZeroTrie into a LiteMap.
to_litemap(&self) -> LiteMap<Box<[u8]>, usize>820 pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
821 impl_dispatch!(&self, to_litemap_bytes())
822 }
823 }
824
825 #[cfg(feature = "alloc")]
826 impl ZeroTrie<Vec<u8>> {
try_from_tuple_slice( items: &[(&ByteStr, usize)], ) -> Result<Self, ZeroTrieBuildError>827 pub(crate) fn try_from_tuple_slice(
828 items: &[(&ByteStr, usize)],
829 ) -> Result<Self, ZeroTrieBuildError> {
830 let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
831 if is_all_ascii && items.len() < 512 {
832 ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
833 } else {
834 ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
835 }
836 }
837 }
838
839 #[cfg(feature = "alloc")]
840 impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
841 where
842 K: AsRef<[u8]>,
843 {
from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self844 fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
845 // We need two Vecs because the first one anchors the `K`s that the second one borrows.
846 let items = Vec::from_iter(iter);
847 let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
848 items.sort();
849 let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
850 #[allow(clippy::unwrap_used)] // FromIterator is panicky
851 Self::try_from_tuple_slice(byte_str_slice).unwrap()
852 }
853 }
854
855 #[cfg(feature = "databake")]
856 impl<Store> databake::Bake for ZeroTrie<Store>
857 where
858 Store: databake::Bake,
859 {
bake(&self, env: &databake::CrateEnv) -> databake::TokenStream860 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
861 use databake::*;
862 let inner = impl_dispatch!(&self, bake(env));
863 quote! { #inner.into_zerotrie() }
864 }
865 }
866
867 #[cfg(feature = "databake")]
868 impl<Store> databake::BakeSize for ZeroTrie<Store>
869 where
870 Store: databake::BakeSize,
871 {
borrows_size(&self) -> usize872 fn borrows_size(&self) -> usize {
873 impl_dispatch!(&self, borrows_size())
874 }
875 }
876
877 #[cfg(feature = "zerofrom")]
878 impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
879 where
880 Store2: zerofrom::ZeroFrom<'zf, Store1>,
881 {
zero_from(other: &'zf ZeroTrie<Store1>) -> Self882 fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
883 use zerofrom::ZeroFrom;
884 impl_dispatch!(&other, ZeroFrom::zero_from())
885 }
886 }
887