• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use super::components::{VarZeroSliceIter, VarZeroVecComponents};
6 use super::vec::VarZeroVecInner;
7 use super::*;
8 use crate::ule::*;
9 use core::cmp::{Ord, Ordering, PartialOrd};
10 use core::fmt;
11 use core::marker::PhantomData;
12 use core::mem;
13 
14 use core::ops::Index;
15 use core::ops::Range;
16 
17 /// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
18 /// where `T` is not `Sized`.
19 ///
20 /// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
21 /// owned data and as such is ideal for deserialization since most human readable
22 /// serialization formats cannot unconditionally deserialize zero-copy.
23 ///
24 /// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
25 /// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
26 /// using `VarZeroVec<ZeroSlice<T>>`.
27 ///
28 /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
29 /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
30 ///
31 /// This type can be nested within itself to allow for multi-level nested `Vec`s.
32 ///
33 /// # Examples
34 ///
35 /// ## Nested Slices
36 ///
37 /// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
38 ///
39 /// ```rust
40 /// use zerovec::{VarZeroSlice, VarZeroVec};
41 /// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
42 /// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
43 /// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
44 /// let strings_4: Vec<&str> = vec!["w", "ω", "文", "��"];
45 /// let strings_12 = vec![&*strings_1, &*strings_2];
46 /// let strings_34 = vec![&*strings_3, &*strings_4];
47 /// let all_strings = vec![strings_12, strings_34];
48 ///
49 /// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
50 /// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
51 /// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
52 /// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
53 /// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
54 /// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
55 /// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
56 ///
57 /// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
58 ///     .iter()
59 ///     .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
60 ///         v.iter()
61 ///             .map(|x: &VarZeroSlice<_>| {
62 ///                 x.as_varzerovec()
63 ///                     .iter()
64 ///                     .map(|s| s.to_owned())
65 ///                     .collect::<Vec<String>>()
66 ///             })
67 ///             .collect::<Vec<_>>()
68 ///     })
69 ///     .collect::<Vec<_>>();
70 /// assert_eq!(reconstructed, all_strings);
71 ///
72 /// let bytes = vzv_all.as_bytes();
73 /// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
74 ///     VarZeroVec::parse_bytes(bytes).unwrap();
75 /// assert_eq!(vzv_from_bytes, vzv_all);
76 /// ```
77 ///
78 /// ## Iterate over Windows
79 ///
80 /// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
81 /// [core::slice::Windows], this behavior can be easily modeled using an iterator:
82 ///
83 /// ```
84 /// use zerovec::VarZeroVec;
85 ///
86 /// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
87 /// # let mut pairs: Vec<(&str, &str)> = Vec::new();
88 ///
89 /// let mut it = vzv.iter().peekable();
90 /// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
91 ///     // Evaluate (x, y) here.
92 /// #   pairs.push((x, y));
93 /// }
94 /// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
95 /// ```
96 //
97 // safety invariant: The slice MUST be one which parses to
98 // a valid VarZeroVecComponents<T>
99 #[repr(transparent)]
100 pub struct VarZeroSlice<T: ?Sized, F = Index16> {
101     marker: PhantomData<(F, T)>,
102     /// The original slice this was constructed from
103     entire_slice: [u8],
104 }
105 
106 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
107     /// Construct a new empty VarZeroSlice
new_empty() -> &'static Self108     pub const fn new_empty() -> &'static Self {
109         // The empty VZV is special-cased to the empty slice
110         unsafe { mem::transmute(&[] as &[u8]) }
111     }
112 
113     /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
114     #[inline]
as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F>115     pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
116         unsafe {
117             // safety: VarZeroSlice is guaranteed to parse here
118             VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
119         }
120     }
121 
122     /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
123     ///
124     /// # Safety
125     ///
126     /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
from_bytes_unchecked(bytes: &[u8]) -> &Self127     pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
128         // self is really just a wrapper around a byte slice
129         mem::transmute(bytes)
130     }
131 
132     /// Get the number of elements in this slice
133     ///
134     /// # Example
135     ///
136     /// ```rust
137     /// # use zerovec::VarZeroVec;
138     ///
139     /// let strings = vec!["foo", "bar", "baz", "quux"];
140     /// let vec = VarZeroVec::<str>::from(&strings);
141     ///
142     /// assert_eq!(vec.len(), 4);
143     /// ```
len(&self) -> usize144     pub fn len(&self) -> usize {
145         self.as_components().len()
146     }
147 
148     /// Returns `true` if the slice contains no elements.
149     ///
150     /// # Examples
151     ///
152     /// ```
153     /// # use zerovec::VarZeroVec;
154     ///
155     /// let strings: Vec<String> = vec![];
156     /// let vec = VarZeroVec::<str>::from(&strings);
157     ///
158     /// assert!(vec.is_empty());
159     /// ```
is_empty(&self) -> bool160     pub fn is_empty(&self) -> bool {
161         self.as_components().is_empty()
162     }
163 
164     /// Obtain an iterator over this slice's elements
165     ///
166     /// # Example
167     ///
168     /// ```rust
169     /// # use zerovec::VarZeroVec;
170     ///
171     /// let strings = vec!["foo", "bar", "baz", "quux"];
172     /// let vec = VarZeroVec::<str>::from(&strings);
173     ///
174     /// let mut iter_results: Vec<&str> = vec.iter().collect();
175     /// assert_eq!(iter_results[0], "foo");
176     /// assert_eq!(iter_results[1], "bar");
177     /// assert_eq!(iter_results[2], "baz");
178     /// assert_eq!(iter_results[3], "quux");
179     /// ```
iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F>180     pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
181         self.as_components().iter()
182     }
183 
184     /// Get one of this slice's elements, returning `None` if the index is out of bounds
185     ///
186     /// # Example
187     ///
188     /// ```rust
189     /// # use zerovec::VarZeroVec;
190     ///
191     /// let strings = vec!["foo", "bar", "baz", "quux"];
192     /// let vec = VarZeroVec::<str>::from(&strings);
193     ///
194     /// let mut iter_results: Vec<&str> = vec.iter().collect();
195     /// assert_eq!(vec.get(0), Some("foo"));
196     /// assert_eq!(vec.get(1), Some("bar"));
197     /// assert_eq!(vec.get(2), Some("baz"));
198     /// assert_eq!(vec.get(3), Some("quux"));
199     /// assert_eq!(vec.get(4), None);
200     /// ```
get(&self, idx: usize) -> Option<&T>201     pub fn get(&self, idx: usize) -> Option<&T> {
202         self.as_components().get(idx)
203     }
204 
205     /// Get one of this slice's elements
206     ///
207     /// # Safety
208     ///
209     /// `index` must be in range
210     ///
211     /// # Example
212     ///
213     /// ```rust
214     /// # use zerovec::VarZeroVec;
215     ///
216     /// let strings = vec!["foo", "bar", "baz", "quux"];
217     /// let vec = VarZeroVec::<str>::from(&strings);
218     ///
219     /// let mut iter_results: Vec<&str> = vec.iter().collect();
220     /// unsafe {
221     ///     assert_eq!(vec.get_unchecked(0), "foo");
222     ///     assert_eq!(vec.get_unchecked(1), "bar");
223     ///     assert_eq!(vec.get_unchecked(2), "baz");
224     ///     assert_eq!(vec.get_unchecked(3), "quux");
225     /// }
226     /// ```
get_unchecked(&self, idx: usize) -> &T227     pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
228         self.as_components().get_unchecked(idx)
229     }
230 
231     /// Obtain an owned `Vec<Box<T>>` out of this
232     #[cfg(feature = "alloc")]
to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>>233     pub fn to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
234         self.as_components().to_vec()
235     }
236 
237     /// Get a reference to the entire encoded backing buffer of this slice
238     ///
239     /// The bytes can be passed back to [`Self::parse_bytes()`].
240     ///
241     /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
242     ///
243     /// # Example
244     ///
245     /// ```rust
246     /// # use zerovec::VarZeroVec;
247     ///
248     /// let strings = vec!["foo", "bar", "baz"];
249     /// let vzv = VarZeroVec::<str>::from(&strings);
250     ///
251     /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap());
252     /// ```
253     #[inline]
as_bytes(&self) -> &[u8]254     pub const fn as_bytes(&self) -> &[u8] {
255         &self.entire_slice
256     }
257 
258     /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
259     ///
260     /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
261     /// it is more efficient to perform this conversion first
as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F>262     pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
263         VarZeroVec(VarZeroVecInner::Borrowed(self))
264     }
265 
266     /// Parse a VarZeroSlice from a slice of the appropriate format
267     ///
268     /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError>269     pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> {
270         <Self as VarULE>::parse_bytes(slice)
271     }
272 }
273 
274 impl<T, F> VarZeroSlice<T, F>
275 where
276     T: VarULE,
277     T: ?Sized,
278     T: Ord,
279     F: VarZeroVecFormat,
280 {
281     /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
282     /// the standard library function [`binary_search`].
283     ///
284     /// # Example
285     ///
286     /// ```
287     /// # use zerovec::VarZeroVec;
288     ///
289     /// let strings = vec!["a", "b", "f", "g"];
290     /// let vec = VarZeroVec::<str>::from(&strings);
291     ///
292     /// assert_eq!(vec.binary_search("f"), Ok(2));
293     /// assert_eq!(vec.binary_search("e"), Err(2));
294     /// ```
295     ///
296     /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
297     #[inline]
binary_search(&self, x: &T) -> Result<usize, usize>298     pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
299         self.as_components().binary_search(x)
300     }
301 
302     /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
303     ///
304     /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
305     /// to the behavior of the standard library function [`binary_search`].
306     ///
307     /// The index is returned relative to the start of the range.
308     ///
309     /// # Example
310     ///
311     /// ```
312     /// # use zerovec::VarZeroVec;
313     /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
314     /// let vec = VarZeroVec::<str>::from(&strings);
315     ///
316     /// // Same behavior as binary_search when the range covers the whole slice:
317     /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
318     /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
319     ///
320     /// // Will not look outside of the range:
321     /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
322     /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
323     ///
324     /// // Will return indices relative to the start of the range:
325     /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
326     /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
327     ///
328     /// // Will return `None` if the range is out of bounds:
329     /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
330     /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
331     /// ```
332     ///
333     /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
334     #[inline]
binary_search_in_range( &self, x: &T, range: Range<usize>, ) -> Option<Result<usize, usize>>335     pub fn binary_search_in_range(
336         &self,
337         x: &T,
338         range: Range<usize>,
339     ) -> Option<Result<usize, usize>> {
340         self.as_components().binary_search_in_range(x, range)
341     }
342 }
343 
344 impl<T, F> VarZeroSlice<T, F>
345 where
346     T: VarULE,
347     T: ?Sized,
348     F: VarZeroVecFormat,
349 {
350     /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
351     /// the standard library function [`binary_search_by`].
352     ///
353     /// # Example
354     ///
355     /// ```
356     /// # use zerovec::VarZeroVec;
357     /// let strings = vec!["a", "b", "f", "g"];
358     /// let vec = VarZeroVec::<str>::from(&strings);
359     ///
360     /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
361     /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
362     /// ```
363     ///
364     /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
365     #[inline]
binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>366     pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
367         self.as_components().binary_search_by(predicate)
368     }
369 
370     /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
371     ///
372     /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
373     /// to the behavior of the standard library function [`binary_search`].
374     ///
375     /// The index is returned relative to the start of the range.
376     ///
377     /// # Example
378     ///
379     /// ```
380     /// # use zerovec::VarZeroVec;
381     /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
382     /// let vec = VarZeroVec::<str>::from(&strings);
383     ///
384     /// // Same behavior as binary_search when the range covers the whole slice:
385     /// assert_eq!(
386     ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
387     ///     Some(Ok(3))
388     /// );
389     /// assert_eq!(
390     ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
391     ///     Some(Err(4))
392     /// );
393     ///
394     /// // Will not look outside of the range:
395     /// assert_eq!(
396     ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
397     ///     Some(Err(1))
398     /// );
399     /// assert_eq!(
400     ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
401     ///     Some(Err(0))
402     /// );
403     ///
404     /// // Will return indices relative to the start of the range:
405     /// assert_eq!(
406     ///     vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
407     ///     Some(Ok(2))
408     /// );
409     /// assert_eq!(
410     ///     vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
411     ///     Some(Err(3))
412     /// );
413     ///
414     /// // Will return `None` if the range is out of bounds:
415     /// assert_eq!(
416     ///     vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
417     ///     None
418     /// );
419     /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
420     /// ```
421     ///
422     /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Option<Result<usize, usize>>423     pub fn binary_search_in_range_by(
424         &self,
425         predicate: impl FnMut(&T) -> Ordering,
426         range: Range<usize>,
427     ) -> Option<Result<usize, usize>> {
428         self.as_components()
429             .binary_search_in_range_by(predicate, range)
430     }
431 }
432 // Safety (based on the safety checklist on the VarULE trait):
433 //  1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
434 //     `[u8]` slice which satisfies this invariant)
435 //  2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
436 //     `[u8]` slice which satisfies this invariant)
437 //  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
438 //  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
439 //  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
440 //  6. `as_bytes()` is equivalent to a regular transmute of the underlying data
441 //  7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
442 unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
validate_bytes(bytes: &[u8]) -> Result<(), UleError>443     fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
444         let _: VarZeroVecComponents<T, F> =
445             VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::<Self>())?;
446         Ok(())
447     }
448 
from_bytes_unchecked(bytes: &[u8]) -> &Self449     unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
450         // self is really just a wrapper around a byte slice
451         mem::transmute(bytes)
452     }
453 
as_bytes(&self) -> &[u8]454     fn as_bytes(&self) -> &[u8] {
455         &self.entire_slice
456     }
457 }
458 
459 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
460     type Output = T;
index(&self, index: usize) -> &Self::Output461     fn index(&self, index: usize) -> &Self::Output {
462         #[allow(clippy::panic)] // documented
463         match self.get(index) {
464             Some(x) => x,
465             None => panic!(
466                 "index out of bounds: the len is {} but the index is {index}",
467                 self.len()
468             ),
469         }
470     }
471 }
472 
473 impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
474 where
475     T: VarULE,
476     T: ?Sized,
477     T: PartialEq,
478     F: VarZeroVecFormat,
479 {
480     #[inline]
eq(&self, other: &VarZeroSlice<T, F>) -> bool481     fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
482         // VarULE has an API guarantee that this is equivalent
483         // to `T::VarULE::eq()`
484         self.entire_slice.eq(&other.entire_slice)
485     }
486 }
487 
488 impl<T, F> Eq for VarZeroSlice<T, F>
489 where
490     T: VarULE,
491     T: ?Sized,
492     T: Eq,
493     F: VarZeroVecFormat,
494 {
495 }
496 
497 impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
498     #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>499     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
500         self.iter().partial_cmp(other.iter())
501     }
502 }
503 
504 impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
505     #[inline]
cmp(&self, other: &Self) -> Ordering506     fn cmp(&self, other: &Self) -> Ordering {
507         self.iter().cmp(other.iter())
508     }
509 }
510 
511 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
512 where
513     T: fmt::Debug,
514 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result515     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516         f.debug_list().entries(self.iter()).finish()
517     }
518 }
519 
520 impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
as_ref(&self) -> &VarZeroSlice<T, F>521     fn as_ref(&self) -> &VarZeroSlice<T, F> {
522         self
523     }
524 }
525