• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use super::VarZeroVecFormatError;
6 use crate::ule::*;
7 use core::cmp::Ordering;
8 use core::convert::TryFrom;
9 use core::marker::PhantomData;
10 use core::mem;
11 use core::ops::Range;
12 
13 /// This trait allows switching between different possible internal
14 /// representations of VarZeroVec.
15 ///
16 /// Currently this crate supports three formats: [`Index8`], [`Index16`] and [`Index32`],
17 /// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec)
18 /// types unless explicitly specified otherwise.
19 ///
20 /// Do not implement this trait, its internals may be changed in the future,
21 /// and all of its associated items are hidden from the docs.
22 pub trait VarZeroVecFormat: 'static + Sized {
23     /// The type to use for the indexing array
24     ///
25     /// Safety: must be a ULE for which all byte sequences are allowed
26     #[doc(hidden)]
27     type Index: IntegerULE;
28     /// The type to use for the length segment
29     ///
30     /// Safety: must be a ULE for which all byte sequences are allowed
31     #[doc(hidden)]
32     type Len: IntegerULE;
33 }
34 
35 /// This trait represents various ULE types that can be used to represent an integer
36 ///
37 /// Do not implement this trait, its internals may be changed in the future,
38 /// and all of its associated items are hidden from the docs.
39 #[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period
40 #[doc(hidden)]
41 pub unsafe trait IntegerULE: ULE {
42     /// The error to show when unable to construct a vec
43     #[doc(hidden)]
44     const TOO_LARGE_ERROR: &'static str;
45 
46     /// Safety: must be sizeof(self)
47     #[doc(hidden)]
48     const SIZE: usize;
49 
50     /// Safety: must be maximum integral value represented here
51     #[doc(hidden)]
52     const MAX_VALUE: u32;
53 
54     /// Safety: Must roundtrip with from_usize and represent the correct
55     /// integral value
56     #[doc(hidden)]
iule_to_usize(self) -> usize57     fn iule_to_usize(self) -> usize;
58 
59     #[doc(hidden)]
iule_from_usize(x: usize) -> Option<Self>60     fn iule_from_usize(x: usize) -> Option<Self>;
61 
62     /// Safety: Should always convert a buffer into an array of Self with the correct length
63     #[doc(hidden)]
64     #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]65     fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self];
66 }
67 
68 /// This is a [`VarZeroVecFormat`] that stores u8s in the index array, and a u8 for a length.
69 ///
70 /// Will have a smaller data size, but it's *extremely* likely for larger arrays
71 /// to be unrepresentable (and error on construction). Should probably be used
72 /// for known-small arrays, where all but the last field are known-small.
73 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
74 #[allow(clippy::exhaustive_structs)] // marker
75 pub struct Index8;
76 
77 /// This is a [`VarZeroVecFormat`] that stores u16s in the index array, and a u16 for a length.
78 ///
79 /// Will have a smaller data size, but it's more likely for larger arrays
80 /// to be unrepresentable (and error on construction)
81 ///
82 /// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types.
83 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
84 #[allow(clippy::exhaustive_structs)] // marker
85 pub struct Index16;
86 
87 /// This is a [`VarZeroVecFormat`] that stores u32s in the index array, and a u32 for a length.
88 /// Will have a larger data size, but will support large arrays without
89 /// problems.
90 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
91 #[allow(clippy::exhaustive_structs)] // marker
92 pub struct Index32;
93 
94 impl VarZeroVecFormat for Index8 {
95     type Index = u8;
96     type Len = u8;
97 }
98 
99 impl VarZeroVecFormat for Index16 {
100     type Index = RawBytesULE<2>;
101     type Len = RawBytesULE<2>;
102 }
103 
104 impl VarZeroVecFormat for Index32 {
105     type Index = RawBytesULE<4>;
106     type Len = RawBytesULE<4>;
107 }
108 
109 unsafe impl IntegerULE for u8 {
110     const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
111                                      cumulatively are larger than a u8 in size";
112     const SIZE: usize = mem::size_of::<Self>();
113     const MAX_VALUE: u32 = u8::MAX as u32;
114     #[inline]
iule_to_usize(self) -> usize115     fn iule_to_usize(self) -> usize {
116         self as usize
117     }
118     #[inline]
iule_from_usize(u: usize) -> Option<Self>119     fn iule_from_usize(u: usize) -> Option<Self> {
120         u8::try_from(u).ok()
121     }
122     #[inline]
123     #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]124     fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
125         bytes
126     }
127 }
128 
129 unsafe impl IntegerULE for RawBytesULE<2> {
130     const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
131                                      cumulatively are larger than a u16 in size";
132     const SIZE: usize = mem::size_of::<Self>();
133     const MAX_VALUE: u32 = u16::MAX as u32;
134     #[inline]
iule_to_usize(self) -> usize135     fn iule_to_usize(self) -> usize {
136         self.as_unsigned_int() as usize
137     }
138     #[inline]
iule_from_usize(u: usize) -> Option<Self>139     fn iule_from_usize(u: usize) -> Option<Self> {
140         u16::try_from(u).ok().map(u16::to_unaligned)
141     }
142     #[inline]
143     #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]144     fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
145         Self::from_bytes_unchecked_mut(bytes)
146     }
147 }
148 
149 unsafe impl IntegerULE for RawBytesULE<4> {
150     const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
151                                      cumulatively are larger than a u32 in size";
152     const SIZE: usize = mem::size_of::<Self>();
153     const MAX_VALUE: u32 = u32::MAX;
154     #[inline]
iule_to_usize(self) -> usize155     fn iule_to_usize(self) -> usize {
156         self.as_unsigned_int() as usize
157     }
158     #[inline]
iule_from_usize(u: usize) -> Option<Self>159     fn iule_from_usize(u: usize) -> Option<Self> {
160         u32::try_from(u).ok().map(u32::to_unaligned)
161     }
162     #[inline]
163     #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]164     fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
165         Self::from_bytes_unchecked_mut(bytes)
166     }
167 }
168 
169 /// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec
170 /// internal representation code lies.
171 ///
172 /// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into
173 /// the buffer components. Logically this is capable of behaving as
174 /// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually
175 /// exist.
176 ///
177 /// See [`VarZeroVecComponents::parse_bytes()`] for information on the internal invariants involved
178 #[derive(Debug)]
179 pub struct VarZeroVecComponents<'a, T: ?Sized, F> {
180     /// The number of elements
181     len: u32,
182     /// The list of indices into the `things` slice
183     /// Since the first element is always at things[0], the first element of the indices array is for the *second* element
184     indices: &'a [u8],
185     /// The contiguous list of `T::VarULE`s
186     things: &'a [u8],
187     marker: PhantomData<(&'a T, F)>,
188 }
189 
190 // #[derive()] won't work here since we do not want it to be
191 // bound on T: Copy
192 impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {}
193 impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> {
clone(&self) -> Self194     fn clone(&self) -> Self {
195         *self
196     }
197 }
198 
199 impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> {
200     #[inline]
default() -> Self201     fn default() -> Self {
202         Self::new()
203     }
204 }
205 
206 impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> {
207     #[inline]
new() -> Self208     pub fn new() -> Self {
209         Self {
210             len: 0,
211             indices: &[],
212             things: &[],
213             marker: PhantomData,
214         }
215     }
216 }
217 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> {
218     /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
219     ///
220     /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize)
221     /// - There must be at least `4*(length - 1) + 4` bytes total, to form the array `indices` of indices
222     /// - `0..indices[0]` must index into a valid section of
223     ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
224     /// - `indices[i - 1]..indices[i]` must index into a valid section of
225     ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
226     /// - `indices[len - 2]..things.len()` must index into a valid section of
227     ///   `things`, such that it parses to a `T::VarULE`
228     #[inline]
parse_bytes(slice: &'a [u8]) -> Result<Self, VarZeroVecFormatError>229     pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, VarZeroVecFormatError> {
230         // The empty VZV is special-cased to the empty slice
231         if slice.is_empty() {
232             return Ok(VarZeroVecComponents {
233                 len: 0,
234                 indices: &[],
235                 things: &[],
236                 marker: PhantomData,
237             });
238         }
239         let len_bytes = slice
240             .get(0..F::Len::SIZE)
241             .ok_or(VarZeroVecFormatError::Metadata)?;
242         let len_ule =
243             F::Len::parse_bytes_to_slice(len_bytes).map_err(|_| VarZeroVecFormatError::Metadata)?;
244 
245         let len = len_ule
246             .first()
247             .ok_or(VarZeroVecFormatError::Metadata)?
248             .iule_to_usize();
249 
250         let rest = slice
251             .get(F::Len::SIZE..)
252             .ok_or(VarZeroVecFormatError::Metadata)?;
253         let len_u32 = u32::try_from(len).map_err(|_| VarZeroVecFormatError::Metadata);
254         // We pass down the rest of the invariants
255         Self::parse_bytes_with_length(len_u32?, rest)
256     }
257 
258     /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
259     ///
260     /// - There must be at least `4*len` bytes total, to form the array `indices` of indices.
261     /// - `indices[i]..indices[i+1]` must index into a valid section of
262     ///   `things` (the data after `indices`), such that it parses to a `T::VarULE`
263     /// - `indices[len - 1]..things.len()` must index into a valid section of
264     ///   `things`, such that it parses to a `T::VarULE`
265     #[inline]
parse_bytes_with_length( len: u32, slice: &'a [u8], ) -> Result<Self, VarZeroVecFormatError>266     pub fn parse_bytes_with_length(
267         len: u32,
268         slice: &'a [u8],
269     ) -> Result<Self, VarZeroVecFormatError> {
270         let len_minus_one = len.checked_sub(1);
271         // The empty VZV is special-cased to the empty slice
272         let Some(len_minus_one) = len_minus_one else {
273             return Ok(VarZeroVecComponents {
274                 len: 0,
275                 indices: &[],
276                 things: &[],
277                 marker: PhantomData,
278             });
279         };
280         // The indices array is one element shorter since the first index is always 0,
281         // so we use len_minus_one
282         let indices_bytes = slice
283             .get(..F::Index::SIZE * (len_minus_one as usize))
284             .ok_or(VarZeroVecFormatError::Metadata)?;
285         let things = slice
286             .get(F::Index::SIZE * (len_minus_one as usize)..)
287             .ok_or(VarZeroVecFormatError::Metadata)?;
288 
289         let borrowed = VarZeroVecComponents {
290             len,
291             indices: indices_bytes,
292             things,
293             marker: PhantomData,
294         };
295 
296         borrowed.check_indices_and_things()?;
297 
298         Ok(borrowed)
299     }
300 
301     /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
302     /// successfully returned a [`VarZeroVecComponents`] when passed to
303     /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
304     /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
305     ///
306     /// # Safety
307     /// The bytes must have previously successfully run through
308     /// [`VarZeroVecComponents::parse_bytes()`]
from_bytes_unchecked(slice: &'a [u8]) -> Self309     pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
310         // The empty VZV is special-cased to the empty slice
311         if slice.is_empty() {
312             return VarZeroVecComponents {
313                 len: 0,
314                 indices: &[],
315                 things: &[],
316                 marker: PhantomData,
317             };
318         }
319         let (len_bytes, data_bytes) = unsafe { slice.split_at_unchecked(F::Len::SIZE) };
320         // Safety: F::Len allows all byte sequences
321         let len_ule = F::Len::slice_from_bytes_unchecked(len_bytes);
322 
323         let len = len_ule.get_unchecked(0).iule_to_usize();
324         let len_u32 = len as u32;
325 
326         // Safety: This method requires the bytes to have passed through `parse_bytes()`
327         // whereas we're calling something that asks for `parse_bytes_with_length()`.
328         // The two methods perform similar validation, with parse_bytes() validating an additional
329         // 4-byte `length` header.
330         Self::from_bytes_unchecked_with_length(len_u32, data_bytes)
331     }
332 
333     /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
334     /// successfully returned a [`VarZeroVecComponents`] when passed to
335     /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
336     /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
337     ///
338     /// # Safety
339     /// The len,bytes must have previously successfully run through
340     /// [`VarZeroVecComponents::parse_bytes_with_length()`]
from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self341     pub unsafe fn from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self {
342         let len_minus_one = len.checked_sub(1);
343         // The empty VZV is special-cased to the empty slice
344         let Some(len_minus_one) = len_minus_one else {
345             return VarZeroVecComponents {
346                 len: 0,
347                 indices: &[],
348                 things: &[],
349                 marker: PhantomData,
350             };
351         };
352         // The indices array is one element shorter since the first index is always 0,
353         // so we use len_minus_one
354         let indices_bytes = slice.get_unchecked(..F::Index::SIZE * (len_minus_one as usize));
355         let things = slice.get_unchecked(F::Index::SIZE * (len_minus_one as usize)..);
356 
357         VarZeroVecComponents {
358             len,
359             indices: indices_bytes,
360             things,
361             marker: PhantomData,
362         }
363     }
364 
365     /// Get the number of elements in this vector
366     #[inline]
len(self) -> usize367     pub fn len(self) -> usize {
368         self.len as usize
369     }
370 
371     /// Returns `true` if the vector contains no elements.
372     #[inline]
is_empty(self) -> bool373     pub fn is_empty(self) -> bool {
374         self.len == 0
375     }
376 
377     /// Get the idx'th element out of this slice. Returns `None` if out of bounds.
378     #[inline]
get(self, idx: usize) -> Option<&'a T>379     pub fn get(self, idx: usize) -> Option<&'a T> {
380         if idx >= self.len() {
381             return None;
382         }
383         Some(unsafe { self.get_unchecked(idx) })
384     }
385 
386     /// Get the idx'th element out of this slice. Does not bounds check.
387     ///
388     /// Safety:
389     /// - `idx` must be in bounds (`idx < self.len()`)
390     #[inline]
get_unchecked(self, idx: usize) -> &'a T391     pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T {
392         let range = self.get_things_range(idx);
393         let things_slice = self.things.get_unchecked(range);
394         T::from_bytes_unchecked(things_slice)
395     }
396 
397     /// Get the range in `things` for the element at `idx`. Does not bounds check.
398     ///
399     /// Safety:
400     /// - `idx` must be in bounds (`idx < self.len()`)
401     #[inline]
get_things_range(self, idx: usize) -> Range<usize>402     pub(crate) unsafe fn get_things_range(self, idx: usize) -> Range<usize> {
403         let start = if let Some(idx_minus_one) = idx.checked_sub(1) {
404             self.indices_slice()
405                 .get_unchecked(idx_minus_one)
406                 .iule_to_usize()
407         } else {
408             0
409         };
410         let end = if idx + 1 == self.len() {
411             self.things.len()
412         } else {
413             self.indices_slice().get_unchecked(idx).iule_to_usize()
414         };
415         debug_assert!(start <= end);
416         start..end
417     }
418 
419     /// Get the size, in bytes, of the indices array
get_indices_size(self) -> usize420     pub(crate) unsafe fn get_indices_size(self) -> usize {
421         self.indices.len()
422     }
423 
424     /// Check the internal invariants of VarZeroVecComponents:
425     ///
426     /// - `indices[i]..indices[i+1]` must index into a valid section of
427     ///   `things`, such that it parses to a `T::VarULE`
428     /// - `indices[len - 1]..things.len()` must index into a valid section of
429     ///   `things`, such that it parses to a `T::VarULE`
430     /// - `indices` is monotonically increasing
431     ///
432     /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods
433     /// assume that the slice has been passed through check_indices_and_things
434     #[inline]
435     #[allow(clippy::len_zero)] // more explicit to enforce safety invariants
check_indices_and_things(self) -> Result<(), VarZeroVecFormatError>436     fn check_indices_and_things(self) -> Result<(), VarZeroVecFormatError> {
437         if self.len() == 0 {
438             if self.things.len() > 0 {
439                 return Err(VarZeroVecFormatError::Metadata);
440             } else {
441                 return Ok(());
442             }
443         }
444         let indices_slice = self.indices_slice();
445         assert_eq!(self.len(), indices_slice.len() + 1);
446         // Safety: i is in bounds (assertion above)
447         let mut start = 0;
448         for i in 0..self.len() {
449             // The indices array is offset by 1: indices[0] is the end of the first
450             // element and the start of the next, since the start of the first element
451             // is always things[0]. So to get the end we get element `i`.
452             let end = if let Some(end) = indices_slice.get(i) {
453                 end.iule_to_usize()
454             } else {
455                 // This only happens at i = self.len() - 1 = indices_slice.len() + 1 - 1
456                 // = indices_slice.len(). This is the last `end`, which is always the size of
457                 // `things` and thus never stored in the array
458                 self.things.len()
459             };
460 
461             if start > end {
462                 return Err(VarZeroVecFormatError::Metadata);
463             }
464             if end > self.things.len() {
465                 return Err(VarZeroVecFormatError::Metadata);
466             }
467             // Safety: start..end is a valid range in self.things
468             let bytes = unsafe { self.things.get_unchecked(start..end) };
469             T::parse_bytes(bytes).map_err(VarZeroVecFormatError::Values)?;
470             start = end;
471         }
472         Ok(())
473     }
474 
475     /// Create an iterator over the Ts contained in VarZeroVecComponents
476     #[inline]
iter(self) -> VarZeroSliceIter<'a, T, F>477     pub fn iter(self) -> VarZeroSliceIter<'a, T, F> {
478         VarZeroSliceIter::new(self)
479     }
480 
481     #[cfg(feature = "alloc")]
to_vec(self) -> alloc::vec::Vec<alloc::boxed::Box<T>>482     pub fn to_vec(self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
483         self.iter().map(T::to_boxed).collect()
484     }
485 
486     #[inline]
indices_slice(&self) -> &'a [F::Index]487     fn indices_slice(&self) -> &'a [F::Index] {
488         unsafe { F::Index::slice_from_bytes_unchecked(self.indices) }
489     }
490 
491     // Dump a debuggable representation of this type
492     #[allow(unused)] // useful for debugging
493     #[cfg(feature = "alloc")]
dump(&self) -> alloc::string::String494     pub(crate) fn dump(&self) -> alloc::string::String {
495         let indices = self
496             .indices_slice()
497             .iter()
498             .copied()
499             .map(IntegerULE::iule_to_usize)
500             .collect::<alloc::vec::Vec<_>>();
501         alloc::format!("VarZeroVecComponents {{ indices: {indices:?} }}")
502     }
503 }
504 
505 /// An iterator over VarZeroSlice
506 #[derive(Debug)]
507 pub struct VarZeroSliceIter<'a, T: ?Sized, F = Index16> {
508     components: VarZeroVecComponents<'a, T, F>,
509     index: usize,
510     // Safety invariant: must be a valid index into the data segment of `components`, or an index at the end
511     // i.e. start_index <= components.things.len()
512     //
513     // It must be a valid index into the `things` array of components, coming from `components.indices_slice()`
514     start_index: usize,
515 }
516 
517 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> {
new(c: VarZeroVecComponents<'a, T, F>) -> Self518     fn new(c: VarZeroVecComponents<'a, T, F>) -> Self {
519         Self {
520             components: c,
521             index: 0,
522             // Invariant upheld, 0 is always a valid index-or-end
523             start_index: 0,
524         }
525     }
526 }
527 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> {
528     type Item = &'a T;
529 
next(&mut self) -> Option<Self::Item>530     fn next(&mut self) -> Option<Self::Item> {
531         // Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled
532         // by start_index, and the len is handled by the code for `end`.
533 
534         if self.index >= self.components.len() {
535             return None;
536         }
537 
538         // Invariant established: self.index is in bounds for self.components.len(),
539         // which means it is in bounds for self.components.indices_slice() since that has the same length
540 
541         let end = if self.index + 1 == self.components.len() {
542             // We don't store the end index since it is computable, so the last element should use self.components.things.len()
543             self.components.things.len()
544         } else {
545             // Safety: self.index was known to be in bounds from the bounds check above.
546             unsafe {
547                 self.components
548                     .indices_slice()
549                     .get_unchecked(self.index)
550                     .iule_to_usize()
551             }
552         };
553         // Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed
554         // to only contain valid indexes
555 
556         let item = unsafe {
557             // Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice
558             // which means we can treat this data as a T
559             T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end))
560         };
561         self.index += 1;
562         // Invariant upheld: end has the same invariant as self.start_index
563         self.start_index = end;
564         Some(item)
565     }
566 
size_hint(&self) -> (usize, Option<usize>)567     fn size_hint(&self) -> (usize, Option<usize>) {
568         let remainder = self.components.len() - self.index;
569         (remainder, Some(remainder))
570     }
571 }
572 
573 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> {
len(&self) -> usize574     fn len(&self) -> usize {
575         self.components.len() - self.index
576     }
577 }
578 
579 impl<'a, T, F> VarZeroVecComponents<'a, T, F>
580 where
581     T: VarULE,
582     T: ?Sized,
583     T: Ord,
584     F: VarZeroVecFormat,
585 {
586     /// Binary searches a sorted `VarZeroVecComponents<T>` for the given element. For more information, see
587     /// the primitive function [`binary_search`](slice::binary_search).
binary_search(&self, needle: &T) -> Result<usize, usize>588     pub fn binary_search(&self, needle: &T) -> Result<usize, usize> {
589         self.binary_search_by(|probe| probe.cmp(needle))
590     }
591 
binary_search_in_range( &self, needle: &T, range: Range<usize>, ) -> Option<Result<usize, usize>>592     pub fn binary_search_in_range(
593         &self,
594         needle: &T,
595         range: Range<usize>,
596     ) -> Option<Result<usize, usize>> {
597         self.binary_search_in_range_by(|probe| probe.cmp(needle), range)
598     }
599 }
600 
601 impl<'a, T, F> VarZeroVecComponents<'a, T, F>
602 where
603     T: VarULE,
604     T: ?Sized,
605     F: VarZeroVecFormat,
606 {
607     /// Binary searches a sorted `VarZeroVecComponents<T>` for the given predicate. For more information, see
608     /// the primitive function [`binary_search_by`](slice::binary_search_by).
binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>609     pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
610         // Safety: 0 and len are in range
611         unsafe { self.binary_search_in_range_unchecked(predicate, 0..self.len()) }
612     }
613 
614     // Binary search within a range.
615     // Values returned are relative to the range start!
binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Option<Result<usize, usize>>616     pub fn binary_search_in_range_by(
617         &self,
618         predicate: impl FnMut(&T) -> Ordering,
619         range: Range<usize>,
620     ) -> Option<Result<usize, usize>> {
621         if range.end > self.len() {
622             return None;
623         }
624         if range.end < range.start {
625             return None;
626         }
627         // Safety: We bounds checked above: end is in-bounds or len, and start is <= end
628         let range_absolute =
629             unsafe { self.binary_search_in_range_unchecked(predicate, range.clone()) };
630         // The values returned are relative to the range start
631         Some(
632             range_absolute
633                 .map(|o| o - range.start)
634                 .map_err(|e| e - range.start),
635         )
636     }
637 
638     /// Safety: range must be in range for the slice (start <= len, end <= len, start <= end)
binary_search_in_range_unchecked( &self, mut predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Result<usize, usize>639     unsafe fn binary_search_in_range_unchecked(
640         &self,
641         mut predicate: impl FnMut(&T) -> Ordering,
642         range: Range<usize>,
643     ) -> Result<usize, usize> {
644         // Function invariant: size is always end - start
645         let mut start = range.start;
646         let mut end = range.end;
647         let mut size;
648 
649         // Loop invariant: 0 <= start < end <= len
650         // This invariant is initialized by the function safety invariants and the loop condition
651         while start < end {
652             size = end - start;
653             // This establishes mid < end (which implies mid < len)
654             // size is end - start. start + size is end (which is <= len).
655             // mid = start + size/2 will be less than end
656             let mid = start + size / 2;
657 
658             // Safety: mid is < end <= len, so in-range
659             let cmp = predicate(self.get_unchecked(mid));
660 
661             match cmp {
662                 Ordering::Less => {
663                     // This retains the loop invariant since it
664                     // increments start, and we already have 0 <= start
665                     // start < end is enforced by the loop condition
666                     start = mid + 1;
667                 }
668                 Ordering::Greater => {
669                     // mid < end, so this decreases end.
670                     // This means end <= len is still true, and
671                     // end > start is enforced by the loop condition
672                     end = mid;
673                 }
674                 Ordering::Equal => return Ok(mid),
675             }
676         }
677         Err(start)
678     }
679 }
680 
681 /// Collects the bytes for a VarZeroSlice into a Vec.
682 #[cfg(feature = "alloc")]
get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<alloc::vec::Vec<u8>> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,683 pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<alloc::vec::Vec<u8>>
684 where
685     T: VarULE + ?Sized,
686     A: EncodeAsVarULE<T>,
687     F: VarZeroVecFormat,
688 {
689     debug_assert!(!elements.is_empty());
690     let len = compute_serializable_len::<T, A, F>(elements)?;
691     debug_assert!(
692         len >= F::Len::SIZE as u32,
693         "Must have at least F::Len::SIZE bytes to hold the length of the vector"
694     );
695     let mut output = alloc::vec![0u8; len as usize];
696     write_serializable_bytes::<T, A, F>(elements, &mut output);
697     Some(output)
698 }
699 
700 /// Writes the bytes for a VarZeroLengthlessSlice into an output buffer.
701 /// Usable for a VarZeroSlice if you first write the length bytes.
702 ///
703 /// Every byte in the buffer will be initialized after calling this function.
704 ///
705 /// # Panics
706 ///
707 /// Panics if the buffer is not exactly the correct length.
write_serializable_bytes_without_length<T, A, F>(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,708 pub fn write_serializable_bytes_without_length<T, A, F>(elements: &[A], output: &mut [u8])
709 where
710     T: VarULE + ?Sized,
711     A: EncodeAsVarULE<T>,
712     F: VarZeroVecFormat,
713 {
714     assert!(elements.len() <= F::Len::MAX_VALUE as usize);
715     if elements.is_empty() {
716         return;
717     }
718 
719     // idx_offset = offset from the start of the buffer for the next index
720     let mut idx_offset: usize = 0;
721     // first_dat_offset = offset from the start of the buffer of the first data block
722     let first_dat_offset: usize = idx_offset + (elements.len() - 1) * F::Index::SIZE;
723     // dat_offset = offset from the start of the buffer of the next data block
724     let mut dat_offset: usize = first_dat_offset;
725 
726     for (i, element) in elements.iter().enumerate() {
727         let element_len = element.encode_var_ule_len();
728 
729         // The first index is always 0. We don't write it, or update the idx offset.
730         if i != 0 {
731             let idx_limit = idx_offset + F::Index::SIZE;
732             #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
733             let idx_slice = &mut output[idx_offset..idx_limit];
734             // VZV expects data offsets to be stored relative to the first data block
735             let idx = dat_offset - first_dat_offset;
736             assert!(idx <= F::Index::MAX_VALUE as usize);
737             #[allow(clippy::expect_used)] // this function is explicitly panicky
738             let bytes_to_write = F::Index::iule_from_usize(idx).expect(F::Index::TOO_LARGE_ERROR);
739             idx_slice.copy_from_slice(ULE::slice_as_bytes(&[bytes_to_write]));
740 
741             idx_offset = idx_limit;
742         }
743 
744         let dat_limit = dat_offset + element_len;
745         #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
746         let dat_slice = &mut output[dat_offset..dat_limit];
747         element.encode_var_ule_write(dat_slice);
748         debug_assert_eq!(T::validate_bytes(dat_slice), Ok(()));
749         dat_offset = dat_limit;
750     }
751 
752     debug_assert_eq!(idx_offset, F::Index::SIZE * (elements.len() - 1));
753     assert_eq!(dat_offset, output.len());
754 }
755 
756 /// Writes the bytes for a VarZeroSlice into an output buffer.
757 ///
758 /// Every byte in the buffer will be initialized after calling this function.
759 ///
760 /// # Panics
761 ///
762 /// Panics if the buffer is not exactly the correct length.
write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,763 pub fn write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8])
764 where
765     T: VarULE + ?Sized,
766     A: EncodeAsVarULE<T>,
767     F: VarZeroVecFormat,
768 {
769     if elements.is_empty() {
770         return;
771     }
772     assert!(elements.len() <= F::Len::MAX_VALUE as usize);
773     #[allow(clippy::expect_used)] // This function is explicitly panicky
774     let num_elements_ule = F::Len::iule_from_usize(elements.len()).expect(F::Len::TOO_LARGE_ERROR);
775     #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
776     output[0..F::Len::SIZE].copy_from_slice(ULE::slice_as_bytes(&[num_elements_ule]));
777 
778     #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
779     write_serializable_bytes_without_length::<T, A, F>(elements, &mut output[F::Len::SIZE..]);
780 }
781 
compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,782 pub fn compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32>
783 where
784     T: VarULE + ?Sized,
785     A: EncodeAsVarULE<T>,
786     F: VarZeroVecFormat,
787 {
788     let elements_len = elements.len();
789     let Some(elements_len_minus_one) = elements_len.checked_sub(1) else {
790         // Empty vec is optimized to an empty byte representation
791         return Some(0);
792     };
793     let idx_len: u32 = u32::try_from(elements_len_minus_one)
794         .ok()?
795         .checked_mul(F::Index::SIZE as u32)?;
796     let data_len: u32 = elements
797         .iter()
798         .map(|v| u32::try_from(v.encode_var_ule_len()).ok())
799         .try_fold(0u32, |s, v| s.checked_add(v?))?;
800     let ret = idx_len.checked_add(data_len);
801     if let Some(r) = ret {
802         if r >= F::Index::MAX_VALUE {
803             return None;
804         }
805     }
806     ret
807 }
808 
compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,809 pub fn compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32>
810 where
811     T: VarULE + ?Sized,
812     A: EncodeAsVarULE<T>,
813     F: VarZeroVecFormat,
814 {
815     compute_serializable_len_without_length::<T, A, F>(elements).map(|x| x + F::Len::SIZE as u32)
816 }
817