1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use super::VarZeroVecFormatError;
6 use crate::ule::*;
7 use core::cmp::Ordering;
8 use core::convert::TryFrom;
9 use core::marker::PhantomData;
10 use core::mem;
11 use core::ops::Range;
12
13 /// This trait allows switching between different possible internal
14 /// representations of VarZeroVec.
15 ///
16 /// Currently this crate supports three formats: [`Index8`], [`Index16`] and [`Index32`],
17 /// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec)
18 /// types unless explicitly specified otherwise.
19 ///
20 /// Do not implement this trait, its internals may be changed in the future,
21 /// and all of its associated items are hidden from the docs.
22 pub trait VarZeroVecFormat: 'static + Sized {
23 /// The type to use for the indexing array
24 ///
25 /// Safety: must be a ULE for which all byte sequences are allowed
26 #[doc(hidden)]
27 type Index: IntegerULE;
28 /// The type to use for the length segment
29 ///
30 /// Safety: must be a ULE for which all byte sequences are allowed
31 #[doc(hidden)]
32 type Len: IntegerULE;
33 }
34
35 /// This trait represents various ULE types that can be used to represent an integer
36 ///
37 /// Do not implement this trait, its internals may be changed in the future,
38 /// and all of its associated items are hidden from the docs.
39 #[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period
40 #[doc(hidden)]
41 pub unsafe trait IntegerULE: ULE {
42 /// The error to show when unable to construct a vec
43 #[doc(hidden)]
44 const TOO_LARGE_ERROR: &'static str;
45
46 /// Safety: must be sizeof(self)
47 #[doc(hidden)]
48 const SIZE: usize;
49
50 /// Safety: must be maximum integral value represented here
51 #[doc(hidden)]
52 const MAX_VALUE: u32;
53
54 /// Safety: Must roundtrip with from_usize and represent the correct
55 /// integral value
56 #[doc(hidden)]
iule_to_usize(self) -> usize57 fn iule_to_usize(self) -> usize;
58
59 #[doc(hidden)]
iule_from_usize(x: usize) -> Option<Self>60 fn iule_from_usize(x: usize) -> Option<Self>;
61
62 /// Safety: Should always convert a buffer into an array of Self with the correct length
63 #[doc(hidden)]
64 #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]65 fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self];
66 }
67
68 /// This is a [`VarZeroVecFormat`] that stores u8s in the index array, and a u8 for a length.
69 ///
70 /// Will have a smaller data size, but it's *extremely* likely for larger arrays
71 /// to be unrepresentable (and error on construction). Should probably be used
72 /// for known-small arrays, where all but the last field are known-small.
73 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
74 #[allow(clippy::exhaustive_structs)] // marker
75 pub struct Index8;
76
77 /// This is a [`VarZeroVecFormat`] that stores u16s in the index array, and a u16 for a length.
78 ///
79 /// Will have a smaller data size, but it's more likely for larger arrays
80 /// to be unrepresentable (and error on construction)
81 ///
82 /// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types.
83 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
84 #[allow(clippy::exhaustive_structs)] // marker
85 pub struct Index16;
86
87 /// This is a [`VarZeroVecFormat`] that stores u32s in the index array, and a u32 for a length.
88 /// Will have a larger data size, but will support large arrays without
89 /// problems.
90 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
91 #[allow(clippy::exhaustive_structs)] // marker
92 pub struct Index32;
93
94 impl VarZeroVecFormat for Index8 {
95 type Index = u8;
96 type Len = u8;
97 }
98
99 impl VarZeroVecFormat for Index16 {
100 type Index = RawBytesULE<2>;
101 type Len = RawBytesULE<2>;
102 }
103
104 impl VarZeroVecFormat for Index32 {
105 type Index = RawBytesULE<4>;
106 type Len = RawBytesULE<4>;
107 }
108
109 unsafe impl IntegerULE for u8 {
110 const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
111 cumulatively are larger than a u8 in size";
112 const SIZE: usize = mem::size_of::<Self>();
113 const MAX_VALUE: u32 = u8::MAX as u32;
114 #[inline]
iule_to_usize(self) -> usize115 fn iule_to_usize(self) -> usize {
116 self as usize
117 }
118 #[inline]
iule_from_usize(u: usize) -> Option<Self>119 fn iule_from_usize(u: usize) -> Option<Self> {
120 u8::try_from(u).ok()
121 }
122 #[inline]
123 #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]124 fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
125 bytes
126 }
127 }
128
129 unsafe impl IntegerULE for RawBytesULE<2> {
130 const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
131 cumulatively are larger than a u16 in size";
132 const SIZE: usize = mem::size_of::<Self>();
133 const MAX_VALUE: u32 = u16::MAX as u32;
134 #[inline]
iule_to_usize(self) -> usize135 fn iule_to_usize(self) -> usize {
136 self.as_unsigned_int() as usize
137 }
138 #[inline]
iule_from_usize(u: usize) -> Option<Self>139 fn iule_from_usize(u: usize) -> Option<Self> {
140 u16::try_from(u).ok().map(u16::to_unaligned)
141 }
142 #[inline]
143 #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]144 fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
145 Self::from_bytes_unchecked_mut(bytes)
146 }
147 }
148
149 unsafe impl IntegerULE for RawBytesULE<4> {
150 const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
151 cumulatively are larger than a u32 in size";
152 const SIZE: usize = mem::size_of::<Self>();
153 const MAX_VALUE: u32 = u32::MAX;
154 #[inline]
iule_to_usize(self) -> usize155 fn iule_to_usize(self) -> usize {
156 self.as_unsigned_int() as usize
157 }
158 #[inline]
iule_from_usize(u: usize) -> Option<Self>159 fn iule_from_usize(u: usize) -> Option<Self> {
160 u32::try_from(u).ok().map(u32::to_unaligned)
161 }
162 #[inline]
163 #[cfg(feature = "alloc")]
iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]164 fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
165 Self::from_bytes_unchecked_mut(bytes)
166 }
167 }
168
169 /// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec
170 /// internal representation code lies.
171 ///
172 /// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into
173 /// the buffer components. Logically this is capable of behaving as
174 /// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually
175 /// exist.
176 ///
177 /// See [`VarZeroVecComponents::parse_bytes()`] for information on the internal invariants involved
178 #[derive(Debug)]
179 pub struct VarZeroVecComponents<'a, T: ?Sized, F> {
180 /// The number of elements
181 len: u32,
182 /// The list of indices into the `things` slice
183 /// Since the first element is always at things[0], the first element of the indices array is for the *second* element
184 indices: &'a [u8],
185 /// The contiguous list of `T::VarULE`s
186 things: &'a [u8],
187 marker: PhantomData<(&'a T, F)>,
188 }
189
190 // #[derive()] won't work here since we do not want it to be
191 // bound on T: Copy
192 impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {}
193 impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> {
clone(&self) -> Self194 fn clone(&self) -> Self {
195 *self
196 }
197 }
198
199 impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> {
200 #[inline]
default() -> Self201 fn default() -> Self {
202 Self::new()
203 }
204 }
205
206 impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> {
207 #[inline]
new() -> Self208 pub fn new() -> Self {
209 Self {
210 len: 0,
211 indices: &[],
212 things: &[],
213 marker: PhantomData,
214 }
215 }
216 }
217 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> {
218 /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
219 ///
220 /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize)
221 /// - There must be at least `4*(length - 1) + 4` bytes total, to form the array `indices` of indices
222 /// - `0..indices[0]` must index into a valid section of
223 /// `things` (the data after `indices`), such that it parses to a `T::VarULE`
224 /// - `indices[i - 1]..indices[i]` must index into a valid section of
225 /// `things` (the data after `indices`), such that it parses to a `T::VarULE`
226 /// - `indices[len - 2]..things.len()` must index into a valid section of
227 /// `things`, such that it parses to a `T::VarULE`
228 #[inline]
parse_bytes(slice: &'a [u8]) -> Result<Self, VarZeroVecFormatError>229 pub fn parse_bytes(slice: &'a [u8]) -> Result<Self, VarZeroVecFormatError> {
230 // The empty VZV is special-cased to the empty slice
231 if slice.is_empty() {
232 return Ok(VarZeroVecComponents {
233 len: 0,
234 indices: &[],
235 things: &[],
236 marker: PhantomData,
237 });
238 }
239 let len_bytes = slice
240 .get(0..F::Len::SIZE)
241 .ok_or(VarZeroVecFormatError::Metadata)?;
242 let len_ule =
243 F::Len::parse_bytes_to_slice(len_bytes).map_err(|_| VarZeroVecFormatError::Metadata)?;
244
245 let len = len_ule
246 .first()
247 .ok_or(VarZeroVecFormatError::Metadata)?
248 .iule_to_usize();
249
250 let rest = slice
251 .get(F::Len::SIZE..)
252 .ok_or(VarZeroVecFormatError::Metadata)?;
253 let len_u32 = u32::try_from(len).map_err(|_| VarZeroVecFormatError::Metadata);
254 // We pass down the rest of the invariants
255 Self::parse_bytes_with_length(len_u32?, rest)
256 }
257
258 /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
259 ///
260 /// - There must be at least `4*len` bytes total, to form the array `indices` of indices.
261 /// - `indices[i]..indices[i+1]` must index into a valid section of
262 /// `things` (the data after `indices`), such that it parses to a `T::VarULE`
263 /// - `indices[len - 1]..things.len()` must index into a valid section of
264 /// `things`, such that it parses to a `T::VarULE`
265 #[inline]
parse_bytes_with_length( len: u32, slice: &'a [u8], ) -> Result<Self, VarZeroVecFormatError>266 pub fn parse_bytes_with_length(
267 len: u32,
268 slice: &'a [u8],
269 ) -> Result<Self, VarZeroVecFormatError> {
270 let len_minus_one = len.checked_sub(1);
271 // The empty VZV is special-cased to the empty slice
272 let Some(len_minus_one) = len_minus_one else {
273 return Ok(VarZeroVecComponents {
274 len: 0,
275 indices: &[],
276 things: &[],
277 marker: PhantomData,
278 });
279 };
280 // The indices array is one element shorter since the first index is always 0,
281 // so we use len_minus_one
282 let indices_bytes = slice
283 .get(..F::Index::SIZE * (len_minus_one as usize))
284 .ok_or(VarZeroVecFormatError::Metadata)?;
285 let things = slice
286 .get(F::Index::SIZE * (len_minus_one as usize)..)
287 .ok_or(VarZeroVecFormatError::Metadata)?;
288
289 let borrowed = VarZeroVecComponents {
290 len,
291 indices: indices_bytes,
292 things,
293 marker: PhantomData,
294 };
295
296 borrowed.check_indices_and_things()?;
297
298 Ok(borrowed)
299 }
300
301 /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
302 /// successfully returned a [`VarZeroVecComponents`] when passed to
303 /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
304 /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
305 ///
306 /// # Safety
307 /// The bytes must have previously successfully run through
308 /// [`VarZeroVecComponents::parse_bytes()`]
from_bytes_unchecked(slice: &'a [u8]) -> Self309 pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
310 // The empty VZV is special-cased to the empty slice
311 if slice.is_empty() {
312 return VarZeroVecComponents {
313 len: 0,
314 indices: &[],
315 things: &[],
316 marker: PhantomData,
317 };
318 }
319 let (len_bytes, data_bytes) = unsafe { slice.split_at_unchecked(F::Len::SIZE) };
320 // Safety: F::Len allows all byte sequences
321 let len_ule = F::Len::slice_from_bytes_unchecked(len_bytes);
322
323 let len = len_ule.get_unchecked(0).iule_to_usize();
324 let len_u32 = len as u32;
325
326 // Safety: This method requires the bytes to have passed through `parse_bytes()`
327 // whereas we're calling something that asks for `parse_bytes_with_length()`.
328 // The two methods perform similar validation, with parse_bytes() validating an additional
329 // 4-byte `length` header.
330 Self::from_bytes_unchecked_with_length(len_u32, data_bytes)
331 }
332
333 /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
334 /// successfully returned a [`VarZeroVecComponents`] when passed to
335 /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same
336 /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`].
337 ///
338 /// # Safety
339 /// The len,bytes must have previously successfully run through
340 /// [`VarZeroVecComponents::parse_bytes_with_length()`]
from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self341 pub unsafe fn from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self {
342 let len_minus_one = len.checked_sub(1);
343 // The empty VZV is special-cased to the empty slice
344 let Some(len_minus_one) = len_minus_one else {
345 return VarZeroVecComponents {
346 len: 0,
347 indices: &[],
348 things: &[],
349 marker: PhantomData,
350 };
351 };
352 // The indices array is one element shorter since the first index is always 0,
353 // so we use len_minus_one
354 let indices_bytes = slice.get_unchecked(..F::Index::SIZE * (len_minus_one as usize));
355 let things = slice.get_unchecked(F::Index::SIZE * (len_minus_one as usize)..);
356
357 VarZeroVecComponents {
358 len,
359 indices: indices_bytes,
360 things,
361 marker: PhantomData,
362 }
363 }
364
365 /// Get the number of elements in this vector
366 #[inline]
len(self) -> usize367 pub fn len(self) -> usize {
368 self.len as usize
369 }
370
371 /// Returns `true` if the vector contains no elements.
372 #[inline]
is_empty(self) -> bool373 pub fn is_empty(self) -> bool {
374 self.len == 0
375 }
376
377 /// Get the idx'th element out of this slice. Returns `None` if out of bounds.
378 #[inline]
get(self, idx: usize) -> Option<&'a T>379 pub fn get(self, idx: usize) -> Option<&'a T> {
380 if idx >= self.len() {
381 return None;
382 }
383 Some(unsafe { self.get_unchecked(idx) })
384 }
385
386 /// Get the idx'th element out of this slice. Does not bounds check.
387 ///
388 /// Safety:
389 /// - `idx` must be in bounds (`idx < self.len()`)
390 #[inline]
get_unchecked(self, idx: usize) -> &'a T391 pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T {
392 let range = self.get_things_range(idx);
393 let things_slice = self.things.get_unchecked(range);
394 T::from_bytes_unchecked(things_slice)
395 }
396
397 /// Get the range in `things` for the element at `idx`. Does not bounds check.
398 ///
399 /// Safety:
400 /// - `idx` must be in bounds (`idx < self.len()`)
401 #[inline]
get_things_range(self, idx: usize) -> Range<usize>402 pub(crate) unsafe fn get_things_range(self, idx: usize) -> Range<usize> {
403 let start = if let Some(idx_minus_one) = idx.checked_sub(1) {
404 self.indices_slice()
405 .get_unchecked(idx_minus_one)
406 .iule_to_usize()
407 } else {
408 0
409 };
410 let end = if idx + 1 == self.len() {
411 self.things.len()
412 } else {
413 self.indices_slice().get_unchecked(idx).iule_to_usize()
414 };
415 debug_assert!(start <= end);
416 start..end
417 }
418
419 /// Get the size, in bytes, of the indices array
get_indices_size(self) -> usize420 pub(crate) unsafe fn get_indices_size(self) -> usize {
421 self.indices.len()
422 }
423
424 /// Check the internal invariants of VarZeroVecComponents:
425 ///
426 /// - `indices[i]..indices[i+1]` must index into a valid section of
427 /// `things`, such that it parses to a `T::VarULE`
428 /// - `indices[len - 1]..things.len()` must index into a valid section of
429 /// `things`, such that it parses to a `T::VarULE`
430 /// - `indices` is monotonically increasing
431 ///
432 /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods
433 /// assume that the slice has been passed through check_indices_and_things
434 #[inline]
435 #[allow(clippy::len_zero)] // more explicit to enforce safety invariants
check_indices_and_things(self) -> Result<(), VarZeroVecFormatError>436 fn check_indices_and_things(self) -> Result<(), VarZeroVecFormatError> {
437 if self.len() == 0 {
438 if self.things.len() > 0 {
439 return Err(VarZeroVecFormatError::Metadata);
440 } else {
441 return Ok(());
442 }
443 }
444 let indices_slice = self.indices_slice();
445 assert_eq!(self.len(), indices_slice.len() + 1);
446 // Safety: i is in bounds (assertion above)
447 let mut start = 0;
448 for i in 0..self.len() {
449 // The indices array is offset by 1: indices[0] is the end of the first
450 // element and the start of the next, since the start of the first element
451 // is always things[0]. So to get the end we get element `i`.
452 let end = if let Some(end) = indices_slice.get(i) {
453 end.iule_to_usize()
454 } else {
455 // This only happens at i = self.len() - 1 = indices_slice.len() + 1 - 1
456 // = indices_slice.len(). This is the last `end`, which is always the size of
457 // `things` and thus never stored in the array
458 self.things.len()
459 };
460
461 if start > end {
462 return Err(VarZeroVecFormatError::Metadata);
463 }
464 if end > self.things.len() {
465 return Err(VarZeroVecFormatError::Metadata);
466 }
467 // Safety: start..end is a valid range in self.things
468 let bytes = unsafe { self.things.get_unchecked(start..end) };
469 T::parse_bytes(bytes).map_err(VarZeroVecFormatError::Values)?;
470 start = end;
471 }
472 Ok(())
473 }
474
475 /// Create an iterator over the Ts contained in VarZeroVecComponents
476 #[inline]
iter(self) -> VarZeroSliceIter<'a, T, F>477 pub fn iter(self) -> VarZeroSliceIter<'a, T, F> {
478 VarZeroSliceIter::new(self)
479 }
480
481 #[cfg(feature = "alloc")]
to_vec(self) -> alloc::vec::Vec<alloc::boxed::Box<T>>482 pub fn to_vec(self) -> alloc::vec::Vec<alloc::boxed::Box<T>> {
483 self.iter().map(T::to_boxed).collect()
484 }
485
486 #[inline]
indices_slice(&self) -> &'a [F::Index]487 fn indices_slice(&self) -> &'a [F::Index] {
488 unsafe { F::Index::slice_from_bytes_unchecked(self.indices) }
489 }
490
491 // Dump a debuggable representation of this type
492 #[allow(unused)] // useful for debugging
493 #[cfg(feature = "alloc")]
dump(&self) -> alloc::string::String494 pub(crate) fn dump(&self) -> alloc::string::String {
495 let indices = self
496 .indices_slice()
497 .iter()
498 .copied()
499 .map(IntegerULE::iule_to_usize)
500 .collect::<alloc::vec::Vec<_>>();
501 alloc::format!("VarZeroVecComponents {{ indices: {indices:?} }}")
502 }
503 }
504
505 /// An iterator over VarZeroSlice
506 #[derive(Debug)]
507 pub struct VarZeroSliceIter<'a, T: ?Sized, F = Index16> {
508 components: VarZeroVecComponents<'a, T, F>,
509 index: usize,
510 // Safety invariant: must be a valid index into the data segment of `components`, or an index at the end
511 // i.e. start_index <= components.things.len()
512 //
513 // It must be a valid index into the `things` array of components, coming from `components.indices_slice()`
514 start_index: usize,
515 }
516
517 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> {
new(c: VarZeroVecComponents<'a, T, F>) -> Self518 fn new(c: VarZeroVecComponents<'a, T, F>) -> Self {
519 Self {
520 components: c,
521 index: 0,
522 // Invariant upheld, 0 is always a valid index-or-end
523 start_index: 0,
524 }
525 }
526 }
527 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> {
528 type Item = &'a T;
529
next(&mut self) -> Option<Self::Item>530 fn next(&mut self) -> Option<Self::Item> {
531 // Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled
532 // by start_index, and the len is handled by the code for `end`.
533
534 if self.index >= self.components.len() {
535 return None;
536 }
537
538 // Invariant established: self.index is in bounds for self.components.len(),
539 // which means it is in bounds for self.components.indices_slice() since that has the same length
540
541 let end = if self.index + 1 == self.components.len() {
542 // We don't store the end index since it is computable, so the last element should use self.components.things.len()
543 self.components.things.len()
544 } else {
545 // Safety: self.index was known to be in bounds from the bounds check above.
546 unsafe {
547 self.components
548 .indices_slice()
549 .get_unchecked(self.index)
550 .iule_to_usize()
551 }
552 };
553 // Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed
554 // to only contain valid indexes
555
556 let item = unsafe {
557 // Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice
558 // which means we can treat this data as a T
559 T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end))
560 };
561 self.index += 1;
562 // Invariant upheld: end has the same invariant as self.start_index
563 self.start_index = end;
564 Some(item)
565 }
566
size_hint(&self) -> (usize, Option<usize>)567 fn size_hint(&self) -> (usize, Option<usize>) {
568 let remainder = self.components.len() - self.index;
569 (remainder, Some(remainder))
570 }
571 }
572
573 impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> {
len(&self) -> usize574 fn len(&self) -> usize {
575 self.components.len() - self.index
576 }
577 }
578
579 impl<'a, T, F> VarZeroVecComponents<'a, T, F>
580 where
581 T: VarULE,
582 T: ?Sized,
583 T: Ord,
584 F: VarZeroVecFormat,
585 {
586 /// Binary searches a sorted `VarZeroVecComponents<T>` for the given element. For more information, see
587 /// the primitive function [`binary_search`](slice::binary_search).
binary_search(&self, needle: &T) -> Result<usize, usize>588 pub fn binary_search(&self, needle: &T) -> Result<usize, usize> {
589 self.binary_search_by(|probe| probe.cmp(needle))
590 }
591
binary_search_in_range( &self, needle: &T, range: Range<usize>, ) -> Option<Result<usize, usize>>592 pub fn binary_search_in_range(
593 &self,
594 needle: &T,
595 range: Range<usize>,
596 ) -> Option<Result<usize, usize>> {
597 self.binary_search_in_range_by(|probe| probe.cmp(needle), range)
598 }
599 }
600
601 impl<'a, T, F> VarZeroVecComponents<'a, T, F>
602 where
603 T: VarULE,
604 T: ?Sized,
605 F: VarZeroVecFormat,
606 {
607 /// Binary searches a sorted `VarZeroVecComponents<T>` for the given predicate. For more information, see
608 /// the primitive function [`binary_search_by`](slice::binary_search_by).
binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>609 pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
610 // Safety: 0 and len are in range
611 unsafe { self.binary_search_in_range_unchecked(predicate, 0..self.len()) }
612 }
613
614 // Binary search within a range.
615 // Values returned are relative to the range start!
binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Option<Result<usize, usize>>616 pub fn binary_search_in_range_by(
617 &self,
618 predicate: impl FnMut(&T) -> Ordering,
619 range: Range<usize>,
620 ) -> Option<Result<usize, usize>> {
621 if range.end > self.len() {
622 return None;
623 }
624 if range.end < range.start {
625 return None;
626 }
627 // Safety: We bounds checked above: end is in-bounds or len, and start is <= end
628 let range_absolute =
629 unsafe { self.binary_search_in_range_unchecked(predicate, range.clone()) };
630 // The values returned are relative to the range start
631 Some(
632 range_absolute
633 .map(|o| o - range.start)
634 .map_err(|e| e - range.start),
635 )
636 }
637
638 /// Safety: range must be in range for the slice (start <= len, end <= len, start <= end)
binary_search_in_range_unchecked( &self, mut predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Result<usize, usize>639 unsafe fn binary_search_in_range_unchecked(
640 &self,
641 mut predicate: impl FnMut(&T) -> Ordering,
642 range: Range<usize>,
643 ) -> Result<usize, usize> {
644 // Function invariant: size is always end - start
645 let mut start = range.start;
646 let mut end = range.end;
647 let mut size;
648
649 // Loop invariant: 0 <= start < end <= len
650 // This invariant is initialized by the function safety invariants and the loop condition
651 while start < end {
652 size = end - start;
653 // This establishes mid < end (which implies mid < len)
654 // size is end - start. start + size is end (which is <= len).
655 // mid = start + size/2 will be less than end
656 let mid = start + size / 2;
657
658 // Safety: mid is < end <= len, so in-range
659 let cmp = predicate(self.get_unchecked(mid));
660
661 match cmp {
662 Ordering::Less => {
663 // This retains the loop invariant since it
664 // increments start, and we already have 0 <= start
665 // start < end is enforced by the loop condition
666 start = mid + 1;
667 }
668 Ordering::Greater => {
669 // mid < end, so this decreases end.
670 // This means end <= len is still true, and
671 // end > start is enforced by the loop condition
672 end = mid;
673 }
674 Ordering::Equal => return Ok(mid),
675 }
676 }
677 Err(start)
678 }
679 }
680
681 /// Collects the bytes for a VarZeroSlice into a Vec.
682 #[cfg(feature = "alloc")]
get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<alloc::vec::Vec<u8>> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,683 pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<alloc::vec::Vec<u8>>
684 where
685 T: VarULE + ?Sized,
686 A: EncodeAsVarULE<T>,
687 F: VarZeroVecFormat,
688 {
689 debug_assert!(!elements.is_empty());
690 let len = compute_serializable_len::<T, A, F>(elements)?;
691 debug_assert!(
692 len >= F::Len::SIZE as u32,
693 "Must have at least F::Len::SIZE bytes to hold the length of the vector"
694 );
695 let mut output = alloc::vec![0u8; len as usize];
696 write_serializable_bytes::<T, A, F>(elements, &mut output);
697 Some(output)
698 }
699
700 /// Writes the bytes for a VarZeroLengthlessSlice into an output buffer.
701 /// Usable for a VarZeroSlice if you first write the length bytes.
702 ///
703 /// Every byte in the buffer will be initialized after calling this function.
704 ///
705 /// # Panics
706 ///
707 /// Panics if the buffer is not exactly the correct length.
write_serializable_bytes_without_length<T, A, F>(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,708 pub fn write_serializable_bytes_without_length<T, A, F>(elements: &[A], output: &mut [u8])
709 where
710 T: VarULE + ?Sized,
711 A: EncodeAsVarULE<T>,
712 F: VarZeroVecFormat,
713 {
714 assert!(elements.len() <= F::Len::MAX_VALUE as usize);
715 if elements.is_empty() {
716 return;
717 }
718
719 // idx_offset = offset from the start of the buffer for the next index
720 let mut idx_offset: usize = 0;
721 // first_dat_offset = offset from the start of the buffer of the first data block
722 let first_dat_offset: usize = idx_offset + (elements.len() - 1) * F::Index::SIZE;
723 // dat_offset = offset from the start of the buffer of the next data block
724 let mut dat_offset: usize = first_dat_offset;
725
726 for (i, element) in elements.iter().enumerate() {
727 let element_len = element.encode_var_ule_len();
728
729 // The first index is always 0. We don't write it, or update the idx offset.
730 if i != 0 {
731 let idx_limit = idx_offset + F::Index::SIZE;
732 #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
733 let idx_slice = &mut output[idx_offset..idx_limit];
734 // VZV expects data offsets to be stored relative to the first data block
735 let idx = dat_offset - first_dat_offset;
736 assert!(idx <= F::Index::MAX_VALUE as usize);
737 #[allow(clippy::expect_used)] // this function is explicitly panicky
738 let bytes_to_write = F::Index::iule_from_usize(idx).expect(F::Index::TOO_LARGE_ERROR);
739 idx_slice.copy_from_slice(ULE::slice_as_bytes(&[bytes_to_write]));
740
741 idx_offset = idx_limit;
742 }
743
744 let dat_limit = dat_offset + element_len;
745 #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
746 let dat_slice = &mut output[dat_offset..dat_limit];
747 element.encode_var_ule_write(dat_slice);
748 debug_assert_eq!(T::validate_bytes(dat_slice), Ok(()));
749 dat_offset = dat_limit;
750 }
751
752 debug_assert_eq!(idx_offset, F::Index::SIZE * (elements.len() - 1));
753 assert_eq!(dat_offset, output.len());
754 }
755
756 /// Writes the bytes for a VarZeroSlice into an output buffer.
757 ///
758 /// Every byte in the buffer will be initialized after calling this function.
759 ///
760 /// # Panics
761 ///
762 /// Panics if the buffer is not exactly the correct length.
write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,763 pub fn write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8])
764 where
765 T: VarULE + ?Sized,
766 A: EncodeAsVarULE<T>,
767 F: VarZeroVecFormat,
768 {
769 if elements.is_empty() {
770 return;
771 }
772 assert!(elements.len() <= F::Len::MAX_VALUE as usize);
773 #[allow(clippy::expect_used)] // This function is explicitly panicky
774 let num_elements_ule = F::Len::iule_from_usize(elements.len()).expect(F::Len::TOO_LARGE_ERROR);
775 #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
776 output[0..F::Len::SIZE].copy_from_slice(ULE::slice_as_bytes(&[num_elements_ule]));
777
778 #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
779 write_serializable_bytes_without_length::<T, A, F>(elements, &mut output[F::Len::SIZE..]);
780 }
781
compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,782 pub fn compute_serializable_len_without_length<T, A, F>(elements: &[A]) -> Option<u32>
783 where
784 T: VarULE + ?Sized,
785 A: EncodeAsVarULE<T>,
786 F: VarZeroVecFormat,
787 {
788 let elements_len = elements.len();
789 let Some(elements_len_minus_one) = elements_len.checked_sub(1) else {
790 // Empty vec is optimized to an empty byte representation
791 return Some(0);
792 };
793 let idx_len: u32 = u32::try_from(elements_len_minus_one)
794 .ok()?
795 .checked_mul(F::Index::SIZE as u32)?;
796 let data_len: u32 = elements
797 .iter()
798 .map(|v| u32::try_from(v.encode_var_ule_len()).ok())
799 .try_fold(0u32, |s, v| s.checked_add(v?))?;
800 let ret = idx_len.checked_add(data_len);
801 if let Some(r) = ret {
802 if r >= F::Index::MAX_VALUE {
803 return None;
804 }
805 }
806 ret
807 }
808
compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32> where T: VarULE + ?Sized, A: EncodeAsVarULE<T>, F: VarZeroVecFormat,809 pub fn compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32>
810 where
811 T: VarULE + ?Sized,
812 A: EncodeAsVarULE<T>,
813 F: VarZeroVecFormat,
814 {
815 compute_serializable_len_without_length::<T, A, F>(elements).map(|x| x + F::Len::SIZE as u32)
816 }
817