• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 #![allow(clippy::upper_case_acronyms)]
6 
7 //! Traits over unaligned little-endian data (ULE, pronounced "yule").
8 //!
9 //! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`].
10 //!
11 //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits
12 //! works under the hood.
13 mod chars;
14 #[cfg(doc)]
15 pub mod custom;
16 mod encode;
17 mod macros;
18 mod multi;
19 mod niche;
20 mod option;
21 mod plain;
22 mod slices;
23 #[cfg(test)]
24 pub mod test_utils;
25 
26 pub mod tuple;
27 pub mod tuplevar;
28 pub mod vartuple;
29 pub use chars::CharULE;
30 #[cfg(feature = "alloc")]
31 pub use encode::encode_varule_to_box;
32 pub use encode::EncodeAsVarULE;
33 pub use multi::MultiFieldsULE;
34 pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
35 pub use option::{OptionULE, OptionVarULE};
36 pub use plain::RawBytesULE;
37 
38 use core::{any, fmt, mem, slice};
39 
40 /// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice.
41 ///
42 /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or
43 ///  [`#[derive(ULE)]`](macro@ULE) instead.
44 ///
45 /// Types that are not fixed-width can implement [`VarULE`] instead.
46 ///
47 /// "ULE" stands for "Unaligned little-endian"
48 ///
49 /// # Safety
50 ///
51 /// Safety checklist for `ULE`:
52 ///
53 /// 1. The type *must not* include any uninitialized or padding bytes.
54 /// 2. The type must have an alignment of 1 byte, or it is a ZST that is safe to construct.
55 /// 3. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice
56 ///    would not represent a valid slice of this type.
57 /// 4. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice
58 ///    cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`).
59 /// 5. All other methods *must* be left with their default impl, or else implemented according to
60 ///    their respective safety guidelines.
61 /// 6. Acknowledge the following note about the equality invariant.
62 ///
63 /// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2,
64 /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`.
65 ///
66 /// # Equality invariant
67 ///
68 /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
69 /// equivalent to byte equality on [`Self::slice_as_bytes()`].
70 ///
71 /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
72 /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error
73 /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
74 /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
75 /// where only a single digit is allowed before `.`.
76 ///
77 /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
78 /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
79 pub unsafe trait ULE
80 where
81     Self: Sized,
82     Self: Copy + 'static,
83 {
84     /// Validates a byte slice, `&[u8]`.
85     ///
86     /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
87     /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok`
88     /// should be returned; otherwise, `Err` should be returned.
validate_bytes(bytes: &[u8]) -> Result<(), UleError>89     fn validate_bytes(bytes: &[u8]) -> Result<(), UleError>;
90 
91     /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime.
92     ///
93     /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
94     /// and an error should be returned in the same cases as [`Self::validate_bytes()`].
95     ///
96     /// The default implementation executes [`Self::validate_bytes()`] followed by
97     /// [`Self::slice_from_bytes_unchecked`].
98     ///
99     /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This
100     /// means that the returned slice can span the entire byte slice.
parse_bytes_to_slice(bytes: &[u8]) -> Result<&[Self], UleError>101     fn parse_bytes_to_slice(bytes: &[u8]) -> Result<&[Self], UleError> {
102         Self::validate_bytes(bytes)?;
103         debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
104         Ok(unsafe { Self::slice_from_bytes_unchecked(bytes) })
105     }
106 
107     /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming
108     /// that this byte slice has previously been run through [`Self::parse_bytes_to_slice()`] with
109     /// success.
110     ///
111     /// The default implementation performs a pointer cast to the same region of memory.
112     ///
113     /// # Safety
114     ///
115     /// ## Callers
116     ///
117     /// Callers of this method must take care to ensure that `bytes` was previously passed through
118     /// [`Self::validate_bytes()`] with success (and was not changed since then).
119     ///
120     /// ## Implementors
121     ///
122     /// Implementations of this method may call unsafe functions to cast the pointer to the correct
123     /// type, assuming the "Callers" invariant above.
124     ///
125     /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
126     ///
127     /// Safety checklist:
128     ///
129     /// 1. This method *must* return the same result as [`Self::parse_bytes_to_slice()`].
130     /// 2. This method *must* return a slice to the same region of memory as the argument.
131     #[inline]
slice_from_bytes_unchecked(bytes: &[u8]) -> &[Self]132     unsafe fn slice_from_bytes_unchecked(bytes: &[u8]) -> &[Self] {
133         let data = bytes.as_ptr();
134         let len = bytes.len() / mem::size_of::<Self>();
135         debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
136         core::slice::from_raw_parts(data as *const Self, len)
137     }
138 
139     /// Given `&[Self]`, returns a `&[u8]` with the same lifetime.
140     ///
141     /// The default implementation performs a pointer cast to the same region of memory.
142     ///
143     /// # Safety
144     ///
145     /// Implementations of this method should call potentially unsafe functions to cast the
146     /// pointer to the correct type.
147     ///
148     /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
149     #[inline]
150     #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219
slice_as_bytes(slice: &[Self]) -> &[u8]151     fn slice_as_bytes(slice: &[Self]) -> &[u8] {
152         unsafe {
153             slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice))
154         }
155     }
156 }
157 
158 /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
159 ///
160 /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead.
161 pub trait AsULE: Copy {
162     /// The ULE type corresponding to `Self`.
163     ///
164     /// Types having infallible conversions from all bit values (Plain Old Data) can use
165     /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`.
166     ///
167     /// Types that are not well-defined for all bit values should implement a custom ULE.
168     type ULE: ULE;
169 
170     /// Converts from `Self` to `Self::ULE`.
171     ///
172     /// This function may involve byte order swapping (native-endian to little-endian).
173     ///
174     /// For best performance, mark your implementation of this function `#[inline]`.
to_unaligned(self) -> Self::ULE175     fn to_unaligned(self) -> Self::ULE;
176 
177     /// Converts from `Self::ULE` to `Self`.
178     ///
179     /// This function may involve byte order swapping (little-endian to native-endian).
180     ///
181     /// For best performance, mark your implementation of this function `#[inline]`.
182     ///
183     /// # Safety
184     ///
185     /// This function is infallible because bit validation should have occurred when `Self::ULE`
186     /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like
187     /// `from_bytes_unchecked()`.
from_unaligned(unaligned: Self::ULE) -> Self188     fn from_unaligned(unaligned: Self::ULE) -> Self;
189 }
190 
191 /// A type whose byte sequence equals the byte sequence of its ULE type on
192 /// little-endian platforms.
193 ///
194 /// This enables certain performance optimizations, such as
195 /// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice).
196 ///
197 /// # Implementation safety
198 ///
199 /// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`)
200 /// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where
201 /// `*const T` can be cast to a valid `*const T::ULE`.
202 pub unsafe trait EqULE: AsULE {}
203 
204 /// A trait for a type where aligned slices can be cast to unaligned slices.
205 ///
206 /// Auto-implemented on all types implementing [`EqULE`].
207 pub trait SliceAsULE
208 where
209     Self: AsULE + Sized,
210 {
211     /// Converts from `&[Self]` to `&[Self::ULE]` if possible.
212     ///
213     /// In general, this function returns `Some` on little-endian and `None` on big-endian.
slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>214     fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>;
215 }
216 
217 #[cfg(target_endian = "little")]
218 impl<T> SliceAsULE for T
219 where
220     T: EqULE,
221 {
222     #[inline]
slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>223     fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> {
224         // This is safe because on little-endian platforms, the byte sequence of &[T]
225         // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE,
226         // and &[T::ULE] has equal or looser alignment than &[T].
227         let ule_slice =
228             unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) };
229         Some(ule_slice)
230     }
231 }
232 
233 #[cfg(not(target_endian = "little"))]
234 impl<T> SliceAsULE for T
235 where
236     T: EqULE,
237 {
238     #[inline]
slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]>239     fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> {
240         None
241     }
242 }
243 
244 /// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice.
245 ///
246 /// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or
247 ///  [`#[derive(VarULE)]`](macro@VarULE) instead.
248 ///
249 /// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types;
250 /// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains
251 /// additional documentation on how this type can be implemented on custom types.
252 ///
253 /// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for
254 /// `Box<T>` (serde does not do this automatically for unsized `T`).
255 ///
256 /// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom)
257 /// on some stack type to convert to and from the ULE type efficiently when necessary.
258 ///
259 /// # Safety
260 ///
261 /// Safety checklist for `VarULE`:
262 ///
263 /// 1. The type *must not* include any uninitialized or padding bytes.
264 /// 2. The type must have an alignment of 1 byte.
265 /// 3. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice
266 ///    would not represent a valid slice of this type.
267 /// 4. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice
268 ///    cannot be used in its entirety.
269 /// 5. The impl of [`VarULE::from_bytes_unchecked()`] must produce a reference to the same
270 ///    underlying data assuming that the given bytes previously passed validation.
271 /// 6. All other methods *must* be left with their default impl, or else implemented according to
272 ///    their respective safety guidelines.
273 /// 7. Acknowledge the following note about the equality invariant.
274 ///
275 /// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2,
276 /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`.
277 ///
278 /// # Equality invariant
279 ///
280 /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
281 /// equivalent to byte equality on [`Self::as_bytes()`].
282 ///
283 /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
284 /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error
285 /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
286 /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
287 /// where only a single digit is allowed before `.`.
288 ///
289 /// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster
290 /// version and a smaller version. The cleanest way to handle this case would be separate types.
291 /// However, if this is not feasible, then the application should ensure that the data it is
292 /// deserializing is in the expected form. For example, if the data is being loaded from an
293 /// external source, then requests could carry information about the expected form of the data.
294 ///
295 /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
296 /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
297 pub unsafe trait VarULE: 'static {
298     /// Validates a byte slice, `&[u8]`.
299     ///
300     /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
301     /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should
302     /// be returned; otherwise, `Self::Error` should be returned.
validate_bytes(_bytes: &[u8]) -> Result<(), UleError>303     fn validate_bytes(_bytes: &[u8]) -> Result<(), UleError>;
304 
305     /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime.
306     ///
307     /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
308     /// and an error should be returned in the same cases as [`Self::validate_bytes()`].
309     ///
310     /// The default implementation executes [`Self::validate_bytes()`] followed by
311     /// [`Self::from_bytes_unchecked`].
312     ///
313     /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`,
314     /// where `result` is the successful return value of the method. This means that the return
315     /// value spans the entire byte slice.
parse_bytes(bytes: &[u8]) -> Result<&Self, UleError>316     fn parse_bytes(bytes: &[u8]) -> Result<&Self, UleError> {
317         Self::validate_bytes(bytes)?;
318         let result = unsafe { Self::from_bytes_unchecked(bytes) };
319         debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes));
320         Ok(result)
321     }
322 
323     /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming
324     /// that this byte slice has previously been run through [`Self::parse_bytes()`] with
325     /// success.
326     ///
327     /// # Safety
328     ///
329     /// ## Callers
330     ///
331     /// Callers of this method must take care to ensure that `bytes` was previously passed through
332     /// [`Self::validate_bytes()`] with success (and was not changed since then).
333     ///
334     /// ## Implementors
335     ///
336     /// Implementations of this method may call unsafe functions to cast the pointer to the correct
337     /// type, assuming the "Callers" invariant above.
338     ///
339     /// Safety checklist:
340     ///
341     /// 1. This method *must* return the same result as [`Self::parse_bytes()`].
342     /// 2. This method *must* return a slice to the same region of memory as the argument.
from_bytes_unchecked(bytes: &[u8]) -> &Self343     unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self;
344 
345     /// Given `&Self`, returns a `&[u8]` with the same lifetime.
346     ///
347     /// The default implementation performs a pointer cast to the same region of memory.
348     ///
349     /// # Safety
350     ///
351     /// Implementations of this method should call potentially unsafe functions to cast the
352     /// pointer to the correct type.
353     #[inline]
as_bytes(&self) -> &[u8]354     fn as_bytes(&self) -> &[u8] {
355         unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) }
356     }
357 
358     /// Allocate on the heap as a `Box<T>`
359     #[inline]
360     #[cfg(feature = "alloc")]
to_boxed(&self) -> alloc::boxed::Box<Self>361     fn to_boxed(&self) -> alloc::boxed::Box<Self> {
362         use alloc::borrow::ToOwned;
363         use alloc::boxed::Box;
364         use core::alloc::Layout;
365         let bytesvec = self.as_bytes().to_owned().into_boxed_slice();
366         let bytesvec = mem::ManuallyDrop::new(bytesvec);
367         unsafe {
368             // Get the pointer representation
369             let ptr: *mut Self = Self::from_bytes_unchecked(&bytesvec) as *const Self as *mut Self;
370             assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec));
371             // Transmute the pointer to an owned pointer
372             Box::from_raw(ptr)
373         }
374     }
375 }
376 
377 // Proc macro reexports
378 //
379 // These exist so that our docs can use intra-doc links.
380 // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from
381 // a submodule
382 
383 /// Custom derive for [`ULE`].
384 ///
385 /// This can be attached to [`Copy`] structs containing only [`ULE`] types.
386 ///
387 /// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining
388 /// a custom ULE type.
389 #[cfg(feature = "derive")]
390 pub use zerovec_derive::ULE;
391 
392 /// Custom derive for [`VarULE`]
393 ///
394 /// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end.
395 ///
396 /// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining
397 /// a custom [`VarULE`] type.
398 #[cfg(feature = "derive")]
399 pub use zerovec_derive::VarULE;
400 
401 /// An error type to be used for decoding slices of ULE types
402 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
403 #[non_exhaustive]
404 pub enum UleError {
405     /// Attempted to parse a buffer into a slice of the given ULE type but its
406     /// length was not compatible.
407     ///
408     /// Typically created by a [`ULE`] impl via [`UleError::length()`].
409     ///
410     /// [`ULE`]: crate::ule::ULE
411     InvalidLength { ty: &'static str, len: usize },
412     /// The byte sequence provided for `ty` failed to parse correctly in the
413     /// given ULE type.
414     ///
415     /// Typically created by a [`ULE`] impl via [`UleError::parse()`].
416     ///
417     /// [`ULE`]: crate::ule::ULE
418     ParseError { ty: &'static str },
419 }
420 
421 impl fmt::Display for UleError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error>422     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
423         match *self {
424             UleError::InvalidLength { ty, len } => {
425                 write!(f, "Invalid length {len} for slice of type {ty}")
426             }
427             UleError::ParseError { ty } => {
428                 write!(f, "Could not parse bytes to slice of type {ty}")
429             }
430         }
431     }
432 }
433 
434 impl UleError {
435     /// Construct a parse error for the given type
parse<T: ?Sized + 'static>() -> UleError436     pub fn parse<T: ?Sized + 'static>() -> UleError {
437         UleError::ParseError {
438             ty: any::type_name::<T>(),
439         }
440     }
441 
442     /// Construct an "invalid length" error for the given type and length
length<T: ?Sized + 'static>(len: usize) -> UleError443     pub fn length<T: ?Sized + 'static>(len: usize) -> UleError {
444         UleError::InvalidLength {
445             ty: any::type_name::<T>(),
446             len,
447         }
448     }
449 }
450 
451 impl core::error::Error for UleError {}
452