1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 #![allow(clippy::upper_case_acronyms)] 6 7 //! Traits over unaligned little-endian data (ULE, pronounced "yule"). 8 //! 9 //! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`]. 10 //! 11 //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits 12 //! works under the hood. 13 mod chars; 14 #[cfg(doc)] 15 pub mod custom; 16 mod encode; 17 mod macros; 18 mod multi; 19 mod niche; 20 mod option; 21 mod plain; 22 mod slices; 23 #[cfg(test)] 24 pub mod test_utils; 25 26 pub mod tuple; 27 pub mod tuplevar; 28 pub mod vartuple; 29 pub use chars::CharULE; 30 #[cfg(feature = "alloc")] 31 pub use encode::encode_varule_to_box; 32 pub use encode::EncodeAsVarULE; 33 pub use multi::MultiFieldsULE; 34 pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; 35 pub use option::{OptionULE, OptionVarULE}; 36 pub use plain::RawBytesULE; 37 38 use core::{any, fmt, mem, slice}; 39 40 /// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice. 41 /// 42 /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or 43 /// [`#[derive(ULE)]`](macro@ULE) instead. 44 /// 45 /// Types that are not fixed-width can implement [`VarULE`] instead. 46 /// 47 /// "ULE" stands for "Unaligned little-endian" 48 /// 49 /// # Safety 50 /// 51 /// Safety checklist for `ULE`: 52 /// 53 /// 1. The type *must not* include any uninitialized or padding bytes. 54 /// 2. The type must have an alignment of 1 byte, or it is a ZST that is safe to construct. 55 /// 3. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice 56 /// would not represent a valid slice of this type. 57 /// 4. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice 58 /// cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`). 59 /// 5. All other methods *must* be left with their default impl, or else implemented according to 60 /// their respective safety guidelines. 61 /// 6. Acknowledge the following note about the equality invariant. 62 /// 63 /// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2, 64 /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`. 65 /// 66 /// # Equality invariant 67 /// 68 /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically 69 /// equivalent to byte equality on [`Self::slice_as_bytes()`]. 70 /// 71 /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not 72 /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error 73 /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and 74 /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form 75 /// where only a single digit is allowed before `.`. 76 /// 77 /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may 78 /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. 79 pub unsafe trait ULE 80 where 81 Self: Sized, 82 Self: Copy + 'static, 83 { 84 /// Validates a byte slice, `&[u8]`. 85 /// 86 /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. 87 /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok` 88 /// should be returned; otherwise, `Err` should be returned. validate_bytes(bytes: &[u8]) -> Result<(), UleError>89 fn validate_bytes(bytes: &[u8]) -> Result<(), UleError>; 90 91 /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime. 92 /// 93 /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, 94 /// and an error should be returned in the same cases as [`Self::validate_bytes()`]. 95 /// 96 /// The default implementation executes [`Self::validate_bytes()`] followed by 97 /// [`Self::slice_from_bytes_unchecked`]. 98 /// 99 /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This 100 /// means that the returned slice can span the entire byte slice. parse_bytes_to_slice(bytes: &[u8]) -> Result<&[Self], UleError>101 fn parse_bytes_to_slice(bytes: &[u8]) -> Result<&[Self], UleError> { 102 Self::validate_bytes(bytes)?; 103 debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); 104 Ok(unsafe { Self::slice_from_bytes_unchecked(bytes) }) 105 } 106 107 /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming 108 /// that this byte slice has previously been run through [`Self::parse_bytes_to_slice()`] with 109 /// success. 110 /// 111 /// The default implementation performs a pointer cast to the same region of memory. 112 /// 113 /// # Safety 114 /// 115 /// ## Callers 116 /// 117 /// Callers of this method must take care to ensure that `bytes` was previously passed through 118 /// [`Self::validate_bytes()`] with success (and was not changed since then). 119 /// 120 /// ## Implementors 121 /// 122 /// Implementations of this method may call unsafe functions to cast the pointer to the correct 123 /// type, assuming the "Callers" invariant above. 124 /// 125 /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. 126 /// 127 /// Safety checklist: 128 /// 129 /// 1. This method *must* return the same result as [`Self::parse_bytes_to_slice()`]. 130 /// 2. This method *must* return a slice to the same region of memory as the argument. 131 #[inline] slice_from_bytes_unchecked(bytes: &[u8]) -> &[Self]132 unsafe fn slice_from_bytes_unchecked(bytes: &[u8]) -> &[Self] { 133 let data = bytes.as_ptr(); 134 let len = bytes.len() / mem::size_of::<Self>(); 135 debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0); 136 core::slice::from_raw_parts(data as *const Self, len) 137 } 138 139 /// Given `&[Self]`, returns a `&[u8]` with the same lifetime. 140 /// 141 /// The default implementation performs a pointer cast to the same region of memory. 142 /// 143 /// # Safety 144 /// 145 /// Implementations of this method should call potentially unsafe functions to cast the 146 /// pointer to the correct type. 147 /// 148 /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. 149 #[inline] 150 #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219 slice_as_bytes(slice: &[Self]) -> &[u8]151 fn slice_as_bytes(slice: &[Self]) -> &[u8] { 152 unsafe { 153 slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice)) 154 } 155 } 156 } 157 158 /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. 159 /// 160 /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead. 161 pub trait AsULE: Copy { 162 /// The ULE type corresponding to `Self`. 163 /// 164 /// Types having infallible conversions from all bit values (Plain Old Data) can use 165 /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`. 166 /// 167 /// Types that are not well-defined for all bit values should implement a custom ULE. 168 type ULE: ULE; 169 170 /// Converts from `Self` to `Self::ULE`. 171 /// 172 /// This function may involve byte order swapping (native-endian to little-endian). 173 /// 174 /// For best performance, mark your implementation of this function `#[inline]`. to_unaligned(self) -> Self::ULE175 fn to_unaligned(self) -> Self::ULE; 176 177 /// Converts from `Self::ULE` to `Self`. 178 /// 179 /// This function may involve byte order swapping (little-endian to native-endian). 180 /// 181 /// For best performance, mark your implementation of this function `#[inline]`. 182 /// 183 /// # Safety 184 /// 185 /// This function is infallible because bit validation should have occurred when `Self::ULE` 186 /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like 187 /// `from_bytes_unchecked()`. from_unaligned(unaligned: Self::ULE) -> Self188 fn from_unaligned(unaligned: Self::ULE) -> Self; 189 } 190 191 /// A type whose byte sequence equals the byte sequence of its ULE type on 192 /// little-endian platforms. 193 /// 194 /// This enables certain performance optimizations, such as 195 /// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice). 196 /// 197 /// # Implementation safety 198 /// 199 /// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`) 200 /// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where 201 /// `*const T` can be cast to a valid `*const T::ULE`. 202 pub unsafe trait EqULE: AsULE {} 203 204 /// A trait for a type where aligned slices can be cast to unaligned slices. 205 /// 206 /// Auto-implemented on all types implementing [`EqULE`]. 207 pub trait SliceAsULE 208 where 209 Self: AsULE + Sized, 210 { 211 /// Converts from `&[Self]` to `&[Self::ULE]` if possible. 212 /// 213 /// In general, this function returns `Some` on little-endian and `None` on big-endian. slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>214 fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>; 215 } 216 217 #[cfg(target_endian = "little")] 218 impl<T> SliceAsULE for T 219 where 220 T: EqULE, 221 { 222 #[inline] slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>223 fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> { 224 // This is safe because on little-endian platforms, the byte sequence of &[T] 225 // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE, 226 // and &[T::ULE] has equal or looser alignment than &[T]. 227 let ule_slice = 228 unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) }; 229 Some(ule_slice) 230 } 231 } 232 233 #[cfg(not(target_endian = "little"))] 234 impl<T> SliceAsULE for T 235 where 236 T: EqULE, 237 { 238 #[inline] slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]>239 fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> { 240 None 241 } 242 } 243 244 /// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice. 245 /// 246 /// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or 247 /// [`#[derive(VarULE)]`](macro@VarULE) instead. 248 /// 249 /// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types; 250 /// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains 251 /// additional documentation on how this type can be implemented on custom types. 252 /// 253 /// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for 254 /// `Box<T>` (serde does not do this automatically for unsized `T`). 255 /// 256 /// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom) 257 /// on some stack type to convert to and from the ULE type efficiently when necessary. 258 /// 259 /// # Safety 260 /// 261 /// Safety checklist for `VarULE`: 262 /// 263 /// 1. The type *must not* include any uninitialized or padding bytes. 264 /// 2. The type must have an alignment of 1 byte. 265 /// 3. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice 266 /// would not represent a valid slice of this type. 267 /// 4. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice 268 /// cannot be used in its entirety. 269 /// 5. The impl of [`VarULE::from_bytes_unchecked()`] must produce a reference to the same 270 /// underlying data assuming that the given bytes previously passed validation. 271 /// 6. All other methods *must* be left with their default impl, or else implemented according to 272 /// their respective safety guidelines. 273 /// 7. Acknowledge the following note about the equality invariant. 274 /// 275 /// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2, 276 /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`. 277 /// 278 /// # Equality invariant 279 /// 280 /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically 281 /// equivalent to byte equality on [`Self::as_bytes()`]. 282 /// 283 /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not 284 /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error 285 /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and 286 /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form 287 /// where only a single digit is allowed before `.`. 288 /// 289 /// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster 290 /// version and a smaller version. The cleanest way to handle this case would be separate types. 291 /// However, if this is not feasible, then the application should ensure that the data it is 292 /// deserializing is in the expected form. For example, if the data is being loaded from an 293 /// external source, then requests could carry information about the expected form of the data. 294 /// 295 /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may 296 /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. 297 pub unsafe trait VarULE: 'static { 298 /// Validates a byte slice, `&[u8]`. 299 /// 300 /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. 301 /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should 302 /// be returned; otherwise, `Self::Error` should be returned. validate_bytes(_bytes: &[u8]) -> Result<(), UleError>303 fn validate_bytes(_bytes: &[u8]) -> Result<(), UleError>; 304 305 /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime. 306 /// 307 /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, 308 /// and an error should be returned in the same cases as [`Self::validate_bytes()`]. 309 /// 310 /// The default implementation executes [`Self::validate_bytes()`] followed by 311 /// [`Self::from_bytes_unchecked`]. 312 /// 313 /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`, 314 /// where `result` is the successful return value of the method. This means that the return 315 /// value spans the entire byte slice. parse_bytes(bytes: &[u8]) -> Result<&Self, UleError>316 fn parse_bytes(bytes: &[u8]) -> Result<&Self, UleError> { 317 Self::validate_bytes(bytes)?; 318 let result = unsafe { Self::from_bytes_unchecked(bytes) }; 319 debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes)); 320 Ok(result) 321 } 322 323 /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming 324 /// that this byte slice has previously been run through [`Self::parse_bytes()`] with 325 /// success. 326 /// 327 /// # Safety 328 /// 329 /// ## Callers 330 /// 331 /// Callers of this method must take care to ensure that `bytes` was previously passed through 332 /// [`Self::validate_bytes()`] with success (and was not changed since then). 333 /// 334 /// ## Implementors 335 /// 336 /// Implementations of this method may call unsafe functions to cast the pointer to the correct 337 /// type, assuming the "Callers" invariant above. 338 /// 339 /// Safety checklist: 340 /// 341 /// 1. This method *must* return the same result as [`Self::parse_bytes()`]. 342 /// 2. This method *must* return a slice to the same region of memory as the argument. from_bytes_unchecked(bytes: &[u8]) -> &Self343 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self; 344 345 /// Given `&Self`, returns a `&[u8]` with the same lifetime. 346 /// 347 /// The default implementation performs a pointer cast to the same region of memory. 348 /// 349 /// # Safety 350 /// 351 /// Implementations of this method should call potentially unsafe functions to cast the 352 /// pointer to the correct type. 353 #[inline] as_bytes(&self) -> &[u8]354 fn as_bytes(&self) -> &[u8] { 355 unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) } 356 } 357 358 /// Allocate on the heap as a `Box<T>` 359 #[inline] 360 #[cfg(feature = "alloc")] to_boxed(&self) -> alloc::boxed::Box<Self>361 fn to_boxed(&self) -> alloc::boxed::Box<Self> { 362 use alloc::borrow::ToOwned; 363 use alloc::boxed::Box; 364 use core::alloc::Layout; 365 let bytesvec = self.as_bytes().to_owned().into_boxed_slice(); 366 let bytesvec = mem::ManuallyDrop::new(bytesvec); 367 unsafe { 368 // Get the pointer representation 369 let ptr: *mut Self = Self::from_bytes_unchecked(&bytesvec) as *const Self as *mut Self; 370 assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); 371 // Transmute the pointer to an owned pointer 372 Box::from_raw(ptr) 373 } 374 } 375 } 376 377 // Proc macro reexports 378 // 379 // These exist so that our docs can use intra-doc links. 380 // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from 381 // a submodule 382 383 /// Custom derive for [`ULE`]. 384 /// 385 /// This can be attached to [`Copy`] structs containing only [`ULE`] types. 386 /// 387 /// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining 388 /// a custom ULE type. 389 #[cfg(feature = "derive")] 390 pub use zerovec_derive::ULE; 391 392 /// Custom derive for [`VarULE`] 393 /// 394 /// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end. 395 /// 396 /// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining 397 /// a custom [`VarULE`] type. 398 #[cfg(feature = "derive")] 399 pub use zerovec_derive::VarULE; 400 401 /// An error type to be used for decoding slices of ULE types 402 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 403 #[non_exhaustive] 404 pub enum UleError { 405 /// Attempted to parse a buffer into a slice of the given ULE type but its 406 /// length was not compatible. 407 /// 408 /// Typically created by a [`ULE`] impl via [`UleError::length()`]. 409 /// 410 /// [`ULE`]: crate::ule::ULE 411 InvalidLength { ty: &'static str, len: usize }, 412 /// The byte sequence provided for `ty` failed to parse correctly in the 413 /// given ULE type. 414 /// 415 /// Typically created by a [`ULE`] impl via [`UleError::parse()`]. 416 /// 417 /// [`ULE`]: crate::ule::ULE 418 ParseError { ty: &'static str }, 419 } 420 421 impl fmt::Display for UleError { fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error>422 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { 423 match *self { 424 UleError::InvalidLength { ty, len } => { 425 write!(f, "Invalid length {len} for slice of type {ty}") 426 } 427 UleError::ParseError { ty } => { 428 write!(f, "Could not parse bytes to slice of type {ty}") 429 } 430 } 431 } 432 } 433 434 impl UleError { 435 /// Construct a parse error for the given type parse<T: ?Sized + 'static>() -> UleError436 pub fn parse<T: ?Sized + 'static>() -> UleError { 437 UleError::ParseError { 438 ty: any::type_name::<T>(), 439 } 440 } 441 442 /// Construct an "invalid length" error for the given type and length length<T: ?Sized + 'static>(len: usize) -> UleError443 pub fn length<T: ?Sized + 'static>(len: usize) -> UleError { 444 UleError::InvalidLength { 445 ty: any::type_name::<T>(), 446 len, 447 } 448 } 449 } 450 451 impl core::error::Error for UleError {} 452