1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 use crate::ule::{EncodeAsVarULE, UleError, VarULE}; 6 #[cfg(feature = "alloc")] 7 use alloc::boxed::Box; 8 use core::fmt; 9 use core::marker::PhantomData; 10 #[cfg(feature = "alloc")] 11 use core::mem::ManuallyDrop; 12 use core::ops::Deref; 13 use core::ptr::NonNull; 14 use zerofrom::ZeroFrom; 15 16 /// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation. 17 /// 18 /// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures 19 /// inside variable-length collections like [`crate::VarZeroVec`]. 20 /// 21 /// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat 22 /// bytestream. 23 /// 24 /// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example 25 /// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing 26 /// the bytestream representation of, say, `Tuple5VarULE<str, str, str, str, str>`. 27 /// 28 /// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed 29 /// to serialize as a byte stream in machine-readable scenarios. 30 /// 31 /// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have 32 /// a human-readable variant. 33 pub struct VarZeroCow<'a, V: ?Sized> { 34 /// Safety invariant: Contained slice must be a valid V 35 /// It may or may not have a lifetime valid for 'a, it must be valid for as long as this type is around. 36 raw: RawVarZeroCow, 37 marker1: PhantomData<&'a V>, 38 #[cfg(feature = "alloc")] 39 marker2: PhantomData<Box<V>>, 40 } 41 42 /// VarZeroCow without the `V` to simulate a dropck eyepatch 43 /// (i.e., prove to rustc that the dtor is not able to observe V or 'a) 44 /// 45 /// This is effectively `Cow<'a, [u8]>`, with the lifetime managed externally 46 struct RawVarZeroCow { 47 /// Pointer to data 48 /// 49 /// # Safety Invariants 50 /// 51 /// 1. This slice must always be valid as a byte slice 52 /// 2. If `owned` is true, this slice can be freed. 53 /// 3. VarZeroCow, the only user of this type, will impose an additional invariant that the buffer is a valid V 54 buf: NonNull<[u8]>, 55 /// The buffer is `Box<[u8]>` if true 56 #[cfg(feature = "alloc")] 57 owned: bool, 58 // Safety: We do not need any PhantomDatas here, since the Drop impl does not observe borrowed data 59 // if there is any. 60 } 61 62 #[cfg(feature = "alloc")] 63 impl Drop for RawVarZeroCow { drop(&mut self)64 fn drop(&mut self) { 65 // Note: this drop impl NEVER observes borrowed data (which may have already been cleaned up by the time the impl is called) 66 if self.owned { 67 unsafe { 68 // Safety: (Invariant 2 on buf) 69 // since owned is true, this is a valid Box<[u8]> and can be cleaned up 70 let _ = Box::<[u8]>::from_raw(self.buf.as_ptr()); 71 } 72 } 73 } 74 } 75 76 // This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on 77 unsafe impl Send for RawVarZeroCow {} 78 unsafe impl Sync for RawVarZeroCow {} 79 80 impl Clone for RawVarZeroCow { clone(&self) -> Self81 fn clone(&self) -> Self { 82 #[cfg(feature = "alloc")] 83 if self.is_owned() { 84 // This clones the box 85 let b: Box<[u8]> = self.as_bytes().into(); 86 let b = ManuallyDrop::new(b); 87 let buf: NonNull<[u8]> = (&**b).into(); 88 return Self { 89 // Invariants upheld: 90 // 1 & 3: The bytes came from `self` so they're a valid value and byte slice 91 // 2: This is owned (we cloned it), so we set owned to true. 92 buf, 93 owned: true, 94 }; 95 } 96 // Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter 97 Self { 98 // Invariants upheld: 99 // 1 & 3: The bytes came from `self` so they're a valid value and byte slice 100 // 2: This is borrowed (we're sharing a borrow), so we set owned to false. 101 buf: self.buf, 102 #[cfg(feature = "alloc")] 103 owned: false, 104 } 105 } 106 } 107 108 impl<'a, V: ?Sized> Clone for VarZeroCow<'a, V> { clone(&self) -> Self109 fn clone(&self) -> Self { 110 let raw = self.raw.clone(); 111 // Invariant upheld: raw came from a valid VarZeroCow, so it 112 // is a valid V 113 unsafe { Self::from_raw(raw) } 114 } 115 } 116 117 impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> { 118 /// Construct from a slice. Errors if the slice doesn't represent a valid `V` parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError>119 pub fn parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError> { 120 let val = V::parse_bytes(bytes)?; 121 Ok(Self::new_borrowed(val)) 122 } 123 124 /// Construct from an owned slice. Errors if the slice doesn't represent a valid `V` 125 #[cfg(feature = "alloc")] parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError>126 pub fn parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError> { 127 V::validate_bytes(&bytes)?; 128 let bytes = ManuallyDrop::new(bytes); 129 let buf: NonNull<[u8]> = (&**bytes).into(); 130 let raw = RawVarZeroCow { 131 // Invariants upheld: 132 // 1 & 3: The bytes came from `val` so they're a valid value and byte slice 133 // 2: This is owned, so we set owned to true. 134 buf, 135 owned: true, 136 }; 137 Ok(Self { 138 raw, 139 marker1: PhantomData, 140 #[cfg(feature = "alloc")] 141 marker2: PhantomData, 142 }) 143 } 144 145 /// Construct from a slice that is known to represent a valid `V` 146 /// 147 /// # Safety 148 /// 149 /// `bytes` must be a valid `V`, i.e. it must successfully pass through 150 /// `V::parse_bytes()` or `V::validate_bytes()`. from_bytes_unchecked(bytes: &'a [u8]) -> Self151 pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { 152 unsafe { 153 // Safety: bytes is an &T which is always non-null 154 let buf: NonNull<[u8]> = NonNull::new_unchecked(bytes as *const [u8] as *mut [u8]); 155 let raw = RawVarZeroCow { 156 // Invariants upheld: 157 // 1 & 3: Passed upstream to caller 158 // 2: This is borrowed, so we set owned to false. 159 buf, 160 #[cfg(feature = "alloc")] 161 owned: false, 162 }; 163 // Invariant passed upstream to caller 164 Self::from_raw(raw) 165 } 166 } 167 168 /// Construct this from an [`EncodeAsVarULE`] version of the contained type 169 /// 170 /// Will always construct an owned version 171 #[cfg(feature = "alloc")] from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self172 pub fn from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self { 173 let b = crate::ule::encode_varule_to_box(encodeable); 174 Self::new_owned(b) 175 } 176 177 /// Construct a new borrowed version of this new_borrowed(val: &'a V) -> Self178 pub fn new_borrowed(val: &'a V) -> Self { 179 unsafe { 180 // Safety: val is a valid V, by type 181 Self::from_bytes_unchecked(val.as_bytes()) 182 } 183 } 184 185 /// Construct a new borrowed version of this 186 #[cfg(feature = "alloc")] new_owned(val: Box<V>) -> Self187 pub fn new_owned(val: Box<V>) -> Self { 188 let val = ManuallyDrop::new(val); 189 let buf: NonNull<[u8]> = val.as_bytes().into(); 190 let raw = RawVarZeroCow { 191 // Invariants upheld: 192 // 1 & 3: The bytes came from `val` so they're a valid value and byte slice 193 // 2: This is owned, so we set owned to true. 194 buf, 195 #[cfg(feature = "alloc")] 196 owned: true, 197 }; 198 // The bytes came from `val`, so it's a valid value 199 unsafe { Self::from_raw(raw) } 200 } 201 } 202 203 impl<'a, V: ?Sized> VarZeroCow<'a, V> { 204 /// Whether or not this is owned is_owned(&self) -> bool205 pub fn is_owned(&self) -> bool { 206 self.raw.is_owned() 207 } 208 209 /// Get the byte representation of this type 210 /// 211 /// Is also always a valid `V` and can be passed to 212 /// `V::from_bytes_unchecked()` as_bytes(&self) -> &[u8]213 pub fn as_bytes(&self) -> &[u8] { 214 // The valid V invariant comes from Invariant 2 215 self.raw.as_bytes() 216 } 217 218 /// Invariant: `raw` must wrap a valid V, either owned or borrowed for 'a from_raw(raw: RawVarZeroCow) -> Self219 const unsafe fn from_raw(raw: RawVarZeroCow) -> Self { 220 Self { 221 // Invariant passed up to caller 222 raw, 223 marker1: PhantomData, 224 #[cfg(feature = "alloc")] 225 marker2: PhantomData, 226 } 227 } 228 } 229 230 impl RawVarZeroCow { 231 /// Whether or not this is owned 232 #[inline] is_owned(&self) -> bool233 pub fn is_owned(&self) -> bool { 234 #[cfg(feature = "alloc")] 235 return self.owned; 236 #[cfg(not(feature = "alloc"))] 237 return false; 238 } 239 240 /// Get the byte representation of this type 241 #[inline] as_bytes(&self) -> &[u8]242 pub fn as_bytes(&self) -> &[u8] { 243 // Safety: Invariant 1 on self.buf 244 unsafe { self.buf.as_ref() } 245 } 246 } 247 248 impl<'a, V: VarULE + ?Sized> Deref for VarZeroCow<'a, V> { 249 type Target = V; deref(&self) -> &V250 fn deref(&self) -> &V { 251 // Safety: From invariant 2 on self.buf 252 unsafe { V::from_bytes_unchecked(self.as_bytes()) } 253 } 254 } 255 256 impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> { from(other: &'a V) -> Self257 fn from(other: &'a V) -> Self { 258 Self::new_borrowed(other) 259 } 260 } 261 262 #[cfg(feature = "alloc")] 263 impl<'a, V: VarULE + ?Sized> From<Box<V>> for VarZeroCow<'a, V> { from(other: Box<V>) -> Self264 fn from(other: Box<V>) -> Self { 265 Self::new_owned(other) 266 } 267 } 268 269 impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debug for VarZeroCow<'a, V> { fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error>270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { 271 self.deref().fmt(f) 272 } 273 } 274 275 // We need manual impls since `#[derive()]` is disallowed on packed types 276 impl<'a, V: VarULE + ?Sized + PartialEq> PartialEq for VarZeroCow<'a, V> { eq(&self, other: &Self) -> bool277 fn eq(&self, other: &Self) -> bool { 278 self.deref().eq(other.deref()) 279 } 280 } 281 282 impl<'a, V: VarULE + ?Sized + Eq> Eq for VarZeroCow<'a, V> {} 283 284 impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrd for VarZeroCow<'a, V> { partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering>285 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { 286 self.deref().partial_cmp(other.deref()) 287 } 288 } 289 290 impl<'a, V: VarULE + ?Sized + Ord> Ord for VarZeroCow<'a, V> { cmp(&self, other: &Self) -> core::cmp::Ordering291 fn cmp(&self, other: &Self) -> core::cmp::Ordering { 292 self.deref().cmp(other.deref()) 293 } 294 } 295 296 // # Safety 297 // 298 // encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant 299 // 300 // encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant 301 unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE<V> for VarZeroCow<'a, V> { encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R302 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { 303 // unnecessary if the other two are implemented 304 unreachable!() 305 } 306 307 #[inline] encode_var_ule_len(&self) -> usize308 fn encode_var_ule_len(&self) -> usize { 309 self.as_bytes().len() 310 } 311 312 #[inline] encode_var_ule_write(&self, dst: &mut [u8])313 fn encode_var_ule_write(&self, dst: &mut [u8]) { 314 dst.copy_from_slice(self.as_bytes()) 315 } 316 } 317 318 #[cfg(feature = "serde")] 319 impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> { serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,320 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 321 where 322 S: serde::Serializer, 323 { 324 if serializer.is_human_readable() { 325 <V as serde::Serialize>::serialize(self.deref(), serializer) 326 } else { 327 serializer.serialize_bytes(self.as_bytes()) 328 } 329 } 330 } 331 332 #[cfg(feature = "serde")] 333 impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V> 334 where 335 Box<V>: serde::Deserialize<'de>, 336 { deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error> where Des: serde::Deserializer<'de>,337 fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error> 338 where 339 Des: serde::Deserializer<'de>, 340 { 341 if deserializer.is_human_readable() { 342 let b = Box::<V>::deserialize(deserializer)?; 343 Ok(Self::new_owned(b)) 344 } else { 345 let bytes = <&[u8]>::deserialize(deserializer)?; 346 Self::parse_bytes(bytes).map_err(serde::de::Error::custom) 347 } 348 } 349 } 350 351 #[cfg(feature = "databake")] 352 impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> { bake(&self, env: &databake::CrateEnv) -> databake::TokenStream353 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { 354 env.insert("zerovec"); 355 let bytes = self.as_bytes().bake(env); 356 databake::quote! { 357 // Safety: Known to come from a valid V since self.as_bytes() is always a valid V 358 unsafe { 359 zerovec::VarZeroCow::from_bytes_unchecked(#bytes) 360 } 361 } 362 } 363 } 364 365 #[cfg(feature = "databake")] 366 impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> { borrows_size(&self) -> usize367 fn borrows_size(&self) -> usize { 368 self.as_bytes().len() 369 } 370 } 371 372 impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> { 373 #[inline] zero_from(other: &'a V) -> Self374 fn zero_from(other: &'a V) -> Self { 375 Self::new_borrowed(other) 376 } 377 } 378 379 impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> { 380 #[inline] zero_from(other: &'a VarZeroCow<'b, V>) -> Self381 fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self { 382 Self::new_borrowed(other) 383 } 384 } 385 386 #[cfg(test)] 387 mod tests { 388 use super::VarZeroCow; 389 use crate::ule::tuplevar::Tuple3VarULE; 390 use crate::vecs::VarZeroSlice; 391 #[test] test_cow_roundtrip()392 fn test_cow_roundtrip() { 393 type Messy = Tuple3VarULE<str, [u8], VarZeroSlice<str>>; 394 let vec = vec!["one", "two", "three"]; 395 let messy: VarZeroCow<Messy> = 396 VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec)); 397 398 assert_eq!(messy.a(), "hello"); 399 assert_eq!(messy.b(), b"g\xFF\xFFdbye"); 400 assert_eq!(&messy.c()[1], "two"); 401 402 #[cfg(feature = "serde")] 403 { 404 let bincode = bincode::serialize(&messy).unwrap(); 405 let deserialized: VarZeroCow<Messy> = bincode::deserialize(&bincode).unwrap(); 406 assert_eq!( 407 messy, deserialized, 408 "Single element roundtrips with bincode" 409 ); 410 assert!(!deserialized.is_owned()); 411 412 let json = serde_json::to_string(&messy).unwrap(); 413 let deserialized: VarZeroCow<Messy> = serde_json::from_str(&json).unwrap(); 414 assert_eq!(messy, deserialized, "Single element roundtrips with serde"); 415 } 416 } 417 418 struct TwoCows<'a> { 419 cow1: VarZeroCow<'a, str>, 420 cow2: VarZeroCow<'a, str>, 421 } 422 423 #[test] test_eyepatch_works()424 fn test_eyepatch_works() { 425 // This code should compile 426 let mut two = TwoCows { 427 cow1: VarZeroCow::new_borrowed("hello"), 428 cow2: VarZeroCow::new_owned("world".into()), 429 }; 430 let three = VarZeroCow::new_borrowed(&*two.cow2); 431 two.cow1 = three; 432 433 // Without the eyepatch, dropck will be worried that the dtor of two.cow1 can observe the 434 // data it borrowed from two.cow2, which may have already been deleted 435 436 // This test will fail if you add an empty `impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V>` 437 } 438 } 439