• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use crate::ule::{EncodeAsVarULE, UleError, VarULE};
6 #[cfg(feature = "alloc")]
7 use alloc::boxed::Box;
8 use core::fmt;
9 use core::marker::PhantomData;
10 #[cfg(feature = "alloc")]
11 use core::mem::ManuallyDrop;
12 use core::ops::Deref;
13 use core::ptr::NonNull;
14 use zerofrom::ZeroFrom;
15 
16 /// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation.
17 ///
18 /// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures
19 /// inside variable-length collections like [`crate::VarZeroVec`].
20 ///
21 /// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat
22 /// bytestream.
23 ///
24 /// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example
25 /// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing
26 /// the bytestream representation of, say, `Tuple5VarULE<str, str, str, str, str>`.
27 ///
28 /// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed
29 /// to serialize as a byte stream in machine-readable scenarios.
30 ///
31 /// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have
32 /// a human-readable variant.
33 pub struct VarZeroCow<'a, V: ?Sized> {
34     /// Safety invariant: Contained slice must be a valid V
35     /// It may or may not have a lifetime valid for 'a, it must be valid for as long as this type is around.
36     raw: RawVarZeroCow,
37     marker1: PhantomData<&'a V>,
38     #[cfg(feature = "alloc")]
39     marker2: PhantomData<Box<V>>,
40 }
41 
42 /// VarZeroCow without the `V` to simulate a dropck eyepatch
43 /// (i.e., prove to rustc that the dtor is not able to observe V or 'a)
44 ///
45 /// This is effectively `Cow<'a, [u8]>`, with the lifetime managed externally
46 struct RawVarZeroCow {
47     /// Pointer to data
48     ///
49     /// # Safety Invariants
50     ///
51     /// 1. This slice must always be valid as a byte slice
52     /// 2. If `owned` is true, this slice can be freed.
53     /// 3. VarZeroCow, the only user of this type, will impose an additional invariant that the buffer is a valid V
54     buf: NonNull<[u8]>,
55     /// The buffer is `Box<[u8]>` if true
56     #[cfg(feature = "alloc")]
57     owned: bool,
58     // Safety: We do not need any PhantomDatas here, since the Drop impl does not observe borrowed data
59     // if there is any.
60 }
61 
62 #[cfg(feature = "alloc")]
63 impl Drop for RawVarZeroCow {
drop(&mut self)64     fn drop(&mut self) {
65         // Note: this drop impl NEVER observes borrowed data (which may have already been cleaned up by the time the impl is called)
66         if self.owned {
67             unsafe {
68                 // Safety: (Invariant 2 on buf)
69                 // since owned is true, this is a valid Box<[u8]> and can be cleaned up
70                 let _ = Box::<[u8]>::from_raw(self.buf.as_ptr());
71             }
72         }
73     }
74 }
75 
76 // This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on
77 unsafe impl Send for RawVarZeroCow {}
78 unsafe impl Sync for RawVarZeroCow {}
79 
80 impl Clone for RawVarZeroCow {
clone(&self) -> Self81     fn clone(&self) -> Self {
82         #[cfg(feature = "alloc")]
83         if self.is_owned() {
84             // This clones the box
85             let b: Box<[u8]> = self.as_bytes().into();
86             let b = ManuallyDrop::new(b);
87             let buf: NonNull<[u8]> = (&**b).into();
88             return Self {
89                 // Invariants upheld:
90                 // 1 & 3: The bytes came from `self` so they're a valid value and byte slice
91                 // 2: This is owned (we cloned it), so we set owned to true.
92                 buf,
93                 owned: true,
94             };
95         }
96         // Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter
97         Self {
98             // Invariants upheld:
99             // 1 & 3: The bytes came from `self` so they're a valid value and byte slice
100             // 2: This is borrowed (we're sharing a borrow), so we set owned to false.
101             buf: self.buf,
102             #[cfg(feature = "alloc")]
103             owned: false,
104         }
105     }
106 }
107 
108 impl<'a, V: ?Sized> Clone for VarZeroCow<'a, V> {
clone(&self) -> Self109     fn clone(&self) -> Self {
110         let raw = self.raw.clone();
111         // Invariant upheld: raw came from a valid VarZeroCow, so it
112         // is a valid V
113         unsafe { Self::from_raw(raw) }
114     }
115 }
116 
117 impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> {
118     /// Construct from a slice. Errors if the slice doesn't represent a valid `V`
parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError>119     pub fn parse_bytes(bytes: &'a [u8]) -> Result<Self, UleError> {
120         let val = V::parse_bytes(bytes)?;
121         Ok(Self::new_borrowed(val))
122     }
123 
124     /// Construct from an owned slice. Errors if the slice doesn't represent a valid `V`
125     #[cfg(feature = "alloc")]
parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError>126     pub fn parse_owned_bytes(bytes: Box<[u8]>) -> Result<Self, UleError> {
127         V::validate_bytes(&bytes)?;
128         let bytes = ManuallyDrop::new(bytes);
129         let buf: NonNull<[u8]> = (&**bytes).into();
130         let raw = RawVarZeroCow {
131             // Invariants upheld:
132             // 1 & 3: The bytes came from `val` so they're a valid value and byte slice
133             // 2: This is owned, so we set owned to true.
134             buf,
135             owned: true,
136         };
137         Ok(Self {
138             raw,
139             marker1: PhantomData,
140             #[cfg(feature = "alloc")]
141             marker2: PhantomData,
142         })
143     }
144 
145     /// Construct from a slice that is known to represent a valid `V`
146     ///
147     /// # Safety
148     ///
149     /// `bytes` must be a valid `V`, i.e. it must successfully pass through
150     /// `V::parse_bytes()` or `V::validate_bytes()`.
from_bytes_unchecked(bytes: &'a [u8]) -> Self151     pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
152         unsafe {
153             // Safety: bytes is an &T which is always non-null
154             let buf: NonNull<[u8]> = NonNull::new_unchecked(bytes as *const [u8] as *mut [u8]);
155             let raw = RawVarZeroCow {
156                 // Invariants upheld:
157                 // 1 & 3: Passed upstream to caller
158                 // 2: This is borrowed, so we set owned to false.
159                 buf,
160                 #[cfg(feature = "alloc")]
161                 owned: false,
162             };
163             // Invariant passed upstream to caller
164             Self::from_raw(raw)
165         }
166     }
167 
168     /// Construct this from an [`EncodeAsVarULE`] version of the contained type
169     ///
170     /// Will always construct an owned version
171     #[cfg(feature = "alloc")]
from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self172     pub fn from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self {
173         let b = crate::ule::encode_varule_to_box(encodeable);
174         Self::new_owned(b)
175     }
176 
177     /// Construct a new borrowed version of this
new_borrowed(val: &'a V) -> Self178     pub fn new_borrowed(val: &'a V) -> Self {
179         unsafe {
180             // Safety: val is a valid V, by type
181             Self::from_bytes_unchecked(val.as_bytes())
182         }
183     }
184 
185     /// Construct a new borrowed version of this
186     #[cfg(feature = "alloc")]
new_owned(val: Box<V>) -> Self187     pub fn new_owned(val: Box<V>) -> Self {
188         let val = ManuallyDrop::new(val);
189         let buf: NonNull<[u8]> = val.as_bytes().into();
190         let raw = RawVarZeroCow {
191             // Invariants upheld:
192             // 1 & 3: The bytes came from `val` so they're a valid value and byte slice
193             // 2: This is owned, so we set owned to true.
194             buf,
195             #[cfg(feature = "alloc")]
196             owned: true,
197         };
198         // The bytes came from `val`, so it's a valid value
199         unsafe { Self::from_raw(raw) }
200     }
201 }
202 
203 impl<'a, V: ?Sized> VarZeroCow<'a, V> {
204     /// Whether or not this is owned
is_owned(&self) -> bool205     pub fn is_owned(&self) -> bool {
206         self.raw.is_owned()
207     }
208 
209     /// Get the byte representation of this type
210     ///
211     /// Is also always a valid `V` and can be passed to
212     /// `V::from_bytes_unchecked()`
as_bytes(&self) -> &[u8]213     pub fn as_bytes(&self) -> &[u8] {
214         // The valid V invariant comes from Invariant 2
215         self.raw.as_bytes()
216     }
217 
218     /// Invariant: `raw` must wrap a valid V, either owned or borrowed for 'a
from_raw(raw: RawVarZeroCow) -> Self219     const unsafe fn from_raw(raw: RawVarZeroCow) -> Self {
220         Self {
221             // Invariant passed up to caller
222             raw,
223             marker1: PhantomData,
224             #[cfg(feature = "alloc")]
225             marker2: PhantomData,
226         }
227     }
228 }
229 
230 impl RawVarZeroCow {
231     /// Whether or not this is owned
232     #[inline]
is_owned(&self) -> bool233     pub fn is_owned(&self) -> bool {
234         #[cfg(feature = "alloc")]
235         return self.owned;
236         #[cfg(not(feature = "alloc"))]
237         return false;
238     }
239 
240     /// Get the byte representation of this type
241     #[inline]
as_bytes(&self) -> &[u8]242     pub fn as_bytes(&self) -> &[u8] {
243         // Safety: Invariant 1 on self.buf
244         unsafe { self.buf.as_ref() }
245     }
246 }
247 
248 impl<'a, V: VarULE + ?Sized> Deref for VarZeroCow<'a, V> {
249     type Target = V;
deref(&self) -> &V250     fn deref(&self) -> &V {
251         // Safety: From invariant 2 on self.buf
252         unsafe { V::from_bytes_unchecked(self.as_bytes()) }
253     }
254 }
255 
256 impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> {
from(other: &'a V) -> Self257     fn from(other: &'a V) -> Self {
258         Self::new_borrowed(other)
259     }
260 }
261 
262 #[cfg(feature = "alloc")]
263 impl<'a, V: VarULE + ?Sized> From<Box<V>> for VarZeroCow<'a, V> {
from(other: Box<V>) -> Self264     fn from(other: Box<V>) -> Self {
265         Self::new_owned(other)
266     }
267 }
268 
269 impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debug for VarZeroCow<'a, V> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error>270     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
271         self.deref().fmt(f)
272     }
273 }
274 
275 // We need manual impls since `#[derive()]` is disallowed on packed types
276 impl<'a, V: VarULE + ?Sized + PartialEq> PartialEq for VarZeroCow<'a, V> {
eq(&self, other: &Self) -> bool277     fn eq(&self, other: &Self) -> bool {
278         self.deref().eq(other.deref())
279     }
280 }
281 
282 impl<'a, V: VarULE + ?Sized + Eq> Eq for VarZeroCow<'a, V> {}
283 
284 impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrd for VarZeroCow<'a, V> {
partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering>285     fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
286         self.deref().partial_cmp(other.deref())
287     }
288 }
289 
290 impl<'a, V: VarULE + ?Sized + Ord> Ord for VarZeroCow<'a, V> {
cmp(&self, other: &Self) -> core::cmp::Ordering291     fn cmp(&self, other: &Self) -> core::cmp::Ordering {
292         self.deref().cmp(other.deref())
293     }
294 }
295 
296 // # Safety
297 //
298 // encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant
299 //
300 // encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant
301 unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE<V> for VarZeroCow<'a, V> {
encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R302     fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
303         // unnecessary if the other two are implemented
304         unreachable!()
305     }
306 
307     #[inline]
encode_var_ule_len(&self) -> usize308     fn encode_var_ule_len(&self) -> usize {
309         self.as_bytes().len()
310     }
311 
312     #[inline]
encode_var_ule_write(&self, dst: &mut [u8])313     fn encode_var_ule_write(&self, dst: &mut [u8]) {
314         dst.copy_from_slice(self.as_bytes())
315     }
316 }
317 
318 #[cfg(feature = "serde")]
319 impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,320     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
321     where
322         S: serde::Serializer,
323     {
324         if serializer.is_human_readable() {
325             <V as serde::Serialize>::serialize(self.deref(), serializer)
326         } else {
327             serializer.serialize_bytes(self.as_bytes())
328         }
329     }
330 }
331 
332 #[cfg(feature = "serde")]
333 impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V>
334 where
335     Box<V>: serde::Deserialize<'de>,
336 {
deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error> where Des: serde::Deserializer<'de>,337     fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error>
338     where
339         Des: serde::Deserializer<'de>,
340     {
341         if deserializer.is_human_readable() {
342             let b = Box::<V>::deserialize(deserializer)?;
343             Ok(Self::new_owned(b))
344         } else {
345             let bytes = <&[u8]>::deserialize(deserializer)?;
346             Self::parse_bytes(bytes).map_err(serde::de::Error::custom)
347         }
348     }
349 }
350 
351 #[cfg(feature = "databake")]
352 impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> {
bake(&self, env: &databake::CrateEnv) -> databake::TokenStream353     fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
354         env.insert("zerovec");
355         let bytes = self.as_bytes().bake(env);
356         databake::quote! {
357             // Safety: Known to come from a valid V since self.as_bytes() is always a valid V
358             unsafe {
359                 zerovec::VarZeroCow::from_bytes_unchecked(#bytes)
360             }
361         }
362     }
363 }
364 
365 #[cfg(feature = "databake")]
366 impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> {
borrows_size(&self) -> usize367     fn borrows_size(&self) -> usize {
368         self.as_bytes().len()
369     }
370 }
371 
372 impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> {
373     #[inline]
zero_from(other: &'a V) -> Self374     fn zero_from(other: &'a V) -> Self {
375         Self::new_borrowed(other)
376     }
377 }
378 
379 impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> {
380     #[inline]
zero_from(other: &'a VarZeroCow<'b, V>) -> Self381     fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self {
382         Self::new_borrowed(other)
383     }
384 }
385 
386 #[cfg(test)]
387 mod tests {
388     use super::VarZeroCow;
389     use crate::ule::tuplevar::Tuple3VarULE;
390     use crate::vecs::VarZeroSlice;
391     #[test]
test_cow_roundtrip()392     fn test_cow_roundtrip() {
393         type Messy = Tuple3VarULE<str, [u8], VarZeroSlice<str>>;
394         let vec = vec!["one", "two", "three"];
395         let messy: VarZeroCow<Messy> =
396             VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec));
397 
398         assert_eq!(messy.a(), "hello");
399         assert_eq!(messy.b(), b"g\xFF\xFFdbye");
400         assert_eq!(&messy.c()[1], "two");
401 
402         #[cfg(feature = "serde")]
403         {
404             let bincode = bincode::serialize(&messy).unwrap();
405             let deserialized: VarZeroCow<Messy> = bincode::deserialize(&bincode).unwrap();
406             assert_eq!(
407                 messy, deserialized,
408                 "Single element roundtrips with bincode"
409             );
410             assert!(!deserialized.is_owned());
411 
412             let json = serde_json::to_string(&messy).unwrap();
413             let deserialized: VarZeroCow<Messy> = serde_json::from_str(&json).unwrap();
414             assert_eq!(messy, deserialized, "Single element roundtrips with serde");
415         }
416     }
417 
418     struct TwoCows<'a> {
419         cow1: VarZeroCow<'a, str>,
420         cow2: VarZeroCow<'a, str>,
421     }
422 
423     #[test]
test_eyepatch_works()424     fn test_eyepatch_works() {
425         // This code should compile
426         let mut two = TwoCows {
427             cow1: VarZeroCow::new_borrowed("hello"),
428             cow2: VarZeroCow::new_owned("world".into()),
429         };
430         let three = VarZeroCow::new_borrowed(&*two.cow2);
431         two.cow1 = three;
432 
433         // Without the eyepatch, dropck will be worried that the dtor of two.cow1 can observe the
434         // data it borrowed from two.cow2, which may have already been deleted
435 
436         // This test will fail if you add an empty `impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V>`
437     }
438 }
439