1 // This file is part of ICU4X. For terms of use, please see the file 2 // called LICENSE at the top level of the ICU4X source tree 3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). 4 5 use super::components::{VarZeroSliceIter, VarZeroVecComponents}; 6 use super::vec::VarZeroVecInner; 7 use super::*; 8 use crate::ule::*; 9 use core::cmp::{Ord, Ordering, PartialOrd}; 10 use core::fmt; 11 use core::marker::PhantomData; 12 use core::mem; 13 14 use core::ops::Index; 15 use core::ops::Range; 16 17 /// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` 18 /// where `T` is not `Sized`. 19 /// 20 /// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain 21 /// owned data and as such is ideal for deserialization since most human readable 22 /// serialization formats cannot unconditionally deserialize zero-copy. 23 /// 24 /// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): 25 /// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead 26 /// using `VarZeroVec<ZeroSlice<T>>`. 27 /// 28 /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the 29 /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. 30 /// 31 /// This type can be nested within itself to allow for multi-level nested `Vec`s. 32 /// 33 /// # Examples 34 /// 35 /// ## Nested Slices 36 /// 37 /// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>` 38 /// 39 /// ```rust 40 /// use zerovec::{VarZeroSlice, VarZeroVec}; 41 /// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; 42 /// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; 43 /// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; 44 /// let strings_4: Vec<&str> = vec!["w", "ω", "文", ""]; 45 /// let strings_12 = vec![&*strings_1, &*strings_2]; 46 /// let strings_34 = vec![&*strings_3, &*strings_4]; 47 /// let all_strings = vec![strings_12, strings_34]; 48 /// 49 /// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1); 50 /// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2); 51 /// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3); 52 /// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4); 53 /// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); 54 /// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); 55 /// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); 56 /// 57 /// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all 58 /// .iter() 59 /// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| { 60 /// v.iter() 61 /// .map(|x: &VarZeroSlice<_>| { 62 /// x.as_varzerovec() 63 /// .iter() 64 /// .map(|s| s.to_owned()) 65 /// .collect::<Vec<String>>() 66 /// }) 67 /// .collect::<Vec<_>>() 68 /// }) 69 /// .collect::<Vec<_>>(); 70 /// assert_eq!(reconstructed, all_strings); 71 /// 72 /// let bytes = vzv_all.as_bytes(); 73 /// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> = 74 /// VarZeroVec::parse_bytes(bytes).unwrap(); 75 /// assert_eq!(vzv_from_bytes, vzv_all); 76 /// ``` 77 /// 78 /// ## Iterate over Windows 79 /// 80 /// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like 81 /// [core::slice::Windows], this behavior can be easily modeled using an iterator: 82 /// 83 /// ``` 84 /// use zerovec::VarZeroVec; 85 /// 86 /// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]); 87 /// # let mut pairs: Vec<(&str, &str)> = Vec::new(); 88 /// 89 /// let mut it = vzv.iter().peekable(); 90 /// while let (Some(x), Some(y)) = (it.next(), it.peek()) { 91 /// // Evaluate (x, y) here. 92 /// # pairs.push((x, y)); 93 /// } 94 /// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]); 95 /// ``` 96 // 97 // safety invariant: The slice MUST be one which parses to 98 // a valid VarZeroVecComponents<T> 99 #[repr(transparent)] 100 pub struct VarZeroSlice<T: ?Sized, F = Index16> { 101 marker: PhantomData<(F, T)>, 102 /// The original slice this was constructed from 103 entire_slice: [u8], 104 } 105 106 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> { 107 /// Construct a new empty VarZeroSlice new_empty() -> &'static Self108 pub const fn new_empty() -> &'static Self { 109 // The empty VZV is special-cased to the empty slice 110 unsafe { mem::transmute(&[] as &[u8]) } 111 } 112 113 /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer 114 #[inline] as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F>115 pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { 116 unsafe { 117 // safety: VarZeroSlice is guaranteed to parse here 118 VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) 119 } 120 } 121 122 /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification. 123 /// 124 /// # Safety 125 /// 126 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. from_bytes_unchecked(bytes: &[u8]) -> &Self127 pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { 128 // self is really just a wrapper around a byte slice 129 mem::transmute(bytes) 130 } 131 132 /// Get the number of elements in this slice 133 /// 134 /// # Example 135 /// 136 /// ```rust 137 /// # use zerovec::VarZeroVec; 138 /// 139 /// let strings = vec!["foo", "bar", "baz", "quux"]; 140 /// let vec = VarZeroVec::<str>::from(&strings); 141 /// 142 /// assert_eq!(vec.len(), 4); 143 /// ``` len(&self) -> usize144 pub fn len(&self) -> usize { 145 self.as_components().len() 146 } 147 148 /// Returns `true` if the slice contains no elements. 149 /// 150 /// # Examples 151 /// 152 /// ``` 153 /// # use zerovec::VarZeroVec; 154 /// 155 /// let strings: Vec<String> = vec![]; 156 /// let vec = VarZeroVec::<str>::from(&strings); 157 /// 158 /// assert!(vec.is_empty()); 159 /// ``` is_empty(&self) -> bool160 pub fn is_empty(&self) -> bool { 161 self.as_components().is_empty() 162 } 163 164 /// Obtain an iterator over this slice's elements 165 /// 166 /// # Example 167 /// 168 /// ```rust 169 /// # use zerovec::VarZeroVec; 170 /// 171 /// let strings = vec!["foo", "bar", "baz", "quux"]; 172 /// let vec = VarZeroVec::<str>::from(&strings); 173 /// 174 /// let mut iter_results: Vec<&str> = vec.iter().collect(); 175 /// assert_eq!(iter_results[0], "foo"); 176 /// assert_eq!(iter_results[1], "bar"); 177 /// assert_eq!(iter_results[2], "baz"); 178 /// assert_eq!(iter_results[3], "quux"); 179 /// ``` iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F>180 pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> { 181 self.as_components().iter() 182 } 183 184 /// Get one of this slice's elements, returning `None` if the index is out of bounds 185 /// 186 /// # Example 187 /// 188 /// ```rust 189 /// # use zerovec::VarZeroVec; 190 /// 191 /// let strings = vec!["foo", "bar", "baz", "quux"]; 192 /// let vec = VarZeroVec::<str>::from(&strings); 193 /// 194 /// let mut iter_results: Vec<&str> = vec.iter().collect(); 195 /// assert_eq!(vec.get(0), Some("foo")); 196 /// assert_eq!(vec.get(1), Some("bar")); 197 /// assert_eq!(vec.get(2), Some("baz")); 198 /// assert_eq!(vec.get(3), Some("quux")); 199 /// assert_eq!(vec.get(4), None); 200 /// ``` get(&self, idx: usize) -> Option<&T>201 pub fn get(&self, idx: usize) -> Option<&T> { 202 self.as_components().get(idx) 203 } 204 205 /// Get one of this slice's elements 206 /// 207 /// # Safety 208 /// 209 /// `index` must be in range 210 /// 211 /// # Example 212 /// 213 /// ```rust 214 /// # use zerovec::VarZeroVec; 215 /// 216 /// let strings = vec!["foo", "bar", "baz", "quux"]; 217 /// let vec = VarZeroVec::<str>::from(&strings); 218 /// 219 /// let mut iter_results: Vec<&str> = vec.iter().collect(); 220 /// unsafe { 221 /// assert_eq!(vec.get_unchecked(0), "foo"); 222 /// assert_eq!(vec.get_unchecked(1), "bar"); 223 /// assert_eq!(vec.get_unchecked(2), "baz"); 224 /// assert_eq!(vec.get_unchecked(3), "quux"); 225 /// } 226 /// ``` get_unchecked(&self, idx: usize) -> &T227 pub unsafe fn get_unchecked(&self, idx: usize) -> &T { 228 self.as_components().get_unchecked(idx) 229 } 230 231 /// Obtain an owned `Vec<Box<T>>` out of this 232 #[cfg(feature = "alloc")] to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>>233 pub fn to_vec(&self) -> alloc::vec::Vec<alloc::boxed::Box<T>> { 234 self.as_components().to_vec() 235 } 236 237 /// Get a reference to the entire encoded backing buffer of this slice 238 /// 239 /// The bytes can be passed back to [`Self::parse_bytes()`]. 240 /// 241 /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. 242 /// 243 /// # Example 244 /// 245 /// ```rust 246 /// # use zerovec::VarZeroVec; 247 /// 248 /// let strings = vec!["foo", "bar", "baz"]; 249 /// let vzv = VarZeroVec::<str>::from(&strings); 250 /// 251 /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap()); 252 /// ``` 253 #[inline] as_bytes(&self) -> &[u8]254 pub const fn as_bytes(&self) -> &[u8] { 255 &self.entire_slice 256 } 257 258 /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] 259 /// 260 /// If you wish to repeatedly call methods on this [`VarZeroSlice`], 261 /// it is more efficient to perform this conversion first as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F>262 pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { 263 VarZeroVec(VarZeroVecInner::Borrowed(self)) 264 } 265 266 /// Parse a VarZeroSlice from a slice of the appropriate format 267 /// 268 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError>269 pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> { 270 <Self as VarULE>::parse_bytes(slice) 271 } 272 } 273 274 impl<T, F> VarZeroSlice<T, F> 275 where 276 T: VarULE, 277 T: ?Sized, 278 T: Ord, 279 F: VarZeroVecFormat, 280 { 281 /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see 282 /// the standard library function [`binary_search`]. 283 /// 284 /// # Example 285 /// 286 /// ``` 287 /// # use zerovec::VarZeroVec; 288 /// 289 /// let strings = vec!["a", "b", "f", "g"]; 290 /// let vec = VarZeroVec::<str>::from(&strings); 291 /// 292 /// assert_eq!(vec.binary_search("f"), Ok(2)); 293 /// assert_eq!(vec.binary_search("e"), Err(2)); 294 /// ``` 295 /// 296 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search 297 #[inline] binary_search(&self, x: &T) -> Result<usize, usize>298 pub fn binary_search(&self, x: &T) -> Result<usize, usize> { 299 self.as_components().binary_search(x) 300 } 301 302 /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range. 303 /// 304 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according 305 /// to the behavior of the standard library function [`binary_search`]. 306 /// 307 /// The index is returned relative to the start of the range. 308 /// 309 /// # Example 310 /// 311 /// ``` 312 /// # use zerovec::VarZeroVec; 313 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; 314 /// let vec = VarZeroVec::<str>::from(&strings); 315 /// 316 /// // Same behavior as binary_search when the range covers the whole slice: 317 /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); 318 /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); 319 /// 320 /// // Will not look outside of the range: 321 /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); 322 /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); 323 /// 324 /// // Will return indices relative to the start of the range: 325 /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); 326 /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); 327 /// 328 /// // Will return `None` if the range is out of bounds: 329 /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); 330 /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); 331 /// ``` 332 /// 333 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search 334 #[inline] binary_search_in_range( &self, x: &T, range: Range<usize>, ) -> Option<Result<usize, usize>>335 pub fn binary_search_in_range( 336 &self, 337 x: &T, 338 range: Range<usize>, 339 ) -> Option<Result<usize, usize>> { 340 self.as_components().binary_search_in_range(x, range) 341 } 342 } 343 344 impl<T, F> VarZeroSlice<T, F> 345 where 346 T: VarULE, 347 T: ?Sized, 348 F: VarZeroVecFormat, 349 { 350 /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see 351 /// the standard library function [`binary_search_by`]. 352 /// 353 /// # Example 354 /// 355 /// ``` 356 /// # use zerovec::VarZeroVec; 357 /// let strings = vec!["a", "b", "f", "g"]; 358 /// let vec = VarZeroVec::<str>::from(&strings); 359 /// 360 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); 361 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); 362 /// ``` 363 /// 364 /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by 365 #[inline] binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>366 pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> { 367 self.as_components().binary_search_by(predicate) 368 } 369 370 /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range. 371 /// 372 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according 373 /// to the behavior of the standard library function [`binary_search`]. 374 /// 375 /// The index is returned relative to the start of the range. 376 /// 377 /// # Example 378 /// 379 /// ``` 380 /// # use zerovec::VarZeroVec; 381 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; 382 /// let vec = VarZeroVec::<str>::from(&strings); 383 /// 384 /// // Same behavior as binary_search when the range covers the whole slice: 385 /// assert_eq!( 386 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), 387 /// Some(Ok(3)) 388 /// ); 389 /// assert_eq!( 390 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), 391 /// Some(Err(4)) 392 /// ); 393 /// 394 /// // Will not look outside of the range: 395 /// assert_eq!( 396 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), 397 /// Some(Err(1)) 398 /// ); 399 /// assert_eq!( 400 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), 401 /// Some(Err(0)) 402 /// ); 403 /// 404 /// // Will return indices relative to the start of the range: 405 /// assert_eq!( 406 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), 407 /// Some(Ok(2)) 408 /// ); 409 /// assert_eq!( 410 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), 411 /// Some(Err(3)) 412 /// ); 413 /// 414 /// // Will return `None` if the range is out of bounds: 415 /// assert_eq!( 416 /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), 417 /// None 418 /// ); 419 /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); 420 /// ``` 421 /// 422 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range<usize>, ) -> Option<Result<usize, usize>>423 pub fn binary_search_in_range_by( 424 &self, 425 predicate: impl FnMut(&T) -> Ordering, 426 range: Range<usize>, 427 ) -> Option<Result<usize, usize>> { 428 self.as_components() 429 .binary_search_in_range_by(predicate, range) 430 } 431 } 432 // Safety (based on the safety checklist on the VarULE trait): 433 // 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a 434 // `[u8]` slice which satisfies this invariant) 435 // 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a 436 // `[u8]` slice which satisfies this invariant) 437 // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. 438 // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety 439 // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. 440 // 6. `as_bytes()` is equivalent to a regular transmute of the underlying data 441 // 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) 442 unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> { validate_bytes(bytes: &[u8]) -> Result<(), UleError>443 fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { 444 let _: VarZeroVecComponents<T, F> = 445 VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::<Self>())?; 446 Ok(()) 447 } 448 from_bytes_unchecked(bytes: &[u8]) -> &Self449 unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { 450 // self is really just a wrapper around a byte slice 451 mem::transmute(bytes) 452 } 453 as_bytes(&self) -> &[u8]454 fn as_bytes(&self) -> &[u8] { 455 &self.entire_slice 456 } 457 } 458 459 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> { 460 type Output = T; index(&self, index: usize) -> &Self::Output461 fn index(&self, index: usize) -> &Self::Output { 462 #[allow(clippy::panic)] // documented 463 match self.get(index) { 464 Some(x) => x, 465 None => panic!( 466 "index out of bounds: the len is {} but the index is {index}", 467 self.len() 468 ), 469 } 470 } 471 } 472 473 impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F> 474 where 475 T: VarULE, 476 T: ?Sized, 477 T: PartialEq, 478 F: VarZeroVecFormat, 479 { 480 #[inline] eq(&self, other: &VarZeroSlice<T, F>) -> bool481 fn eq(&self, other: &VarZeroSlice<T, F>) -> bool { 482 // VarULE has an API guarantee that this is equivalent 483 // to `T::VarULE::eq()` 484 self.entire_slice.eq(&other.entire_slice) 485 } 486 } 487 488 impl<T, F> Eq for VarZeroSlice<T, F> 489 where 490 T: VarULE, 491 T: ?Sized, 492 T: Eq, 493 F: VarZeroVecFormat, 494 { 495 } 496 497 impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> { 498 #[inline] partial_cmp(&self, other: &Self) -> Option<Ordering>499 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 500 self.iter().partial_cmp(other.iter()) 501 } 502 } 503 504 impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> { 505 #[inline] cmp(&self, other: &Self) -> Ordering506 fn cmp(&self, other: &Self) -> Ordering { 507 self.iter().cmp(other.iter()) 508 } 509 } 510 511 impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F> 512 where 513 T: fmt::Debug, 514 { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result515 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 516 f.debug_list().entries(self.iter()).finish() 517 } 518 } 519 520 impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> { as_ref(&self) -> &VarZeroSlice<T, F>521 fn as_ref(&self) -> &VarZeroSlice<T, F> { 522 self 523 } 524 } 525