1 // Copyright © 2019 The Rust Fuzz Project Developers. 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 //! Wrappers around raw, unstructured bytes. 10 11 use crate::{Arbitrary, Error, Result}; 12 use std::marker::PhantomData; 13 use std::ops::ControlFlow; 14 use std::{mem, ops}; 15 16 /// A source of unstructured data. 17 /// 18 /// An `Unstructured` helps `Arbitrary` implementations interpret raw data 19 /// (typically provided by a fuzzer) as a "DNA string" that describes how to 20 /// construct the `Arbitrary` type. The goal is that a small change to the "DNA 21 /// string" (the raw data wrapped by an `Unstructured`) results in a small 22 /// change to the generated `Arbitrary` instance. This helps a fuzzer 23 /// efficiently explore the `Arbitrary`'s input space. 24 /// 25 /// `Unstructured` is deterministic: given the same raw data, the same series of 26 /// API calls will return the same results (modulo system resource constraints, 27 /// like running out of memory). However, `Unstructured` does not guarantee 28 /// anything beyond that: it makes not guarantee that it will yield bytes from 29 /// the underlying data in any particular order. 30 /// 31 /// You shouldn't generally need to use an `Unstructured` unless you are writing 32 /// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly, 33 /// you should just be passing it through to nested `Arbitrary::arbitrary` 34 /// calls. 35 /// 36 /// # Example 37 /// 38 /// Imagine you were writing a color conversion crate. You might want to write 39 /// fuzz tests that take a random RGB color and assert various properties, run 40 /// functions and make sure nothing panics, etc. 41 /// 42 /// Below is what translating the fuzzer's raw input into an `Unstructured` and 43 /// using that to generate an arbitrary RGB color might look like: 44 /// 45 /// ``` 46 /// # #[cfg(feature = "derive")] fn foo() { 47 /// use arbitrary::{Arbitrary, Unstructured}; 48 /// 49 /// /// An RGB color. 50 /// #[derive(Arbitrary)] 51 /// pub struct Rgb { 52 /// r: u8, 53 /// g: u8, 54 /// b: u8, 55 /// } 56 /// 57 /// // Get the raw bytes from the fuzzer. 58 /// # let get_input_from_fuzzer = || &[]; 59 /// let raw_data: &[u8] = get_input_from_fuzzer(); 60 /// 61 /// // Wrap it in an `Unstructured`. 62 /// let mut unstructured = Unstructured::new(raw_data); 63 /// 64 /// // Generate an `Rgb` color and run our checks. 65 /// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) { 66 /// # let run_my_color_conversion_checks = |_| {}; 67 /// run_my_color_conversion_checks(rgb); 68 /// } 69 /// # } 70 /// ``` 71 #[derive(Debug)] 72 pub struct Unstructured<'a> { 73 data: &'a [u8], 74 } 75 76 impl<'a> Unstructured<'a> { 77 /// Create a new `Unstructured` from the given raw data. 78 /// 79 /// # Example 80 /// 81 /// ``` 82 /// use arbitrary::Unstructured; 83 /// 84 /// let u = Unstructured::new(&[1, 2, 3, 4]); 85 /// ``` new(data: &'a [u8]) -> Self86 pub fn new(data: &'a [u8]) -> Self { 87 Unstructured { data } 88 } 89 90 /// Get the number of remaining bytes of underlying data that are still 91 /// available. 92 /// 93 /// # Example 94 /// 95 /// ``` 96 /// use arbitrary::{Arbitrary, Unstructured}; 97 /// 98 /// let mut u = Unstructured::new(&[1, 2, 3]); 99 /// 100 /// // Initially have three bytes of data. 101 /// assert_eq!(u.len(), 3); 102 /// 103 /// // Generating a `bool` consumes one byte from the underlying data, so 104 /// // we are left with two bytes afterwards. 105 /// let _ = bool::arbitrary(&mut u); 106 /// assert_eq!(u.len(), 2); 107 /// ``` 108 #[inline] len(&self) -> usize109 pub fn len(&self) -> usize { 110 self.data.len() 111 } 112 113 /// Is the underlying unstructured data exhausted? 114 /// 115 /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`. 116 /// 117 /// # Example 118 /// 119 /// ``` 120 /// use arbitrary::{Arbitrary, Unstructured}; 121 /// 122 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 123 /// 124 /// // Initially, we are not empty. 125 /// assert!(!u.is_empty()); 126 /// 127 /// // Generating a `u32` consumes all four bytes of the underlying data, so 128 /// // we become empty afterwards. 129 /// let _ = u32::arbitrary(&mut u); 130 /// assert!(u.is_empty()); 131 /// ``` 132 #[inline] is_empty(&self) -> bool133 pub fn is_empty(&self) -> bool { 134 self.len() == 0 135 } 136 137 /// Generate an arbitrary instance of `A`. 138 /// 139 /// This is simply a helper method that is equivalent to `<A as 140 /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise, 141 /// and can be used in situations where Rust's type inference will figure 142 /// out what `A` should be. 143 /// 144 /// # Example 145 /// 146 /// ``` 147 /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> { 148 /// use arbitrary::{Arbitrary, Unstructured}; 149 /// 150 /// #[derive(Arbitrary)] 151 /// struct MyType { 152 /// // ... 153 /// } 154 /// 155 /// fn do_stuff(value: MyType) { 156 /// # let _ = value; 157 /// // ... 158 /// } 159 /// 160 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 161 /// 162 /// // Rust's type inference can figure out that `value` should be of type 163 /// // `MyType` here: 164 /// let value = u.arbitrary()?; 165 /// do_stuff(value); 166 /// # Ok(()) } 167 /// ``` arbitrary<A>(&mut self) -> Result<A> where A: Arbitrary<'a>,168 pub fn arbitrary<A>(&mut self) -> Result<A> 169 where 170 A: Arbitrary<'a>, 171 { 172 <A as Arbitrary<'a>>::arbitrary(self) 173 } 174 175 /// Get the number of elements to insert when building up a collection of 176 /// arbitrary `ElementType`s. 177 /// 178 /// This uses the [`<ElementType as 179 /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly 180 /// choose a length such that we most likely have enough underlying bytes to 181 /// construct that many arbitrary `ElementType`s. 182 /// 183 /// This should only be called within an `Arbitrary` implementation. 184 /// 185 /// # Example 186 /// 187 /// ``` 188 /// use arbitrary::{Arbitrary, Result, Unstructured}; 189 /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> } 190 /// # impl<T> MyCollection<T> { 191 /// # pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } } 192 /// # pub fn insert(&mut self, element: T) {} 193 /// # } 194 /// 195 /// impl<'a, T> Arbitrary<'a> for MyCollection<T> 196 /// where 197 /// T: Arbitrary<'a>, 198 /// { 199 /// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> { 200 /// // Get the number of `T`s we should insert into our collection. 201 /// let len = u.arbitrary_len::<T>()?; 202 /// 203 /// // And then create a collection of that length! 204 /// let mut my_collection = MyCollection::with_capacity(len); 205 /// for _ in 0..len { 206 /// let element = T::arbitrary(u)?; 207 /// my_collection.insert(element); 208 /// } 209 /// 210 /// Ok(my_collection) 211 /// } 212 /// } 213 /// ``` arbitrary_len<ElementType>(&mut self) -> Result<usize> where ElementType: Arbitrary<'a>,214 pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize> 215 where 216 ElementType: Arbitrary<'a>, 217 { 218 let byte_size = self.arbitrary_byte_size()?; 219 let (lower, upper) = <ElementType as Arbitrary>::size_hint(0); 220 let elem_size = upper.unwrap_or(lower * 2); 221 let elem_size = std::cmp::max(1, elem_size); 222 Ok(byte_size / elem_size) 223 } 224 arbitrary_byte_size(&mut self) -> Result<usize>225 fn arbitrary_byte_size(&mut self) -> Result<usize> { 226 if self.data.is_empty() { 227 Ok(0) 228 } else if self.data.len() == 1 { 229 self.data = &[]; 230 Ok(0) 231 } else { 232 // Take lengths from the end of the data, since the `libFuzzer` folks 233 // found that this lets fuzzers more efficiently explore the input 234 // space. 235 // 236 // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97 237 238 // We only consume as many bytes as necessary to cover the entire 239 // range of the byte string. 240 // Note: We cast to u64 so we don't overflow when checking u32::MAX + 4 on 32-bit archs 241 let len = if self.data.len() as u64 <= u8::MAX as u64 + 1 { 242 let bytes = 1; 243 let max_size = self.data.len() - bytes; 244 let (rest, for_size) = self.data.split_at(max_size); 245 self.data = rest; 246 Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize 247 } else if self.data.len() as u64 <= u16::MAX as u64 + 2 { 248 let bytes = 2; 249 let max_size = self.data.len() - bytes; 250 let (rest, for_size) = self.data.split_at(max_size); 251 self.data = rest; 252 Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize 253 } else if self.data.len() as u64 <= u32::MAX as u64 + 4 { 254 let bytes = 4; 255 let max_size = self.data.len() - bytes; 256 let (rest, for_size) = self.data.split_at(max_size); 257 self.data = rest; 258 Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize 259 } else { 260 let bytes = 8; 261 let max_size = self.data.len() - bytes; 262 let (rest, for_size) = self.data.split_at(max_size); 263 self.data = rest; 264 Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize 265 }; 266 267 Ok(len) 268 } 269 } 270 271 /// Generate an integer within the given range. 272 /// 273 /// Do not use this to generate the size of a collection. Use 274 /// `arbitrary_len` instead. 275 /// 276 /// # Panics 277 /// 278 /// Panics if `range.start > range.end`. That is, the given range must be 279 /// non-empty. 280 /// 281 /// # Example 282 /// 283 /// ``` 284 /// # fn foo() -> arbitrary::Result<()> { 285 /// use arbitrary::{Arbitrary, Unstructured}; 286 /// 287 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 288 /// 289 /// let x: i32 = u.int_in_range(-5_000..=-1_000)?; 290 /// 291 /// assert!(-5_000 <= x); 292 /// assert!(x <= -1_000); 293 /// # Ok(()) } 294 /// ``` int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> where T: Int,295 pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> 296 where 297 T: Int, 298 { 299 let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?; 300 self.data = &self.data[bytes_consumed..]; 301 Ok(result) 302 } 303 int_in_range_impl<T>( range: ops::RangeInclusive<T>, mut bytes: impl Iterator<Item = u8>, ) -> Result<(T, usize)> where T: Int,304 fn int_in_range_impl<T>( 305 range: ops::RangeInclusive<T>, 306 mut bytes: impl Iterator<Item = u8>, 307 ) -> Result<(T, usize)> 308 where 309 T: Int, 310 { 311 let start = *range.start(); 312 let end = *range.end(); 313 assert!( 314 start <= end, 315 "`arbitrary::Unstructured::int_in_range` requires a non-empty range" 316 ); 317 318 // When there is only one possible choice, don't waste any entropy from 319 // the underlying data. 320 if start == end { 321 return Ok((start, 0)); 322 } 323 324 // From here on out we work with the unsigned representation. All of the 325 // operations performed below work out just as well whether or not `T` 326 // is a signed or unsigned integer. 327 let start = start.to_unsigned(); 328 let end = end.to_unsigned(); 329 330 let delta = end.wrapping_sub(start); 331 debug_assert_ne!(delta, T::Unsigned::ZERO); 332 333 // Compute an arbitrary integer offset from the start of the range. We 334 // do this by consuming `size_of(T)` bytes from the input to create an 335 // arbitrary integer and then clamping that int into our range bounds 336 // with a modulo operation. 337 let mut arbitrary_int = T::Unsigned::ZERO; 338 let mut bytes_consumed: usize = 0; 339 340 while (bytes_consumed < mem::size_of::<T>()) 341 && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO 342 { 343 let byte = match bytes.next() { 344 None => break, 345 Some(b) => b, 346 }; 347 bytes_consumed += 1; 348 349 // Combine this byte into our arbitrary integer, but avoid 350 // overflowing the shift for `u8` and `i8`. 351 arbitrary_int = if mem::size_of::<T>() == 1 { 352 T::Unsigned::from_u8(byte) 353 } else { 354 (arbitrary_int << 8) | T::Unsigned::from_u8(byte) 355 }; 356 } 357 358 let offset = if delta == T::Unsigned::MAX { 359 arbitrary_int 360 } else { 361 arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap()) 362 }; 363 364 // Finally, we add `start` to our offset from `start` to get the result 365 // actual value within the range. 366 let result = start.wrapping_add(offset); 367 368 // And convert back to our maybe-signed representation. 369 let result = T::from_unsigned(result); 370 debug_assert!(*range.start() <= result); 371 debug_assert!(result <= *range.end()); 372 373 Ok((result, bytes_consumed)) 374 } 375 376 /// Choose one of the given choices. 377 /// 378 /// This should only be used inside of `Arbitrary` implementations. 379 /// 380 /// Returns an error if there is not enough underlying data to make a 381 /// choice or if no choices are provided. 382 /// 383 /// # Examples 384 /// 385 /// Selecting from an array of choices: 386 /// 387 /// ``` 388 /// use arbitrary::Unstructured; 389 /// 390 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 391 /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; 392 /// 393 /// let choice = u.choose(&choices).unwrap(); 394 /// 395 /// println!("chose {}", choice); 396 /// ``` 397 /// 398 /// An error is returned if no choices are provided: 399 /// 400 /// ``` 401 /// use arbitrary::Unstructured; 402 /// 403 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 404 /// let choices: [char; 0] = []; 405 /// 406 /// let result = u.choose(&choices); 407 /// 408 /// assert!(result.is_err()); 409 /// ``` choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T>410 pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> { 411 let idx = self.choose_index(choices.len())?; 412 Ok(&choices[idx]) 413 } 414 415 /// Choose one of the given iterator choices. 416 /// 417 /// This should only be used inside of `Arbitrary` implementations. 418 /// 419 /// Returns an error if there is not enough underlying data to make a 420 /// choice or if no choices are provided. 421 /// 422 /// # Examples 423 /// 424 /// Selecting a random item from a set: 425 /// 426 /// ``` 427 /// use std::collections::BTreeSet; 428 /// use arbitrary::Unstructured; 429 /// 430 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 431 /// let set = BTreeSet::from(['a', 'b', 'c']); 432 /// 433 /// let choice = u.choose_iter(set.iter()).unwrap(); 434 /// 435 /// println!("chose {}", choice); 436 /// ``` choose_iter<T, I>(&mut self, choices: I) -> Result<T> where I: IntoIterator<Item = T>, I::IntoIter: ExactSizeIterator,437 pub fn choose_iter<T, I>(&mut self, choices: I) -> Result<T> 438 where 439 I: IntoIterator<Item = T>, 440 I::IntoIter: ExactSizeIterator, 441 { 442 let mut choices = choices.into_iter(); 443 let idx = self.choose_index(choices.len())?; 444 let choice = choices 445 .nth(idx) 446 .expect("ExactSizeIterator should have correct len"); 447 Ok(choice) 448 } 449 450 /// Choose a value in `0..len`. 451 /// 452 /// Returns an error if the `len` is zero. 453 /// 454 /// # Examples 455 /// 456 /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation. 457 /// 458 /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle 459 /// 460 /// ``` 461 /// use arbitrary::Unstructured; 462 /// 463 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 464 /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; 465 /// let mut to_permute = &mut permutation[..]; 466 /// while to_permute.len() > 1 { 467 /// let idx = u.choose_index(to_permute.len()).unwrap(); 468 /// to_permute.swap(0, idx); 469 /// to_permute = &mut to_permute[1..]; 470 /// } 471 /// 472 /// println!("permutation: {:?}", permutation); 473 /// ``` 474 /// 475 /// An error is returned if the length is zero: 476 /// 477 /// ``` 478 /// use arbitrary::Unstructured; 479 /// 480 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 481 /// let array: [i32; 0] = []; 482 /// 483 /// let result = u.choose_index(array.len()); 484 /// 485 /// assert!(result.is_err()); 486 /// ``` choose_index(&mut self, len: usize) -> Result<usize>487 pub fn choose_index(&mut self, len: usize) -> Result<usize> { 488 if len == 0 { 489 return Err(Error::EmptyChoose); 490 } 491 let idx = self.int_in_range(0..=len - 1)?; 492 Ok(idx) 493 } 494 495 /// Generate a boolean according to the given ratio. 496 /// 497 /// # Panics 498 /// 499 /// Panics when the numerator and denominator do not meet these constraints: 500 /// 501 /// * `0 < numerator <= denominator` 502 /// 503 /// # Example 504 /// 505 /// Generate a boolean that is `true` five sevenths of the time: 506 /// 507 /// ``` 508 /// # fn foo() -> arbitrary::Result<()> { 509 /// use arbitrary::Unstructured; 510 /// 511 /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; 512 /// let mut u = Unstructured::new(&my_data); 513 /// 514 /// if u.ratio(5, 7)? { 515 /// // Take this branch 5/7 of the time. 516 /// } 517 /// # Ok(()) 518 /// # } 519 /// ``` ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> where T: Int,520 pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> 521 where 522 T: Int, 523 { 524 assert!(T::ZERO < numerator); 525 assert!(numerator <= denominator); 526 let x = self.int_in_range(T::ONE..=denominator)?; 527 Ok(x <= numerator) 528 } 529 530 /// Fill a `buffer` with bytes from the underlying raw data. 531 /// 532 /// This should only be called within an `Arbitrary` implementation. This is 533 /// a very low-level operation. You should generally prefer calling nested 534 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and 535 /// `String::arbitrary` over using this method directly. 536 /// 537 /// If this `Unstructured` does not have enough underlying data to fill the 538 /// whole `buffer`, it pads the buffer out with zeros. 539 /// 540 /// # Example 541 /// 542 /// ``` 543 /// use arbitrary::Unstructured; 544 /// 545 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 546 /// 547 /// let mut buf = [0; 2]; 548 /// 549 /// assert!(u.fill_buffer(&mut buf).is_ok()); 550 /// assert_eq!(buf, [1, 2]); 551 /// 552 /// assert!(u.fill_buffer(&mut buf).is_ok()); 553 /// assert_eq!(buf, [3, 4]); 554 /// 555 /// assert!(u.fill_buffer(&mut buf).is_ok()); 556 /// assert_eq!(buf, [0, 0]); 557 /// ``` fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()>558 pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> { 559 let n = std::cmp::min(buffer.len(), self.data.len()); 560 buffer[..n].copy_from_slice(&self.data[..n]); 561 for byte in buffer[n..].iter_mut() { 562 *byte = 0; 563 } 564 self.data = &self.data[n..]; 565 Ok(()) 566 } 567 568 /// Provide `size` bytes from the underlying raw data. 569 /// 570 /// This should only be called within an `Arbitrary` implementation. This is 571 /// a very low-level operation. You should generally prefer calling nested 572 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and 573 /// `String::arbitrary` over using this method directly. 574 /// 575 /// # Example 576 /// 577 /// ``` 578 /// use arbitrary::Unstructured; 579 /// 580 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 581 /// 582 /// assert!(u.bytes(2).unwrap() == &[1, 2]); 583 /// assert!(u.bytes(2).unwrap() == &[3, 4]); 584 /// ``` bytes(&mut self, size: usize) -> Result<&'a [u8]>585 pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> { 586 if self.data.len() < size { 587 return Err(Error::NotEnoughData); 588 } 589 590 let (for_buf, rest) = self.data.split_at(size); 591 self.data = rest; 592 Ok(for_buf) 593 } 594 595 /// Peek at `size` number of bytes of the underlying raw input. 596 /// 597 /// Does not consume the bytes, only peeks at them. 598 /// 599 /// Returns `None` if there are not `size` bytes left in the underlying raw 600 /// input. 601 /// 602 /// # Example 603 /// 604 /// ``` 605 /// use arbitrary::Unstructured; 606 /// 607 /// let u = Unstructured::new(&[1, 2, 3]); 608 /// 609 /// assert_eq!(u.peek_bytes(0).unwrap(), []); 610 /// assert_eq!(u.peek_bytes(1).unwrap(), [1]); 611 /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]); 612 /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]); 613 /// 614 /// assert!(u.peek_bytes(4).is_none()); 615 /// ``` peek_bytes(&self, size: usize) -> Option<&'a [u8]>616 pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> { 617 self.data.get(..size) 618 } 619 620 /// Consume all of the rest of the remaining underlying bytes. 621 /// 622 /// Returns a slice of all the remaining, unconsumed bytes. 623 /// 624 /// # Example 625 /// 626 /// ``` 627 /// use arbitrary::Unstructured; 628 /// 629 /// let mut u = Unstructured::new(&[1, 2, 3]); 630 /// 631 /// let mut remaining = u.take_rest(); 632 /// 633 /// assert_eq!(remaining, [1, 2, 3]); 634 /// ``` take_rest(mut self) -> &'a [u8]635 pub fn take_rest(mut self) -> &'a [u8] { 636 mem::take(&mut self.data) 637 } 638 639 /// Provide an iterator over elements for constructing a collection 640 /// 641 /// This is useful for implementing [`Arbitrary::arbitrary`] on collections 642 /// since the implementation is simply `u.arbitrary_iter()?.collect()` arbitrary_iter<'b, ElementType: Arbitrary<'a>>( &'b mut self, ) -> Result<ArbitraryIter<'a, 'b, ElementType>>643 pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>( 644 &'b mut self, 645 ) -> Result<ArbitraryIter<'a, 'b, ElementType>> { 646 Ok(ArbitraryIter { 647 u: &mut *self, 648 _marker: PhantomData, 649 }) 650 } 651 652 /// Provide an iterator over elements for constructing a collection from 653 /// all the remaining bytes. 654 /// 655 /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections 656 /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()` arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( self, ) -> Result<ArbitraryTakeRestIter<'a, ElementType>>657 pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( 658 self, 659 ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> { 660 Ok(ArbitraryTakeRestIter { 661 u: self, 662 _marker: PhantomData, 663 }) 664 } 665 666 /// Call the given function an arbitrary number of times. 667 /// 668 /// The function is given this `Unstructured` so that it can continue to 669 /// generate arbitrary data and structures. 670 /// 671 /// You may optionaly specify minimum and maximum bounds on the number of 672 /// times the function is called. 673 /// 674 /// You may break out of the loop early by returning 675 /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return 676 /// `Ok(std::ops::ControlFlow::Continue)`. 677 /// 678 /// # Panics 679 /// 680 /// Panics if `min > max`. 681 /// 682 /// # Example 683 /// 684 /// Call a closure that generates an arbitrary type inside a context an 685 /// arbitrary number of times: 686 /// 687 /// ``` 688 /// use arbitrary::{Result, Unstructured}; 689 /// use std::ops::ControlFlow; 690 /// 691 /// enum Type { 692 /// /// A boolean type. 693 /// Bool, 694 /// 695 /// /// An integer type. 696 /// Int, 697 /// 698 /// /// A list of the `i`th type in this type's context. 699 /// List(usize), 700 /// } 701 /// 702 /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> { 703 /// let mut context = vec![]; 704 /// 705 /// u.arbitrary_loop(Some(10), Some(20), |u| { 706 /// let num_choices = if context.is_empty() { 707 /// 2 708 /// } else { 709 /// 3 710 /// }; 711 /// let ty = match u.int_in_range::<u8>(1..=num_choices)? { 712 /// 1 => Type::Bool, 713 /// 2 => Type::Int, 714 /// 3 => Type::List(u.int_in_range(0..=context.len() - 1)?), 715 /// _ => unreachable!(), 716 /// }; 717 /// context.push(ty); 718 /// Ok(ControlFlow::Continue(())) 719 /// })?; 720 /// 721 /// // The number of loop iterations are constrained by the min/max 722 /// // bounds that we provided. 723 /// assert!(context.len() >= 10); 724 /// assert!(context.len() <= 20); 725 /// 726 /// Ok(context) 727 /// } 728 /// ``` arbitrary_loop( &mut self, min: Option<u32>, max: Option<u32>, mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, ) -> Result<()>729 pub fn arbitrary_loop( 730 &mut self, 731 min: Option<u32>, 732 max: Option<u32>, 733 mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, 734 ) -> Result<()> { 735 let min = min.unwrap_or(0); 736 let max = max.unwrap_or(u32::MAX); 737 738 for _ in 0..self.int_in_range(min..=max)? { 739 match f(self)? { 740 ControlFlow::Continue(_) => continue, 741 ControlFlow::Break(_) => break, 742 } 743 } 744 745 Ok(()) 746 } 747 } 748 749 /// Utility iterator produced by [`Unstructured::arbitrary_iter`] 750 pub struct ArbitraryIter<'a, 'b, ElementType> { 751 u: &'b mut Unstructured<'a>, 752 _marker: PhantomData<ElementType>, 753 } 754 755 impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> { 756 type Item = Result<ElementType>; next(&mut self) -> Option<Result<ElementType>>757 fn next(&mut self) -> Option<Result<ElementType>> { 758 let keep_going = self.u.arbitrary().unwrap_or(false); 759 if keep_going { 760 Some(Arbitrary::arbitrary(self.u)) 761 } else { 762 None 763 } 764 } 765 } 766 767 /// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`] 768 pub struct ArbitraryTakeRestIter<'a, ElementType> { 769 u: Unstructured<'a>, 770 _marker: PhantomData<ElementType>, 771 } 772 773 impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> { 774 type Item = Result<ElementType>; next(&mut self) -> Option<Result<ElementType>>775 fn next(&mut self) -> Option<Result<ElementType>> { 776 let keep_going = self.u.arbitrary().unwrap_or(false); 777 if keep_going { 778 Some(Arbitrary::arbitrary(&mut self.u)) 779 } else { 780 None 781 } 782 } 783 } 784 785 /// A trait that is implemented for all of the primitive integers: 786 /// 787 /// * `u8` 788 /// * `u16` 789 /// * `u32` 790 /// * `u64` 791 /// * `u128` 792 /// * `usize` 793 /// * `i8` 794 /// * `i16` 795 /// * `i32` 796 /// * `i64` 797 /// * `i128` 798 /// * `isize` 799 /// 800 /// Don't implement this trait yourself. 801 pub trait Int: 802 Copy 803 + std::fmt::Debug 804 + PartialOrd 805 + Ord 806 + ops::Sub<Self, Output = Self> 807 + ops::Rem<Self, Output = Self> 808 + ops::Shr<Self, Output = Self> 809 + ops::Shl<usize, Output = Self> 810 + ops::BitOr<Self, Output = Self> 811 { 812 #[doc(hidden)] 813 type Unsigned: Int; 814 815 #[doc(hidden)] 816 const ZERO: Self; 817 818 #[doc(hidden)] 819 const ONE: Self; 820 821 #[doc(hidden)] 822 const MAX: Self; 823 824 #[doc(hidden)] from_u8(b: u8) -> Self825 fn from_u8(b: u8) -> Self; 826 827 #[doc(hidden)] from_usize(u: usize) -> Self828 fn from_usize(u: usize) -> Self; 829 830 #[doc(hidden)] checked_add(self, rhs: Self) -> Option<Self>831 fn checked_add(self, rhs: Self) -> Option<Self>; 832 833 #[doc(hidden)] wrapping_add(self, rhs: Self) -> Self834 fn wrapping_add(self, rhs: Self) -> Self; 835 836 #[doc(hidden)] wrapping_sub(self, rhs: Self) -> Self837 fn wrapping_sub(self, rhs: Self) -> Self; 838 839 #[doc(hidden)] to_unsigned(self) -> Self::Unsigned840 fn to_unsigned(self) -> Self::Unsigned; 841 842 #[doc(hidden)] from_unsigned(unsigned: Self::Unsigned) -> Self843 fn from_unsigned(unsigned: Self::Unsigned) -> Self; 844 } 845 846 macro_rules! impl_int { 847 ( $( $ty:ty : $unsigned_ty: ty ; )* ) => { 848 $( 849 impl Int for $ty { 850 type Unsigned = $unsigned_ty; 851 852 const ZERO: Self = 0; 853 854 const ONE: Self = 1; 855 856 const MAX: Self = Self::MAX; 857 858 fn from_u8(b: u8) -> Self { 859 b as Self 860 } 861 862 fn from_usize(u: usize) -> Self { 863 u as Self 864 } 865 866 fn checked_add(self, rhs: Self) -> Option<Self> { 867 <$ty>::checked_add(self, rhs) 868 } 869 870 fn wrapping_add(self, rhs: Self) -> Self { 871 <$ty>::wrapping_add(self, rhs) 872 } 873 874 fn wrapping_sub(self, rhs: Self) -> Self { 875 <$ty>::wrapping_sub(self, rhs) 876 } 877 878 fn to_unsigned(self) -> Self::Unsigned { 879 self as $unsigned_ty 880 } 881 882 fn from_unsigned(unsigned: $unsigned_ty) -> Self { 883 unsigned as Self 884 } 885 } 886 )* 887 } 888 } 889 890 impl_int! { 891 u8: u8; 892 u16: u16; 893 u32: u32; 894 u64: u64; 895 u128: u128; 896 usize: usize; 897 i8: u8; 898 i16: u16; 899 i32: u32; 900 i64: u64; 901 i128: u128; 902 isize: usize; 903 } 904 905 #[cfg(test)] 906 mod tests { 907 use super::*; 908 909 #[test] test_byte_size()910 fn test_byte_size() { 911 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); 912 // Should take one byte off the end 913 assert_eq!(u.arbitrary_byte_size().unwrap(), 6); 914 assert_eq!(u.len(), 9); 915 let mut v = vec![0; 260]; 916 v.push(1); 917 v.push(4); 918 let mut u = Unstructured::new(&v); 919 // Should read two bytes off the end 920 assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104); 921 assert_eq!(u.len(), 260); 922 } 923 924 #[test] int_in_range_of_one()925 fn int_in_range_of_one() { 926 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); 927 let x = u.int_in_range(0..=0).unwrap(); 928 assert_eq!(x, 0); 929 let choice = *u.choose(&[42]).unwrap(); 930 assert_eq!(choice, 42) 931 } 932 933 #[test] int_in_range_uses_minimal_amount_of_bytes()934 fn int_in_range_uses_minimal_amount_of_bytes() { 935 let mut u = Unstructured::new(&[1, 2]); 936 assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap()); 937 assert_eq!(u.len(), 1); 938 939 let mut u = Unstructured::new(&[1, 2]); 940 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap()); 941 assert_eq!(u.len(), 1); 942 943 let mut u = Unstructured::new(&[1]); 944 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap()); 945 assert!(u.is_empty()); 946 } 947 948 #[test] int_in_range_in_bounds()949 fn int_in_range_in_bounds() { 950 for input in u8::MIN..=u8::MAX { 951 let input = [input]; 952 953 let mut u = Unstructured::new(&input); 954 let x = u.int_in_range(1..=u8::MAX).unwrap(); 955 assert_ne!(x, 0); 956 957 let mut u = Unstructured::new(&input); 958 let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); 959 assert_ne!(x, u8::MAX); 960 } 961 } 962 963 #[test] int_in_range_covers_unsigned_range()964 fn int_in_range_covers_unsigned_range() { 965 // Test that we generate all values within the range given to 966 // `int_in_range`. 967 968 let mut full = [false; u8::MAX as usize + 1]; 969 let mut no_zero = [false; u8::MAX as usize]; 970 let mut no_max = [false; u8::MAX as usize]; 971 let mut narrow = [false; 10]; 972 973 for input in u8::MIN..=u8::MAX { 974 let input = [input]; 975 976 let mut u = Unstructured::new(&input); 977 let x = u.int_in_range(0..=u8::MAX).unwrap(); 978 full[x as usize] = true; 979 980 let mut u = Unstructured::new(&input); 981 let x = u.int_in_range(1..=u8::MAX).unwrap(); 982 no_zero[x as usize - 1] = true; 983 984 let mut u = Unstructured::new(&input); 985 let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); 986 no_max[x as usize] = true; 987 988 let mut u = Unstructured::new(&input); 989 let x = u.int_in_range(100..=109).unwrap(); 990 narrow[x as usize - 100] = true; 991 } 992 993 for (i, covered) in full.iter().enumerate() { 994 assert!(covered, "full[{}] should have been generated", i); 995 } 996 for (i, covered) in no_zero.iter().enumerate() { 997 assert!(covered, "no_zero[{}] should have been generated", i); 998 } 999 for (i, covered) in no_max.iter().enumerate() { 1000 assert!(covered, "no_max[{}] should have been generated", i); 1001 } 1002 for (i, covered) in narrow.iter().enumerate() { 1003 assert!(covered, "narrow[{}] should have been generated", i); 1004 } 1005 } 1006 1007 #[test] int_in_range_covers_signed_range()1008 fn int_in_range_covers_signed_range() { 1009 // Test that we generate all values within the range given to 1010 // `int_in_range`. 1011 1012 let mut full = [false; u8::MAX as usize + 1]; 1013 let mut no_min = [false; u8::MAX as usize]; 1014 let mut no_max = [false; u8::MAX as usize]; 1015 let mut narrow = [false; 21]; 1016 1017 let abs_i8_min: isize = 128; 1018 1019 for input in 0..=u8::MAX { 1020 let input = [input]; 1021 1022 let mut u = Unstructured::new(&input); 1023 let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap(); 1024 full[(x as isize + abs_i8_min) as usize] = true; 1025 1026 let mut u = Unstructured::new(&input); 1027 let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap(); 1028 no_min[(x as isize + abs_i8_min - 1) as usize] = true; 1029 1030 let mut u = Unstructured::new(&input); 1031 let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap(); 1032 no_max[(x as isize + abs_i8_min) as usize] = true; 1033 1034 let mut u = Unstructured::new(&input); 1035 let x = u.int_in_range(-10..=10).unwrap(); 1036 narrow[(x as isize + 10) as usize] = true; 1037 } 1038 1039 for (i, covered) in full.iter().enumerate() { 1040 assert!(covered, "full[{}] should have been generated", i); 1041 } 1042 for (i, covered) in no_min.iter().enumerate() { 1043 assert!(covered, "no_min[{}] should have been generated", i); 1044 } 1045 for (i, covered) in no_max.iter().enumerate() { 1046 assert!(covered, "no_max[{}] should have been generated", i); 1047 } 1048 for (i, covered) in narrow.iter().enumerate() { 1049 assert!(covered, "narrow[{}] should have been generated", i); 1050 } 1051 } 1052 } 1053