• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2019 The Rust Fuzz Project Developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! Wrappers around raw, unstructured bytes.
10 
11 use crate::{Arbitrary, Error, Result};
12 use std::marker::PhantomData;
13 use std::ops::ControlFlow;
14 use std::{mem, ops};
15 
16 /// A source of unstructured data.
17 ///
18 /// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19 /// (typically provided by a fuzzer) as a "DNA string" that describes how to
20 /// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21 /// string" (the raw data wrapped by an `Unstructured`) results in a small
22 /// change to the generated `Arbitrary` instance. This helps a fuzzer
23 /// efficiently explore the `Arbitrary`'s input space.
24 ///
25 /// `Unstructured` is deterministic: given the same raw data, the same series of
26 /// API calls will return the same results (modulo system resource constraints,
27 /// like running out of memory). However, `Unstructured` does not guarantee
28 /// anything beyond that: it makes not guarantee that it will yield bytes from
29 /// the underlying data in any particular order.
30 ///
31 /// You shouldn't generally need to use an `Unstructured` unless you are writing
32 /// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33 /// you should just be passing it through to nested `Arbitrary::arbitrary`
34 /// calls.
35 ///
36 /// # Example
37 ///
38 /// Imagine you were writing a color conversion crate. You might want to write
39 /// fuzz tests that take a random RGB color and assert various properties, run
40 /// functions and make sure nothing panics, etc.
41 ///
42 /// Below is what translating the fuzzer's raw input into an `Unstructured` and
43 /// using that to generate an arbitrary RGB color might look like:
44 ///
45 /// ```
46 /// # #[cfg(feature = "derive")] fn foo() {
47 /// use arbitrary::{Arbitrary, Unstructured};
48 ///
49 /// /// An RGB color.
50 /// #[derive(Arbitrary)]
51 /// pub struct Rgb {
52 ///     r: u8,
53 ///     g: u8,
54 ///     b: u8,
55 /// }
56 ///
57 /// // Get the raw bytes from the fuzzer.
58 /// #   let get_input_from_fuzzer = || &[];
59 /// let raw_data: &[u8] = get_input_from_fuzzer();
60 ///
61 /// // Wrap it in an `Unstructured`.
62 /// let mut unstructured = Unstructured::new(raw_data);
63 ///
64 /// // Generate an `Rgb` color and run our checks.
65 /// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66 /// #   let run_my_color_conversion_checks = |_| {};
67 ///     run_my_color_conversion_checks(rgb);
68 /// }
69 /// # }
70 /// ```
71 #[derive(Debug)]
72 pub struct Unstructured<'a> {
73     data: &'a [u8],
74 }
75 
76 impl<'a> Unstructured<'a> {
77     /// Create a new `Unstructured` from the given raw data.
78     ///
79     /// # Example
80     ///
81     /// ```
82     /// use arbitrary::Unstructured;
83     ///
84     /// let u = Unstructured::new(&[1, 2, 3, 4]);
85     /// ```
new(data: &'a [u8]) -> Self86     pub fn new(data: &'a [u8]) -> Self {
87         Unstructured { data }
88     }
89 
90     /// Get the number of remaining bytes of underlying data that are still
91     /// available.
92     ///
93     /// # Example
94     ///
95     /// ```
96     /// use arbitrary::{Arbitrary, Unstructured};
97     ///
98     /// let mut u = Unstructured::new(&[1, 2, 3]);
99     ///
100     /// // Initially have three bytes of data.
101     /// assert_eq!(u.len(), 3);
102     ///
103     /// // Generating a `bool` consumes one byte from the underlying data, so
104     /// // we are left with two bytes afterwards.
105     /// let _ = bool::arbitrary(&mut u);
106     /// assert_eq!(u.len(), 2);
107     /// ```
108     #[inline]
len(&self) -> usize109     pub fn len(&self) -> usize {
110         self.data.len()
111     }
112 
113     /// Is the underlying unstructured data exhausted?
114     ///
115     /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
116     ///
117     /// # Example
118     ///
119     /// ```
120     /// use arbitrary::{Arbitrary, Unstructured};
121     ///
122     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
123     ///
124     /// // Initially, we are not empty.
125     /// assert!(!u.is_empty());
126     ///
127     /// // Generating a `u32` consumes all four bytes of the underlying data, so
128     /// // we become empty afterwards.
129     /// let _ = u32::arbitrary(&mut u);
130     /// assert!(u.is_empty());
131     /// ```
132     #[inline]
is_empty(&self) -> bool133     pub fn is_empty(&self) -> bool {
134         self.len() == 0
135     }
136 
137     /// Generate an arbitrary instance of `A`.
138     ///
139     /// This is simply a helper method that is equivalent to `<A as
140     /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
141     /// and can be used in situations where Rust's type inference will figure
142     /// out what `A` should be.
143     ///
144     /// # Example
145     ///
146     /// ```
147     /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
148     /// use arbitrary::{Arbitrary, Unstructured};
149     ///
150     /// #[derive(Arbitrary)]
151     /// struct MyType {
152     ///     // ...
153     /// }
154     ///
155     /// fn do_stuff(value: MyType) {
156     /// #   let _ = value;
157     ///     // ...
158     /// }
159     ///
160     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
161     ///
162     /// // Rust's type inference can figure out that `value` should be of type
163     /// // `MyType` here:
164     /// let value = u.arbitrary()?;
165     /// do_stuff(value);
166     /// # Ok(()) }
167     /// ```
arbitrary<A>(&mut self) -> Result<A> where A: Arbitrary<'a>,168     pub fn arbitrary<A>(&mut self) -> Result<A>
169     where
170         A: Arbitrary<'a>,
171     {
172         <A as Arbitrary<'a>>::arbitrary(self)
173     }
174 
175     /// Get the number of elements to insert when building up a collection of
176     /// arbitrary `ElementType`s.
177     ///
178     /// This uses the [`<ElementType as
179     /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
180     /// choose a length such that we most likely have enough underlying bytes to
181     /// construct that many arbitrary `ElementType`s.
182     ///
183     /// This should only be called within an `Arbitrary` implementation.
184     ///
185     /// # Example
186     ///
187     /// ```
188     /// use arbitrary::{Arbitrary, Result, Unstructured};
189     /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
190     /// # impl<T> MyCollection<T> {
191     /// #     pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
192     /// #     pub fn insert(&mut self, element: T) {}
193     /// # }
194     ///
195     /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
196     /// where
197     ///     T: Arbitrary<'a>,
198     /// {
199     ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
200     ///         // Get the number of `T`s we should insert into our collection.
201     ///         let len = u.arbitrary_len::<T>()?;
202     ///
203     ///         // And then create a collection of that length!
204     ///         let mut my_collection = MyCollection::with_capacity(len);
205     ///         for _ in 0..len {
206     ///             let element = T::arbitrary(u)?;
207     ///             my_collection.insert(element);
208     ///         }
209     ///
210     ///         Ok(my_collection)
211     ///     }
212     /// }
213     /// ```
arbitrary_len<ElementType>(&mut self) -> Result<usize> where ElementType: Arbitrary<'a>,214     pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
215     where
216         ElementType: Arbitrary<'a>,
217     {
218         let byte_size = self.arbitrary_byte_size()?;
219         let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
220         let elem_size = upper.unwrap_or(lower * 2);
221         let elem_size = std::cmp::max(1, elem_size);
222         Ok(byte_size / elem_size)
223     }
224 
arbitrary_byte_size(&mut self) -> Result<usize>225     fn arbitrary_byte_size(&mut self) -> Result<usize> {
226         if self.data.is_empty() {
227             Ok(0)
228         } else if self.data.len() == 1 {
229             self.data = &[];
230             Ok(0)
231         } else {
232             // Take lengths from the end of the data, since the `libFuzzer` folks
233             // found that this lets fuzzers more efficiently explore the input
234             // space.
235             //
236             // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
237 
238             // We only consume as many bytes as necessary to cover the entire
239             // range of the byte string.
240             // Note: We cast to u64 so we don't overflow when checking u32::MAX + 4 on 32-bit archs
241             let len = if self.data.len() as u64 <= u8::MAX as u64 + 1 {
242                 let bytes = 1;
243                 let max_size = self.data.len() - bytes;
244                 let (rest, for_size) = self.data.split_at(max_size);
245                 self.data = rest;
246                 Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
247             } else if self.data.len() as u64 <= u16::MAX as u64 + 2 {
248                 let bytes = 2;
249                 let max_size = self.data.len() - bytes;
250                 let (rest, for_size) = self.data.split_at(max_size);
251                 self.data = rest;
252                 Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
253             } else if self.data.len() as u64 <= u32::MAX as u64 + 4 {
254                 let bytes = 4;
255                 let max_size = self.data.len() - bytes;
256                 let (rest, for_size) = self.data.split_at(max_size);
257                 self.data = rest;
258                 Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
259             } else {
260                 let bytes = 8;
261                 let max_size = self.data.len() - bytes;
262                 let (rest, for_size) = self.data.split_at(max_size);
263                 self.data = rest;
264                 Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
265             };
266 
267             Ok(len)
268         }
269     }
270 
271     /// Generate an integer within the given range.
272     ///
273     /// Do not use this to generate the size of a collection. Use
274     /// `arbitrary_len` instead.
275     ///
276     /// # Panics
277     ///
278     /// Panics if `range.start > range.end`. That is, the given range must be
279     /// non-empty.
280     ///
281     /// # Example
282     ///
283     /// ```
284     /// # fn foo() -> arbitrary::Result<()> {
285     /// use arbitrary::{Arbitrary, Unstructured};
286     ///
287     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
288     ///
289     /// let x: i32 = u.int_in_range(-5_000..=-1_000)?;
290     ///
291     /// assert!(-5_000 <= x);
292     /// assert!(x <= -1_000);
293     /// # Ok(()) }
294     /// ```
int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> where T: Int,295     pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
296     where
297         T: Int,
298     {
299         let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
300         self.data = &self.data[bytes_consumed..];
301         Ok(result)
302     }
303 
int_in_range_impl<T>( range: ops::RangeInclusive<T>, mut bytes: impl Iterator<Item = u8>, ) -> Result<(T, usize)> where T: Int,304     fn int_in_range_impl<T>(
305         range: ops::RangeInclusive<T>,
306         mut bytes: impl Iterator<Item = u8>,
307     ) -> Result<(T, usize)>
308     where
309         T: Int,
310     {
311         let start = *range.start();
312         let end = *range.end();
313         assert!(
314             start <= end,
315             "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
316         );
317 
318         // When there is only one possible choice, don't waste any entropy from
319         // the underlying data.
320         if start == end {
321             return Ok((start, 0));
322         }
323 
324         // From here on out we work with the unsigned representation. All of the
325         // operations performed below work out just as well whether or not `T`
326         // is a signed or unsigned integer.
327         let start = start.to_unsigned();
328         let end = end.to_unsigned();
329 
330         let delta = end.wrapping_sub(start);
331         debug_assert_ne!(delta, T::Unsigned::ZERO);
332 
333         // Compute an arbitrary integer offset from the start of the range. We
334         // do this by consuming `size_of(T)` bytes from the input to create an
335         // arbitrary integer and then clamping that int into our range bounds
336         // with a modulo operation.
337         let mut arbitrary_int = T::Unsigned::ZERO;
338         let mut bytes_consumed: usize = 0;
339 
340         while (bytes_consumed < mem::size_of::<T>())
341             && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
342         {
343             let byte = match bytes.next() {
344                 None => break,
345                 Some(b) => b,
346             };
347             bytes_consumed += 1;
348 
349             // Combine this byte into our arbitrary integer, but avoid
350             // overflowing the shift for `u8` and `i8`.
351             arbitrary_int = if mem::size_of::<T>() == 1 {
352                 T::Unsigned::from_u8(byte)
353             } else {
354                 (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
355             };
356         }
357 
358         let offset = if delta == T::Unsigned::MAX {
359             arbitrary_int
360         } else {
361             arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
362         };
363 
364         // Finally, we add `start` to our offset from `start` to get the result
365         // actual value within the range.
366         let result = start.wrapping_add(offset);
367 
368         // And convert back to our maybe-signed representation.
369         let result = T::from_unsigned(result);
370         debug_assert!(*range.start() <= result);
371         debug_assert!(result <= *range.end());
372 
373         Ok((result, bytes_consumed))
374     }
375 
376     /// Choose one of the given choices.
377     ///
378     /// This should only be used inside of `Arbitrary` implementations.
379     ///
380     /// Returns an error if there is not enough underlying data to make a
381     /// choice or if no choices are provided.
382     ///
383     /// # Examples
384     ///
385     /// Selecting from an array of choices:
386     ///
387     /// ```
388     /// use arbitrary::Unstructured;
389     ///
390     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
391     /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
392     ///
393     /// let choice = u.choose(&choices).unwrap();
394     ///
395     /// println!("chose {}", choice);
396     /// ```
397     ///
398     /// An error is returned if no choices are provided:
399     ///
400     /// ```
401     /// use arbitrary::Unstructured;
402     ///
403     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
404     /// let choices: [char; 0] = [];
405     ///
406     /// let result = u.choose(&choices);
407     ///
408     /// assert!(result.is_err());
409     /// ```
choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T>410     pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
411         let idx = self.choose_index(choices.len())?;
412         Ok(&choices[idx])
413     }
414 
415     /// Choose one of the given iterator choices.
416     ///
417     /// This should only be used inside of `Arbitrary` implementations.
418     ///
419     /// Returns an error if there is not enough underlying data to make a
420     /// choice or if no choices are provided.
421     ///
422     /// # Examples
423     ///
424     /// Selecting a random item from a set:
425     ///
426     /// ```
427     /// use std::collections::BTreeSet;
428     /// use arbitrary::Unstructured;
429     ///
430     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
431     /// let set = BTreeSet::from(['a', 'b', 'c']);
432     ///
433     /// let choice = u.choose_iter(set.iter()).unwrap();
434     ///
435     /// println!("chose {}", choice);
436     /// ```
choose_iter<T, I>(&mut self, choices: I) -> Result<T> where I: IntoIterator<Item = T>, I::IntoIter: ExactSizeIterator,437     pub fn choose_iter<T, I>(&mut self, choices: I) -> Result<T>
438     where
439         I: IntoIterator<Item = T>,
440         I::IntoIter: ExactSizeIterator,
441     {
442         let mut choices = choices.into_iter();
443         let idx = self.choose_index(choices.len())?;
444         let choice = choices
445             .nth(idx)
446             .expect("ExactSizeIterator should have correct len");
447         Ok(choice)
448     }
449 
450     /// Choose a value in `0..len`.
451     ///
452     /// Returns an error if the `len` is zero.
453     ///
454     /// # Examples
455     ///
456     /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation.
457     ///
458     /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
459     ///
460     /// ```
461     /// use arbitrary::Unstructured;
462     ///
463     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
464     /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
465     /// let mut to_permute = &mut permutation[..];
466     /// while to_permute.len() > 1 {
467     ///     let idx = u.choose_index(to_permute.len()).unwrap();
468     ///     to_permute.swap(0, idx);
469     ///     to_permute = &mut to_permute[1..];
470     /// }
471     ///
472     /// println!("permutation: {:?}", permutation);
473     /// ```
474     ///
475     /// An error is returned if the length is zero:
476     ///
477     /// ```
478     /// use arbitrary::Unstructured;
479     ///
480     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
481     /// let array: [i32; 0] = [];
482     ///
483     /// let result = u.choose_index(array.len());
484     ///
485     /// assert!(result.is_err());
486     /// ```
choose_index(&mut self, len: usize) -> Result<usize>487     pub fn choose_index(&mut self, len: usize) -> Result<usize> {
488         if len == 0 {
489             return Err(Error::EmptyChoose);
490         }
491         let idx = self.int_in_range(0..=len - 1)?;
492         Ok(idx)
493     }
494 
495     /// Generate a boolean according to the given ratio.
496     ///
497     /// # Panics
498     ///
499     /// Panics when the numerator and denominator do not meet these constraints:
500     ///
501     /// * `0 < numerator <= denominator`
502     ///
503     /// # Example
504     ///
505     /// Generate a boolean that is `true` five sevenths of the time:
506     ///
507     /// ```
508     /// # fn foo() -> arbitrary::Result<()> {
509     /// use arbitrary::Unstructured;
510     ///
511     /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
512     /// let mut u = Unstructured::new(&my_data);
513     ///
514     /// if u.ratio(5, 7)? {
515     ///     // Take this branch 5/7 of the time.
516     /// }
517     /// # Ok(())
518     /// # }
519     /// ```
ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> where T: Int,520     pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
521     where
522         T: Int,
523     {
524         assert!(T::ZERO < numerator);
525         assert!(numerator <= denominator);
526         let x = self.int_in_range(T::ONE..=denominator)?;
527         Ok(x <= numerator)
528     }
529 
530     /// Fill a `buffer` with bytes from the underlying raw data.
531     ///
532     /// This should only be called within an `Arbitrary` implementation. This is
533     /// a very low-level operation. You should generally prefer calling nested
534     /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
535     /// `String::arbitrary` over using this method directly.
536     ///
537     /// If this `Unstructured` does not have enough underlying data to fill the
538     /// whole `buffer`, it pads the buffer out with zeros.
539     ///
540     /// # Example
541     ///
542     /// ```
543     /// use arbitrary::Unstructured;
544     ///
545     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
546     ///
547     /// let mut buf = [0; 2];
548     ///
549     /// assert!(u.fill_buffer(&mut buf).is_ok());
550     /// assert_eq!(buf, [1, 2]);
551     ///
552     /// assert!(u.fill_buffer(&mut buf).is_ok());
553     /// assert_eq!(buf, [3, 4]);
554     ///
555     /// assert!(u.fill_buffer(&mut buf).is_ok());
556     /// assert_eq!(buf, [0, 0]);
557     /// ```
fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()>558     pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
559         let n = std::cmp::min(buffer.len(), self.data.len());
560         buffer[..n].copy_from_slice(&self.data[..n]);
561         for byte in buffer[n..].iter_mut() {
562             *byte = 0;
563         }
564         self.data = &self.data[n..];
565         Ok(())
566     }
567 
568     /// Provide `size` bytes from the underlying raw data.
569     ///
570     /// This should only be called within an `Arbitrary` implementation. This is
571     /// a very low-level operation. You should generally prefer calling nested
572     /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
573     /// `String::arbitrary` over using this method directly.
574     ///
575     /// # Example
576     ///
577     /// ```
578     /// use arbitrary::Unstructured;
579     ///
580     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
581     ///
582     /// assert!(u.bytes(2).unwrap() == &[1, 2]);
583     /// assert!(u.bytes(2).unwrap() == &[3, 4]);
584     /// ```
bytes(&mut self, size: usize) -> Result<&'a [u8]>585     pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
586         if self.data.len() < size {
587             return Err(Error::NotEnoughData);
588         }
589 
590         let (for_buf, rest) = self.data.split_at(size);
591         self.data = rest;
592         Ok(for_buf)
593     }
594 
595     /// Peek at `size` number of bytes of the underlying raw input.
596     ///
597     /// Does not consume the bytes, only peeks at them.
598     ///
599     /// Returns `None` if there are not `size` bytes left in the underlying raw
600     /// input.
601     ///
602     /// # Example
603     ///
604     /// ```
605     /// use arbitrary::Unstructured;
606     ///
607     /// let u = Unstructured::new(&[1, 2, 3]);
608     ///
609     /// assert_eq!(u.peek_bytes(0).unwrap(), []);
610     /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
611     /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
612     /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
613     ///
614     /// assert!(u.peek_bytes(4).is_none());
615     /// ```
peek_bytes(&self, size: usize) -> Option<&'a [u8]>616     pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
617         self.data.get(..size)
618     }
619 
620     /// Consume all of the rest of the remaining underlying bytes.
621     ///
622     /// Returns a slice of all the remaining, unconsumed bytes.
623     ///
624     /// # Example
625     ///
626     /// ```
627     /// use arbitrary::Unstructured;
628     ///
629     /// let mut u = Unstructured::new(&[1, 2, 3]);
630     ///
631     /// let mut remaining = u.take_rest();
632     ///
633     /// assert_eq!(remaining, [1, 2, 3]);
634     /// ```
take_rest(mut self) -> &'a [u8]635     pub fn take_rest(mut self) -> &'a [u8] {
636         mem::take(&mut self.data)
637     }
638 
639     /// Provide an iterator over elements for constructing a collection
640     ///
641     /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
642     /// since the implementation is simply `u.arbitrary_iter()?.collect()`
arbitrary_iter<'b, ElementType: Arbitrary<'a>>( &'b mut self, ) -> Result<ArbitraryIter<'a, 'b, ElementType>>643     pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
644         &'b mut self,
645     ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
646         Ok(ArbitraryIter {
647             u: &mut *self,
648             _marker: PhantomData,
649         })
650     }
651 
652     /// Provide an iterator over elements for constructing a collection from
653     /// all the remaining bytes.
654     ///
655     /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
656     /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( self, ) -> Result<ArbitraryTakeRestIter<'a, ElementType>>657     pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
658         self,
659     ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
660         Ok(ArbitraryTakeRestIter {
661             u: self,
662             _marker: PhantomData,
663         })
664     }
665 
666     /// Call the given function an arbitrary number of times.
667     ///
668     /// The function is given this `Unstructured` so that it can continue to
669     /// generate arbitrary data and structures.
670     ///
671     /// You may optionaly specify minimum and maximum bounds on the number of
672     /// times the function is called.
673     ///
674     /// You may break out of the loop early by returning
675     /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
676     /// `Ok(std::ops::ControlFlow::Continue)`.
677     ///
678     /// # Panics
679     ///
680     /// Panics if `min > max`.
681     ///
682     /// # Example
683     ///
684     /// Call a closure that generates an arbitrary type inside a context an
685     /// arbitrary number of times:
686     ///
687     /// ```
688     /// use arbitrary::{Result, Unstructured};
689     /// use std::ops::ControlFlow;
690     ///
691     /// enum Type {
692     ///     /// A boolean type.
693     ///     Bool,
694     ///
695     ///     /// An integer type.
696     ///     Int,
697     ///
698     ///     /// A list of the `i`th type in this type's context.
699     ///     List(usize),
700     /// }
701     ///
702     /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
703     ///     let mut context = vec![];
704     ///
705     ///     u.arbitrary_loop(Some(10), Some(20), |u| {
706     ///         let num_choices = if context.is_empty() {
707     ///             2
708     ///         } else {
709     ///             3
710     ///         };
711     ///         let ty = match u.int_in_range::<u8>(1..=num_choices)? {
712     ///             1 => Type::Bool,
713     ///             2 => Type::Int,
714     ///             3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
715     ///             _ => unreachable!(),
716     ///         };
717     ///         context.push(ty);
718     ///         Ok(ControlFlow::Continue(()))
719     ///     })?;
720     ///
721     ///     // The number of loop iterations are constrained by the min/max
722     ///     // bounds that we provided.
723     ///     assert!(context.len() >= 10);
724     ///     assert!(context.len() <= 20);
725     ///
726     ///     Ok(context)
727     /// }
728     /// ```
arbitrary_loop( &mut self, min: Option<u32>, max: Option<u32>, mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, ) -> Result<()>729     pub fn arbitrary_loop(
730         &mut self,
731         min: Option<u32>,
732         max: Option<u32>,
733         mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
734     ) -> Result<()> {
735         let min = min.unwrap_or(0);
736         let max = max.unwrap_or(u32::MAX);
737 
738         for _ in 0..self.int_in_range(min..=max)? {
739             match f(self)? {
740                 ControlFlow::Continue(_) => continue,
741                 ControlFlow::Break(_) => break,
742             }
743         }
744 
745         Ok(())
746     }
747 }
748 
749 /// Utility iterator produced by [`Unstructured::arbitrary_iter`]
750 pub struct ArbitraryIter<'a, 'b, ElementType> {
751     u: &'b mut Unstructured<'a>,
752     _marker: PhantomData<ElementType>,
753 }
754 
755 impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> {
756     type Item = Result<ElementType>;
next(&mut self) -> Option<Result<ElementType>>757     fn next(&mut self) -> Option<Result<ElementType>> {
758         let keep_going = self.u.arbitrary().unwrap_or(false);
759         if keep_going {
760             Some(Arbitrary::arbitrary(self.u))
761         } else {
762             None
763         }
764     }
765 }
766 
767 /// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
768 pub struct ArbitraryTakeRestIter<'a, ElementType> {
769     u: Unstructured<'a>,
770     _marker: PhantomData<ElementType>,
771 }
772 
773 impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
774     type Item = Result<ElementType>;
next(&mut self) -> Option<Result<ElementType>>775     fn next(&mut self) -> Option<Result<ElementType>> {
776         let keep_going = self.u.arbitrary().unwrap_or(false);
777         if keep_going {
778             Some(Arbitrary::arbitrary(&mut self.u))
779         } else {
780             None
781         }
782     }
783 }
784 
785 /// A trait that is implemented for all of the primitive integers:
786 ///
787 /// * `u8`
788 /// * `u16`
789 /// * `u32`
790 /// * `u64`
791 /// * `u128`
792 /// * `usize`
793 /// * `i8`
794 /// * `i16`
795 /// * `i32`
796 /// * `i64`
797 /// * `i128`
798 /// * `isize`
799 ///
800 /// Don't implement this trait yourself.
801 pub trait Int:
802     Copy
803     + std::fmt::Debug
804     + PartialOrd
805     + Ord
806     + ops::Sub<Self, Output = Self>
807     + ops::Rem<Self, Output = Self>
808     + ops::Shr<Self, Output = Self>
809     + ops::Shl<usize, Output = Self>
810     + ops::BitOr<Self, Output = Self>
811 {
812     #[doc(hidden)]
813     type Unsigned: Int;
814 
815     #[doc(hidden)]
816     const ZERO: Self;
817 
818     #[doc(hidden)]
819     const ONE: Self;
820 
821     #[doc(hidden)]
822     const MAX: Self;
823 
824     #[doc(hidden)]
from_u8(b: u8) -> Self825     fn from_u8(b: u8) -> Self;
826 
827     #[doc(hidden)]
from_usize(u: usize) -> Self828     fn from_usize(u: usize) -> Self;
829 
830     #[doc(hidden)]
checked_add(self, rhs: Self) -> Option<Self>831     fn checked_add(self, rhs: Self) -> Option<Self>;
832 
833     #[doc(hidden)]
wrapping_add(self, rhs: Self) -> Self834     fn wrapping_add(self, rhs: Self) -> Self;
835 
836     #[doc(hidden)]
wrapping_sub(self, rhs: Self) -> Self837     fn wrapping_sub(self, rhs: Self) -> Self;
838 
839     #[doc(hidden)]
to_unsigned(self) -> Self::Unsigned840     fn to_unsigned(self) -> Self::Unsigned;
841 
842     #[doc(hidden)]
from_unsigned(unsigned: Self::Unsigned) -> Self843     fn from_unsigned(unsigned: Self::Unsigned) -> Self;
844 }
845 
846 macro_rules! impl_int {
847     ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
848         $(
849             impl Int for $ty {
850                 type Unsigned = $unsigned_ty;
851 
852                 const ZERO: Self = 0;
853 
854                 const ONE: Self = 1;
855 
856                 const MAX: Self = Self::MAX;
857 
858                 fn from_u8(b: u8) -> Self {
859                     b as Self
860                 }
861 
862                 fn from_usize(u: usize) -> Self {
863                     u as Self
864                 }
865 
866                 fn checked_add(self, rhs: Self) -> Option<Self> {
867                     <$ty>::checked_add(self, rhs)
868                 }
869 
870                 fn wrapping_add(self, rhs: Self) -> Self {
871                     <$ty>::wrapping_add(self, rhs)
872                 }
873 
874                 fn wrapping_sub(self, rhs: Self) -> Self {
875                     <$ty>::wrapping_sub(self, rhs)
876                 }
877 
878                 fn to_unsigned(self) -> Self::Unsigned {
879                     self as $unsigned_ty
880                 }
881 
882                 fn from_unsigned(unsigned: $unsigned_ty) -> Self {
883                     unsigned as Self
884                 }
885             }
886         )*
887     }
888 }
889 
890 impl_int! {
891     u8: u8;
892     u16: u16;
893     u32: u32;
894     u64: u64;
895     u128: u128;
896     usize: usize;
897     i8: u8;
898     i16: u16;
899     i32: u32;
900     i64: u64;
901     i128: u128;
902     isize: usize;
903 }
904 
905 #[cfg(test)]
906 mod tests {
907     use super::*;
908 
909     #[test]
test_byte_size()910     fn test_byte_size() {
911         let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
912         // Should take one byte off the end
913         assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
914         assert_eq!(u.len(), 9);
915         let mut v = vec![0; 260];
916         v.push(1);
917         v.push(4);
918         let mut u = Unstructured::new(&v);
919         // Should read two bytes off the end
920         assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
921         assert_eq!(u.len(), 260);
922     }
923 
924     #[test]
int_in_range_of_one()925     fn int_in_range_of_one() {
926         let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
927         let x = u.int_in_range(0..=0).unwrap();
928         assert_eq!(x, 0);
929         let choice = *u.choose(&[42]).unwrap();
930         assert_eq!(choice, 42)
931     }
932 
933     #[test]
int_in_range_uses_minimal_amount_of_bytes()934     fn int_in_range_uses_minimal_amount_of_bytes() {
935         let mut u = Unstructured::new(&[1, 2]);
936         assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
937         assert_eq!(u.len(), 1);
938 
939         let mut u = Unstructured::new(&[1, 2]);
940         assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
941         assert_eq!(u.len(), 1);
942 
943         let mut u = Unstructured::new(&[1]);
944         assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
945         assert!(u.is_empty());
946     }
947 
948     #[test]
int_in_range_in_bounds()949     fn int_in_range_in_bounds() {
950         for input in u8::MIN..=u8::MAX {
951             let input = [input];
952 
953             let mut u = Unstructured::new(&input);
954             let x = u.int_in_range(1..=u8::MAX).unwrap();
955             assert_ne!(x, 0);
956 
957             let mut u = Unstructured::new(&input);
958             let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
959             assert_ne!(x, u8::MAX);
960         }
961     }
962 
963     #[test]
int_in_range_covers_unsigned_range()964     fn int_in_range_covers_unsigned_range() {
965         // Test that we generate all values within the range given to
966         // `int_in_range`.
967 
968         let mut full = [false; u8::MAX as usize + 1];
969         let mut no_zero = [false; u8::MAX as usize];
970         let mut no_max = [false; u8::MAX as usize];
971         let mut narrow = [false; 10];
972 
973         for input in u8::MIN..=u8::MAX {
974             let input = [input];
975 
976             let mut u = Unstructured::new(&input);
977             let x = u.int_in_range(0..=u8::MAX).unwrap();
978             full[x as usize] = true;
979 
980             let mut u = Unstructured::new(&input);
981             let x = u.int_in_range(1..=u8::MAX).unwrap();
982             no_zero[x as usize - 1] = true;
983 
984             let mut u = Unstructured::new(&input);
985             let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
986             no_max[x as usize] = true;
987 
988             let mut u = Unstructured::new(&input);
989             let x = u.int_in_range(100..=109).unwrap();
990             narrow[x as usize - 100] = true;
991         }
992 
993         for (i, covered) in full.iter().enumerate() {
994             assert!(covered, "full[{}] should have been generated", i);
995         }
996         for (i, covered) in no_zero.iter().enumerate() {
997             assert!(covered, "no_zero[{}] should have been generated", i);
998         }
999         for (i, covered) in no_max.iter().enumerate() {
1000             assert!(covered, "no_max[{}] should have been generated", i);
1001         }
1002         for (i, covered) in narrow.iter().enumerate() {
1003             assert!(covered, "narrow[{}] should have been generated", i);
1004         }
1005     }
1006 
1007     #[test]
int_in_range_covers_signed_range()1008     fn int_in_range_covers_signed_range() {
1009         // Test that we generate all values within the range given to
1010         // `int_in_range`.
1011 
1012         let mut full = [false; u8::MAX as usize + 1];
1013         let mut no_min = [false; u8::MAX as usize];
1014         let mut no_max = [false; u8::MAX as usize];
1015         let mut narrow = [false; 21];
1016 
1017         let abs_i8_min: isize = 128;
1018 
1019         for input in 0..=u8::MAX {
1020             let input = [input];
1021 
1022             let mut u = Unstructured::new(&input);
1023             let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1024             full[(x as isize + abs_i8_min) as usize] = true;
1025 
1026             let mut u = Unstructured::new(&input);
1027             let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1028             no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1029 
1030             let mut u = Unstructured::new(&input);
1031             let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1032             no_max[(x as isize + abs_i8_min) as usize] = true;
1033 
1034             let mut u = Unstructured::new(&input);
1035             let x = u.int_in_range(-10..=10).unwrap();
1036             narrow[(x as isize + 10) as usize] = true;
1037         }
1038 
1039         for (i, covered) in full.iter().enumerate() {
1040             assert!(covered, "full[{}] should have been generated", i);
1041         }
1042         for (i, covered) in no_min.iter().enumerate() {
1043             assert!(covered, "no_min[{}] should have been generated", i);
1044         }
1045         for (i, covered) in no_max.iter().enumerate() {
1046             assert!(covered, "no_max[{}] should have been generated", i);
1047         }
1048         for (i, covered) in narrow.iter().enumerate() {
1049             assert!(covered, "narrow[{}] should have been generated", i);
1050         }
1051     }
1052 }
1053