• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #[cfg(test)]
2 mod tests;
3 
4 use std::hash;
5 use std::iter;
6 use std::ops::Range;
7 
8 use rustc_serialize::{Decodable, Encodable};
9 use rustc_target::abi::Size;
10 use rustc_type_ir::{TyDecoder, TyEncoder};
11 
12 use super::AllocRange;
13 
14 type Block = u64;
15 
16 /// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
17 /// is initialized. If it is `false` the byte is uninitialized.
18 /// The actual bits are only materialized when needed, and we try to keep this data lazy as long as
19 /// possible. Currently, if all the blocks have the same value, then the mask represents either a
20 /// fully initialized or fully uninitialized const allocation, so we can only store that single
21 /// value.
22 #[derive(Clone, Debug, Eq, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)]
23 pub struct InitMask {
24     blocks: InitMaskBlocks,
25     len: Size,
26 }
27 
28 #[derive(Clone, Debug, Eq, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)]
29 enum InitMaskBlocks {
30     Lazy {
31         /// Whether the lazy init mask is fully initialized or uninitialized.
32         state: bool,
33     },
34     Materialized(InitMaskMaterialized),
35 }
36 
37 impl InitMask {
new(size: Size, state: bool) -> Self38     pub fn new(size: Size, state: bool) -> Self {
39         // Blocks start lazily allocated, until we have to materialize them.
40         let blocks = InitMaskBlocks::Lazy { state };
41         InitMask { len: size, blocks }
42     }
43 
44     /// Checks whether the `range` is entirely initialized.
45     ///
46     /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
47     /// indexes for the first contiguous span of the uninitialized access.
48     #[inline]
is_range_initialized(&self, range: AllocRange) -> Result<(), AllocRange>49     pub fn is_range_initialized(&self, range: AllocRange) -> Result<(), AllocRange> {
50         let end = range.end();
51         if end > self.len {
52             return Err(AllocRange::from(self.len..end));
53         }
54 
55         match self.blocks {
56             InitMaskBlocks::Lazy { state } => {
57                 // Lazily allocated blocks represent the full mask, and cover the requested range by
58                 // definition.
59                 if state { Ok(()) } else { Err(range) }
60             }
61             InitMaskBlocks::Materialized(ref blocks) => {
62                 blocks.is_range_initialized(range.start, end)
63             }
64         }
65     }
66 
67     /// Sets a specified range to a value. If the range is out-of-bounds, the mask will grow to
68     /// accommodate it entirely.
set_range(&mut self, range: AllocRange, new_state: bool)69     pub fn set_range(&mut self, range: AllocRange, new_state: bool) {
70         let start = range.start;
71         let end = range.end();
72 
73         let is_full_overwrite = start == Size::ZERO && end >= self.len;
74 
75         // Optimize the cases of a full init/uninit state, while handling growth if needed.
76         match self.blocks {
77             InitMaskBlocks::Lazy { ref mut state } if is_full_overwrite => {
78                 // This is fully overwriting the mask, and we'll still have a single initialization
79                 // state: the blocks can stay lazy.
80                 *state = new_state;
81                 self.len = end;
82             }
83             InitMaskBlocks::Materialized(_) if is_full_overwrite => {
84                 // This is also fully overwriting materialized blocks with a single initialization
85                 // state: we'll have no need for these blocks anymore and can make them lazy.
86                 self.blocks = InitMaskBlocks::Lazy { state: new_state };
87                 self.len = end;
88             }
89             InitMaskBlocks::Lazy { state } if state == new_state => {
90                 // Here we're partially overwriting the mask but the initialization state doesn't
91                 // change: the blocks can stay lazy.
92                 if end > self.len {
93                     self.len = end;
94                 }
95             }
96             _ => {
97                 // Otherwise, we have a partial overwrite that can result in a mix of initialization
98                 // states, so we'll need materialized blocks.
99                 let len = self.len;
100                 let blocks = self.materialize_blocks();
101 
102                 // There are 3 cases of interest here, if we have:
103                 //
104                 //         [--------]
105                 //         ^        ^
106                 //         0        len
107                 //
108                 // 1) the range to set can be in-bounds:
109                 //
110                 //            xxxx = [start, end]
111                 //         [--------]
112                 //         ^        ^
113                 //         0        len
114                 //
115                 // Here, we'll simply set the single `start` to `end` range.
116                 //
117                 // 2) the range to set can be partially out-of-bounds:
118                 //
119                 //                xxxx = [start, end]
120                 //         [--------]
121                 //         ^        ^
122                 //         0        len
123                 //
124                 // We have 2 subranges to handle:
125                 // - we'll set the existing `start` to `len` range.
126                 // - we'll grow and set the `len` to `end` range.
127                 //
128                 // 3) the range to set can be fully out-of-bounds:
129                 //
130                 //                   ---xxxx = [start, end]
131                 //         [--------]
132                 //         ^        ^
133                 //         0        len
134                 //
135                 // Since we're growing the mask to a single `new_state` value, we consider the gap
136                 // from `len` to `start` to be part of the range, and have a single subrange to
137                 // handle: we'll grow and set the `len` to `end` range.
138                 //
139                 // Note that we have to materialize, set blocks, and grow the mask. We could
140                 // therefore slightly optimize things in situations where these writes overlap.
141                 // However, as of writing this, growing the mask doesn't happen in practice yet, so
142                 // we don't do this micro-optimization.
143 
144                 if end <= len {
145                     // Handle case 1.
146                     blocks.set_range_inbounds(start, end, new_state);
147                 } else {
148                     if start < len {
149                         // Handle the first subrange of case 2.
150                         blocks.set_range_inbounds(start, len, new_state);
151                     }
152 
153                     // Handle the second subrange of case 2, and case 3.
154                     blocks.grow(len, end - len, new_state); // `Size` operation
155                     self.len = end;
156                 }
157             }
158         }
159     }
160 
161     /// Materializes this mask's blocks when the mask is lazy.
162     #[inline]
materialize_blocks(&mut self) -> &mut InitMaskMaterialized163     fn materialize_blocks(&mut self) -> &mut InitMaskMaterialized {
164         if let InitMaskBlocks::Lazy { state } = self.blocks {
165             self.blocks = InitMaskBlocks::Materialized(InitMaskMaterialized::new(self.len, state));
166         }
167 
168         let InitMaskBlocks::Materialized(ref mut blocks) = self.blocks else {
169             bug!("initmask blocks must be materialized here")
170         };
171         blocks
172     }
173 
174     /// Returns the initialization state at the specified in-bounds index.
175     #[inline]
get(&self, idx: Size) -> bool176     pub fn get(&self, idx: Size) -> bool {
177         match self.blocks {
178             InitMaskBlocks::Lazy { state } => state,
179             InitMaskBlocks::Materialized(ref blocks) => blocks.get(idx),
180         }
181     }
182 }
183 
184 /// The actual materialized blocks of the bitmask, when we can't keep the `InitMask` lazy.
185 // Note: for performance reasons when interning, some of the fields can be partially
186 // hashed. (see the `Hash` impl below for more details), so the impl is not derived.
187 #[derive(Clone, Debug, Eq, PartialEq, HashStable)]
188 struct InitMaskMaterialized {
189     blocks: Vec<Block>,
190 }
191 
192 // `Block` is a `u64`, but it is a bitmask not a numeric value. If we were to just derive
193 // Encodable and Decodable we would apply varint encoding to the bitmasks, which is slower
194 // and also produces more output when the high bits of each `u64` are occupied.
195 // Note: There is probably a remaining optimization for masks that do not use an entire
196 // `Block`.
197 impl<E: TyEncoder> Encodable<E> for InitMaskMaterialized {
encode(&self, encoder: &mut E)198     fn encode(&self, encoder: &mut E) {
199         encoder.emit_usize(self.blocks.len());
200         for block in &self.blocks {
201             encoder.emit_raw_bytes(&block.to_le_bytes());
202         }
203     }
204 }
205 
206 // This implementation is deliberately not derived, see the matching `Encodable` impl.
207 impl<D: TyDecoder> Decodable<D> for InitMaskMaterialized {
decode(decoder: &mut D) -> Self208     fn decode(decoder: &mut D) -> Self {
209         let num_blocks = decoder.read_usize();
210         let mut blocks = Vec::with_capacity(num_blocks);
211         for _ in 0..num_blocks {
212             let bytes = decoder.read_raw_bytes(8);
213             let block = u64::from_le_bytes(bytes.try_into().unwrap());
214             blocks.push(block);
215         }
216         InitMaskMaterialized { blocks }
217     }
218 }
219 
220 // Const allocations are only hashed for interning. However, they can be large, making the hashing
221 // expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
222 // big buffers like the allocation's init mask. We can partially hash some fields when they're
223 // large.
224 impl hash::Hash for InitMaskMaterialized {
hash<H: hash::Hasher>(&self, state: &mut H)225     fn hash<H: hash::Hasher>(&self, state: &mut H) {
226         const MAX_BLOCKS_TO_HASH: usize = super::MAX_BYTES_TO_HASH / std::mem::size_of::<Block>();
227         const MAX_BLOCKS_LEN: usize = super::MAX_HASHED_BUFFER_LEN / std::mem::size_of::<Block>();
228 
229         // Partially hash the `blocks` buffer when it is large. To limit collisions with common
230         // prefixes and suffixes, we hash the length and some slices of the buffer.
231         let block_count = self.blocks.len();
232         if block_count > MAX_BLOCKS_LEN {
233             // Hash the buffer's length.
234             block_count.hash(state);
235 
236             // And its head and tail.
237             self.blocks[..MAX_BLOCKS_TO_HASH].hash(state);
238             self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state);
239         } else {
240             self.blocks.hash(state);
241         }
242     }
243 }
244 
245 impl InitMaskMaterialized {
246     pub const BLOCK_SIZE: u64 = 64;
247 
new(size: Size, state: bool) -> Self248     fn new(size: Size, state: bool) -> Self {
249         let mut m = InitMaskMaterialized { blocks: vec![] };
250         m.grow(Size::ZERO, size, state);
251         m
252     }
253 
254     #[inline]
bit_index(bits: Size) -> (usize, usize)255     fn bit_index(bits: Size) -> (usize, usize) {
256         // BLOCK_SIZE is the number of bits that can fit in a `Block`.
257         // Each bit in a `Block` represents the initialization state of one byte of an allocation,
258         // so we use `.bytes()` here.
259         let bits = bits.bytes();
260         let a = bits / Self::BLOCK_SIZE;
261         let b = bits % Self::BLOCK_SIZE;
262         (usize::try_from(a).unwrap(), usize::try_from(b).unwrap())
263     }
264 
265     #[inline]
size_from_bit_index(block: impl TryInto<u64>, bit: impl TryInto<u64>) -> Size266     fn size_from_bit_index(block: impl TryInto<u64>, bit: impl TryInto<u64>) -> Size {
267         let block = block.try_into().ok().unwrap();
268         let bit = bit.try_into().ok().unwrap();
269         Size::from_bytes(block * Self::BLOCK_SIZE + bit)
270     }
271 
272     /// Checks whether the `range` is entirely initialized.
273     ///
274     /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
275     /// indexes for the first contiguous span of the uninitialized access.
276     #[inline]
is_range_initialized(&self, start: Size, end: Size) -> Result<(), AllocRange>277     fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), AllocRange> {
278         let uninit_start = self.find_bit(start, end, false);
279 
280         match uninit_start {
281             Some(uninit_start) => {
282                 let uninit_end = self.find_bit(uninit_start, end, true).unwrap_or(end);
283                 Err(AllocRange::from(uninit_start..uninit_end))
284             }
285             None => Ok(()),
286         }
287     }
288 
set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool)289     fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) {
290         let (block_a, bit_a) = Self::bit_index(start);
291         let (block_b, bit_b) = Self::bit_index(end);
292         if block_a == block_b {
293             // First set all bits except the first `bit_a`,
294             // then unset the last `64 - bit_b` bits.
295             let range = if bit_b == 0 {
296                 u64::MAX << bit_a
297             } else {
298                 (u64::MAX << bit_a) & (u64::MAX >> (64 - bit_b))
299             };
300             if new_state {
301                 self.blocks[block_a] |= range;
302             } else {
303                 self.blocks[block_a] &= !range;
304             }
305             return;
306         }
307         // across block boundaries
308         if new_state {
309             // Set `bit_a..64` to `1`.
310             self.blocks[block_a] |= u64::MAX << bit_a;
311             // Set `0..bit_b` to `1`.
312             if bit_b != 0 {
313                 self.blocks[block_b] |= u64::MAX >> (64 - bit_b);
314             }
315             // Fill in all the other blocks (much faster than one bit at a time).
316             for block in (block_a + 1)..block_b {
317                 self.blocks[block] = u64::MAX;
318             }
319         } else {
320             // Set `bit_a..64` to `0`.
321             self.blocks[block_a] &= !(u64::MAX << bit_a);
322             // Set `0..bit_b` to `0`.
323             if bit_b != 0 {
324                 self.blocks[block_b] &= !(u64::MAX >> (64 - bit_b));
325             }
326             // Fill in all the other blocks (much faster than one bit at a time).
327             for block in (block_a + 1)..block_b {
328                 self.blocks[block] = 0;
329             }
330         }
331     }
332 
333     #[inline]
get(&self, i: Size) -> bool334     fn get(&self, i: Size) -> bool {
335         let (block, bit) = Self::bit_index(i);
336         (self.blocks[block] & (1 << bit)) != 0
337     }
338 
grow(&mut self, len: Size, amount: Size, new_state: bool)339     fn grow(&mut self, len: Size, amount: Size, new_state: bool) {
340         if amount.bytes() == 0 {
341             return;
342         }
343         let unused_trailing_bits =
344             u64::try_from(self.blocks.len()).unwrap() * Self::BLOCK_SIZE - len.bytes();
345 
346         // If there's not enough capacity in the currently allocated blocks, allocate some more.
347         if amount.bytes() > unused_trailing_bits {
348             let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1;
349 
350             // We allocate the blocks to the correct value for the requested init state, so we won't
351             // have to manually set them with another write.
352             let block = if new_state { u64::MAX } else { 0 };
353             self.blocks
354                 .extend(iter::repeat(block).take(usize::try_from(additional_blocks).unwrap()));
355         }
356 
357         // New blocks have already been set here, so we only need to set the unused trailing bits,
358         // if any.
359         if unused_trailing_bits > 0 {
360             let in_bounds_tail = Size::from_bytes(unused_trailing_bits);
361             self.set_range_inbounds(len, len + in_bounds_tail, new_state); // `Size` operation
362         }
363     }
364 
365     /// Returns the index of the first bit in `start..end` (end-exclusive) that is equal to is_init.
find_bit(&self, start: Size, end: Size, is_init: bool) -> Option<Size>366     fn find_bit(&self, start: Size, end: Size, is_init: bool) -> Option<Size> {
367         /// A fast implementation of `find_bit`,
368         /// which skips over an entire block at a time if it's all 0s (resp. 1s),
369         /// and finds the first 1 (resp. 0) bit inside a block using `trailing_zeros` instead of a loop.
370         ///
371         /// Note that all examples below are written with 8 (instead of 64) bit blocks for simplicity,
372         /// and with the least significant bit (and lowest block) first:
373         /// ```text
374         ///        00000000|00000000
375         ///        ^      ^ ^      ^
376         /// index: 0      7 8      15
377         /// ```
378         /// Also, if not stated, assume that `is_init = true`, that is, we are searching for the first 1 bit.
379         fn find_bit_fast(
380             init_mask: &InitMaskMaterialized,
381             start: Size,
382             end: Size,
383             is_init: bool,
384         ) -> Option<Size> {
385             /// Search one block, returning the index of the first bit equal to `is_init`.
386             fn search_block(
387                 bits: Block,
388                 block: usize,
389                 start_bit: usize,
390                 is_init: bool,
391             ) -> Option<Size> {
392                 // For the following examples, assume this function was called with:
393                 //   bits = 0b00111011
394                 //   start_bit = 3
395                 //   is_init = false
396                 // Note that, for the examples in this function, the most significant bit is written first,
397                 // which is backwards compared to the comments in `find_bit`/`find_bit_fast`.
398 
399                 // Invert bits so we're always looking for the first set bit.
400                 //        ! 0b00111011
401                 //   bits = 0b11000100
402                 let bits = if is_init { bits } else { !bits };
403                 // Mask off unused start bits.
404                 //          0b11000100
405                 //        & 0b11111000
406                 //   bits = 0b11000000
407                 let bits = bits & (!0 << start_bit);
408                 // Find set bit, if any.
409                 //   bit = trailing_zeros(0b11000000)
410                 //   bit = 6
411                 if bits == 0 {
412                     None
413                 } else {
414                     let bit = bits.trailing_zeros();
415                     Some(InitMaskMaterialized::size_from_bit_index(block, bit))
416                 }
417             }
418 
419             if start >= end {
420                 return None;
421             }
422 
423             // Convert `start` and `end` to block indexes and bit indexes within each block.
424             // We must convert `end` to an inclusive bound to handle block boundaries correctly.
425             //
426             // For example:
427             //
428             //   (a) 00000000|00000000    (b) 00000000|
429             //       ^~~~~~~~~~~^             ^~~~~~~~~^
430             //     start       end          start     end
431             //
432             // In both cases, the block index of `end` is 1.
433             // But we do want to search block 1 in (a), and we don't in (b).
434             //
435             // We subtract 1 from both end positions to make them inclusive:
436             //
437             //   (a) 00000000|00000000    (b) 00000000|
438             //       ^~~~~~~~~~^              ^~~~~~~^
439             //     start    end_inclusive   start end_inclusive
440             //
441             // For (a), the block index of `end_inclusive` is 1, and for (b), it's 0.
442             // This provides the desired behavior of searching blocks 0 and 1 for (a),
443             // and searching only block 0 for (b).
444             // There is no concern of overflows since we checked for `start >= end` above.
445             let (start_block, start_bit) = InitMaskMaterialized::bit_index(start);
446             let end_inclusive = Size::from_bytes(end.bytes() - 1);
447             let (end_block_inclusive, _) = InitMaskMaterialized::bit_index(end_inclusive);
448 
449             // Handle first block: need to skip `start_bit` bits.
450             //
451             // We need to handle the first block separately,
452             // because there may be bits earlier in the block that should be ignored,
453             // such as the bit marked (1) in this example:
454             //
455             //       (1)
456             //       -|------
457             //   (c) 01000000|00000000|00000001
458             //          ^~~~~~~~~~~~~~~~~~^
459             //        start              end
460             if let Some(i) =
461                 search_block(init_mask.blocks[start_block], start_block, start_bit, is_init)
462             {
463                 // If the range is less than a block, we may find a matching bit after `end`.
464                 //
465                 // For example, we shouldn't successfully find bit (2), because it's after `end`:
466                 //
467                 //             (2)
468                 //       -------|
469                 //   (d) 00000001|00000000|00000001
470                 //        ^~~~~^
471                 //      start end
472                 //
473                 // An alternative would be to mask off end bits in the same way as we do for start bits,
474                 // but performing this check afterwards is faster and simpler to implement.
475                 if i < end {
476                     return Some(i);
477                 } else {
478                     return None;
479                 }
480             }
481 
482             // Handle remaining blocks.
483             //
484             // We can skip over an entire block at once if it's all 0s (resp. 1s).
485             // The block marked (3) in this example is the first block that will be handled by this loop,
486             // and it will be skipped for that reason:
487             //
488             //                   (3)
489             //                --------
490             //   (e) 01000000|00000000|00000001
491             //          ^~~~~~~~~~~~~~~~~~^
492             //        start              end
493             if start_block < end_block_inclusive {
494                 // This loop is written in a specific way for performance.
495                 // Notably: `..end_block_inclusive + 1` is used for an inclusive range instead of `..=end_block_inclusive`,
496                 // and `.zip(start_block + 1..)` is used to track the index instead of `.enumerate().skip().take()`,
497                 // because both alternatives result in significantly worse codegen.
498                 // `end_block_inclusive + 1` is guaranteed not to wrap, because `end_block_inclusive <= end / BLOCK_SIZE`,
499                 // and `BLOCK_SIZE` (the number of bits per block) will always be at least 8 (1 byte).
500                 for (&bits, block) in init_mask.blocks[start_block + 1..end_block_inclusive + 1]
501                     .iter()
502                     .zip(start_block + 1..)
503                 {
504                     if let Some(i) = search_block(bits, block, 0, is_init) {
505                         // If this is the last block, we may find a matching bit after `end`.
506                         //
507                         // For example, we shouldn't successfully find bit (4), because it's after `end`:
508                         //
509                         //                               (4)
510                         //                         -------|
511                         //   (f) 00000001|00000000|00000001
512                         //          ^~~~~~~~~~~~~~~~~~^
513                         //        start              end
514                         //
515                         // As above with example (d), we could handle the end block separately and mask off end bits,
516                         // but unconditionally searching an entire block at once and performing this check afterwards
517                         // is faster and much simpler to implement.
518                         if i < end {
519                             return Some(i);
520                         } else {
521                             return None;
522                         }
523                     }
524                 }
525             }
526 
527             None
528         }
529 
530         #[cfg_attr(not(debug_assertions), allow(dead_code))]
531         fn find_bit_slow(
532             init_mask: &InitMaskMaterialized,
533             start: Size,
534             end: Size,
535             is_init: bool,
536         ) -> Option<Size> {
537             (start..end).find(|&i| init_mask.get(i) == is_init)
538         }
539 
540         let result = find_bit_fast(self, start, end, is_init);
541 
542         debug_assert_eq!(
543             result,
544             find_bit_slow(self, start, end, is_init),
545             "optimized implementation of find_bit is wrong for start={:?} end={:?} is_init={} init_mask={:#?}",
546             start,
547             end,
548             is_init,
549             self
550         );
551 
552         result
553     }
554 }
555 
556 /// A contiguous chunk of initialized or uninitialized memory.
557 pub enum InitChunk {
558     Init(Range<Size>),
559     Uninit(Range<Size>),
560 }
561 
562 impl InitChunk {
563     #[inline]
is_init(&self) -> bool564     pub fn is_init(&self) -> bool {
565         match self {
566             Self::Init(_) => true,
567             Self::Uninit(_) => false,
568         }
569     }
570 
571     #[inline]
range(&self) -> Range<Size>572     pub fn range(&self) -> Range<Size> {
573         match self {
574             Self::Init(r) => r.clone(),
575             Self::Uninit(r) => r.clone(),
576         }
577     }
578 }
579 
580 impl InitMask {
581     /// Returns an iterator, yielding a range of byte indexes for each contiguous region
582     /// of initialized or uninitialized bytes inside the range `start..end` (end-exclusive).
583     ///
584     /// The iterator guarantees the following:
585     /// - Chunks are nonempty.
586     /// - Chunks are adjacent (each range's start is equal to the previous range's end).
587     /// - Chunks span exactly `start..end` (the first starts at `start`, the last ends at `end`).
588     /// - Chunks alternate between [`InitChunk::Init`] and [`InitChunk::Uninit`].
589     #[inline]
range_as_init_chunks(&self, range: AllocRange) -> InitChunkIter<'_>590     pub fn range_as_init_chunks(&self, range: AllocRange) -> InitChunkIter<'_> {
591         let start = range.start;
592         let end = range.end();
593         assert!(end <= self.len);
594 
595         let is_init = if start < end {
596             self.get(start)
597         } else {
598             // `start..end` is empty: there are no chunks, so use some arbitrary value
599             false
600         };
601 
602         InitChunkIter { init_mask: self, is_init, start, end }
603     }
604 }
605 
606 /// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`].
607 #[derive(Clone)]
608 pub struct InitChunkIter<'a> {
609     init_mask: &'a InitMask,
610     /// Whether the next chunk we will return is initialized.
611     /// If there are no more chunks, contains some arbitrary value.
612     is_init: bool,
613     /// The current byte index into `init_mask`.
614     start: Size,
615     /// The end byte index into `init_mask`.
616     end: Size,
617 }
618 
619 impl<'a> Iterator for InitChunkIter<'a> {
620     type Item = InitChunk;
621 
622     #[inline]
next(&mut self) -> Option<Self::Item>623     fn next(&mut self) -> Option<Self::Item> {
624         if self.start >= self.end {
625             return None;
626         }
627 
628         let end_of_chunk = match self.init_mask.blocks {
629             InitMaskBlocks::Lazy { .. } => {
630                 // If we're iterating over the chunks of lazy blocks, we just emit a single
631                 // full-size chunk.
632                 self.end
633             }
634             InitMaskBlocks::Materialized(ref blocks) => {
635                 let end_of_chunk =
636                     blocks.find_bit(self.start, self.end, !self.is_init).unwrap_or(self.end);
637                 end_of_chunk
638             }
639         };
640         let range = self.start..end_of_chunk;
641         let ret =
642             Some(if self.is_init { InitChunk::Init(range) } else { InitChunk::Uninit(range) });
643 
644         self.is_init = !self.is_init;
645         self.start = end_of_chunk;
646 
647         ret
648     }
649 }
650 
651 /// Run-length encoding of the uninit mask.
652 /// Used to copy parts of a mask multiple times to another allocation.
653 pub struct InitCopy {
654     /// Whether the first range is initialized.
655     initial: bool,
656     /// The lengths of ranges that are run-length encoded.
657     /// The initialization state of the ranges alternate starting with `initial`.
658     ranges: smallvec::SmallVec<[u64; 1]>,
659 }
660 
661 impl InitCopy {
no_bytes_init(&self) -> bool662     pub fn no_bytes_init(&self) -> bool {
663         // The `ranges` are run-length encoded and of alternating initialization state.
664         // So if `ranges.len() > 1` then the second block is an initialized range.
665         !self.initial && self.ranges.len() == 1
666     }
667 }
668 
669 /// Transferring the initialization mask to other allocations.
670 impl InitMask {
671     /// Creates a run-length encoding of the initialization mask; panics if range is empty.
672     ///
673     /// This is essentially a more space-efficient version of
674     /// `InitMask::range_as_init_chunks(...).collect::<Vec<_>>()`.
prepare_copy(&self, range: AllocRange) -> InitCopy675     pub fn prepare_copy(&self, range: AllocRange) -> InitCopy {
676         // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
677         // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from
678         // the source and write it to the destination. Even if we optimized the memory accesses,
679         // we'd be doing all of this `repeat` times.
680         // Therefore we precompute a compressed version of the initialization mask of the source value and
681         // then write it back `repeat` times without computing any more information from the source.
682 
683         // A precomputed cache for ranges of initialized / uninitialized bits
684         // 0000010010001110 will become
685         // `[5, 1, 2, 1, 3, 3, 1]`,
686         // where each element toggles the state.
687 
688         let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
689 
690         let mut chunks = self.range_as_init_chunks(range).peekable();
691 
692         let initial = chunks.peek().expect("range should be nonempty").is_init();
693 
694         // Here we rely on `range_as_init_chunks` to yield alternating init/uninit chunks.
695         for chunk in chunks {
696             let len = chunk.range().end.bytes() - chunk.range().start.bytes();
697             ranges.push(len);
698         }
699 
700         InitCopy { ranges, initial }
701     }
702 
703     /// Applies multiple instances of the run-length encoding to the initialization mask.
apply_copy(&mut self, defined: InitCopy, range: AllocRange, repeat: u64)704     pub fn apply_copy(&mut self, defined: InitCopy, range: AllocRange, repeat: u64) {
705         // An optimization where we can just overwrite an entire range of initialization bits if
706         // they are going to be uniformly `1` or `0`. If this happens to be a full-range overwrite,
707         // we won't need materialized blocks either.
708         if defined.ranges.len() <= 1 {
709             let start = range.start;
710             let end = range.start + range.size * repeat; // `Size` operations
711             self.set_range(AllocRange::from(start..end), defined.initial);
712             return;
713         }
714 
715         // We're about to do one or more partial writes, so we ensure the blocks are materialized.
716         let blocks = self.materialize_blocks();
717 
718         for mut j in 0..repeat {
719             j *= range.size.bytes();
720             j += range.start.bytes();
721             let mut cur = defined.initial;
722             for range in &defined.ranges {
723                 let old_j = j;
724                 j += range;
725                 blocks.set_range_inbounds(Size::from_bytes(old_j), Size::from_bytes(j), cur);
726                 cur = !cur;
727             }
728         }
729     }
730 }
731