• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2017 The vulkano developers
2 // Licensed under the Apache License, Version 2.0
3 // <LICENSE-APACHE or
4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT
5 // license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
6 // at your option. All files in the project carrying such
7 // notice may not be copied, modified, or distributed except
8 // according to those terms.
9 
10 use crate::buffer::sys::BufferCreationError;
11 use crate::buffer::sys::UnsafeBuffer;
12 use crate::buffer::traits::BufferAccess;
13 use crate::buffer::traits::BufferInner;
14 use crate::buffer::traits::TypedBufferAccess;
15 use crate::buffer::BufferUsage;
16 use crate::device::Device;
17 use crate::device::DeviceOwned;
18 use crate::device::Queue;
19 use crate::memory::pool::AllocFromRequirementsFilter;
20 use crate::memory::pool::AllocLayout;
21 use crate::memory::pool::MappingRequirement;
22 use crate::memory::pool::MemoryPool;
23 use crate::memory::pool::MemoryPoolAlloc;
24 use crate::memory::pool::PotentialDedicatedAllocation;
25 use crate::memory::pool::StdMemoryPool;
26 use crate::memory::DedicatedAlloc;
27 use crate::memory::DeviceMemoryAllocError;
28 use crate::sync::AccessError;
29 use crate::sync::Sharing;
30 use crate::DeviceSize;
31 use crate::OomError;
32 use std::cmp;
33 use std::hash::Hash;
34 use std::hash::Hasher;
35 use std::iter;
36 use std::marker::PhantomData;
37 use std::mem;
38 use std::ptr;
39 use std::sync::atomic::AtomicU64;
40 use std::sync::atomic::Ordering;
41 use std::sync::Arc;
42 use std::sync::Mutex;
43 use std::sync::MutexGuard;
44 
45 // TODO: Add `CpuBufferPoolSubbuffer::read` to read the content of a subbuffer.
46 //       But that's hard to do because we must prevent `increase_gpu_lock` from working while a
47 //       a buffer is locked.
48 
49 /// Ring buffer from which "sub-buffers" can be individually allocated.
50 ///
51 /// This buffer is especially suitable when you want to upload or download some data regularly
52 /// (for example, at each frame for a video game).
53 ///
54 /// # Usage
55 ///
56 /// A `CpuBufferPool` is similar to a ring buffer. You start by creating an empty pool, then you
57 /// grab elements from the pool and use them, and if the pool is full it will automatically grow
58 /// in size.
59 ///
60 /// Contrary to a `Vec`, elements automatically free themselves when they are dropped (ie. usually
61 /// when you call `cleanup_finished()` on a future, or when you drop that future).
62 ///
63 /// # Arc-like
64 ///
65 /// The `CpuBufferPool` struct internally contains an `Arc`. You can clone the `CpuBufferPool` for
66 /// a cheap cost, and all the clones will share the same underlying buffer.
67 ///
68 /// # Example
69 ///
70 /// ```
71 /// use vulkano::buffer::CpuBufferPool;
72 /// use vulkano::command_buffer::AutoCommandBufferBuilder;
73 /// use vulkano::command_buffer::CommandBufferUsage;
74 /// use vulkano::command_buffer::PrimaryCommandBuffer;
75 /// use vulkano::sync::GpuFuture;
76 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
77 /// # let queue: std::sync::Arc<vulkano::device::Queue> = return;
78 ///
79 /// // Create the ring buffer.
80 /// let buffer = CpuBufferPool::upload(device.clone());
81 ///
82 /// for n in 0 .. 25u32 {
83 ///     // Each loop grabs a new entry from that ring buffer and stores ` data` in it.
84 ///     let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0];
85 ///     let sub_buffer = buffer.next(data).unwrap();
86 ///
87 ///     // You can then use `sub_buffer` as if it was an entirely separate buffer.
88 ///     AutoCommandBufferBuilder::primary(device.clone(), queue.family(), CommandBufferUsage::OneTimeSubmit)
89 ///         .unwrap()
90 ///         // For the sake of the example we just call `update_buffer` on the buffer, even though
91 ///         // it is pointless to do that.
92 ///         .update_buffer(sub_buffer.clone(), &[0.2, 0.3, 0.4, 0.5])
93 ///         .unwrap()
94 ///         .build().unwrap()
95 ///         .execute(queue.clone())
96 ///         .unwrap()
97 ///         .then_signal_fence_and_flush()
98 ///         .unwrap();
99 /// }
100 /// ```
101 ///
102 pub struct CpuBufferPool<T, A = Arc<StdMemoryPool>>
103 where
104     A: MemoryPool,
105 {
106     // The device of the pool.
107     device: Arc<Device>,
108 
109     // The memory pool to use for allocations.
110     pool: A,
111 
112     // Current buffer from which elements are grabbed.
113     current_buffer: Mutex<Option<Arc<ActualBuffer<A>>>>,
114 
115     // Buffer usage.
116     usage: BufferUsage,
117 
118     // Necessary to make it compile.
119     marker: PhantomData<Box<T>>,
120 }
121 
122 // One buffer of the pool.
123 struct ActualBuffer<A>
124 where
125     A: MemoryPool,
126 {
127     // Inner content.
128     inner: UnsafeBuffer,
129 
130     // The memory held by the buffer.
131     memory: PotentialDedicatedAllocation<A::Alloc>,
132 
133     // List of the chunks that are reserved.
134     chunks_in_use: Mutex<Vec<ActualBufferChunk>>,
135 
136     // The index of the chunk that should be available next for the ring buffer.
137     next_index: AtomicU64,
138 
139     // Number of elements in the buffer.
140     capacity: DeviceSize,
141 }
142 
143 // Access pattern of one subbuffer.
144 #[derive(Debug)]
145 struct ActualBufferChunk {
146     // First element number within the actual buffer.
147     index: DeviceSize,
148 
149     // Number of occupied elements within the actual buffer.
150     len: DeviceSize,
151 
152     // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer.
153     num_cpu_accesses: usize,
154 
155     // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer and that have been
156     // GPU-locked.
157     num_gpu_accesses: usize,
158 }
159 
160 /// A subbuffer allocated from a `CpuBufferPool`.
161 ///
162 /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool.
163 pub struct CpuBufferPoolChunk<T, A>
164 where
165     A: MemoryPool,
166 {
167     buffer: Arc<ActualBuffer<A>>,
168 
169     // Index of the subbuffer within `buffer`. In number of elements.
170     index: DeviceSize,
171 
172     // Number of bytes to add to `index * mem::size_of::<T>()` to obtain the start of the data in
173     // the buffer. Necessary for alignment purposes.
174     align_offset: DeviceSize,
175 
176     // Size of the subbuffer in number of elements, as requested by the user.
177     // If this is 0, then no entry was added to `chunks_in_use`.
178     requested_len: DeviceSize,
179 
180     // Necessary to make it compile.
181     marker: PhantomData<Box<T>>,
182 }
183 
184 /// A subbuffer allocated from a `CpuBufferPool`.
185 ///
186 /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool.
187 pub struct CpuBufferPoolSubbuffer<T, A>
188 where
189     A: MemoryPool,
190 {
191     // This struct is just a wrapper around `CpuBufferPoolChunk`.
192     chunk: CpuBufferPoolChunk<T, A>,
193 }
194 
195 impl<T> CpuBufferPool<T> {
196     /// Builds a `CpuBufferPool`.
197     #[inline]
new(device: Arc<Device>, usage: BufferUsage) -> CpuBufferPool<T>198     pub fn new(device: Arc<Device>, usage: BufferUsage) -> CpuBufferPool<T> {
199         let pool = Device::standard_pool(&device);
200 
201         CpuBufferPool {
202             device: device,
203             pool: pool,
204             current_buffer: Mutex::new(None),
205             usage: usage.clone(),
206             marker: PhantomData,
207         }
208     }
209 
210     /// Builds a `CpuBufferPool` meant for simple uploads.
211     ///
212     /// Shortcut for a pool that can only be used as transfer source and with exclusive queue
213     /// family accesses.
214     #[inline]
upload(device: Arc<Device>) -> CpuBufferPool<T>215     pub fn upload(device: Arc<Device>) -> CpuBufferPool<T> {
216         CpuBufferPool::new(device, BufferUsage::transfer_source())
217     }
218 
219     /// Builds a `CpuBufferPool` meant for simple downloads.
220     ///
221     /// Shortcut for a pool that can only be used as transfer destination and with exclusive queue
222     /// family accesses.
223     #[inline]
download(device: Arc<Device>) -> CpuBufferPool<T>224     pub fn download(device: Arc<Device>) -> CpuBufferPool<T> {
225         CpuBufferPool::new(device, BufferUsage::transfer_destination())
226     }
227 
228     /// Builds a `CpuBufferPool` meant for usage as a uniform buffer.
229     ///
230     /// Shortcut for a pool that can only be used as uniform buffer and with exclusive queue
231     /// family accesses.
232     #[inline]
uniform_buffer(device: Arc<Device>) -> CpuBufferPool<T>233     pub fn uniform_buffer(device: Arc<Device>) -> CpuBufferPool<T> {
234         CpuBufferPool::new(device, BufferUsage::uniform_buffer())
235     }
236 
237     /// Builds a `CpuBufferPool` meant for usage as a vertex buffer.
238     ///
239     /// Shortcut for a pool that can only be used as vertex buffer and with exclusive queue
240     /// family accesses.
241     #[inline]
vertex_buffer(device: Arc<Device>) -> CpuBufferPool<T>242     pub fn vertex_buffer(device: Arc<Device>) -> CpuBufferPool<T> {
243         CpuBufferPool::new(device, BufferUsage::vertex_buffer())
244     }
245 
246     /// Builds a `CpuBufferPool` meant for usage as a indirect buffer.
247     ///
248     /// Shortcut for a pool that can only be used as indirect buffer and with exclusive queue
249     /// family accesses.
250     #[inline]
indirect_buffer(device: Arc<Device>) -> CpuBufferPool<T>251     pub fn indirect_buffer(device: Arc<Device>) -> CpuBufferPool<T> {
252         CpuBufferPool::new(device, BufferUsage::indirect_buffer())
253     }
254 }
255 
256 impl<T, A> CpuBufferPool<T, A>
257 where
258     A: MemoryPool,
259 {
260     /// Returns the current capacity of the pool, in number of elements.
capacity(&self) -> DeviceSize261     pub fn capacity(&self) -> DeviceSize {
262         match *self.current_buffer.lock().unwrap() {
263             None => 0,
264             Some(ref buf) => buf.capacity,
265         }
266     }
267 
268     /// Makes sure that the capacity is at least `capacity`. Allocates memory if it is not the
269     /// case.
270     ///
271     /// Since this can involve a memory allocation, an `OomError` can happen.
reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryAllocError>272     pub fn reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryAllocError> {
273         let mut cur_buf = self.current_buffer.lock().unwrap();
274 
275         // Check current capacity.
276         match *cur_buf {
277             Some(ref buf) if buf.capacity >= capacity => {
278                 return Ok(());
279             }
280             _ => (),
281         };
282 
283         self.reset_buf(&mut cur_buf, capacity)
284     }
285 
286     /// Grants access to a new subbuffer and puts `data` in it.
287     ///
288     /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will
289     /// automatically be allocated.
290     ///
291     /// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not
292     /// > large enough, a new chunk of memory is automatically allocated.
293     #[inline]
next(&self, data: T) -> Result<CpuBufferPoolSubbuffer<T, A>, DeviceMemoryAllocError>294     pub fn next(&self, data: T) -> Result<CpuBufferPoolSubbuffer<T, A>, DeviceMemoryAllocError> {
295         Ok(CpuBufferPoolSubbuffer {
296             chunk: self.chunk(iter::once(data))?,
297         })
298     }
299 
300     /// Grants access to a new subbuffer and puts `data` in it.
301     ///
302     /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will
303     /// automatically be allocated.
304     ///
305     /// > **Note**: You can think of it like a `Vec`. If you insert elements and the `Vec` is not
306     /// > large enough, a new chunk of memory is automatically allocated.
307     ///
308     /// # Panic
309     ///
310     /// Panics if the length of the iterator didn't match the actual number of element.
311     ///
chunk<I>(&self, data: I) -> Result<CpuBufferPoolChunk<T, A>, DeviceMemoryAllocError> where I: IntoIterator<Item = T>, I::IntoIter: ExactSizeIterator,312     pub fn chunk<I>(&self, data: I) -> Result<CpuBufferPoolChunk<T, A>, DeviceMemoryAllocError>
313     where
314         I: IntoIterator<Item = T>,
315         I::IntoIter: ExactSizeIterator,
316     {
317         let data = data.into_iter();
318 
319         let mut mutex = self.current_buffer.lock().unwrap();
320 
321         let data = match self.try_next_impl(&mut mutex, data) {
322             Ok(n) => return Ok(n),
323             Err(d) => d,
324         };
325 
326         let next_capacity = match *mutex {
327             Some(ref b) if (data.len() as DeviceSize) < b.capacity => 2 * b.capacity,
328             _ => 2 * data.len() as DeviceSize,
329         };
330 
331         self.reset_buf(&mut mutex, next_capacity)?;
332 
333         match self.try_next_impl(&mut mutex, data) {
334             Ok(n) => Ok(n),
335             Err(_) => unreachable!(),
336         }
337     }
338 
339     /// Grants access to a new subbuffer and puts `data` in it.
340     ///
341     /// Returns `None` if no subbuffer is available.
342     ///
343     /// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use
344     /// `try_next` the first time you use it.
345     #[inline]
try_next(&self, data: T) -> Option<CpuBufferPoolSubbuffer<T, A>>346     pub fn try_next(&self, data: T) -> Option<CpuBufferPoolSubbuffer<T, A>> {
347         let mut mutex = self.current_buffer.lock().unwrap();
348         self.try_next_impl(&mut mutex, iter::once(data))
349             .map(|c| CpuBufferPoolSubbuffer { chunk: c })
350             .ok()
351     }
352 
353     // Creates a new buffer and sets it as current. The capacity is in number of elements.
354     //
355     // `cur_buf_mutex` must be an active lock of `self.current_buffer`.
reset_buf( &self, cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, capacity: DeviceSize, ) -> Result<(), DeviceMemoryAllocError>356     fn reset_buf(
357         &self,
358         cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>,
359         capacity: DeviceSize,
360     ) -> Result<(), DeviceMemoryAllocError> {
361         unsafe {
362             let (buffer, mem_reqs) = {
363                 let size_bytes = match (mem::size_of::<T>() as DeviceSize).checked_mul(capacity) {
364                     Some(s) => s,
365                     None => {
366                         return Err(DeviceMemoryAllocError::OomError(
367                             OomError::OutOfDeviceMemory,
368                         ))
369                     }
370                 };
371 
372                 match UnsafeBuffer::new(
373                     self.device.clone(),
374                     size_bytes as DeviceSize,
375                     self.usage,
376                     Sharing::Exclusive::<iter::Empty<_>>,
377                     None,
378                 ) {
379                     Ok(b) => b,
380                     Err(BufferCreationError::AllocError(err)) => return Err(err),
381                     Err(_) => unreachable!(), // We don't use sparse binding, therefore the other
382                                               // errors can't happen
383                 }
384             };
385 
386             let mem = MemoryPool::alloc_from_requirements(
387                 &self.pool,
388                 &mem_reqs,
389                 AllocLayout::Linear,
390                 MappingRequirement::Map,
391                 DedicatedAlloc::Buffer(&buffer),
392                 |_| AllocFromRequirementsFilter::Allowed,
393             )?;
394             debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
395             debug_assert!(mem.mapped_memory().is_some());
396             buffer.bind_memory(mem.memory(), mem.offset())?;
397 
398             **cur_buf_mutex = Some(Arc::new(ActualBuffer {
399                 inner: buffer,
400                 memory: mem,
401                 chunks_in_use: Mutex::new(vec![]),
402                 next_index: AtomicU64::new(0),
403                 capacity: capacity,
404             }));
405 
406             Ok(())
407         }
408     }
409 
410     // Tries to lock a subbuffer from the current buffer.
411     //
412     // `cur_buf_mutex` must be an active lock of `self.current_buffer`.
413     //
414     // Returns `data` wrapped inside an `Err` if there is no slot available in the current buffer.
415     //
416     // # Panic
417     //
418     // Panics if the length of the iterator didn't match the actual number of element.
419     //
try_next_impl<I>( &self, cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, mut data: I, ) -> Result<CpuBufferPoolChunk<T, A>, I> where I: ExactSizeIterator<Item = T>,420     fn try_next_impl<I>(
421         &self,
422         cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>,
423         mut data: I,
424     ) -> Result<CpuBufferPoolChunk<T, A>, I>
425     where
426         I: ExactSizeIterator<Item = T>,
427     {
428         // Grab the current buffer. Return `Err` if the pool wasn't "initialized" yet.
429         let current_buffer = match cur_buf_mutex.clone() {
430             Some(b) => b,
431             None => return Err(data),
432         };
433 
434         let mut chunks_in_use = current_buffer.chunks_in_use.lock().unwrap();
435         debug_assert!(!chunks_in_use.iter().any(|c| c.len == 0));
436 
437         // Number of elements requested by the user.
438         let requested_len = data.len() as DeviceSize;
439 
440         // We special case when 0 elements are requested. Polluting the list of allocated chunks
441         // with chunks of length 0 means that we will have troubles deallocating.
442         if requested_len == 0 {
443             assert!(
444                 data.next().is_none(),
445                 "Expected iterator passed to CpuBufferPool::chunk to be empty"
446             );
447             return Ok(CpuBufferPoolChunk {
448                 // TODO: remove .clone() once non-lexical borrows land
449                 buffer: current_buffer.clone(),
450                 index: 0,
451                 align_offset: 0,
452                 requested_len: 0,
453                 marker: PhantomData,
454             });
455         }
456 
457         // Find a suitable offset and len, or returns if none available.
458         let (index, occupied_len, align_offset) = {
459             let (tentative_index, tentative_len, tentative_align_offset) = {
460                 // Since the only place that touches `next_index` is this code, and since we
461                 // own a mutex lock to the buffer, it means that `next_index` can't be accessed
462                 // concurrently.
463                 // TODO: ^ eventually should be put inside the mutex
464                 let idx = current_buffer.next_index.load(Ordering::SeqCst);
465 
466                 // Find the required alignment in bytes.
467                 let align_bytes = cmp::max(
468                     if self.usage.uniform_buffer {
469                         self.device()
470                             .physical_device()
471                             .properties()
472                             .min_uniform_buffer_offset_alignment
473                     } else {
474                         1
475                     },
476                     if self.usage.storage_buffer {
477                         self.device()
478                             .physical_device()
479                             .properties()
480                             .min_storage_buffer_offset_alignment
481                     } else {
482                         1
483                     },
484                 );
485 
486                 let tentative_align_offset = (align_bytes
487                     - ((idx * mem::size_of::<T>() as DeviceSize) % align_bytes))
488                     % align_bytes;
489                 let additional_len = if tentative_align_offset == 0 {
490                     0
491                 } else {
492                     1 + (tentative_align_offset - 1) / mem::size_of::<T>() as DeviceSize
493                 };
494 
495                 (idx, requested_len + additional_len, tentative_align_offset)
496             };
497 
498             // Find out whether any chunk in use overlaps this range.
499             if tentative_index + tentative_len <= current_buffer.capacity
500                 && !chunks_in_use.iter().any(|c| {
501                     (c.index >= tentative_index && c.index < tentative_index + tentative_len)
502                         || (c.index <= tentative_index && c.index + c.len > tentative_index)
503                 })
504             {
505                 (tentative_index, tentative_len, tentative_align_offset)
506             } else {
507                 // Impossible to allocate at `tentative_index`. Let's try 0 instead.
508                 if requested_len <= current_buffer.capacity
509                     && !chunks_in_use.iter().any(|c| c.index < requested_len)
510                 {
511                     (0, requested_len, 0)
512                 } else {
513                     // Buffer is full. Return.
514                     return Err(data);
515                 }
516             }
517         };
518 
519         // Write `data` in the memory.
520         unsafe {
521             let mem_off = current_buffer.memory.offset();
522             let range_start = index * mem::size_of::<T>() as DeviceSize + align_offset + mem_off;
523             let range_end = (index + requested_len) * mem::size_of::<T>() as DeviceSize
524                 + align_offset
525                 + mem_off;
526             let mut mapping = current_buffer
527                 .memory
528                 .mapped_memory()
529                 .unwrap()
530                 .read_write::<[T]>(range_start..range_end);
531 
532             let mut written = 0;
533             for (o, i) in mapping.iter_mut().zip(data) {
534                 ptr::write(o, i);
535                 written += 1;
536             }
537             assert_eq!(
538                 written, requested_len,
539                 "Iterator passed to CpuBufferPool::chunk has a mismatch between reported \
540                         length and actual number of elements"
541             );
542         }
543 
544         // Mark the chunk as in use.
545         current_buffer
546             .next_index
547             .store(index + occupied_len, Ordering::SeqCst);
548         chunks_in_use.push(ActualBufferChunk {
549             index,
550             len: occupied_len,
551             num_cpu_accesses: 1,
552             num_gpu_accesses: 0,
553         });
554 
555         Ok(CpuBufferPoolChunk {
556             // TODO: remove .clone() once non-lexical borrows land
557             buffer: current_buffer.clone(),
558             index: index,
559             align_offset,
560             requested_len,
561             marker: PhantomData,
562         })
563     }
564 }
565 
566 // Can't automatically derive `Clone`, otherwise the compiler adds a `T: Clone` requirement.
567 impl<T, A> Clone for CpuBufferPool<T, A>
568 where
569     A: MemoryPool + Clone,
570 {
clone(&self) -> Self571     fn clone(&self) -> Self {
572         let buf = self.current_buffer.lock().unwrap();
573 
574         CpuBufferPool {
575             device: self.device.clone(),
576             pool: self.pool.clone(),
577             current_buffer: Mutex::new(buf.clone()),
578             usage: self.usage.clone(),
579             marker: PhantomData,
580         }
581     }
582 }
583 
584 unsafe impl<T, A> DeviceOwned for CpuBufferPool<T, A>
585 where
586     A: MemoryPool,
587 {
588     #[inline]
device(&self) -> &Arc<Device>589     fn device(&self) -> &Arc<Device> {
590         &self.device
591     }
592 }
593 
594 impl<T, A> Clone for CpuBufferPoolChunk<T, A>
595 where
596     A: MemoryPool,
597 {
clone(&self) -> CpuBufferPoolChunk<T, A>598     fn clone(&self) -> CpuBufferPoolChunk<T, A> {
599         let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
600         let chunk = chunks_in_use_lock
601             .iter_mut()
602             .find(|c| c.index == self.index)
603             .unwrap();
604 
605         debug_assert!(chunk.num_cpu_accesses >= 1);
606         chunk.num_cpu_accesses = chunk
607             .num_cpu_accesses
608             .checked_add(1)
609             .expect("Overflow in CPU accesses");
610 
611         CpuBufferPoolChunk {
612             buffer: self.buffer.clone(),
613             index: self.index,
614             align_offset: self.align_offset,
615             requested_len: self.requested_len,
616             marker: PhantomData,
617         }
618     }
619 }
620 
621 unsafe impl<T, A> BufferAccess for CpuBufferPoolChunk<T, A>
622 where
623     A: MemoryPool,
624 {
625     #[inline]
inner(&self) -> BufferInner626     fn inner(&self) -> BufferInner {
627         BufferInner {
628             buffer: &self.buffer.inner,
629             offset: self.index * mem::size_of::<T>() as DeviceSize + self.align_offset,
630         }
631     }
632 
633     #[inline]
size(&self) -> DeviceSize634     fn size(&self) -> DeviceSize {
635         self.requested_len * mem::size_of::<T>() as DeviceSize
636     }
637 
638     #[inline]
conflict_key(&self) -> (u64, u64)639     fn conflict_key(&self) -> (u64, u64) {
640         (
641             self.buffer.inner.key(),
642             // ensure the special cased empty buffers don't collide with a regular buffer starting at 0
643             if self.requested_len == 0 {
644                 u64::MAX
645             } else {
646                 self.index
647             },
648         )
649     }
650 
651     #[inline]
try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError>652     fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> {
653         if self.requested_len == 0 {
654             return Ok(());
655         }
656 
657         let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
658         let chunk = chunks_in_use_lock
659             .iter_mut()
660             .find(|c| c.index == self.index)
661             .unwrap();
662 
663         if chunk.num_gpu_accesses != 0 {
664             return Err(AccessError::AlreadyInUse);
665         }
666 
667         chunk.num_gpu_accesses = 1;
668         Ok(())
669     }
670 
671     #[inline]
increase_gpu_lock(&self)672     unsafe fn increase_gpu_lock(&self) {
673         if self.requested_len == 0 {
674             return;
675         }
676 
677         let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
678         let chunk = chunks_in_use_lock
679             .iter_mut()
680             .find(|c| c.index == self.index)
681             .unwrap();
682 
683         debug_assert!(chunk.num_gpu_accesses >= 1);
684         chunk.num_gpu_accesses = chunk
685             .num_gpu_accesses
686             .checked_add(1)
687             .expect("Overflow in GPU usages");
688     }
689 
690     #[inline]
unlock(&self)691     unsafe fn unlock(&self) {
692         if self.requested_len == 0 {
693             return;
694         }
695 
696         let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
697         let chunk = chunks_in_use_lock
698             .iter_mut()
699             .find(|c| c.index == self.index)
700             .unwrap();
701 
702         debug_assert!(chunk.num_gpu_accesses >= 1);
703         chunk.num_gpu_accesses -= 1;
704     }
705 }
706 
707 impl<T, A> Drop for CpuBufferPoolChunk<T, A>
708 where
709     A: MemoryPool,
710 {
drop(&mut self)711     fn drop(&mut self) {
712         // If `requested_len` is 0, then no entry was added in the chunks.
713         if self.requested_len == 0 {
714             return;
715         }
716 
717         let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
718         let chunk_num = chunks_in_use_lock
719             .iter_mut()
720             .position(|c| c.index == self.index)
721             .unwrap();
722 
723         if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 {
724             chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1;
725         } else {
726             debug_assert_eq!(chunks_in_use_lock[chunk_num].num_gpu_accesses, 0);
727             chunks_in_use_lock.remove(chunk_num);
728         }
729     }
730 }
731 
732 unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolChunk<T, A>
733 where
734     A: MemoryPool,
735 {
736     type Content = [T];
737 }
738 
739 unsafe impl<T, A> DeviceOwned for CpuBufferPoolChunk<T, A>
740 where
741     A: MemoryPool,
742 {
743     #[inline]
device(&self) -> &Arc<Device>744     fn device(&self) -> &Arc<Device> {
745         self.buffer.inner.device()
746     }
747 }
748 
749 impl<T, A> PartialEq for CpuBufferPoolChunk<T, A>
750 where
751     A: MemoryPool,
752 {
753     #[inline]
eq(&self, other: &Self) -> bool754     fn eq(&self, other: &Self) -> bool {
755         self.inner() == other.inner() && self.size() == other.size()
756     }
757 }
758 
759 impl<T, A> Eq for CpuBufferPoolChunk<T, A> where A: MemoryPool {}
760 
761 impl<T, A> Hash for CpuBufferPoolChunk<T, A>
762 where
763     A: MemoryPool,
764 {
765     #[inline]
hash<H: Hasher>(&self, state: &mut H)766     fn hash<H: Hasher>(&self, state: &mut H) {
767         self.inner().hash(state);
768         self.size().hash(state);
769     }
770 }
771 
772 impl<T, A> Clone for CpuBufferPoolSubbuffer<T, A>
773 where
774     A: MemoryPool,
775 {
clone(&self) -> CpuBufferPoolSubbuffer<T, A>776     fn clone(&self) -> CpuBufferPoolSubbuffer<T, A> {
777         CpuBufferPoolSubbuffer {
778             chunk: self.chunk.clone(),
779         }
780     }
781 }
782 
783 unsafe impl<T, A> BufferAccess for CpuBufferPoolSubbuffer<T, A>
784 where
785     A: MemoryPool,
786 {
787     #[inline]
inner(&self) -> BufferInner788     fn inner(&self) -> BufferInner {
789         self.chunk.inner()
790     }
791 
792     #[inline]
size(&self) -> DeviceSize793     fn size(&self) -> DeviceSize {
794         self.chunk.size()
795     }
796 
797     #[inline]
conflict_key(&self) -> (u64, u64)798     fn conflict_key(&self) -> (u64, u64) {
799         self.chunk.conflict_key()
800     }
801 
802     #[inline]
try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError>803     fn try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError> {
804         self.chunk.try_gpu_lock(e, q)
805     }
806 
807     #[inline]
increase_gpu_lock(&self)808     unsafe fn increase_gpu_lock(&self) {
809         self.chunk.increase_gpu_lock()
810     }
811 
812     #[inline]
unlock(&self)813     unsafe fn unlock(&self) {
814         self.chunk.unlock()
815     }
816 }
817 
818 unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolSubbuffer<T, A>
819 where
820     A: MemoryPool,
821 {
822     type Content = T;
823 }
824 
825 unsafe impl<T, A> DeviceOwned for CpuBufferPoolSubbuffer<T, A>
826 where
827     A: MemoryPool,
828 {
829     #[inline]
device(&self) -> &Arc<Device>830     fn device(&self) -> &Arc<Device> {
831         self.chunk.buffer.inner.device()
832     }
833 }
834 
835 impl<T, A> PartialEq for CpuBufferPoolSubbuffer<T, A>
836 where
837     A: MemoryPool,
838 {
839     #[inline]
eq(&self, other: &Self) -> bool840     fn eq(&self, other: &Self) -> bool {
841         self.inner() == other.inner() && self.size() == other.size()
842     }
843 }
844 
845 impl<T, A> Eq for CpuBufferPoolSubbuffer<T, A> where A: MemoryPool {}
846 
847 impl<T, A> Hash for CpuBufferPoolSubbuffer<T, A>
848 where
849     A: MemoryPool,
850 {
851     #[inline]
hash<H: Hasher>(&self, state: &mut H)852     fn hash<H: Hasher>(&self, state: &mut H) {
853         self.inner().hash(state);
854         self.size().hash(state);
855     }
856 }
857 
858 #[cfg(test)]
859 mod tests {
860     use crate::buffer::CpuBufferPool;
861     use std::mem;
862 
863     #[test]
basic_create()864     fn basic_create() {
865         let (device, _) = gfx_dev_and_queue!();
866         let _ = CpuBufferPool::<u8>::upload(device);
867     }
868 
869     #[test]
reserve()870     fn reserve() {
871         let (device, _) = gfx_dev_and_queue!();
872 
873         let pool = CpuBufferPool::<u8>::upload(device);
874         assert_eq!(pool.capacity(), 0);
875 
876         pool.reserve(83).unwrap();
877         assert_eq!(pool.capacity(), 83);
878     }
879 
880     #[test]
capacity_increase()881     fn capacity_increase() {
882         let (device, _) = gfx_dev_and_queue!();
883 
884         let pool = CpuBufferPool::upload(device);
885         assert_eq!(pool.capacity(), 0);
886 
887         pool.next(12).unwrap();
888         let first_cap = pool.capacity();
889         assert!(first_cap >= 1);
890 
891         for _ in 0..first_cap + 5 {
892             mem::forget(pool.next(12).unwrap());
893         }
894 
895         assert!(pool.capacity() > first_cap);
896     }
897 
898     #[test]
reuse_subbuffers()899     fn reuse_subbuffers() {
900         let (device, _) = gfx_dev_and_queue!();
901 
902         let pool = CpuBufferPool::upload(device);
903         assert_eq!(pool.capacity(), 0);
904 
905         let mut capacity = None;
906         for _ in 0..64 {
907             pool.next(12).unwrap();
908 
909             let new_cap = pool.capacity();
910             assert!(new_cap >= 1);
911             match capacity {
912                 None => capacity = Some(new_cap),
913                 Some(c) => assert_eq!(c, new_cap),
914             }
915         }
916     }
917 
918     #[test]
chunk_loopback()919     fn chunk_loopback() {
920         let (device, _) = gfx_dev_and_queue!();
921 
922         let pool = CpuBufferPool::<u8>::upload(device);
923         pool.reserve(5).unwrap();
924 
925         let a = pool.chunk(vec![0, 0]).unwrap();
926         let b = pool.chunk(vec![0, 0]).unwrap();
927         assert_eq!(b.index, 2);
928         drop(a);
929 
930         let c = pool.chunk(vec![0, 0]).unwrap();
931         assert_eq!(c.index, 0);
932 
933         assert_eq!(pool.capacity(), 5);
934     }
935 
936     #[test]
chunk_0_elems_doesnt_pollute()937     fn chunk_0_elems_doesnt_pollute() {
938         let (device, _) = gfx_dev_and_queue!();
939 
940         let pool = CpuBufferPool::<u8>::upload(device);
941 
942         let _ = pool.chunk(vec![]).unwrap();
943         let _ = pool.chunk(vec![0, 0]).unwrap();
944     }
945 }
946