1 // Copyright (c) 2017 The vulkano developers 2 // Licensed under the Apache License, Version 2.0 3 // <LICENSE-APACHE or 4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT 5 // license <LICENSE-MIT or https://opensource.org/licenses/MIT>, 6 // at your option. All files in the project carrying such 7 // notice may not be copied, modified, or distributed except 8 // according to those terms. 9 10 use crate::buffer::sys::BufferCreationError; 11 use crate::buffer::sys::UnsafeBuffer; 12 use crate::buffer::traits::BufferAccess; 13 use crate::buffer::traits::BufferInner; 14 use crate::buffer::traits::TypedBufferAccess; 15 use crate::buffer::BufferUsage; 16 use crate::device::Device; 17 use crate::device::DeviceOwned; 18 use crate::device::Queue; 19 use crate::memory::pool::AllocFromRequirementsFilter; 20 use crate::memory::pool::AllocLayout; 21 use crate::memory::pool::MappingRequirement; 22 use crate::memory::pool::MemoryPool; 23 use crate::memory::pool::MemoryPoolAlloc; 24 use crate::memory::pool::PotentialDedicatedAllocation; 25 use crate::memory::pool::StdMemoryPool; 26 use crate::memory::DedicatedAlloc; 27 use crate::memory::DeviceMemoryAllocError; 28 use crate::sync::AccessError; 29 use crate::sync::Sharing; 30 use crate::DeviceSize; 31 use crate::OomError; 32 use std::cmp; 33 use std::hash::Hash; 34 use std::hash::Hasher; 35 use std::iter; 36 use std::marker::PhantomData; 37 use std::mem; 38 use std::ptr; 39 use std::sync::atomic::AtomicU64; 40 use std::sync::atomic::Ordering; 41 use std::sync::Arc; 42 use std::sync::Mutex; 43 use std::sync::MutexGuard; 44 45 // TODO: Add `CpuBufferPoolSubbuffer::read` to read the content of a subbuffer. 46 // But that's hard to do because we must prevent `increase_gpu_lock` from working while a 47 // a buffer is locked. 48 49 /// Ring buffer from which "sub-buffers" can be individually allocated. 50 /// 51 /// This buffer is especially suitable when you want to upload or download some data regularly 52 /// (for example, at each frame for a video game). 53 /// 54 /// # Usage 55 /// 56 /// A `CpuBufferPool` is similar to a ring buffer. You start by creating an empty pool, then you 57 /// grab elements from the pool and use them, and if the pool is full it will automatically grow 58 /// in size. 59 /// 60 /// Contrary to a `Vec`, elements automatically free themselves when they are dropped (ie. usually 61 /// when you call `cleanup_finished()` on a future, or when you drop that future). 62 /// 63 /// # Arc-like 64 /// 65 /// The `CpuBufferPool` struct internally contains an `Arc`. You can clone the `CpuBufferPool` for 66 /// a cheap cost, and all the clones will share the same underlying buffer. 67 /// 68 /// # Example 69 /// 70 /// ``` 71 /// use vulkano::buffer::CpuBufferPool; 72 /// use vulkano::command_buffer::AutoCommandBufferBuilder; 73 /// use vulkano::command_buffer::CommandBufferUsage; 74 /// use vulkano::command_buffer::PrimaryCommandBuffer; 75 /// use vulkano::sync::GpuFuture; 76 /// # let device: std::sync::Arc<vulkano::device::Device> = return; 77 /// # let queue: std::sync::Arc<vulkano::device::Queue> = return; 78 /// 79 /// // Create the ring buffer. 80 /// let buffer = CpuBufferPool::upload(device.clone()); 81 /// 82 /// for n in 0 .. 25u32 { 83 /// // Each loop grabs a new entry from that ring buffer and stores ` data` in it. 84 /// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0]; 85 /// let sub_buffer = buffer.next(data).unwrap(); 86 /// 87 /// // You can then use `sub_buffer` as if it was an entirely separate buffer. 88 /// AutoCommandBufferBuilder::primary(device.clone(), queue.family(), CommandBufferUsage::OneTimeSubmit) 89 /// .unwrap() 90 /// // For the sake of the example we just call `update_buffer` on the buffer, even though 91 /// // it is pointless to do that. 92 /// .update_buffer(sub_buffer.clone(), &[0.2, 0.3, 0.4, 0.5]) 93 /// .unwrap() 94 /// .build().unwrap() 95 /// .execute(queue.clone()) 96 /// .unwrap() 97 /// .then_signal_fence_and_flush() 98 /// .unwrap(); 99 /// } 100 /// ``` 101 /// 102 pub struct CpuBufferPool<T, A = Arc<StdMemoryPool>> 103 where 104 A: MemoryPool, 105 { 106 // The device of the pool. 107 device: Arc<Device>, 108 109 // The memory pool to use for allocations. 110 pool: A, 111 112 // Current buffer from which elements are grabbed. 113 current_buffer: Mutex<Option<Arc<ActualBuffer<A>>>>, 114 115 // Buffer usage. 116 usage: BufferUsage, 117 118 // Necessary to make it compile. 119 marker: PhantomData<Box<T>>, 120 } 121 122 // One buffer of the pool. 123 struct ActualBuffer<A> 124 where 125 A: MemoryPool, 126 { 127 // Inner content. 128 inner: UnsafeBuffer, 129 130 // The memory held by the buffer. 131 memory: PotentialDedicatedAllocation<A::Alloc>, 132 133 // List of the chunks that are reserved. 134 chunks_in_use: Mutex<Vec<ActualBufferChunk>>, 135 136 // The index of the chunk that should be available next for the ring buffer. 137 next_index: AtomicU64, 138 139 // Number of elements in the buffer. 140 capacity: DeviceSize, 141 } 142 143 // Access pattern of one subbuffer. 144 #[derive(Debug)] 145 struct ActualBufferChunk { 146 // First element number within the actual buffer. 147 index: DeviceSize, 148 149 // Number of occupied elements within the actual buffer. 150 len: DeviceSize, 151 152 // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer. 153 num_cpu_accesses: usize, 154 155 // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer and that have been 156 // GPU-locked. 157 num_gpu_accesses: usize, 158 } 159 160 /// A subbuffer allocated from a `CpuBufferPool`. 161 /// 162 /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. 163 pub struct CpuBufferPoolChunk<T, A> 164 where 165 A: MemoryPool, 166 { 167 buffer: Arc<ActualBuffer<A>>, 168 169 // Index of the subbuffer within `buffer`. In number of elements. 170 index: DeviceSize, 171 172 // Number of bytes to add to `index * mem::size_of::<T>()` to obtain the start of the data in 173 // the buffer. Necessary for alignment purposes. 174 align_offset: DeviceSize, 175 176 // Size of the subbuffer in number of elements, as requested by the user. 177 // If this is 0, then no entry was added to `chunks_in_use`. 178 requested_len: DeviceSize, 179 180 // Necessary to make it compile. 181 marker: PhantomData<Box<T>>, 182 } 183 184 /// A subbuffer allocated from a `CpuBufferPool`. 185 /// 186 /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. 187 pub struct CpuBufferPoolSubbuffer<T, A> 188 where 189 A: MemoryPool, 190 { 191 // This struct is just a wrapper around `CpuBufferPoolChunk`. 192 chunk: CpuBufferPoolChunk<T, A>, 193 } 194 195 impl<T> CpuBufferPool<T> { 196 /// Builds a `CpuBufferPool`. 197 #[inline] new(device: Arc<Device>, usage: BufferUsage) -> CpuBufferPool<T>198 pub fn new(device: Arc<Device>, usage: BufferUsage) -> CpuBufferPool<T> { 199 let pool = Device::standard_pool(&device); 200 201 CpuBufferPool { 202 device: device, 203 pool: pool, 204 current_buffer: Mutex::new(None), 205 usage: usage.clone(), 206 marker: PhantomData, 207 } 208 } 209 210 /// Builds a `CpuBufferPool` meant for simple uploads. 211 /// 212 /// Shortcut for a pool that can only be used as transfer source and with exclusive queue 213 /// family accesses. 214 #[inline] upload(device: Arc<Device>) -> CpuBufferPool<T>215 pub fn upload(device: Arc<Device>) -> CpuBufferPool<T> { 216 CpuBufferPool::new(device, BufferUsage::transfer_source()) 217 } 218 219 /// Builds a `CpuBufferPool` meant for simple downloads. 220 /// 221 /// Shortcut for a pool that can only be used as transfer destination and with exclusive queue 222 /// family accesses. 223 #[inline] download(device: Arc<Device>) -> CpuBufferPool<T>224 pub fn download(device: Arc<Device>) -> CpuBufferPool<T> { 225 CpuBufferPool::new(device, BufferUsage::transfer_destination()) 226 } 227 228 /// Builds a `CpuBufferPool` meant for usage as a uniform buffer. 229 /// 230 /// Shortcut for a pool that can only be used as uniform buffer and with exclusive queue 231 /// family accesses. 232 #[inline] uniform_buffer(device: Arc<Device>) -> CpuBufferPool<T>233 pub fn uniform_buffer(device: Arc<Device>) -> CpuBufferPool<T> { 234 CpuBufferPool::new(device, BufferUsage::uniform_buffer()) 235 } 236 237 /// Builds a `CpuBufferPool` meant for usage as a vertex buffer. 238 /// 239 /// Shortcut for a pool that can only be used as vertex buffer and with exclusive queue 240 /// family accesses. 241 #[inline] vertex_buffer(device: Arc<Device>) -> CpuBufferPool<T>242 pub fn vertex_buffer(device: Arc<Device>) -> CpuBufferPool<T> { 243 CpuBufferPool::new(device, BufferUsage::vertex_buffer()) 244 } 245 246 /// Builds a `CpuBufferPool` meant for usage as a indirect buffer. 247 /// 248 /// Shortcut for a pool that can only be used as indirect buffer and with exclusive queue 249 /// family accesses. 250 #[inline] indirect_buffer(device: Arc<Device>) -> CpuBufferPool<T>251 pub fn indirect_buffer(device: Arc<Device>) -> CpuBufferPool<T> { 252 CpuBufferPool::new(device, BufferUsage::indirect_buffer()) 253 } 254 } 255 256 impl<T, A> CpuBufferPool<T, A> 257 where 258 A: MemoryPool, 259 { 260 /// Returns the current capacity of the pool, in number of elements. capacity(&self) -> DeviceSize261 pub fn capacity(&self) -> DeviceSize { 262 match *self.current_buffer.lock().unwrap() { 263 None => 0, 264 Some(ref buf) => buf.capacity, 265 } 266 } 267 268 /// Makes sure that the capacity is at least `capacity`. Allocates memory if it is not the 269 /// case. 270 /// 271 /// Since this can involve a memory allocation, an `OomError` can happen. reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryAllocError>272 pub fn reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryAllocError> { 273 let mut cur_buf = self.current_buffer.lock().unwrap(); 274 275 // Check current capacity. 276 match *cur_buf { 277 Some(ref buf) if buf.capacity >= capacity => { 278 return Ok(()); 279 } 280 _ => (), 281 }; 282 283 self.reset_buf(&mut cur_buf, capacity) 284 } 285 286 /// Grants access to a new subbuffer and puts `data` in it. 287 /// 288 /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will 289 /// automatically be allocated. 290 /// 291 /// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not 292 /// > large enough, a new chunk of memory is automatically allocated. 293 #[inline] next(&self, data: T) -> Result<CpuBufferPoolSubbuffer<T, A>, DeviceMemoryAllocError>294 pub fn next(&self, data: T) -> Result<CpuBufferPoolSubbuffer<T, A>, DeviceMemoryAllocError> { 295 Ok(CpuBufferPoolSubbuffer { 296 chunk: self.chunk(iter::once(data))?, 297 }) 298 } 299 300 /// Grants access to a new subbuffer and puts `data` in it. 301 /// 302 /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will 303 /// automatically be allocated. 304 /// 305 /// > **Note**: You can think of it like a `Vec`. If you insert elements and the `Vec` is not 306 /// > large enough, a new chunk of memory is automatically allocated. 307 /// 308 /// # Panic 309 /// 310 /// Panics if the length of the iterator didn't match the actual number of element. 311 /// chunk<I>(&self, data: I) -> Result<CpuBufferPoolChunk<T, A>, DeviceMemoryAllocError> where I: IntoIterator<Item = T>, I::IntoIter: ExactSizeIterator,312 pub fn chunk<I>(&self, data: I) -> Result<CpuBufferPoolChunk<T, A>, DeviceMemoryAllocError> 313 where 314 I: IntoIterator<Item = T>, 315 I::IntoIter: ExactSizeIterator, 316 { 317 let data = data.into_iter(); 318 319 let mut mutex = self.current_buffer.lock().unwrap(); 320 321 let data = match self.try_next_impl(&mut mutex, data) { 322 Ok(n) => return Ok(n), 323 Err(d) => d, 324 }; 325 326 let next_capacity = match *mutex { 327 Some(ref b) if (data.len() as DeviceSize) < b.capacity => 2 * b.capacity, 328 _ => 2 * data.len() as DeviceSize, 329 }; 330 331 self.reset_buf(&mut mutex, next_capacity)?; 332 333 match self.try_next_impl(&mut mutex, data) { 334 Ok(n) => Ok(n), 335 Err(_) => unreachable!(), 336 } 337 } 338 339 /// Grants access to a new subbuffer and puts `data` in it. 340 /// 341 /// Returns `None` if no subbuffer is available. 342 /// 343 /// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use 344 /// `try_next` the first time you use it. 345 #[inline] try_next(&self, data: T) -> Option<CpuBufferPoolSubbuffer<T, A>>346 pub fn try_next(&self, data: T) -> Option<CpuBufferPoolSubbuffer<T, A>> { 347 let mut mutex = self.current_buffer.lock().unwrap(); 348 self.try_next_impl(&mut mutex, iter::once(data)) 349 .map(|c| CpuBufferPoolSubbuffer { chunk: c }) 350 .ok() 351 } 352 353 // Creates a new buffer and sets it as current. The capacity is in number of elements. 354 // 355 // `cur_buf_mutex` must be an active lock of `self.current_buffer`. reset_buf( &self, cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, capacity: DeviceSize, ) -> Result<(), DeviceMemoryAllocError>356 fn reset_buf( 357 &self, 358 cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, 359 capacity: DeviceSize, 360 ) -> Result<(), DeviceMemoryAllocError> { 361 unsafe { 362 let (buffer, mem_reqs) = { 363 let size_bytes = match (mem::size_of::<T>() as DeviceSize).checked_mul(capacity) { 364 Some(s) => s, 365 None => { 366 return Err(DeviceMemoryAllocError::OomError( 367 OomError::OutOfDeviceMemory, 368 )) 369 } 370 }; 371 372 match UnsafeBuffer::new( 373 self.device.clone(), 374 size_bytes as DeviceSize, 375 self.usage, 376 Sharing::Exclusive::<iter::Empty<_>>, 377 None, 378 ) { 379 Ok(b) => b, 380 Err(BufferCreationError::AllocError(err)) => return Err(err), 381 Err(_) => unreachable!(), // We don't use sparse binding, therefore the other 382 // errors can't happen 383 } 384 }; 385 386 let mem = MemoryPool::alloc_from_requirements( 387 &self.pool, 388 &mem_reqs, 389 AllocLayout::Linear, 390 MappingRequirement::Map, 391 DedicatedAlloc::Buffer(&buffer), 392 |_| AllocFromRequirementsFilter::Allowed, 393 )?; 394 debug_assert!((mem.offset() % mem_reqs.alignment) == 0); 395 debug_assert!(mem.mapped_memory().is_some()); 396 buffer.bind_memory(mem.memory(), mem.offset())?; 397 398 **cur_buf_mutex = Some(Arc::new(ActualBuffer { 399 inner: buffer, 400 memory: mem, 401 chunks_in_use: Mutex::new(vec![]), 402 next_index: AtomicU64::new(0), 403 capacity: capacity, 404 })); 405 406 Ok(()) 407 } 408 } 409 410 // Tries to lock a subbuffer from the current buffer. 411 // 412 // `cur_buf_mutex` must be an active lock of `self.current_buffer`. 413 // 414 // Returns `data` wrapped inside an `Err` if there is no slot available in the current buffer. 415 // 416 // # Panic 417 // 418 // Panics if the length of the iterator didn't match the actual number of element. 419 // try_next_impl<I>( &self, cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, mut data: I, ) -> Result<CpuBufferPoolChunk<T, A>, I> where I: ExactSizeIterator<Item = T>,420 fn try_next_impl<I>( 421 &self, 422 cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, 423 mut data: I, 424 ) -> Result<CpuBufferPoolChunk<T, A>, I> 425 where 426 I: ExactSizeIterator<Item = T>, 427 { 428 // Grab the current buffer. Return `Err` if the pool wasn't "initialized" yet. 429 let current_buffer = match cur_buf_mutex.clone() { 430 Some(b) => b, 431 None => return Err(data), 432 }; 433 434 let mut chunks_in_use = current_buffer.chunks_in_use.lock().unwrap(); 435 debug_assert!(!chunks_in_use.iter().any(|c| c.len == 0)); 436 437 // Number of elements requested by the user. 438 let requested_len = data.len() as DeviceSize; 439 440 // We special case when 0 elements are requested. Polluting the list of allocated chunks 441 // with chunks of length 0 means that we will have troubles deallocating. 442 if requested_len == 0 { 443 assert!( 444 data.next().is_none(), 445 "Expected iterator passed to CpuBufferPool::chunk to be empty" 446 ); 447 return Ok(CpuBufferPoolChunk { 448 // TODO: remove .clone() once non-lexical borrows land 449 buffer: current_buffer.clone(), 450 index: 0, 451 align_offset: 0, 452 requested_len: 0, 453 marker: PhantomData, 454 }); 455 } 456 457 // Find a suitable offset and len, or returns if none available. 458 let (index, occupied_len, align_offset) = { 459 let (tentative_index, tentative_len, tentative_align_offset) = { 460 // Since the only place that touches `next_index` is this code, and since we 461 // own a mutex lock to the buffer, it means that `next_index` can't be accessed 462 // concurrently. 463 // TODO: ^ eventually should be put inside the mutex 464 let idx = current_buffer.next_index.load(Ordering::SeqCst); 465 466 // Find the required alignment in bytes. 467 let align_bytes = cmp::max( 468 if self.usage.uniform_buffer { 469 self.device() 470 .physical_device() 471 .properties() 472 .min_uniform_buffer_offset_alignment 473 } else { 474 1 475 }, 476 if self.usage.storage_buffer { 477 self.device() 478 .physical_device() 479 .properties() 480 .min_storage_buffer_offset_alignment 481 } else { 482 1 483 }, 484 ); 485 486 let tentative_align_offset = (align_bytes 487 - ((idx * mem::size_of::<T>() as DeviceSize) % align_bytes)) 488 % align_bytes; 489 let additional_len = if tentative_align_offset == 0 { 490 0 491 } else { 492 1 + (tentative_align_offset - 1) / mem::size_of::<T>() as DeviceSize 493 }; 494 495 (idx, requested_len + additional_len, tentative_align_offset) 496 }; 497 498 // Find out whether any chunk in use overlaps this range. 499 if tentative_index + tentative_len <= current_buffer.capacity 500 && !chunks_in_use.iter().any(|c| { 501 (c.index >= tentative_index && c.index < tentative_index + tentative_len) 502 || (c.index <= tentative_index && c.index + c.len > tentative_index) 503 }) 504 { 505 (tentative_index, tentative_len, tentative_align_offset) 506 } else { 507 // Impossible to allocate at `tentative_index`. Let's try 0 instead. 508 if requested_len <= current_buffer.capacity 509 && !chunks_in_use.iter().any(|c| c.index < requested_len) 510 { 511 (0, requested_len, 0) 512 } else { 513 // Buffer is full. Return. 514 return Err(data); 515 } 516 } 517 }; 518 519 // Write `data` in the memory. 520 unsafe { 521 let mem_off = current_buffer.memory.offset(); 522 let range_start = index * mem::size_of::<T>() as DeviceSize + align_offset + mem_off; 523 let range_end = (index + requested_len) * mem::size_of::<T>() as DeviceSize 524 + align_offset 525 + mem_off; 526 let mut mapping = current_buffer 527 .memory 528 .mapped_memory() 529 .unwrap() 530 .read_write::<[T]>(range_start..range_end); 531 532 let mut written = 0; 533 for (o, i) in mapping.iter_mut().zip(data) { 534 ptr::write(o, i); 535 written += 1; 536 } 537 assert_eq!( 538 written, requested_len, 539 "Iterator passed to CpuBufferPool::chunk has a mismatch between reported \ 540 length and actual number of elements" 541 ); 542 } 543 544 // Mark the chunk as in use. 545 current_buffer 546 .next_index 547 .store(index + occupied_len, Ordering::SeqCst); 548 chunks_in_use.push(ActualBufferChunk { 549 index, 550 len: occupied_len, 551 num_cpu_accesses: 1, 552 num_gpu_accesses: 0, 553 }); 554 555 Ok(CpuBufferPoolChunk { 556 // TODO: remove .clone() once non-lexical borrows land 557 buffer: current_buffer.clone(), 558 index: index, 559 align_offset, 560 requested_len, 561 marker: PhantomData, 562 }) 563 } 564 } 565 566 // Can't automatically derive `Clone`, otherwise the compiler adds a `T: Clone` requirement. 567 impl<T, A> Clone for CpuBufferPool<T, A> 568 where 569 A: MemoryPool + Clone, 570 { clone(&self) -> Self571 fn clone(&self) -> Self { 572 let buf = self.current_buffer.lock().unwrap(); 573 574 CpuBufferPool { 575 device: self.device.clone(), 576 pool: self.pool.clone(), 577 current_buffer: Mutex::new(buf.clone()), 578 usage: self.usage.clone(), 579 marker: PhantomData, 580 } 581 } 582 } 583 584 unsafe impl<T, A> DeviceOwned for CpuBufferPool<T, A> 585 where 586 A: MemoryPool, 587 { 588 #[inline] device(&self) -> &Arc<Device>589 fn device(&self) -> &Arc<Device> { 590 &self.device 591 } 592 } 593 594 impl<T, A> Clone for CpuBufferPoolChunk<T, A> 595 where 596 A: MemoryPool, 597 { clone(&self) -> CpuBufferPoolChunk<T, A>598 fn clone(&self) -> CpuBufferPoolChunk<T, A> { 599 let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); 600 let chunk = chunks_in_use_lock 601 .iter_mut() 602 .find(|c| c.index == self.index) 603 .unwrap(); 604 605 debug_assert!(chunk.num_cpu_accesses >= 1); 606 chunk.num_cpu_accesses = chunk 607 .num_cpu_accesses 608 .checked_add(1) 609 .expect("Overflow in CPU accesses"); 610 611 CpuBufferPoolChunk { 612 buffer: self.buffer.clone(), 613 index: self.index, 614 align_offset: self.align_offset, 615 requested_len: self.requested_len, 616 marker: PhantomData, 617 } 618 } 619 } 620 621 unsafe impl<T, A> BufferAccess for CpuBufferPoolChunk<T, A> 622 where 623 A: MemoryPool, 624 { 625 #[inline] inner(&self) -> BufferInner626 fn inner(&self) -> BufferInner { 627 BufferInner { 628 buffer: &self.buffer.inner, 629 offset: self.index * mem::size_of::<T>() as DeviceSize + self.align_offset, 630 } 631 } 632 633 #[inline] size(&self) -> DeviceSize634 fn size(&self) -> DeviceSize { 635 self.requested_len * mem::size_of::<T>() as DeviceSize 636 } 637 638 #[inline] conflict_key(&self) -> (u64, u64)639 fn conflict_key(&self) -> (u64, u64) { 640 ( 641 self.buffer.inner.key(), 642 // ensure the special cased empty buffers don't collide with a regular buffer starting at 0 643 if self.requested_len == 0 { 644 u64::MAX 645 } else { 646 self.index 647 }, 648 ) 649 } 650 651 #[inline] try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError>652 fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> { 653 if self.requested_len == 0 { 654 return Ok(()); 655 } 656 657 let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); 658 let chunk = chunks_in_use_lock 659 .iter_mut() 660 .find(|c| c.index == self.index) 661 .unwrap(); 662 663 if chunk.num_gpu_accesses != 0 { 664 return Err(AccessError::AlreadyInUse); 665 } 666 667 chunk.num_gpu_accesses = 1; 668 Ok(()) 669 } 670 671 #[inline] increase_gpu_lock(&self)672 unsafe fn increase_gpu_lock(&self) { 673 if self.requested_len == 0 { 674 return; 675 } 676 677 let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); 678 let chunk = chunks_in_use_lock 679 .iter_mut() 680 .find(|c| c.index == self.index) 681 .unwrap(); 682 683 debug_assert!(chunk.num_gpu_accesses >= 1); 684 chunk.num_gpu_accesses = chunk 685 .num_gpu_accesses 686 .checked_add(1) 687 .expect("Overflow in GPU usages"); 688 } 689 690 #[inline] unlock(&self)691 unsafe fn unlock(&self) { 692 if self.requested_len == 0 { 693 return; 694 } 695 696 let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); 697 let chunk = chunks_in_use_lock 698 .iter_mut() 699 .find(|c| c.index == self.index) 700 .unwrap(); 701 702 debug_assert!(chunk.num_gpu_accesses >= 1); 703 chunk.num_gpu_accesses -= 1; 704 } 705 } 706 707 impl<T, A> Drop for CpuBufferPoolChunk<T, A> 708 where 709 A: MemoryPool, 710 { drop(&mut self)711 fn drop(&mut self) { 712 // If `requested_len` is 0, then no entry was added in the chunks. 713 if self.requested_len == 0 { 714 return; 715 } 716 717 let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); 718 let chunk_num = chunks_in_use_lock 719 .iter_mut() 720 .position(|c| c.index == self.index) 721 .unwrap(); 722 723 if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 { 724 chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1; 725 } else { 726 debug_assert_eq!(chunks_in_use_lock[chunk_num].num_gpu_accesses, 0); 727 chunks_in_use_lock.remove(chunk_num); 728 } 729 } 730 } 731 732 unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolChunk<T, A> 733 where 734 A: MemoryPool, 735 { 736 type Content = [T]; 737 } 738 739 unsafe impl<T, A> DeviceOwned for CpuBufferPoolChunk<T, A> 740 where 741 A: MemoryPool, 742 { 743 #[inline] device(&self) -> &Arc<Device>744 fn device(&self) -> &Arc<Device> { 745 self.buffer.inner.device() 746 } 747 } 748 749 impl<T, A> PartialEq for CpuBufferPoolChunk<T, A> 750 where 751 A: MemoryPool, 752 { 753 #[inline] eq(&self, other: &Self) -> bool754 fn eq(&self, other: &Self) -> bool { 755 self.inner() == other.inner() && self.size() == other.size() 756 } 757 } 758 759 impl<T, A> Eq for CpuBufferPoolChunk<T, A> where A: MemoryPool {} 760 761 impl<T, A> Hash for CpuBufferPoolChunk<T, A> 762 where 763 A: MemoryPool, 764 { 765 #[inline] hash<H: Hasher>(&self, state: &mut H)766 fn hash<H: Hasher>(&self, state: &mut H) { 767 self.inner().hash(state); 768 self.size().hash(state); 769 } 770 } 771 772 impl<T, A> Clone for CpuBufferPoolSubbuffer<T, A> 773 where 774 A: MemoryPool, 775 { clone(&self) -> CpuBufferPoolSubbuffer<T, A>776 fn clone(&self) -> CpuBufferPoolSubbuffer<T, A> { 777 CpuBufferPoolSubbuffer { 778 chunk: self.chunk.clone(), 779 } 780 } 781 } 782 783 unsafe impl<T, A> BufferAccess for CpuBufferPoolSubbuffer<T, A> 784 where 785 A: MemoryPool, 786 { 787 #[inline] inner(&self) -> BufferInner788 fn inner(&self) -> BufferInner { 789 self.chunk.inner() 790 } 791 792 #[inline] size(&self) -> DeviceSize793 fn size(&self) -> DeviceSize { 794 self.chunk.size() 795 } 796 797 #[inline] conflict_key(&self) -> (u64, u64)798 fn conflict_key(&self) -> (u64, u64) { 799 self.chunk.conflict_key() 800 } 801 802 #[inline] try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError>803 fn try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError> { 804 self.chunk.try_gpu_lock(e, q) 805 } 806 807 #[inline] increase_gpu_lock(&self)808 unsafe fn increase_gpu_lock(&self) { 809 self.chunk.increase_gpu_lock() 810 } 811 812 #[inline] unlock(&self)813 unsafe fn unlock(&self) { 814 self.chunk.unlock() 815 } 816 } 817 818 unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolSubbuffer<T, A> 819 where 820 A: MemoryPool, 821 { 822 type Content = T; 823 } 824 825 unsafe impl<T, A> DeviceOwned for CpuBufferPoolSubbuffer<T, A> 826 where 827 A: MemoryPool, 828 { 829 #[inline] device(&self) -> &Arc<Device>830 fn device(&self) -> &Arc<Device> { 831 self.chunk.buffer.inner.device() 832 } 833 } 834 835 impl<T, A> PartialEq for CpuBufferPoolSubbuffer<T, A> 836 where 837 A: MemoryPool, 838 { 839 #[inline] eq(&self, other: &Self) -> bool840 fn eq(&self, other: &Self) -> bool { 841 self.inner() == other.inner() && self.size() == other.size() 842 } 843 } 844 845 impl<T, A> Eq for CpuBufferPoolSubbuffer<T, A> where A: MemoryPool {} 846 847 impl<T, A> Hash for CpuBufferPoolSubbuffer<T, A> 848 where 849 A: MemoryPool, 850 { 851 #[inline] hash<H: Hasher>(&self, state: &mut H)852 fn hash<H: Hasher>(&self, state: &mut H) { 853 self.inner().hash(state); 854 self.size().hash(state); 855 } 856 } 857 858 #[cfg(test)] 859 mod tests { 860 use crate::buffer::CpuBufferPool; 861 use std::mem; 862 863 #[test] basic_create()864 fn basic_create() { 865 let (device, _) = gfx_dev_and_queue!(); 866 let _ = CpuBufferPool::<u8>::upload(device); 867 } 868 869 #[test] reserve()870 fn reserve() { 871 let (device, _) = gfx_dev_and_queue!(); 872 873 let pool = CpuBufferPool::<u8>::upload(device); 874 assert_eq!(pool.capacity(), 0); 875 876 pool.reserve(83).unwrap(); 877 assert_eq!(pool.capacity(), 83); 878 } 879 880 #[test] capacity_increase()881 fn capacity_increase() { 882 let (device, _) = gfx_dev_and_queue!(); 883 884 let pool = CpuBufferPool::upload(device); 885 assert_eq!(pool.capacity(), 0); 886 887 pool.next(12).unwrap(); 888 let first_cap = pool.capacity(); 889 assert!(first_cap >= 1); 890 891 for _ in 0..first_cap + 5 { 892 mem::forget(pool.next(12).unwrap()); 893 } 894 895 assert!(pool.capacity() > first_cap); 896 } 897 898 #[test] reuse_subbuffers()899 fn reuse_subbuffers() { 900 let (device, _) = gfx_dev_and_queue!(); 901 902 let pool = CpuBufferPool::upload(device); 903 assert_eq!(pool.capacity(), 0); 904 905 let mut capacity = None; 906 for _ in 0..64 { 907 pool.next(12).unwrap(); 908 909 let new_cap = pool.capacity(); 910 assert!(new_cap >= 1); 911 match capacity { 912 None => capacity = Some(new_cap), 913 Some(c) => assert_eq!(c, new_cap), 914 } 915 } 916 } 917 918 #[test] chunk_loopback()919 fn chunk_loopback() { 920 let (device, _) = gfx_dev_and_queue!(); 921 922 let pool = CpuBufferPool::<u8>::upload(device); 923 pool.reserve(5).unwrap(); 924 925 let a = pool.chunk(vec![0, 0]).unwrap(); 926 let b = pool.chunk(vec![0, 0]).unwrap(); 927 assert_eq!(b.index, 2); 928 drop(a); 929 930 let c = pool.chunk(vec![0, 0]).unwrap(); 931 assert_eq!(c.index, 0); 932 933 assert_eq!(pool.capacity(), 5); 934 } 935 936 #[test] chunk_0_elems_doesnt_pollute()937 fn chunk_0_elems_doesnt_pollute() { 938 let (device, _) = gfx_dev_and_queue!(); 939 940 let pool = CpuBufferPool::<u8>::upload(device); 941 942 let _ = pool.chunk(vec![]).unwrap(); 943 let _ = pool.chunk(vec![0, 0]).unwrap(); 944 } 945 } 946