// Copyright (c) 2016 The vulkano developers // Licensed under the Apache License, Version 2.0 // or the MIT // license , // at your option. All files in the project carrying such // notice may not be copied, modified, or distributed except // according to those terms. //! Buffer whose content is accessible to the CPU. //! //! The `CpuAccessibleBuffer` is a basic general-purpose buffer. It can be used in any situation //! but may not perform as well as other buffer types. //! //! Each access from the CPU or from the GPU locks the whole buffer for either reading or writing. //! You can read the buffer multiple times simultaneously. Trying to read and write simultaneously, //! or write and write simultaneously will block. use crate::buffer::sys::BufferCreationError; use crate::buffer::sys::UnsafeBuffer; use crate::buffer::traits::BufferAccess; use crate::buffer::traits::BufferInner; use crate::buffer::traits::TypedBufferAccess; use crate::buffer::BufferUsage; use crate::device::physical::QueueFamily; use crate::device::Device; use crate::device::DeviceOwned; use crate::device::Queue; use crate::memory::pool::AllocFromRequirementsFilter; use crate::memory::pool::AllocLayout; use crate::memory::pool::MappingRequirement; use crate::memory::pool::MemoryPool; use crate::memory::pool::MemoryPoolAlloc; use crate::memory::pool::PotentialDedicatedAllocation; use crate::memory::pool::StdMemoryPoolAlloc; use crate::memory::Content; use crate::memory::CpuAccess as MemCpuAccess; use crate::memory::DedicatedAlloc; use crate::memory::DeviceMemoryAllocError; use crate::sync::AccessError; use crate::sync::Sharing; use crate::DeviceSize; use parking_lot::RwLock; use parking_lot::RwLockReadGuard; use parking_lot::RwLockWriteGuard; use smallvec::SmallVec; use std::error; use std::fmt; use std::hash::Hash; use std::hash::Hasher; use std::iter; use std::marker::PhantomData; use std::mem; use std::ops::Deref; use std::ops::DerefMut; use std::ptr; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::sync::Arc; /// Buffer whose content is accessible by the CPU. /// /// Setting the `host_cached` field on the various initializers to `true` will make it so /// the `CpuAccessibleBuffer` prefers to allocate from host_cached memory. Host cached /// memory caches GPU data on the CPU side. This can be more performant in cases where /// the cpu needs to read data coming off the GPU. #[derive(Debug)] pub struct CpuAccessibleBuffer> { // Inner content. inner: UnsafeBuffer, // The memory held by the buffer. memory: A, // Access pattern of the buffer. // Every time the user tries to read or write the buffer from the CPU, this `RwLock` is kept // locked and its content is checked to verify that we are allowed access. Every time the user // tries to submit this buffer for the GPU, this `RwLock` is briefly locked and modified. access: RwLock, // Queue families allowed to access this buffer. queue_families: SmallVec<[u32; 4]>, // Necessary to make it compile. marker: PhantomData>, } #[derive(Debug)] enum CurrentGpuAccess { NonExclusive { // Number of non-exclusive GPU accesses. Can be 0. num: AtomicUsize, }, Exclusive { // Number of exclusive locks. Cannot be 0. If 0 is reached, we must jump to `NonExclusive`. num: usize, }, } impl CpuAccessibleBuffer { /// Builds a new buffer with some data in it. Only allowed for sized data. pub fn from_data( device: Arc, usage: BufferUsage, host_cached: bool, data: T, ) -> Result>, DeviceMemoryAllocError> where T: Content + Copy + 'static, { unsafe { let uninitialized = CpuAccessibleBuffer::raw( device, mem::size_of::() as DeviceSize, usage, host_cached, iter::empty(), )?; // Note that we are in panic-unsafety land here. However a panic should never ever // happen here, so in theory we are safe. // TODO: check whether that's true ^ { let mut mapping = uninitialized.write().unwrap(); ptr::write::(&mut *mapping, data) } Ok(uninitialized) } } /// Builds a new uninitialized buffer. Only allowed for sized data. #[inline] pub unsafe fn uninitialized( device: Arc, usage: BufferUsage, host_cached: bool, ) -> Result>, DeviceMemoryAllocError> { CpuAccessibleBuffer::raw( device, mem::size_of::() as DeviceSize, usage, host_cached, iter::empty(), ) } } impl CpuAccessibleBuffer<[T]> { /// Builds a new buffer that contains an array `T`. The initial data comes from an iterator /// that produces that list of Ts. pub fn from_iter( device: Arc, usage: BufferUsage, host_cached: bool, data: I, ) -> Result>, DeviceMemoryAllocError> where I: ExactSizeIterator, T: Content + 'static, { unsafe { let uninitialized = CpuAccessibleBuffer::uninitialized_array( device, data.len() as DeviceSize, usage, host_cached, )?; // Note that we are in panic-unsafety land here. However a panic should never ever // happen here, so in theory we are safe. // TODO: check whether that's true ^ { let mut mapping = uninitialized.write().unwrap(); for (i, o) in data.zip(mapping.iter_mut()) { ptr::write(o, i); } } Ok(uninitialized) } } /// Builds a new buffer. Can be used for arrays. #[inline] pub unsafe fn uninitialized_array( device: Arc, len: DeviceSize, usage: BufferUsage, host_cached: bool, ) -> Result>, DeviceMemoryAllocError> { CpuAccessibleBuffer::raw( device, len * mem::size_of::() as DeviceSize, usage, host_cached, iter::empty(), ) } } impl CpuAccessibleBuffer { /// Builds a new buffer without checking the size. /// /// # Safety /// /// You must ensure that the size that you pass is correct for `T`. /// pub unsafe fn raw<'a, I>( device: Arc, size: DeviceSize, usage: BufferUsage, host_cached: bool, queue_families: I, ) -> Result>, DeviceMemoryAllocError> where I: IntoIterator>, { let queue_families = queue_families .into_iter() .map(|f| f.id()) .collect::>(); let (buffer, mem_reqs) = { let sharing = if queue_families.len() >= 2 { Sharing::Concurrent(queue_families.iter().cloned()) } else { Sharing::Exclusive }; match UnsafeBuffer::new(device.clone(), size, usage, sharing, None) { Ok(b) => b, Err(BufferCreationError::AllocError(err)) => return Err(err), Err(_) => unreachable!(), // We don't use sparse binding, therefore the other // errors can't happen } }; let mem = MemoryPool::alloc_from_requirements( &Device::standard_pool(&device), &mem_reqs, AllocLayout::Linear, MappingRequirement::Map, DedicatedAlloc::Buffer(&buffer), |m| { if m.is_host_cached() { if host_cached { AllocFromRequirementsFilter::Preferred } else { AllocFromRequirementsFilter::Allowed } } else { if host_cached { AllocFromRequirementsFilter::Allowed } else { AllocFromRequirementsFilter::Preferred } } }, )?; debug_assert!((mem.offset() % mem_reqs.alignment) == 0); debug_assert!(mem.mapped_memory().is_some()); buffer.bind_memory(mem.memory(), mem.offset())?; Ok(Arc::new(CpuAccessibleBuffer { inner: buffer, memory: mem, access: RwLock::new(CurrentGpuAccess::NonExclusive { num: AtomicUsize::new(0), }), queue_families: queue_families, marker: PhantomData, })) } } impl CpuAccessibleBuffer { /// Returns the queue families this buffer can be used on. // TODO: use a custom iterator #[inline] pub fn queue_families(&self) -> Vec { self.queue_families .iter() .map(|&num| { self.device() .physical_device() .queue_family_by_id(num) .unwrap() }) .collect() } } impl CpuAccessibleBuffer where T: Content + 'static, A: MemoryPoolAlloc, { /// Locks the buffer in order to read its content from the CPU. /// /// If the buffer is currently used in exclusive mode by the GPU, this function will return /// an error. Similarly if you called `write()` on the buffer and haven't dropped the lock, /// this function will return an error as well. /// /// After this function successfully locks the buffer, any attempt to submit a command buffer /// that uses it in exclusive mode will fail. You can still submit this buffer for non-exclusive /// accesses (ie. reads). #[inline] pub fn read(&self) -> Result, ReadLockError> { let lock = match self.access.try_read() { Some(l) => l, // TODO: if a user simultaneously calls .write(), and write() is currently finding out // that the buffer is in fact GPU locked, then we will return a CpuWriteLocked // error instead of a GpuWriteLocked ; is this a problem? how do we fix this? None => return Err(ReadLockError::CpuWriteLocked), }; if let CurrentGpuAccess::Exclusive { .. } = *lock { return Err(ReadLockError::GpuWriteLocked); } let offset = self.memory.offset(); let range = offset..offset + self.inner.size(); Ok(ReadLock { inner: unsafe { self.memory.mapped_memory().unwrap().read_write(range) }, lock: lock, }) } /// Locks the buffer in order to write its content from the CPU. /// /// If the buffer is currently in use by the GPU, this function will return an error. Similarly /// if you called `read()` on the buffer and haven't dropped the lock, this function will /// return an error as well. /// /// After this function successfully locks the buffer, any attempt to submit a command buffer /// that uses it and any attempt to call `read()` will return an error. #[inline] pub fn write(&self) -> Result, WriteLockError> { let lock = match self.access.try_write() { Some(l) => l, // TODO: if a user simultaneously calls .read() or .write(), and the function is // currently finding out that the buffer is in fact GPU locked, then we will // return a CpuLocked error instead of a GpuLocked ; is this a problem? // how do we fix this? None => return Err(WriteLockError::CpuLocked), }; match *lock { CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (), _ => return Err(WriteLockError::GpuLocked), } let offset = self.memory.offset(); let range = offset..offset + self.inner.size(); Ok(WriteLock { inner: unsafe { self.memory.mapped_memory().unwrap().read_write(range) }, lock: lock, }) } } unsafe impl BufferAccess for CpuAccessibleBuffer where T: 'static + Send + Sync, { #[inline] fn inner(&self) -> BufferInner { BufferInner { buffer: &self.inner, offset: 0, } } #[inline] fn size(&self) -> DeviceSize { self.inner.size() } #[inline] fn conflict_key(&self) -> (u64, u64) { (self.inner.key(), 0) } #[inline] fn try_gpu_lock(&self, exclusive_access: bool, _: &Queue) -> Result<(), AccessError> { if exclusive_access { let mut lock = match self.access.try_write() { Some(lock) => lock, None => return Err(AccessError::AlreadyInUse), }; match *lock { CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (), _ => return Err(AccessError::AlreadyInUse), }; *lock = CurrentGpuAccess::Exclusive { num: 1 }; Ok(()) } else { let lock = match self.access.try_read() { Some(lock) => lock, None => return Err(AccessError::AlreadyInUse), }; match *lock { CurrentGpuAccess::Exclusive { .. } => return Err(AccessError::AlreadyInUse), CurrentGpuAccess::NonExclusive { ref num } => num.fetch_add(1, Ordering::SeqCst), }; Ok(()) } } #[inline] unsafe fn increase_gpu_lock(&self) { // First, handle if we have a non-exclusive access. { // Since the buffer is in use by the GPU, it is invalid to hold a write-lock to // the buffer. The buffer can still be briefly in a write-locked state for the duration // of the check though. let read_lock = self.access.read(); if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock { let prev = num.fetch_add(1, Ordering::SeqCst); debug_assert!(prev >= 1); return; } } // If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`. { // Same remark as above, but for writing. let mut write_lock = self.access.write(); if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock { *num += 1; } else { unreachable!() } } } #[inline] unsafe fn unlock(&self) { // First, handle if we had a non-exclusive access. { // Since the buffer is in use by the GPU, it is invalid to hold a write-lock to // the buffer. The buffer can still be briefly in a write-locked state for the duration // of the check though. let read_lock = self.access.read(); if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock { let prev = num.fetch_sub(1, Ordering::SeqCst); debug_assert!(prev >= 1); return; } } // If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`. { // Same remark as above, but for writing. let mut write_lock = self.access.write(); if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock { if *num != 1 { *num -= 1; return; } } else { // Can happen if we lock in exclusive mode N times, and unlock N+1 times with the // last two unlocks happen simultaneously. panic!() } *write_lock = CurrentGpuAccess::NonExclusive { num: AtomicUsize::new(0), }; } } } unsafe impl TypedBufferAccess for CpuAccessibleBuffer where T: 'static + Send + Sync, { type Content = T; } unsafe impl DeviceOwned for CpuAccessibleBuffer { #[inline] fn device(&self) -> &Arc { self.inner.device() } } impl PartialEq for CpuAccessibleBuffer where T: 'static + Send + Sync, { #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() && self.size() == other.size() } } impl Eq for CpuAccessibleBuffer where T: 'static + Send + Sync {} impl Hash for CpuAccessibleBuffer where T: 'static + Send + Sync, { #[inline] fn hash(&self, state: &mut H) { self.inner().hash(state); self.size().hash(state); } } /// Object that can be used to read or write the content of a `CpuAccessibleBuffer`. /// /// Note that this object holds a rwlock read guard on the chunk. If another thread tries to access /// this buffer's content or tries to submit a GPU command that uses this buffer, it will block. pub struct ReadLock<'a, T: ?Sized + 'a> { inner: MemCpuAccess<'a, T>, lock: RwLockReadGuard<'a, CurrentGpuAccess>, } impl<'a, T: ?Sized + 'a> ReadLock<'a, T> { /// Makes a new `ReadLock` to access a sub-part of the current `ReadLock`. #[inline] pub fn map(self, f: F) -> ReadLock<'a, U> where F: FnOnce(&mut T) -> &mut U, { ReadLock { inner: self.inner.map(|ptr| unsafe { f(&mut *ptr) as *mut _ }), lock: self.lock, } } } impl<'a, T: ?Sized + 'a> Deref for ReadLock<'a, T> { type Target = T; #[inline] fn deref(&self) -> &T { self.inner.deref() } } /// Error when attempting to CPU-read a buffer. #[derive(Clone, Debug, PartialEq, Eq)] pub enum ReadLockError { /// The buffer is already locked for write mode by the CPU. CpuWriteLocked, /// The buffer is already locked for write mode by the GPU. GpuWriteLocked, } impl error::Error for ReadLockError {} impl fmt::Display for ReadLockError { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( fmt, "{}", match *self { ReadLockError::CpuWriteLocked => { "the buffer is already locked for write mode by the CPU" } ReadLockError::GpuWriteLocked => { "the buffer is already locked for write mode by the GPU" } } ) } } /// Object that can be used to read or write the content of a `CpuAccessibleBuffer`. /// /// Note that this object holds a rwlock write guard on the chunk. If another thread tries to access /// this buffer's content or tries to submit a GPU command that uses this buffer, it will block. pub struct WriteLock<'a, T: ?Sized + 'a> { inner: MemCpuAccess<'a, T>, lock: RwLockWriteGuard<'a, CurrentGpuAccess>, } impl<'a, T: ?Sized + 'a> WriteLock<'a, T> { /// Makes a new `WriteLock` to access a sub-part of the current `WriteLock`. #[inline] pub fn map(self, f: F) -> WriteLock<'a, U> where F: FnOnce(&mut T) -> &mut U, { WriteLock { inner: self.inner.map(|ptr| unsafe { f(&mut *ptr) as *mut _ }), lock: self.lock, } } } impl<'a, T: ?Sized + 'a> Deref for WriteLock<'a, T> { type Target = T; #[inline] fn deref(&self) -> &T { self.inner.deref() } } impl<'a, T: ?Sized + 'a> DerefMut for WriteLock<'a, T> { #[inline] fn deref_mut(&mut self) -> &mut T { self.inner.deref_mut() } } /// Error when attempting to CPU-write a buffer. #[derive(Clone, Debug, PartialEq, Eq)] pub enum WriteLockError { /// The buffer is already locked by the CPU. CpuLocked, /// The buffer is already locked by the GPU. GpuLocked, } impl error::Error for WriteLockError {} impl fmt::Display for WriteLockError { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( fmt, "{}", match *self { WriteLockError::CpuLocked => "the buffer is already locked by the CPU", WriteLockError::GpuLocked => "the buffer is already locked by the GPU", } ) } } #[cfg(test)] mod tests { use crate::buffer::{BufferUsage, CpuAccessibleBuffer}; #[test] fn create_empty_buffer() { let (device, queue) = gfx_dev_and_queue!(); const EMPTY: [i32; 0] = []; let _ = CpuAccessibleBuffer::from_data(device.clone(), BufferUsage::all(), false, EMPTY); let _ = CpuAccessibleBuffer::from_iter(device, BufferUsage::all(), false, EMPTY.iter()); } }