1 // Copyright (c) 2016 The vulkano developers 2 // Licensed under the Apache License, Version 2.0 3 // <LICENSE-APACHE or 4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT 5 // license <LICENSE-MIT or https://opensource.org/licenses/MIT>, 6 // at your option. All files in the project carrying such 7 // notice may not be copied, modified, or distributed except 8 // according to those terms. 9 10 //! Buffer whose content is read-written by the GPU only. 11 //! 12 //! Each access from the CPU or from the GPU locks the whole buffer for either reading or writing. 13 //! You can read the buffer multiple times simultaneously from multiple queues. Trying to read and 14 //! write simultaneously, or write and write simultaneously will block with a semaphore. 15 16 use crate::buffer::sys::BufferCreationError; 17 use crate::buffer::sys::UnsafeBuffer; 18 use crate::buffer::traits::BufferAccess; 19 use crate::buffer::traits::BufferInner; 20 use crate::buffer::traits::TypedBufferAccess; 21 use crate::buffer::BufferUsage; 22 use crate::device::physical::QueueFamily; 23 use crate::device::Device; 24 use crate::device::DeviceOwned; 25 use crate::device::Queue; 26 use crate::memory::pool::AllocFromRequirementsFilter; 27 use crate::memory::pool::AllocLayout; 28 use crate::memory::pool::MappingRequirement; 29 use crate::memory::pool::MemoryPool; 30 use crate::memory::pool::MemoryPoolAlloc; 31 use crate::memory::pool::PotentialDedicatedAllocation; 32 use crate::memory::pool::StdMemoryPoolAlloc; 33 use crate::memory::{DedicatedAlloc, MemoryRequirements}; 34 use crate::memory::{DeviceMemoryAllocError, ExternalMemoryHandleType}; 35 use crate::sync::AccessError; 36 use crate::sync::Sharing; 37 use crate::DeviceSize; 38 use smallvec::SmallVec; 39 use std::fs::File; 40 use std::hash::Hash; 41 use std::hash::Hasher; 42 use std::marker::PhantomData; 43 use std::mem; 44 use std::sync::Arc; 45 use std::sync::Mutex; 46 47 /// Buffer whose content is in device-local memory. 48 /// 49 /// This buffer type is useful in order to store intermediary data. For example you execute a 50 /// compute shader that writes to this buffer, then read the content of the buffer in a following 51 /// compute or graphics pipeline. 52 /// 53 /// The `DeviceLocalBuffer` will be in device-local memory, unless the device doesn't provide any 54 /// device-local memory. 55 #[derive(Debug)] 56 pub struct DeviceLocalBuffer<T: ?Sized, A = PotentialDedicatedAllocation<StdMemoryPoolAlloc>> { 57 // Inner content. 58 inner: UnsafeBuffer, 59 60 // The memory held by the buffer. 61 memory: A, 62 63 // Queue families allowed to access this buffer. 64 queue_families: SmallVec<[u32; 4]>, 65 66 // Number of times this buffer is locked on the GPU side. 67 gpu_lock: Mutex<GpuAccess>, 68 69 // Necessary to make it compile. 70 marker: PhantomData<Box<T>>, 71 } 72 73 #[derive(Debug, Copy, Clone)] 74 enum GpuAccess { 75 None, 76 NonExclusive { num: u32 }, 77 Exclusive { num: u32 }, 78 } 79 80 impl<T> DeviceLocalBuffer<T> { 81 /// Builds a new buffer. Only allowed for sized data. 82 // TODO: unsafe because uninitialized data 83 #[inline] new<'a, I>( device: Arc<Device>, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,84 pub fn new<'a, I>( 85 device: Arc<Device>, 86 usage: BufferUsage, 87 queue_families: I, 88 ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> 89 where 90 I: IntoIterator<Item = QueueFamily<'a>>, 91 { 92 unsafe { 93 DeviceLocalBuffer::raw( 94 device, 95 mem::size_of::<T>() as DeviceSize, 96 usage, 97 queue_families, 98 ) 99 } 100 } 101 } 102 103 impl<T> DeviceLocalBuffer<[T]> { 104 /// Builds a new buffer. Can be used for arrays. 105 // TODO: unsafe because uninitialized data 106 #[inline] array<'a, I>( device: Arc<Device>, len: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<[T]>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,107 pub fn array<'a, I>( 108 device: Arc<Device>, 109 len: DeviceSize, 110 usage: BufferUsage, 111 queue_families: I, 112 ) -> Result<Arc<DeviceLocalBuffer<[T]>>, DeviceMemoryAllocError> 113 where 114 I: IntoIterator<Item = QueueFamily<'a>>, 115 { 116 unsafe { 117 DeviceLocalBuffer::raw( 118 device, 119 len * mem::size_of::<T>() as DeviceSize, 120 usage, 121 queue_families, 122 ) 123 } 124 } 125 } 126 127 impl<T: ?Sized> DeviceLocalBuffer<T> { 128 /// Builds a new buffer without checking the size. 129 /// 130 /// # Safety 131 /// 132 /// You must ensure that the size that you pass is correct for `T`. 133 /// raw<'a, I>( device: Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,134 pub unsafe fn raw<'a, I>( 135 device: Arc<Device>, 136 size: DeviceSize, 137 usage: BufferUsage, 138 queue_families: I, 139 ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> 140 where 141 I: IntoIterator<Item = QueueFamily<'a>>, 142 { 143 let queue_families = queue_families 144 .into_iter() 145 .map(|f| f.id()) 146 .collect::<SmallVec<[u32; 4]>>(); 147 148 let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?; 149 150 let mem = MemoryPool::alloc_from_requirements( 151 &Device::standard_pool(&device), 152 &mem_reqs, 153 AllocLayout::Linear, 154 MappingRequirement::DoNotMap, 155 DedicatedAlloc::Buffer(&buffer), 156 |t| { 157 if t.is_device_local() { 158 AllocFromRequirementsFilter::Preferred 159 } else { 160 AllocFromRequirementsFilter::Allowed 161 } 162 }, 163 )?; 164 debug_assert!((mem.offset() % mem_reqs.alignment) == 0); 165 buffer.bind_memory(mem.memory(), mem.offset())?; 166 167 Ok(Arc::new(DeviceLocalBuffer { 168 inner: buffer, 169 memory: mem, 170 queue_families: queue_families, 171 gpu_lock: Mutex::new(GpuAccess::None), 172 marker: PhantomData, 173 })) 174 } 175 176 /// Same as `raw` but with exportable fd option for the allocated memory on Linux 177 #[cfg(target_os = "linux")] raw_with_exportable_fd<'a, I>( device: Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,178 pub unsafe fn raw_with_exportable_fd<'a, I>( 179 device: Arc<Device>, 180 size: DeviceSize, 181 usage: BufferUsage, 182 queue_families: I, 183 ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> 184 where 185 I: IntoIterator<Item = QueueFamily<'a>>, 186 { 187 assert!(device.enabled_extensions().khr_external_memory_fd); 188 assert!(device.enabled_extensions().khr_external_memory); 189 190 let queue_families = queue_families 191 .into_iter() 192 .map(|f| f.id()) 193 .collect::<SmallVec<[u32; 4]>>(); 194 195 let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?; 196 197 let mem = MemoryPool::alloc_from_requirements_with_exportable_fd( 198 &Device::standard_pool(&device), 199 &mem_reqs, 200 AllocLayout::Linear, 201 MappingRequirement::DoNotMap, 202 DedicatedAlloc::Buffer(&buffer), 203 |t| { 204 if t.is_device_local() { 205 AllocFromRequirementsFilter::Preferred 206 } else { 207 AllocFromRequirementsFilter::Allowed 208 } 209 }, 210 )?; 211 debug_assert!((mem.offset() % mem_reqs.alignment) == 0); 212 buffer.bind_memory(mem.memory(), mem.offset())?; 213 214 Ok(Arc::new(DeviceLocalBuffer { 215 inner: buffer, 216 memory: mem, 217 queue_families: queue_families, 218 gpu_lock: Mutex::new(GpuAccess::None), 219 marker: PhantomData, 220 })) 221 } 222 build_buffer( device: &Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: &SmallVec<[u32; 4]>, ) -> Result<(UnsafeBuffer, MemoryRequirements), DeviceMemoryAllocError>223 unsafe fn build_buffer( 224 device: &Arc<Device>, 225 size: DeviceSize, 226 usage: BufferUsage, 227 queue_families: &SmallVec<[u32; 4]>, 228 ) -> Result<(UnsafeBuffer, MemoryRequirements), DeviceMemoryAllocError> { 229 let (buffer, mem_reqs) = { 230 let sharing = if queue_families.len() >= 2 { 231 Sharing::Concurrent(queue_families.iter().cloned()) 232 } else { 233 Sharing::Exclusive 234 }; 235 236 match UnsafeBuffer::new(device.clone(), size, usage, sharing, None) { 237 Ok(b) => b, 238 Err(BufferCreationError::AllocError(err)) => return Err(err), 239 Err(_) => unreachable!(), // We don't use sparse binding, therefore the other 240 // errors can't happen 241 } 242 }; 243 Ok((buffer, mem_reqs)) 244 } 245 246 /// Exports posix file descriptor for the allocated memory 247 /// requires `khr_external_memory_fd` and `khr_external_memory` extensions to be loaded. 248 /// Only works on Linux. 249 #[cfg(target_os = "linux")] export_posix_fd(&self) -> Result<File, DeviceMemoryAllocError>250 pub fn export_posix_fd(&self) -> Result<File, DeviceMemoryAllocError> { 251 self.memory 252 .memory() 253 .export_fd(ExternalMemoryHandleType::posix()) 254 } 255 } 256 257 impl<T: ?Sized, A> DeviceLocalBuffer<T, A> { 258 /// Returns the queue families this buffer can be used on. 259 // TODO: use a custom iterator 260 #[inline] queue_families(&self) -> Vec<QueueFamily>261 pub fn queue_families(&self) -> Vec<QueueFamily> { 262 self.queue_families 263 .iter() 264 .map(|&num| { 265 self.device() 266 .physical_device() 267 .queue_family_by_id(num) 268 .unwrap() 269 }) 270 .collect() 271 } 272 } 273 274 unsafe impl<T: ?Sized, A> DeviceOwned for DeviceLocalBuffer<T, A> { 275 #[inline] device(&self) -> &Arc<Device>276 fn device(&self) -> &Arc<Device> { 277 self.inner.device() 278 } 279 } 280 281 unsafe impl<T: ?Sized, A> BufferAccess for DeviceLocalBuffer<T, A> 282 where 283 T: 'static + Send + Sync, 284 { 285 #[inline] inner(&self) -> BufferInner286 fn inner(&self) -> BufferInner { 287 BufferInner { 288 buffer: &self.inner, 289 offset: 0, 290 } 291 } 292 293 #[inline] size(&self) -> DeviceSize294 fn size(&self) -> DeviceSize { 295 self.inner.size() 296 } 297 298 #[inline] conflict_key(&self) -> (u64, u64)299 fn conflict_key(&self) -> (u64, u64) { 300 (self.inner.key(), 0) 301 } 302 303 #[inline] try_gpu_lock(&self, exclusive: bool, _: &Queue) -> Result<(), AccessError>304 fn try_gpu_lock(&self, exclusive: bool, _: &Queue) -> Result<(), AccessError> { 305 let mut lock = self.gpu_lock.lock().unwrap(); 306 match &mut *lock { 307 a @ &mut GpuAccess::None => { 308 if exclusive { 309 *a = GpuAccess::Exclusive { num: 1 }; 310 } else { 311 *a = GpuAccess::NonExclusive { num: 1 }; 312 } 313 314 Ok(()) 315 } 316 &mut GpuAccess::NonExclusive { ref mut num } => { 317 if exclusive { 318 Err(AccessError::AlreadyInUse) 319 } else { 320 *num += 1; 321 Ok(()) 322 } 323 } 324 &mut GpuAccess::Exclusive { .. } => Err(AccessError::AlreadyInUse), 325 } 326 } 327 328 #[inline] increase_gpu_lock(&self)329 unsafe fn increase_gpu_lock(&self) { 330 let mut lock = self.gpu_lock.lock().unwrap(); 331 match *lock { 332 GpuAccess::None => panic!(), 333 GpuAccess::NonExclusive { ref mut num } => { 334 debug_assert!(*num >= 1); 335 *num += 1; 336 } 337 GpuAccess::Exclusive { ref mut num } => { 338 debug_assert!(*num >= 1); 339 *num += 1; 340 } 341 } 342 } 343 344 #[inline] unlock(&self)345 unsafe fn unlock(&self) { 346 let mut lock = self.gpu_lock.lock().unwrap(); 347 348 match *lock { 349 GpuAccess::None => panic!("Tried to unlock a buffer that isn't locked"), 350 GpuAccess::NonExclusive { ref mut num } => { 351 assert!(*num >= 1); 352 *num -= 1; 353 if *num >= 1 { 354 return; 355 } 356 } 357 GpuAccess::Exclusive { ref mut num } => { 358 assert!(*num >= 1); 359 *num -= 1; 360 if *num >= 1 { 361 return; 362 } 363 } 364 }; 365 366 *lock = GpuAccess::None; 367 } 368 } 369 370 unsafe impl<T: ?Sized, A> TypedBufferAccess for DeviceLocalBuffer<T, A> 371 where 372 T: 'static + Send + Sync, 373 { 374 type Content = T; 375 } 376 377 impl<T: ?Sized, A> PartialEq for DeviceLocalBuffer<T, A> 378 where 379 T: 'static + Send + Sync, 380 { 381 #[inline] eq(&self, other: &Self) -> bool382 fn eq(&self, other: &Self) -> bool { 383 self.inner() == other.inner() && self.size() == other.size() 384 } 385 } 386 387 impl<T: ?Sized, A> Eq for DeviceLocalBuffer<T, A> where T: 'static + Send + Sync {} 388 389 impl<T: ?Sized, A> Hash for DeviceLocalBuffer<T, A> 390 where 391 T: 'static + Send + Sync, 392 { 393 #[inline] hash<H: Hasher>(&self, state: &mut H)394 fn hash<H: Hasher>(&self, state: &mut H) { 395 self.inner().hash(state); 396 self.size().hash(state); 397 } 398 } 399