• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2016 The vulkano developers
2 // Licensed under the Apache License, Version 2.0
3 // <LICENSE-APACHE or
4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT
5 // license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
6 // at your option. All files in the project carrying such
7 // notice may not be copied, modified, or distributed except
8 // according to those terms.
9 
10 //! Buffer whose content is read-written by the GPU only.
11 //!
12 //! Each access from the CPU or from the GPU locks the whole buffer for either reading or writing.
13 //! You can read the buffer multiple times simultaneously from multiple queues. Trying to read and
14 //! write simultaneously, or write and write simultaneously will block with a semaphore.
15 
16 use crate::buffer::sys::BufferCreationError;
17 use crate::buffer::sys::UnsafeBuffer;
18 use crate::buffer::traits::BufferAccess;
19 use crate::buffer::traits::BufferInner;
20 use crate::buffer::traits::TypedBufferAccess;
21 use crate::buffer::BufferUsage;
22 use crate::device::physical::QueueFamily;
23 use crate::device::Device;
24 use crate::device::DeviceOwned;
25 use crate::device::Queue;
26 use crate::memory::pool::AllocFromRequirementsFilter;
27 use crate::memory::pool::AllocLayout;
28 use crate::memory::pool::MappingRequirement;
29 use crate::memory::pool::MemoryPool;
30 use crate::memory::pool::MemoryPoolAlloc;
31 use crate::memory::pool::PotentialDedicatedAllocation;
32 use crate::memory::pool::StdMemoryPoolAlloc;
33 use crate::memory::{DedicatedAlloc, MemoryRequirements};
34 use crate::memory::{DeviceMemoryAllocError, ExternalMemoryHandleType};
35 use crate::sync::AccessError;
36 use crate::sync::Sharing;
37 use crate::DeviceSize;
38 use smallvec::SmallVec;
39 use std::fs::File;
40 use std::hash::Hash;
41 use std::hash::Hasher;
42 use std::marker::PhantomData;
43 use std::mem;
44 use std::sync::Arc;
45 use std::sync::Mutex;
46 
47 /// Buffer whose content is in device-local memory.
48 ///
49 /// This buffer type is useful in order to store intermediary data. For example you execute a
50 /// compute shader that writes to this buffer, then read the content of the buffer in a following
51 /// compute or graphics pipeline.
52 ///
53 /// The `DeviceLocalBuffer` will be in device-local memory, unless the device doesn't provide any
54 /// device-local memory.
55 #[derive(Debug)]
56 pub struct DeviceLocalBuffer<T: ?Sized, A = PotentialDedicatedAllocation<StdMemoryPoolAlloc>> {
57     // Inner content.
58     inner: UnsafeBuffer,
59 
60     // The memory held by the buffer.
61     memory: A,
62 
63     // Queue families allowed to access this buffer.
64     queue_families: SmallVec<[u32; 4]>,
65 
66     // Number of times this buffer is locked on the GPU side.
67     gpu_lock: Mutex<GpuAccess>,
68 
69     // Necessary to make it compile.
70     marker: PhantomData<Box<T>>,
71 }
72 
73 #[derive(Debug, Copy, Clone)]
74 enum GpuAccess {
75     None,
76     NonExclusive { num: u32 },
77     Exclusive { num: u32 },
78 }
79 
80 impl<T> DeviceLocalBuffer<T> {
81     /// Builds a new buffer. Only allowed for sized data.
82     // TODO: unsafe because uninitialized data
83     #[inline]
new<'a, I>( device: Arc<Device>, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,84     pub fn new<'a, I>(
85         device: Arc<Device>,
86         usage: BufferUsage,
87         queue_families: I,
88     ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError>
89     where
90         I: IntoIterator<Item = QueueFamily<'a>>,
91     {
92         unsafe {
93             DeviceLocalBuffer::raw(
94                 device,
95                 mem::size_of::<T>() as DeviceSize,
96                 usage,
97                 queue_families,
98             )
99         }
100     }
101 }
102 
103 impl<T> DeviceLocalBuffer<[T]> {
104     /// Builds a new buffer. Can be used for arrays.
105     // TODO: unsafe because uninitialized data
106     #[inline]
array<'a, I>( device: Arc<Device>, len: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<[T]>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,107     pub fn array<'a, I>(
108         device: Arc<Device>,
109         len: DeviceSize,
110         usage: BufferUsage,
111         queue_families: I,
112     ) -> Result<Arc<DeviceLocalBuffer<[T]>>, DeviceMemoryAllocError>
113     where
114         I: IntoIterator<Item = QueueFamily<'a>>,
115     {
116         unsafe {
117             DeviceLocalBuffer::raw(
118                 device,
119                 len * mem::size_of::<T>() as DeviceSize,
120                 usage,
121                 queue_families,
122             )
123         }
124     }
125 }
126 
127 impl<T: ?Sized> DeviceLocalBuffer<T> {
128     /// Builds a new buffer without checking the size.
129     ///
130     /// # Safety
131     ///
132     /// You must ensure that the size that you pass is correct for `T`.
133     ///
raw<'a, I>( device: Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,134     pub unsafe fn raw<'a, I>(
135         device: Arc<Device>,
136         size: DeviceSize,
137         usage: BufferUsage,
138         queue_families: I,
139     ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError>
140     where
141         I: IntoIterator<Item = QueueFamily<'a>>,
142     {
143         let queue_families = queue_families
144             .into_iter()
145             .map(|f| f.id())
146             .collect::<SmallVec<[u32; 4]>>();
147 
148         let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?;
149 
150         let mem = MemoryPool::alloc_from_requirements(
151             &Device::standard_pool(&device),
152             &mem_reqs,
153             AllocLayout::Linear,
154             MappingRequirement::DoNotMap,
155             DedicatedAlloc::Buffer(&buffer),
156             |t| {
157                 if t.is_device_local() {
158                     AllocFromRequirementsFilter::Preferred
159                 } else {
160                     AllocFromRequirementsFilter::Allowed
161                 }
162             },
163         )?;
164         debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
165         buffer.bind_memory(mem.memory(), mem.offset())?;
166 
167         Ok(Arc::new(DeviceLocalBuffer {
168             inner: buffer,
169             memory: mem,
170             queue_families: queue_families,
171             gpu_lock: Mutex::new(GpuAccess::None),
172             marker: PhantomData,
173         }))
174     }
175 
176     /// Same as `raw` but with exportable fd option for the allocated memory on Linux
177     #[cfg(target_os = "linux")]
raw_with_exportable_fd<'a, I>( device: Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: I, ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError> where I: IntoIterator<Item = QueueFamily<'a>>,178     pub unsafe fn raw_with_exportable_fd<'a, I>(
179         device: Arc<Device>,
180         size: DeviceSize,
181         usage: BufferUsage,
182         queue_families: I,
183     ) -> Result<Arc<DeviceLocalBuffer<T>>, DeviceMemoryAllocError>
184     where
185         I: IntoIterator<Item = QueueFamily<'a>>,
186     {
187         assert!(device.enabled_extensions().khr_external_memory_fd);
188         assert!(device.enabled_extensions().khr_external_memory);
189 
190         let queue_families = queue_families
191             .into_iter()
192             .map(|f| f.id())
193             .collect::<SmallVec<[u32; 4]>>();
194 
195         let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?;
196 
197         let mem = MemoryPool::alloc_from_requirements_with_exportable_fd(
198             &Device::standard_pool(&device),
199             &mem_reqs,
200             AllocLayout::Linear,
201             MappingRequirement::DoNotMap,
202             DedicatedAlloc::Buffer(&buffer),
203             |t| {
204                 if t.is_device_local() {
205                     AllocFromRequirementsFilter::Preferred
206                 } else {
207                     AllocFromRequirementsFilter::Allowed
208                 }
209             },
210         )?;
211         debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
212         buffer.bind_memory(mem.memory(), mem.offset())?;
213 
214         Ok(Arc::new(DeviceLocalBuffer {
215             inner: buffer,
216             memory: mem,
217             queue_families: queue_families,
218             gpu_lock: Mutex::new(GpuAccess::None),
219             marker: PhantomData,
220         }))
221     }
222 
build_buffer( device: &Arc<Device>, size: DeviceSize, usage: BufferUsage, queue_families: &SmallVec<[u32; 4]>, ) -> Result<(UnsafeBuffer, MemoryRequirements), DeviceMemoryAllocError>223     unsafe fn build_buffer(
224         device: &Arc<Device>,
225         size: DeviceSize,
226         usage: BufferUsage,
227         queue_families: &SmallVec<[u32; 4]>,
228     ) -> Result<(UnsafeBuffer, MemoryRequirements), DeviceMemoryAllocError> {
229         let (buffer, mem_reqs) = {
230             let sharing = if queue_families.len() >= 2 {
231                 Sharing::Concurrent(queue_families.iter().cloned())
232             } else {
233                 Sharing::Exclusive
234             };
235 
236             match UnsafeBuffer::new(device.clone(), size, usage, sharing, None) {
237                 Ok(b) => b,
238                 Err(BufferCreationError::AllocError(err)) => return Err(err),
239                 Err(_) => unreachable!(), // We don't use sparse binding, therefore the other
240                                           // errors can't happen
241             }
242         };
243         Ok((buffer, mem_reqs))
244     }
245 
246     /// Exports posix file descriptor for the allocated memory
247     /// requires `khr_external_memory_fd` and `khr_external_memory` extensions to be loaded.
248     /// Only works on Linux.
249     #[cfg(target_os = "linux")]
export_posix_fd(&self) -> Result<File, DeviceMemoryAllocError>250     pub fn export_posix_fd(&self) -> Result<File, DeviceMemoryAllocError> {
251         self.memory
252             .memory()
253             .export_fd(ExternalMemoryHandleType::posix())
254     }
255 }
256 
257 impl<T: ?Sized, A> DeviceLocalBuffer<T, A> {
258     /// Returns the queue families this buffer can be used on.
259     // TODO: use a custom iterator
260     #[inline]
queue_families(&self) -> Vec<QueueFamily>261     pub fn queue_families(&self) -> Vec<QueueFamily> {
262         self.queue_families
263             .iter()
264             .map(|&num| {
265                 self.device()
266                     .physical_device()
267                     .queue_family_by_id(num)
268                     .unwrap()
269             })
270             .collect()
271     }
272 }
273 
274 unsafe impl<T: ?Sized, A> DeviceOwned for DeviceLocalBuffer<T, A> {
275     #[inline]
device(&self) -> &Arc<Device>276     fn device(&self) -> &Arc<Device> {
277         self.inner.device()
278     }
279 }
280 
281 unsafe impl<T: ?Sized, A> BufferAccess for DeviceLocalBuffer<T, A>
282 where
283     T: 'static + Send + Sync,
284 {
285     #[inline]
inner(&self) -> BufferInner286     fn inner(&self) -> BufferInner {
287         BufferInner {
288             buffer: &self.inner,
289             offset: 0,
290         }
291     }
292 
293     #[inline]
size(&self) -> DeviceSize294     fn size(&self) -> DeviceSize {
295         self.inner.size()
296     }
297 
298     #[inline]
conflict_key(&self) -> (u64, u64)299     fn conflict_key(&self) -> (u64, u64) {
300         (self.inner.key(), 0)
301     }
302 
303     #[inline]
try_gpu_lock(&self, exclusive: bool, _: &Queue) -> Result<(), AccessError>304     fn try_gpu_lock(&self, exclusive: bool, _: &Queue) -> Result<(), AccessError> {
305         let mut lock = self.gpu_lock.lock().unwrap();
306         match &mut *lock {
307             a @ &mut GpuAccess::None => {
308                 if exclusive {
309                     *a = GpuAccess::Exclusive { num: 1 };
310                 } else {
311                     *a = GpuAccess::NonExclusive { num: 1 };
312                 }
313 
314                 Ok(())
315             }
316             &mut GpuAccess::NonExclusive { ref mut num } => {
317                 if exclusive {
318                     Err(AccessError::AlreadyInUse)
319                 } else {
320                     *num += 1;
321                     Ok(())
322                 }
323             }
324             &mut GpuAccess::Exclusive { .. } => Err(AccessError::AlreadyInUse),
325         }
326     }
327 
328     #[inline]
increase_gpu_lock(&self)329     unsafe fn increase_gpu_lock(&self) {
330         let mut lock = self.gpu_lock.lock().unwrap();
331         match *lock {
332             GpuAccess::None => panic!(),
333             GpuAccess::NonExclusive { ref mut num } => {
334                 debug_assert!(*num >= 1);
335                 *num += 1;
336             }
337             GpuAccess::Exclusive { ref mut num } => {
338                 debug_assert!(*num >= 1);
339                 *num += 1;
340             }
341         }
342     }
343 
344     #[inline]
unlock(&self)345     unsafe fn unlock(&self) {
346         let mut lock = self.gpu_lock.lock().unwrap();
347 
348         match *lock {
349             GpuAccess::None => panic!("Tried to unlock a buffer that isn't locked"),
350             GpuAccess::NonExclusive { ref mut num } => {
351                 assert!(*num >= 1);
352                 *num -= 1;
353                 if *num >= 1 {
354                     return;
355                 }
356             }
357             GpuAccess::Exclusive { ref mut num } => {
358                 assert!(*num >= 1);
359                 *num -= 1;
360                 if *num >= 1 {
361                     return;
362                 }
363             }
364         };
365 
366         *lock = GpuAccess::None;
367     }
368 }
369 
370 unsafe impl<T: ?Sized, A> TypedBufferAccess for DeviceLocalBuffer<T, A>
371 where
372     T: 'static + Send + Sync,
373 {
374     type Content = T;
375 }
376 
377 impl<T: ?Sized, A> PartialEq for DeviceLocalBuffer<T, A>
378 where
379     T: 'static + Send + Sync,
380 {
381     #[inline]
eq(&self, other: &Self) -> bool382     fn eq(&self, other: &Self) -> bool {
383         self.inner() == other.inner() && self.size() == other.size()
384     }
385 }
386 
387 impl<T: ?Sized, A> Eq for DeviceLocalBuffer<T, A> where T: 'static + Send + Sync {}
388 
389 impl<T: ?Sized, A> Hash for DeviceLocalBuffer<T, A>
390 where
391     T: 'static + Send + Sync,
392 {
393     #[inline]
hash<H: Hasher>(&self, state: &mut H)394     fn hash<H: Hasher>(&self, state: &mut H) {
395         self.inner().hash(state);
396         self.size().hash(state);
397     }
398 }
399