1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #ifndef VK_DEVICE_H
24 #define VK_DEVICE_H
25
26 #include "rmv/vk_rmv_common.h"
27 #include "vk_dispatch_table.h"
28 #include "vk_extensions.h"
29 #include "vk_object.h"
30 #include "vk_physical_device_features.h"
31
32 #include "util/list.h"
33 #include "util/simple_mtx.h"
34 #include "util/u_atomic.h"
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
40 struct vk_command_buffer_ops;
41 struct vk_device_shader_ops;
42 struct vk_sync;
43
44 enum vk_queue_submit_mode {
45 /** Submits happen immediately
46 *
47 * `vkQueueSubmit()` and `vkQueueBindSparse()` call
48 * ``vk_queue::driver_submit`` directly for all submits and the last call to
49 * ``vk_queue::driver_submit`` will have completed by the time
50 * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
51 */
52 VK_QUEUE_SUBMIT_MODE_IMMEDIATE,
53
54 /** Submits may be deferred until a future `vk_queue_flush()`
55 *
56 * Submits are added to the queue and `vk_queue_flush()` is called.
57 * However, any submits with unsatisfied dependencies will be left on the
58 * queue until a future `vk_queue_flush()` call. This is used for
59 * implementing emulated timeline semaphores without threading.
60 */
61 VK_QUEUE_SUBMIT_MODE_DEFERRED,
62
63 /** Submits will be added to the queue and handled later by a thread
64 *
65 * This places additional requirements on the vk_sync types used by the
66 * driver:
67 *
68 * 1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
69 * support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
70 * sort out when a given submit has all its dependencies resolved.
71 *
72 * 2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
73 * also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
74 * semaphores after waiting on them.
75 *
76 * 3. All vk_sync types used as permanent payloads of semaphores support
77 * ``vk_sync_type::move`` so that it can move the pending signal into a
78 * temporary vk_sync and reset the semaphore.
79 *
80 * This is requied for shared timeline semaphores where we need to handle
81 * wait-before-signal by threading in the driver if we ever see an
82 * unresolve dependency.
83 */
84 VK_QUEUE_SUBMIT_MODE_THREADED,
85
86 /** Threaded but only if we need it to resolve dependencies
87 *
88 * This imposes all the same requirements on `vk_sync` types as
89 * `VK_QUEUE_SUBMIT_MODE_THREADED`.
90 */
91 VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
92 };
93
94 /** Base struct for VkDevice */
95 struct vk_device {
96 struct vk_object_base base;
97
98 /** Allocator used to create this device
99 *
100 * This is used as a fall-back for when a NULL pAllocator is passed into a
101 * device-level create function such as vkCreateImage().
102 */
103 VkAllocationCallbacks alloc;
104
105 /** Pointer to the physical device */
106 struct vk_physical_device *physical;
107
108 /** Table of enabled extensions */
109 struct vk_device_extension_table enabled_extensions;
110
111 /** Table of enabled features */
112 struct vk_features enabled_features;
113
114 /** Device-level dispatch table */
115 struct vk_device_dispatch_table dispatch_table;
116
117 /** Command dispatch table
118 *
119 * This is used for emulated secondary command buffer support. To use
120 * emulated (trace/replay) secondary command buffers:
121 *
122 * 1. Provide your "real" command buffer dispatch table here. Because
123 * this doesn't get populated by vk_device_init(), the driver will have
124 * to add the vk_common entrypoints to this table itself.
125 *
126 * 2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
127 * level dispatch table.
128 */
129 const struct vk_device_dispatch_table *command_dispatch_table;
130
131 /** Command buffer vtable when using the common command pool */
132 const struct vk_command_buffer_ops *command_buffer_ops;
133
134 /** Shader vtable for VK_EXT_shader_object and common pipelines */
135 const struct vk_device_shader_ops *shader_ops;
136
137 /** Driver provided callback for capturing traces
138 *
139 * Triggers for this callback are:
140 * - Keyboard input (F12)
141 * - Creation of a trigger file
142 * - Reaching the trace frame
143 */
144 VkResult (*capture_trace)(VkQueue queue);
145
146 uint32_t current_frame;
147 bool trace_hotkey_trigger;
148 simple_mtx_t trace_mtx;
149
150 /* For VK_EXT_private_data */
151 uint32_t private_data_next_index;
152
153 struct list_head queues;
154
155 struct {
156 int lost;
157 bool reported;
158 } _lost;
159
160 /** Checks the status of this device
161 *
162 * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
163 * It is called before ``vk_queue::driver_submit`` and after every non-trivial
164 * wait operation to ensure the device is still around. This gives the
165 * driver a hook to ask the kernel if its device is still valid. If the
166 * kernel says the device has been lost, it MUST call vk_device_set_lost().
167 *
168 * This function may be called from any thread at any time.
169 */
170 VkResult (*check_status)(struct vk_device *device);
171
172 /** Creates a vk_sync that wraps a memory object
173 *
174 * This is always a one-shot object so it need not track any additional
175 * state. Since it's intended for synchronizing between processes using
176 * implicit synchronization mechanisms, no such tracking would be valid
177 * anyway.
178 *
179 * If `signal_memory` is set, the resulting vk_sync will be used to signal
180 * the memory object from a queue ``via vk_queue_submit::signals``. The common
181 * code guarantees that, by the time vkQueueSubmit() returns, the signal
182 * operation has been submitted to the kernel via the driver's
183 * ``vk_queue::driver_submit`` hook. This means that any vkQueueSubmit() call
184 * which needs implicit synchronization may block.
185 *
186 * If `signal_memory` is not set, it can be assumed that memory object
187 * already has a signal operation pending from some other process and we
188 * need only wait on it.
189 */
190 VkResult (*create_sync_for_memory)(struct vk_device *device,
191 VkDeviceMemory memory,
192 bool signal_memory,
193 struct vk_sync **sync_out);
194
195 /* Set by vk_device_set_drm_fd() */
196 int drm_fd;
197
198 /** An enum describing how timeline semaphores work */
199 enum vk_device_timeline_mode {
200 /** Timeline semaphores are not supported */
201 VK_DEVICE_TIMELINE_MODE_NONE,
202
203 /** Timeline semaphores are emulated with vk_timeline
204 *
205 * In this mode, timeline semaphores are emulated using vk_timeline
206 * which is a collection of binary semaphores, one per time point.
207 * These timeline semaphores cannot be shared because the data structure
208 * exists entirely in userspace. These timelines are virtually
209 * invisible to the driver; all it sees are the binary vk_syncs, one per
210 * time point.
211 *
212 * To handle wait-before-signal, we place all vk_queue_submits in the
213 * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
214 * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
215 * This ensures that, as soon as a given submit's dependencies are fully
216 * resolvable, it gets submitted to the driver.
217 */
218 VK_DEVICE_TIMELINE_MODE_EMULATED,
219
220 /** Timeline semaphores are a kernel-assisted emulation
221 *
222 * In this mode, timeline semaphores are still technically an emulation
223 * in the sense that they don't support wait-before-signal natively.
224 * Instead, all GPU-waitable objects support a CPU wait-for-pending
225 * operation which lets the userspace driver wait until a given event
226 * on the (possibly shared) vk_sync is pending. The event is "pending"
227 * if a job has been submitted to the kernel (possibly from a different
228 * process) which will signal it. In vkQueueSubit, we use this wait
229 * mode to detect waits which are not yet pending and, the first time we
230 * do, spawn a thread to manage the queue. That thread waits for each
231 * submit's waits to all be pending before submitting to the driver
232 * queue.
233 *
234 * We have to be a bit more careful about a few things in this mode.
235 * In particular, we can never assume that any given wait operation is
236 * pending. For instance, when we go to export a sync file from a
237 * binary semaphore, we need to first wait for it to be pending. The
238 * spec guarantees that the vast majority of these waits return almost
239 * immediately, but we do need to insert them for correctness.
240 */
241 VK_DEVICE_TIMELINE_MODE_ASSISTED,
242
243 /** Timeline semaphores are 100% native
244 *
245 * In this mode, wait-before-signal is natively supported by the
246 * underlying timeline implementation. We can submit-and-forget and
247 * assume that dependencies will get resolved for us by the kernel.
248 * Currently, this isn't supported by any Linux primitives.
249 */
250 VK_DEVICE_TIMELINE_MODE_NATIVE,
251 } timeline_mode;
252
253 /** Per-device submit mode
254 *
255 * This represents the device-wide submit strategy which may be different
256 * from the per-queue submit mode. See vk_queue.submit.mode for more
257 * details.
258 */
259 enum vk_queue_submit_mode submit_mode;
260
261 struct vk_memory_trace_data memory_trace_data;
262
263 mtx_t swapchain_private_mtx;
264 struct hash_table *swapchain_private;
265 mtx_t swapchain_name_mtx;
266 struct hash_table *swapchain_name;
267 };
268
269 VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
270 VK_OBJECT_TYPE_DEVICE);
271
272 /** Initialize a vk_device
273 *
274 * Along with initializing the data structures in `vk_device`, this function
275 * checks that every extension specified by
276 * ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by
277 * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
278 * unsupported extension is requested. It also checks all the feature struct
279 * chained into the `pCreateInfo->pNext` chain against the features returned
280 * by `vkGetPhysicalDeviceFeatures2` and returns
281 * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
282 *
283 * :param device: |out| The device to initialize
284 * :param physical_device: |in| The physical device
285 * :param dispatch_table: |in| Device-level dispatch table
286 * :param pCreateInfo: |in| VkDeviceCreateInfo pointer passed to
287 * `vkCreateDevice()`
288 * :param alloc: |in| Allocation callbacks passed to
289 * `vkCreateDevice()`
290 */
291 VkResult MUST_CHECK
292 vk_device_init(struct vk_device *device,
293 struct vk_physical_device *physical_device,
294 const struct vk_device_dispatch_table *dispatch_table,
295 const VkDeviceCreateInfo *pCreateInfo,
296 const VkAllocationCallbacks *alloc);
297
298 static inline void
vk_device_set_drm_fd(struct vk_device * device,int drm_fd)299 vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
300 {
301 device->drm_fd = drm_fd;
302 }
303
304 /** Tears down a vk_device
305 *
306 * :param device: |out| The device to tear down
307 */
308 void
309 vk_device_finish(struct vk_device *device);
310
311 /** Enables threaded submit on this device
312 *
313 * This doesn't ensure that threaded submit will be used. It just disables
314 * the deferred submit option for emulated timeline semaphores and forces them
315 * to always use the threaded path. It also does some checks that the vk_sync
316 * types used by the driver work for threaded submit.
317 *
318 * This must be called before any queues are created.
319 */
320 void vk_device_enable_threaded_submit(struct vk_device *device);
321
322 static inline bool
vk_device_supports_threaded_submit(const struct vk_device * device)323 vk_device_supports_threaded_submit(const struct vk_device *device)
324 {
325 return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
326 device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
327 }
328
329 VkResult vk_device_flush(struct vk_device *device);
330
331 VkResult PRINTFLIKE(4, 5)
332 _vk_device_set_lost(struct vk_device *device,
333 const char *file, int line,
334 const char *msg, ...);
335
336 #define vk_device_set_lost(device, ...) \
337 _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)
338
339 void _vk_device_report_lost(struct vk_device *device);
340
341 static inline bool
vk_device_is_lost_no_report(struct vk_device * device)342 vk_device_is_lost_no_report(struct vk_device *device)
343 {
344 return p_atomic_read(&device->_lost.lost) > 0;
345 }
346
347 static inline bool
vk_device_is_lost(struct vk_device * device)348 vk_device_is_lost(struct vk_device *device)
349 {
350 int lost = vk_device_is_lost_no_report(device);
351 if (unlikely(lost && !device->_lost.reported))
352 _vk_device_report_lost(device);
353 return lost;
354 }
355
356 static inline VkResult
vk_device_check_status(struct vk_device * device)357 vk_device_check_status(struct vk_device *device)
358 {
359 if (vk_device_is_lost(device))
360 return VK_ERROR_DEVICE_LOST;
361
362 if (!device->check_status)
363 return VK_SUCCESS;
364
365 VkResult result = device->check_status(device);
366
367 assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
368 if (result == VK_ERROR_DEVICE_LOST)
369 assert(vk_device_is_lost_no_report(device));
370
371 return result;
372 }
373
374 #ifndef _WIN32
375
376 uint64_t
377 vk_clock_gettime(clockid_t clock_id);
378
379 static inline uint64_t
vk_time_max_deviation(uint64_t begin,uint64_t end,uint64_t max_clock_period)380 vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
381 {
382 /*
383 * The maximum deviation is the sum of the interval over which we
384 * perform the sampling and the maximum period of any sampled
385 * clock. That's because the maximum skew between any two sampled
386 * clock edges is when the sampled clock with the largest period is
387 * sampled at the end of that period but right at the beginning of the
388 * sampling interval and some other clock is sampled right at the
389 * beginning of its sampling period and right at the end of the
390 * sampling interval. Let's assume the GPU has the longest clock
391 * period and that the application is sampling GPU and monotonic:
392 *
393 * s e
394 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
395 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
396 *
397 * g
398 * 0 1 2 3
399 * GPU -----_____-----_____-----_____-----_____
400 *
401 * m
402 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
403 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
404 *
405 * Interval <----------------->
406 * Deviation <-------------------------->
407 *
408 * s = read(raw) 2
409 * g = read(GPU) 1
410 * m = read(monotonic) 2
411 * e = read(raw) b
412 *
413 * We round the sample interval up by one tick to cover sampling error
414 * in the interval clock
415 */
416
417 uint64_t sample_interval = end - begin + 1;
418
419 return sample_interval + max_clock_period;
420 }
421
422 #endif //!_WIN32
423
424 PFN_vkVoidFunction
425 vk_device_get_proc_addr(const struct vk_device *device,
426 const char *name);
427
428 bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext,
429 const VkPhysicalDeviceVulkan11Properties *core);
430 bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext,
431 const VkPhysicalDeviceVulkan12Properties *core);
432 bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext,
433 const VkPhysicalDeviceVulkan13Properties *core);
434
435 #ifdef __cplusplus
436 }
437 #endif
438
439 #endif /* VK_DEVICE_H */
440