1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #ifndef VK_DEVICE_H
24 #define VK_DEVICE_H
25
26 #include "rmv/vk_rmv_common.h"
27 #include "vk_dispatch_table.h"
28 #include "vk_extensions.h"
29 #include "vk_object.h"
30 #include "vk_physical_device_features.h"
31
32 #include "util/list.h"
33 #include "util/simple_mtx.h"
34 #include "util/u_atomic.h"
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
40 struct vk_command_buffer_ops;
41 struct vk_sync;
42
43 enum vk_queue_submit_mode {
44 /** Submits happen immediately
45 *
46 * `vkQueueSubmit()` and `vkQueueBindSparse()` call
47 * `vk_queue::driver_submit` directly for all submits and the last call to
48 * `vk_queue::driver_submit` will have completed by the time
49 * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
50 */
51 VK_QUEUE_SUBMIT_MODE_IMMEDIATE,
52
53 /** Submits may be deferred until a future `vk_queue_flush()`
54 *
55 * Submits are added to the queue and `vk_queue_flush()` is called.
56 * However, any submits with unsatisfied dependencies will be left on the
57 * queue until a future `vk_queue_flush()` call. This is used for
58 * implementing emulated timeline semaphores without threading.
59 */
60 VK_QUEUE_SUBMIT_MODE_DEFERRED,
61
62 /** Submits will be added to the queue and handled later by a thread
63 *
64 * This places additional requirements on the vk_sync types used by the
65 * driver:
66 *
67 * 1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
68 * support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
69 * sort out when a given submit has all its dependencies resolved.
70 *
71 * 2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
72 * also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
73 * semaphores after waiting on them.
74 *
75 * 3. All vk_sync types used as permanent payloads of semaphores support
76 * `vk_sync_type::move` so that it can move the pending signal into a
77 * temporary vk_sync and reset the semaphore.
78 *
79 * This is requied for shared timeline semaphores where we need to handle
80 * wait-before-signal by threading in the driver if we ever see an
81 * unresolve dependency.
82 */
83 VK_QUEUE_SUBMIT_MODE_THREADED,
84
85 /** Threaded but only if we need it to resolve dependencies
86 *
87 * This imposes all the same requirements on `vk_sync` types as
88 * `VK_QUEUE_SUBMIT_MODE_THREADED`.
89 */
90 VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
91 };
92
93 /** Base struct for VkDevice */
94 struct vk_device {
95 struct vk_object_base base;
96
97 /** Allocator used to create this device
98 *
99 * This is used as a fall-back for when a NULL pAllocator is passed into a
100 * device-level create function such as vkCreateImage().
101 */
102 VkAllocationCallbacks alloc;
103
104 /** Pointer to the physical device */
105 struct vk_physical_device *physical;
106
107 /** Table of enabled extensions */
108 struct vk_device_extension_table enabled_extensions;
109
110 /** Table of enabled features */
111 struct vk_features enabled_features;
112
113 /** Device-level dispatch table */
114 struct vk_device_dispatch_table dispatch_table;
115
116 /** Command dispatch table
117 *
118 * This is used for emulated secondary command buffer support. To use
119 * emulated (trace/replay) secondary command buffers:
120 *
121 * 1. Provide your "real" command buffer dispatch table here. Because
122 * this doesn't get populated by vk_device_init(), the driver will have
123 * to add the vk_common entrypoints to this table itself.
124 *
125 * 2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
126 * level dispatch table.
127 */
128 const struct vk_device_dispatch_table *command_dispatch_table;
129
130 /** Command buffer vtable when using the common command pool */
131 const struct vk_command_buffer_ops *command_buffer_ops;
132
133 /** Driver provided callback for capturing traces
134 *
135 * Triggers for this callback are:
136 * - Keyboard input (F12)
137 * - Creation of a trigger file
138 * - Reaching the trace frame
139 */
140 VkResult (*capture_trace)(VkQueue queue);
141
142 uint32_t current_frame;
143 bool trace_hotkey_trigger;
144 simple_mtx_t trace_mtx;
145
146 /* For VK_EXT_private_data */
147 uint32_t private_data_next_index;
148
149 struct list_head queues;
150
151 struct {
152 int lost;
153 bool reported;
154 } _lost;
155
156 /** Checks the status of this device
157 *
158 * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
159 * It is called before vk_queue::driver_submit and after every non-trivial
160 * wait operation to ensure the device is still around. This gives the
161 * driver a hook to ask the kernel if its device is still valid. If the
162 * kernel says the device has been lost, it MUST call vk_device_set_lost().
163 *
164 * This function may be called from any thread at any time.
165 */
166 VkResult (*check_status)(struct vk_device *device);
167
168 /** Creates a vk_sync that wraps a memory object
169 *
170 * This is always a one-shot object so it need not track any additional
171 * state. Since it's intended for synchronizing between processes using
172 * implicit synchronization mechanisms, no such tracking would be valid
173 * anyway.
174 *
175 * If `signal_memory` is set, the resulting vk_sync will be used to signal
176 * the memory object from a queue via vk_queue_submit::signals. The common
177 * code guarantees that, by the time vkQueueSubmit() returns, the signal
178 * operation has been submitted to the kernel via the driver's
179 * vk_queue::driver_submit hook. This means that any vkQueueSubmit() call
180 * which needs implicit synchronization may block.
181 *
182 * If `signal_memory` is not set, it can be assumed that memory object
183 * already has a signal operation pending from some other process and we
184 * need only wait on it.
185 */
186 VkResult (*create_sync_for_memory)(struct vk_device *device,
187 VkDeviceMemory memory,
188 bool signal_memory,
189 struct vk_sync **sync_out);
190
191 /* Set by vk_device_set_drm_fd() */
192 int drm_fd;
193
194 /** An enum describing how timeline semaphores work */
195 enum vk_device_timeline_mode {
196 /** Timeline semaphores are not supported */
197 VK_DEVICE_TIMELINE_MODE_NONE,
198
199 /** Timeline semaphores are emulated with vk_timeline
200 *
201 * In this mode, timeline semaphores are emulated using vk_timeline
202 * which is a collection of binary semaphores, one per time point.
203 * These timeline semaphores cannot be shared because the data structure
204 * exists entirely in userspace. These timelines are virtually
205 * invisible to the driver; all it sees are the binary vk_syncs, one per
206 * time point.
207 *
208 * To handle wait-before-signal, we place all vk_queue_submits in the
209 * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
210 * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
211 * This ensures that, as soon as a given submit's dependencies are fully
212 * resolvable, it gets submitted to the driver.
213 */
214 VK_DEVICE_TIMELINE_MODE_EMULATED,
215
216 /** Timeline semaphores are a kernel-assisted emulation
217 *
218 * In this mode, timeline semaphores are still technically an emulation
219 * in the sense that they don't support wait-before-signal natively.
220 * Instead, all GPU-waitable objects support a CPU wait-for-pending
221 * operation which lets the userspace driver wait until a given event
222 * on the (possibly shared) vk_sync is pending. The event is "pending"
223 * if a job has been submitted to the kernel (possibly from a different
224 * process) which will signal it. In vkQueueSubit, we use this wait
225 * mode to detect waits which are not yet pending and, the first time we
226 * do, spawn a thread to manage the queue. That thread waits for each
227 * submit's waits to all be pending before submitting to the driver
228 * queue.
229 *
230 * We have to be a bit more careful about a few things in this mode.
231 * In particular, we can never assume that any given wait operation is
232 * pending. For instance, when we go to export a sync file from a
233 * binary semaphore, we need to first wait for it to be pending. The
234 * spec guarantees that the vast majority of these waits return almost
235 * immediately, but we do need to insert them for correctness.
236 */
237 VK_DEVICE_TIMELINE_MODE_ASSISTED,
238
239 /** Timeline semaphores are 100% native
240 *
241 * In this mode, wait-before-signal is natively supported by the
242 * underlying timeline implementation. We can submit-and-forget and
243 * assume that dependencies will get resolved for us by the kernel.
244 * Currently, this isn't supported by any Linux primitives.
245 */
246 VK_DEVICE_TIMELINE_MODE_NATIVE,
247 } timeline_mode;
248
249 /** Per-device submit mode
250 *
251 * This represents the device-wide submit strategy which may be different
252 * from the per-queue submit mode. See vk_queue.submit.mode for more
253 * details.
254 */
255 enum vk_queue_submit_mode submit_mode;
256
257 struct vk_memory_trace_data memory_trace_data;
258
259 #ifdef ANDROID
260 mtx_t swapchain_private_mtx;
261 struct hash_table *swapchain_private;
262 #endif
263 };
264
265 VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
266 VK_OBJECT_TYPE_DEVICE);
267
268 /** Initialize a vk_device
269 *
270 * Along with initializing the data structures in `vk_device`, this function
271 * checks that every extension specified by
272 * `VkInstanceCreateInfo::ppEnabledExtensionNames` is actually supported by
273 * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
274 * unsupported extension is requested. It also checks all the feature struct
275 * chained into the `pCreateInfo->pNext` chain against the features returned
276 * by `vkGetPhysicalDeviceFeatures2` and returns
277 * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
278 *
279 * @param[out] device The device to initialize
280 * @param[in] physical_device The physical device
281 * @param[in] dispatch_table Device-level dispatch table
282 * @param[in] pCreateInfo VkDeviceCreateInfo pointer passed to
283 * `vkCreateDevice()`
284 * @param[in] alloc Allocation callbacks passed to
285 * `vkCreateDevice()`
286 */
287 VkResult MUST_CHECK
288 vk_device_init(struct vk_device *device,
289 struct vk_physical_device *physical_device,
290 const struct vk_device_dispatch_table *dispatch_table,
291 const VkDeviceCreateInfo *pCreateInfo,
292 const VkAllocationCallbacks *alloc);
293
294 static inline void
vk_device_set_drm_fd(struct vk_device * device,int drm_fd)295 vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
296 {
297 device->drm_fd = drm_fd;
298 }
299
300 /** Tears down a vk_device
301 *
302 * @param[out] device The device to tear down
303 */
304 void
305 vk_device_finish(struct vk_device *device);
306
307 /** Enables threaded submit on this device
308 *
309 * This doesn't ensure that threaded submit will be used. It just disables
310 * the deferred submit option for emulated timeline semaphores and forces them
311 * to always use the threaded path. It also does some checks that the vk_sync
312 * types used by the driver work for threaded submit.
313 *
314 * This must be called before any queues are created.
315 */
316 void vk_device_enable_threaded_submit(struct vk_device *device);
317
318 static inline bool
vk_device_supports_threaded_submit(const struct vk_device * device)319 vk_device_supports_threaded_submit(const struct vk_device *device)
320 {
321 return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
322 device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
323 }
324
325 VkResult vk_device_flush(struct vk_device *device);
326
327 VkResult PRINTFLIKE(4, 5)
328 _vk_device_set_lost(struct vk_device *device,
329 const char *file, int line,
330 const char *msg, ...);
331
332 #define vk_device_set_lost(device, ...) \
333 _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)
334
335 void _vk_device_report_lost(struct vk_device *device);
336
337 static inline bool
vk_device_is_lost_no_report(struct vk_device * device)338 vk_device_is_lost_no_report(struct vk_device *device)
339 {
340 return p_atomic_read(&device->_lost.lost) > 0;
341 }
342
343 static inline bool
vk_device_is_lost(struct vk_device * device)344 vk_device_is_lost(struct vk_device *device)
345 {
346 int lost = vk_device_is_lost_no_report(device);
347 if (unlikely(lost && !device->_lost.reported))
348 _vk_device_report_lost(device);
349 return lost;
350 }
351
352 static inline VkResult
vk_device_check_status(struct vk_device * device)353 vk_device_check_status(struct vk_device *device)
354 {
355 if (vk_device_is_lost(device))
356 return VK_ERROR_DEVICE_LOST;
357
358 if (!device->check_status)
359 return VK_SUCCESS;
360
361 VkResult result = device->check_status(device);
362
363 assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
364 if (result == VK_ERROR_DEVICE_LOST)
365 assert(vk_device_is_lost_no_report(device));
366
367 return result;
368 }
369
370 #ifndef _WIN32
371
372 uint64_t
373 vk_clock_gettime(clockid_t clock_id);
374
375 static inline uint64_t
vk_time_max_deviation(uint64_t begin,uint64_t end,uint64_t max_clock_period)376 vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
377 {
378 /*
379 * The maximum deviation is the sum of the interval over which we
380 * perform the sampling and the maximum period of any sampled
381 * clock. That's because the maximum skew between any two sampled
382 * clock edges is when the sampled clock with the largest period is
383 * sampled at the end of that period but right at the beginning of the
384 * sampling interval and some other clock is sampled right at the
385 * beginning of its sampling period and right at the end of the
386 * sampling interval. Let's assume the GPU has the longest clock
387 * period and that the application is sampling GPU and monotonic:
388 *
389 * s e
390 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
391 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
392 *
393 * g
394 * 0 1 2 3
395 * GPU -----_____-----_____-----_____-----_____
396 *
397 * m
398 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
399 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
400 *
401 * Interval <----------------->
402 * Deviation <-------------------------->
403 *
404 * s = read(raw) 2
405 * g = read(GPU) 1
406 * m = read(monotonic) 2
407 * e = read(raw) b
408 *
409 * We round the sample interval up by one tick to cover sampling error
410 * in the interval clock
411 */
412
413 uint64_t sample_interval = end - begin + 1;
414
415 return sample_interval + max_clock_period;
416 }
417
418 #endif //!_WIN32
419
420 PFN_vkVoidFunction
421 vk_device_get_proc_addr(const struct vk_device *device,
422 const char *name);
423
424 bool vk_get_physical_device_core_1_1_feature_ext(struct VkBaseOutStructure *ext,
425 const VkPhysicalDeviceVulkan11Features *core);
426 bool vk_get_physical_device_core_1_2_feature_ext(struct VkBaseOutStructure *ext,
427 const VkPhysicalDeviceVulkan12Features *core);
428 bool vk_get_physical_device_core_1_3_feature_ext(struct VkBaseOutStructure *ext,
429 const VkPhysicalDeviceVulkan13Features *core);
430
431 bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext,
432 const VkPhysicalDeviceVulkan11Properties *core);
433 bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext,
434 const VkPhysicalDeviceVulkan12Properties *core);
435 bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext,
436 const VkPhysicalDeviceVulkan13Properties *core);
437
438 #ifdef __cplusplus
439 }
440 #endif
441
442 #endif /* VK_DEVICE_H */
443