1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_queue.h"
6
7 #include "nvk_cmd_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_device.h"
10 #include "nvk_buffer.h"
11 #include "nvk_image.h"
12 #include "nvk_device_memory.h"
13 #include "nvk_physical_device.h"
14
15 #include "nouveau_context.h"
16
17 #include "drm-uapi/nouveau_drm.h"
18
19 #include "vk_drm_syncobj.h"
20
21 #include <xf86drm.h>
22
23 #define NVK_PUSH_MAX_SYNCS 16
24 #define NVK_PUSH_MAX_BINDS 4096
25 #define NVK_PUSH_MAX_PUSH 1024
26
27 struct push_builder {
28 uint32_t max_push;
29 struct drm_nouveau_sync req_wait[NVK_PUSH_MAX_SYNCS];
30 struct drm_nouveau_sync req_sig[NVK_PUSH_MAX_SYNCS];
31 struct drm_nouveau_exec_push req_push[NVK_PUSH_MAX_PUSH];
32 struct drm_nouveau_exec req;
33 struct drm_nouveau_vm_bind vmbind;
34 struct drm_nouveau_vm_bind_op bind_ops[NVK_PUSH_MAX_BINDS];
35 bool is_vmbind;
36 };
37
38 static void
push_builder_init(struct nvk_queue * queue,struct push_builder * pb,bool is_vmbind)39 push_builder_init(struct nvk_queue *queue,
40 struct push_builder *pb,
41 bool is_vmbind)
42 {
43 struct nvk_device *dev = nvk_queue_device(queue);
44
45 pb->max_push = is_vmbind ? 0 :
46 MIN2(NVK_PUSH_MAX_PUSH, dev->ws_dev->max_push);
47 pb->req = (struct drm_nouveau_exec) {
48 .channel = queue->drm.ws_ctx->channel,
49 .push_count = 0,
50 .wait_count = 0,
51 .sig_count = 0,
52 .push_ptr = (uintptr_t)&pb->req_push,
53 .wait_ptr = (uintptr_t)&pb->req_wait,
54 .sig_ptr = (uintptr_t)&pb->req_sig,
55 };
56 pb->vmbind = (struct drm_nouveau_vm_bind) {
57 .flags = DRM_NOUVEAU_VM_BIND_RUN_ASYNC,
58 .op_count = 0,
59 .op_ptr = (uintptr_t)&pb->bind_ops,
60 .wait_count = 0,
61 .sig_count = 0,
62 .wait_ptr = (uintptr_t)&pb->req_wait,
63 .sig_ptr = (uintptr_t)&pb->req_sig,
64 };
65 pb->is_vmbind = is_vmbind;
66 }
67
68 static void
push_add_syncobj_wait(struct push_builder * pb,uint32_t syncobj,uint64_t wait_value)69 push_add_syncobj_wait(struct push_builder *pb,
70 uint32_t syncobj,
71 uint64_t wait_value)
72 {
73 assert(pb->req.wait_count < NVK_PUSH_MAX_SYNCS);
74 pb->req_wait[pb->req.wait_count++] = (struct drm_nouveau_sync) {
75 .flags = wait_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
76 DRM_NOUVEAU_SYNC_SYNCOBJ,
77 .handle = syncobj,
78 .timeline_value = wait_value,
79 };
80 }
81
82 static void
push_add_sync_wait(struct push_builder * pb,struct vk_sync_wait * wait)83 push_add_sync_wait(struct push_builder *pb,
84 struct vk_sync_wait *wait)
85 {
86 struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(wait->sync);
87 assert(sync != NULL);
88 push_add_syncobj_wait(pb, sync->syncobj, wait->wait_value);
89 }
90
91 static void
push_add_sync_signal(struct push_builder * pb,struct vk_sync_signal * sig)92 push_add_sync_signal(struct push_builder *pb,
93 struct vk_sync_signal *sig)
94 {
95 struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(sig->sync);
96 assert(sync);
97 assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
98 pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
99 .flags = sig->signal_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
100 DRM_NOUVEAU_SYNC_SYNCOBJ,
101 .handle = sync->syncobj,
102 .timeline_value = sig->signal_value,
103 };
104 }
105
106 static void
push_add_buffer_bind(struct push_builder * pb,VkSparseBufferMemoryBindInfo * bind_info)107 push_add_buffer_bind(struct push_builder *pb,
108 VkSparseBufferMemoryBindInfo *bind_info)
109 {
110 VK_FROM_HANDLE(nvk_buffer, buffer, bind_info->buffer);
111 for (unsigned i = 0; i < bind_info->bindCount; i++) {
112 const VkSparseMemoryBind *bind = &bind_info->pBinds[i];
113 VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
114
115 assert(bind->resourceOffset + bind->size <= buffer->vma_size_B);
116 assert(!mem || bind->memoryOffset + bind->size <= mem->vk.size);
117
118 assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS);
119 pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) {
120 .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
121 DRM_NOUVEAU_VM_BIND_OP_UNMAP,
122 .handle = mem ? mem->bo->handle : 0,
123 .addr = buffer->addr + bind->resourceOffset,
124 .bo_offset = bind->memoryOffset,
125 .range = bind->size,
126 };
127 }
128 }
129
130 static void
push_add_image_plane_opaque_bind(struct push_builder * pb,const struct nvk_image_plane * plane,const VkSparseMemoryBind * bind,uint64_t * image_plane_offset_B)131 push_add_image_plane_opaque_bind(struct push_builder *pb,
132 const struct nvk_image_plane *plane,
133 const VkSparseMemoryBind *bind,
134 uint64_t *image_plane_offset_B)
135 {
136 *image_plane_offset_B = align64(*image_plane_offset_B, plane->nil.align_B);
137
138 /* The offset of the bind range within the image */
139 uint64_t image_bind_offset_B = bind->resourceOffset;
140 uint64_t mem_bind_offset_B = bind->memoryOffset;
141 uint64_t bind_size_B = bind->size;
142
143 /* If the bind starts before the plane, clamp from below */
144 if (image_bind_offset_B < *image_plane_offset_B) {
145 /* The offset of the plane within the range being bound */
146 const uint64_t bind_plane_offset_B =
147 *image_plane_offset_B - image_bind_offset_B;
148
149 /* If this plane lies above the bound range, skip this bind */
150 if (bind_plane_offset_B >= bind_size_B)
151 goto skip;
152
153 image_bind_offset_B += bind_plane_offset_B;
154 mem_bind_offset_B += bind_plane_offset_B;
155 bind_size_B -= bind_plane_offset_B;
156
157 assert(image_bind_offset_B == *image_plane_offset_B);
158 }
159
160 /* The offset of the bind range within the plane */
161 const uint64_t plane_bind_offset_B =
162 image_bind_offset_B - *image_plane_offset_B;
163
164 /* The bound range lies above the plane */
165 if (plane_bind_offset_B >= plane->vma_size_B)
166 goto skip;
167
168 /* Clamp the size to fit inside the plane */
169 bind_size_B = MIN2(bind_size_B, plane->vma_size_B - plane_bind_offset_B);
170 assert(bind_size_B > 0);
171
172 VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
173
174 assert(plane_bind_offset_B + bind_size_B <= plane->vma_size_B);
175 assert(!mem || mem_bind_offset_B + bind_size_B <= mem->vk.size);
176
177 assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS);
178 pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) {
179 .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
180 DRM_NOUVEAU_VM_BIND_OP_UNMAP,
181 .handle = mem ? mem->bo->handle : 0,
182 .addr = plane->addr + plane_bind_offset_B,
183 .bo_offset = mem_bind_offset_B,
184 .range = bind_size_B,
185 .flags = plane->nil.pte_kind,
186 };
187
188 skip:
189 assert(plane->vma_size_B == plane->nil.size_B);
190 *image_plane_offset_B += plane->nil.size_B;
191 }
192
193 static void
push_add_image_opaque_bind(struct push_builder * pb,VkSparseImageOpaqueMemoryBindInfo * bind_info)194 push_add_image_opaque_bind(struct push_builder *pb,
195 VkSparseImageOpaqueMemoryBindInfo *bind_info)
196 {
197 VK_FROM_HANDLE(nvk_image, image, bind_info->image);
198 for (unsigned i = 0; i < bind_info->bindCount; i++) {
199 uint64_t image_plane_offset_B = 0;
200 for (unsigned plane = 0; plane < image->plane_count; plane++) {
201 push_add_image_plane_opaque_bind(pb, &image->planes[plane],
202 &bind_info->pBinds[i],
203 &image_plane_offset_B);
204 }
205 if (image->stencil_copy_temp.nil.size_B > 0) {
206 push_add_image_plane_opaque_bind(pb, &image->stencil_copy_temp,
207 &bind_info->pBinds[i],
208 &image_plane_offset_B);
209 }
210 }
211 }
212
213 static void
push_add_push(struct push_builder * pb,uint64_t addr,uint32_t range,bool no_prefetch)214 push_add_push(struct push_builder *pb, uint64_t addr, uint32_t range,
215 bool no_prefetch)
216 {
217 /* This is the hardware limit on all current GPUs */
218 assert((addr % 4) == 0 && (range % 4) == 0);
219 assert(range < (1u << 23));
220
221 uint32_t flags = 0;
222 if (no_prefetch)
223 flags |= DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
224
225 assert(pb->req.push_count < pb->max_push);
226 pb->req_push[pb->req.push_count++] = (struct drm_nouveau_exec_push) {
227 .va = addr,
228 .va_len = range,
229 .flags = flags,
230 };
231 }
232
233 static VkResult
bind_submit(struct nvk_queue * queue,struct push_builder * pb,bool sync)234 bind_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
235 {
236 struct nvk_device *dev = nvk_queue_device(queue);
237 int err;
238
239 pb->vmbind.wait_count = pb->req.wait_count;
240 pb->vmbind.sig_count = pb->req.sig_count;
241 err = drmCommandWriteRead(dev->ws_dev->fd,
242 DRM_NOUVEAU_VM_BIND,
243 &pb->vmbind, sizeof(pb->vmbind));
244 if (err) {
245 return vk_errorf(queue, VK_ERROR_UNKNOWN,
246 "DRM_NOUVEAU_VM_BIND failed: %m");
247 }
248 return VK_SUCCESS;
249 }
250
251 static VkResult
push_submit(struct nvk_queue * queue,struct push_builder * pb,bool sync)252 push_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
253 {
254 struct nvk_device *dev = nvk_queue_device(queue);
255
256 int err;
257 if (sync) {
258 assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
259 pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
260 .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
261 .handle = queue->drm.syncobj,
262 .timeline_value = 0,
263 };
264 }
265 err = drmCommandWriteRead(dev->ws_dev->fd,
266 DRM_NOUVEAU_EXEC,
267 &pb->req, sizeof(pb->req));
268 if (err) {
269 VkResult result = VK_ERROR_UNKNOWN;
270 if (err == -ENODEV)
271 result = VK_ERROR_DEVICE_LOST;
272 return vk_errorf(queue, result,
273 "DRM_NOUVEAU_EXEC failed: %m");
274 }
275 if (sync) {
276 err = drmSyncobjWait(dev->ws_dev->fd,
277 &queue->drm.syncobj, 1, INT64_MAX,
278 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
279 NULL);
280 if (err) {
281 return vk_errorf(queue, VK_ERROR_UNKNOWN,
282 "DRM_SYNCOBJ_WAIT failed: %m");
283 }
284
285 /* Push an empty again, just to check for errors */
286 struct drm_nouveau_exec empty = {
287 .channel = pb->req.channel,
288 };
289 err = drmCommandWriteRead(dev->ws_dev->fd,
290 DRM_NOUVEAU_EXEC,
291 &empty, sizeof(empty));
292 if (err) {
293 return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
294 "DRM_NOUVEAU_EXEC failed: %m");
295 }
296 }
297 return VK_SUCCESS;
298 }
299
300 VkResult
nvk_queue_init_drm_nouveau(struct nvk_device * dev,struct nvk_queue * queue,VkQueueFlags queue_flags)301 nvk_queue_init_drm_nouveau(struct nvk_device *dev,
302 struct nvk_queue *queue,
303 VkQueueFlags queue_flags)
304 {
305 VkResult result;
306 int err;
307
308 enum nouveau_ws_engines engines = 0;
309 if (queue_flags & VK_QUEUE_GRAPHICS_BIT)
310 engines |= NOUVEAU_WS_ENGINE_3D;
311 if (queue_flags & VK_QUEUE_COMPUTE_BIT)
312 engines |= NOUVEAU_WS_ENGINE_COMPUTE;
313 if (queue_flags & VK_QUEUE_TRANSFER_BIT)
314 engines |= NOUVEAU_WS_ENGINE_COPY;
315
316 err = nouveau_ws_context_create(dev->ws_dev, engines, &queue->drm.ws_ctx);
317 if (err != 0) {
318 if (err == -ENOSPC)
319 return vk_error(dev, VK_ERROR_TOO_MANY_OBJECTS);
320 else
321 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
322 }
323
324 err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->drm.syncobj);
325 if (err < 0) {
326 result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
327 goto fail_context;
328 }
329
330 return VK_SUCCESS;
331
332 fail_context:
333 nouveau_ws_context_destroy(queue->drm.ws_ctx);
334
335 return result;
336 }
337
338 void
nvk_queue_finish_drm_nouveau(struct nvk_device * dev,struct nvk_queue * queue)339 nvk_queue_finish_drm_nouveau(struct nvk_device *dev,
340 struct nvk_queue *queue)
341 {
342 ASSERTED int err = drmSyncobjDestroy(dev->ws_dev->fd, queue->drm.syncobj);
343 assert(err == 0);
344 nouveau_ws_context_destroy(queue->drm.ws_ctx);
345 }
346
347 VkResult
nvk_queue_submit_simple_drm_nouveau(struct nvk_queue * queue,uint32_t push_dw_count,struct nouveau_ws_bo * push_bo,uint32_t extra_bo_count,struct nouveau_ws_bo ** extra_bos)348 nvk_queue_submit_simple_drm_nouveau(struct nvk_queue *queue,
349 uint32_t push_dw_count,
350 struct nouveau_ws_bo *push_bo,
351 uint32_t extra_bo_count,
352 struct nouveau_ws_bo **extra_bos)
353 {
354 struct push_builder pb;
355 push_builder_init(queue, &pb, false);
356
357 push_add_push(&pb, push_bo->offset, push_dw_count * 4, false);
358
359 return push_submit(queue, &pb, true);
360 }
361
362 static void
push_add_queue_state(struct push_builder * pb,struct nvk_queue_state * qs)363 push_add_queue_state(struct push_builder *pb, struct nvk_queue_state *qs)
364 {
365 if (qs->push.bo)
366 push_add_push(pb, qs->push.bo->offset, qs->push.dw_count * 4, false);
367 }
368
369 VkResult
nvk_queue_submit_drm_nouveau(struct nvk_queue * queue,struct vk_queue_submit * submit,bool sync)370 nvk_queue_submit_drm_nouveau(struct nvk_queue *queue,
371 struct vk_queue_submit *submit,
372 bool sync)
373 {
374 struct nvk_device *dev = nvk_queue_device(queue);
375 struct push_builder pb;
376 VkResult result;
377
378 uint64_t upload_time_point;
379 result = nvk_upload_queue_flush(dev, &dev->upload, &upload_time_point);
380 if (result != VK_SUCCESS)
381 return result;
382
383 const bool is_vmbind = submit->buffer_bind_count > 0 ||
384 submit->image_opaque_bind_count > 0;
385 push_builder_init(queue, &pb, is_vmbind);
386
387 if (!is_vmbind && upload_time_point > 0)
388 push_add_syncobj_wait(&pb, dev->upload.drm.syncobj, upload_time_point);
389
390 for (uint32_t i = 0; i < submit->wait_count; i++)
391 push_add_sync_wait(&pb, &submit->waits[i]);
392
393 if (is_vmbind) {
394 assert(submit->command_buffer_count == 0);
395
396 for (uint32_t i = 0; i < submit->buffer_bind_count; i++)
397 push_add_buffer_bind(&pb, &submit->buffer_binds[i]);
398
399 for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++)
400 push_add_image_opaque_bind(&pb, &submit->image_opaque_binds[i]);
401 } else if (submit->command_buffer_count > 0) {
402 assert(submit->buffer_bind_count == 0);
403 assert(submit->image_opaque_bind_count == 0);
404
405 push_add_queue_state(&pb, &queue->state);
406
407 for (unsigned i = 0; i < submit->command_buffer_count; i++) {
408 struct nvk_cmd_buffer *cmd =
409 container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
410
411 util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, push) {
412 if (push->range == 0)
413 continue;
414
415 if (pb.req.push_count >= pb.max_push) {
416 result = push_submit(queue, &pb, sync);
417 if (result != VK_SUCCESS)
418 return result;
419
420 push_builder_init(queue, &pb, is_vmbind);
421 }
422
423 push_add_push(&pb, push->addr, push->range, push->no_prefetch);
424 }
425 }
426 }
427
428 for (uint32_t i = 0; i < submit->signal_count; i++)
429 push_add_sync_signal(&pb, &submit->signals[i]);
430
431 if (is_vmbind)
432 return bind_submit(queue, &pb, sync);
433 else
434 return push_submit(queue, &pb, sync);
435 }
436