• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_queue.h"
6 
7 #include "nvk_cmd_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_device.h"
10 #include "nvk_buffer.h"
11 #include "nvk_image.h"
12 #include "nvk_device_memory.h"
13 #include "nvk_physical_device.h"
14 
15 #include "nouveau_context.h"
16 
17 #include "drm-uapi/nouveau_drm.h"
18 
19 #include "vk_drm_syncobj.h"
20 
21 #include <xf86drm.h>
22 
23 #define NVK_PUSH_MAX_SYNCS 16
24 #define NVK_PUSH_MAX_BINDS 4096
25 #define NVK_PUSH_MAX_PUSH 1024
26 
27 struct push_builder {
28    uint32_t max_push;
29    struct drm_nouveau_sync req_wait[NVK_PUSH_MAX_SYNCS];
30    struct drm_nouveau_sync req_sig[NVK_PUSH_MAX_SYNCS];
31    struct drm_nouveau_exec_push req_push[NVK_PUSH_MAX_PUSH];
32    struct drm_nouveau_exec req;
33    struct drm_nouveau_vm_bind vmbind;
34    struct drm_nouveau_vm_bind_op bind_ops[NVK_PUSH_MAX_BINDS];
35    bool is_vmbind;
36 };
37 
38 static void
push_builder_init(struct nvk_queue * queue,struct push_builder * pb,bool is_vmbind)39 push_builder_init(struct nvk_queue *queue,
40                   struct push_builder *pb,
41                   bool is_vmbind)
42 {
43    struct nvk_device *dev = nvk_queue_device(queue);
44 
45    pb->max_push = is_vmbind ? 0 :
46       MIN2(NVK_PUSH_MAX_PUSH, dev->ws_dev->max_push);
47    pb->req = (struct drm_nouveau_exec) {
48       .channel = queue->drm.ws_ctx->channel,
49       .push_count = 0,
50       .wait_count = 0,
51       .sig_count = 0,
52       .push_ptr = (uintptr_t)&pb->req_push,
53       .wait_ptr = (uintptr_t)&pb->req_wait,
54       .sig_ptr = (uintptr_t)&pb->req_sig,
55    };
56    pb->vmbind = (struct drm_nouveau_vm_bind) {
57       .flags = DRM_NOUVEAU_VM_BIND_RUN_ASYNC,
58       .op_count = 0,
59       .op_ptr = (uintptr_t)&pb->bind_ops,
60       .wait_count = 0,
61       .sig_count = 0,
62       .wait_ptr = (uintptr_t)&pb->req_wait,
63       .sig_ptr = (uintptr_t)&pb->req_sig,
64    };
65    pb->is_vmbind = is_vmbind;
66 }
67 
68 static void
push_add_syncobj_wait(struct push_builder * pb,uint32_t syncobj,uint64_t wait_value)69 push_add_syncobj_wait(struct push_builder *pb,
70                       uint32_t syncobj,
71                       uint64_t wait_value)
72 {
73    assert(pb->req.wait_count < NVK_PUSH_MAX_SYNCS);
74    pb->req_wait[pb->req.wait_count++] = (struct drm_nouveau_sync) {
75       .flags = wait_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
76                             DRM_NOUVEAU_SYNC_SYNCOBJ,
77       .handle = syncobj,
78       .timeline_value = wait_value,
79    };
80 }
81 
82 static void
push_add_sync_wait(struct push_builder * pb,struct vk_sync_wait * wait)83 push_add_sync_wait(struct push_builder *pb,
84                    struct vk_sync_wait *wait)
85 {
86    struct vk_drm_syncobj *sync = vk_sync_as_drm_syncobj(wait->sync);
87    assert(sync != NULL);
88    push_add_syncobj_wait(pb, sync->syncobj, wait->wait_value);
89 }
90 
91 static void
push_add_sync_signal(struct push_builder * pb,struct vk_sync_signal * sig)92 push_add_sync_signal(struct push_builder *pb,
93                      struct vk_sync_signal *sig)
94 {
95    struct vk_drm_syncobj *sync  = vk_sync_as_drm_syncobj(sig->sync);
96    assert(sync);
97    assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
98    pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
99       .flags = sig->signal_value ? DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ :
100                                    DRM_NOUVEAU_SYNC_SYNCOBJ,
101       .handle = sync->syncobj,
102       .timeline_value = sig->signal_value,
103    };
104 }
105 
106 static void
push_add_buffer_bind(struct push_builder * pb,VkSparseBufferMemoryBindInfo * bind_info)107 push_add_buffer_bind(struct push_builder *pb,
108                      VkSparseBufferMemoryBindInfo *bind_info)
109 {
110    VK_FROM_HANDLE(nvk_buffer, buffer, bind_info->buffer);
111    for (unsigned i = 0; i < bind_info->bindCount; i++) {
112       const VkSparseMemoryBind *bind = &bind_info->pBinds[i];
113       VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
114 
115       assert(bind->resourceOffset + bind->size <= buffer->vma_size_B);
116       assert(!mem || bind->memoryOffset + bind->size <= mem->vk.size);
117 
118       assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS);
119       pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) {
120          .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
121                      DRM_NOUVEAU_VM_BIND_OP_UNMAP,
122          .handle = mem ? mem->bo->handle : 0,
123          .addr = buffer->addr + bind->resourceOffset,
124          .bo_offset = bind->memoryOffset,
125          .range = bind->size,
126       };
127    }
128 }
129 
130 static void
push_add_image_plane_opaque_bind(struct push_builder * pb,const struct nvk_image_plane * plane,const VkSparseMemoryBind * bind,uint64_t * image_plane_offset_B)131 push_add_image_plane_opaque_bind(struct push_builder *pb,
132                                  const struct nvk_image_plane *plane,
133                                  const VkSparseMemoryBind *bind,
134                                  uint64_t *image_plane_offset_B)
135 {
136    *image_plane_offset_B = align64(*image_plane_offset_B, plane->nil.align_B);
137 
138    /* The offset of the bind range within the image */
139    uint64_t image_bind_offset_B = bind->resourceOffset;
140    uint64_t mem_bind_offset_B = bind->memoryOffset;
141    uint64_t bind_size_B = bind->size;
142 
143    /* If the bind starts before the plane, clamp from below */
144    if (image_bind_offset_B < *image_plane_offset_B) {
145       /* The offset of the plane within the range being bound */
146       const uint64_t bind_plane_offset_B =
147          *image_plane_offset_B - image_bind_offset_B;
148 
149       /* If this plane lies above the bound range, skip this bind */
150       if (bind_plane_offset_B >= bind_size_B)
151          goto skip;
152 
153       image_bind_offset_B += bind_plane_offset_B;
154       mem_bind_offset_B += bind_plane_offset_B;
155       bind_size_B -= bind_plane_offset_B;
156 
157       assert(image_bind_offset_B == *image_plane_offset_B);
158    }
159 
160    /* The offset of the bind range within the plane */
161    const uint64_t plane_bind_offset_B =
162       image_bind_offset_B - *image_plane_offset_B;
163 
164    /* The bound range lies above the plane */
165    if (plane_bind_offset_B >= plane->vma_size_B)
166       goto skip;
167 
168    /* Clamp the size to fit inside the plane */
169    bind_size_B = MIN2(bind_size_B, plane->vma_size_B - plane_bind_offset_B);
170    assert(bind_size_B > 0);
171 
172    VK_FROM_HANDLE(nvk_device_memory, mem, bind->memory);
173 
174    assert(plane_bind_offset_B + bind_size_B <= plane->vma_size_B);
175    assert(!mem || mem_bind_offset_B + bind_size_B <= mem->vk.size);
176 
177    assert(pb->vmbind.op_count < NVK_PUSH_MAX_BINDS);
178    pb->bind_ops[pb->vmbind.op_count++] = (struct drm_nouveau_vm_bind_op) {
179       .op = mem ? DRM_NOUVEAU_VM_BIND_OP_MAP :
180                   DRM_NOUVEAU_VM_BIND_OP_UNMAP,
181       .handle = mem ? mem->bo->handle : 0,
182       .addr = plane->addr + plane_bind_offset_B,
183       .bo_offset = mem_bind_offset_B,
184       .range = bind_size_B,
185       .flags = plane->nil.pte_kind,
186    };
187 
188 skip:
189    assert(plane->vma_size_B == plane->nil.size_B);
190    *image_plane_offset_B += plane->nil.size_B;
191 }
192 
193 static void
push_add_image_opaque_bind(struct push_builder * pb,VkSparseImageOpaqueMemoryBindInfo * bind_info)194 push_add_image_opaque_bind(struct push_builder *pb,
195                            VkSparseImageOpaqueMemoryBindInfo *bind_info)
196 {
197    VK_FROM_HANDLE(nvk_image, image, bind_info->image);
198    for (unsigned i = 0; i < bind_info->bindCount; i++) {
199       uint64_t image_plane_offset_B = 0;
200       for (unsigned plane = 0; plane < image->plane_count; plane++) {
201          push_add_image_plane_opaque_bind(pb, &image->planes[plane],
202                                           &bind_info->pBinds[i],
203                                           &image_plane_offset_B);
204       }
205       if (image->stencil_copy_temp.nil.size_B > 0) {
206          push_add_image_plane_opaque_bind(pb, &image->stencil_copy_temp,
207                                           &bind_info->pBinds[i],
208                                           &image_plane_offset_B);
209       }
210    }
211 }
212 
213 static void
push_add_push(struct push_builder * pb,uint64_t addr,uint32_t range,bool no_prefetch)214 push_add_push(struct push_builder *pb, uint64_t addr, uint32_t range,
215               bool no_prefetch)
216 {
217    /* This is the hardware limit on all current GPUs */
218    assert((addr % 4) == 0 && (range % 4) == 0);
219    assert(range < (1u << 23));
220 
221    uint32_t flags = 0;
222    if (no_prefetch)
223       flags |= DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
224 
225    assert(pb->req.push_count < pb->max_push);
226    pb->req_push[pb->req.push_count++] = (struct drm_nouveau_exec_push) {
227       .va = addr,
228       .va_len = range,
229       .flags = flags,
230    };
231 }
232 
233 static VkResult
bind_submit(struct nvk_queue * queue,struct push_builder * pb,bool sync)234 bind_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
235 {
236    struct nvk_device *dev = nvk_queue_device(queue);
237    int err;
238 
239    pb->vmbind.wait_count = pb->req.wait_count;
240    pb->vmbind.sig_count = pb->req.sig_count;
241    err = drmCommandWriteRead(dev->ws_dev->fd,
242                              DRM_NOUVEAU_VM_BIND,
243                              &pb->vmbind, sizeof(pb->vmbind));
244    if (err) {
245       return vk_errorf(queue, VK_ERROR_UNKNOWN,
246                        "DRM_NOUVEAU_VM_BIND failed: %m");
247    }
248    return VK_SUCCESS;
249 }
250 
251 static VkResult
push_submit(struct nvk_queue * queue,struct push_builder * pb,bool sync)252 push_submit(struct nvk_queue *queue, struct push_builder *pb, bool sync)
253 {
254    struct nvk_device *dev = nvk_queue_device(queue);
255 
256    int err;
257    if (sync) {
258       assert(pb->req.sig_count < NVK_PUSH_MAX_SYNCS);
259       pb->req_sig[pb->req.sig_count++] = (struct drm_nouveau_sync) {
260          .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
261          .handle = queue->drm.syncobj,
262          .timeline_value = 0,
263       };
264    }
265    err = drmCommandWriteRead(dev->ws_dev->fd,
266                              DRM_NOUVEAU_EXEC,
267                              &pb->req, sizeof(pb->req));
268    if (err) {
269       VkResult result = VK_ERROR_UNKNOWN;
270       if (err == -ENODEV)
271          result = VK_ERROR_DEVICE_LOST;
272       return vk_errorf(queue, result,
273                        "DRM_NOUVEAU_EXEC failed: %m");
274    }
275    if (sync) {
276       err = drmSyncobjWait(dev->ws_dev->fd,
277                            &queue->drm.syncobj, 1, INT64_MAX,
278                            DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
279                            NULL);
280       if (err) {
281          return vk_errorf(queue, VK_ERROR_UNKNOWN,
282                           "DRM_SYNCOBJ_WAIT failed: %m");
283       }
284 
285       /* Push an empty again, just to check for errors */
286       struct drm_nouveau_exec empty = {
287          .channel = pb->req.channel,
288       };
289       err = drmCommandWriteRead(dev->ws_dev->fd,
290                                 DRM_NOUVEAU_EXEC,
291                                 &empty, sizeof(empty));
292       if (err) {
293          return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
294                           "DRM_NOUVEAU_EXEC failed: %m");
295       }
296    }
297    return VK_SUCCESS;
298 }
299 
300 VkResult
nvk_queue_init_drm_nouveau(struct nvk_device * dev,struct nvk_queue * queue,VkQueueFlags queue_flags)301 nvk_queue_init_drm_nouveau(struct nvk_device *dev,
302                            struct nvk_queue *queue,
303                            VkQueueFlags queue_flags)
304 {
305    VkResult result;
306    int err;
307 
308    enum nouveau_ws_engines engines = 0;
309    if (queue_flags & VK_QUEUE_GRAPHICS_BIT)
310       engines |= NOUVEAU_WS_ENGINE_3D;
311    if (queue_flags & VK_QUEUE_COMPUTE_BIT)
312       engines |= NOUVEAU_WS_ENGINE_COMPUTE;
313    if (queue_flags & VK_QUEUE_TRANSFER_BIT)
314       engines |= NOUVEAU_WS_ENGINE_COPY;
315 
316    err = nouveau_ws_context_create(dev->ws_dev, engines, &queue->drm.ws_ctx);
317    if (err != 0) {
318       if (err == -ENOSPC)
319          return vk_error(dev, VK_ERROR_TOO_MANY_OBJECTS);
320       else
321          return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
322    }
323 
324    err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->drm.syncobj);
325    if (err < 0) {
326       result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
327       goto fail_context;
328    }
329 
330    return VK_SUCCESS;
331 
332 fail_context:
333    nouveau_ws_context_destroy(queue->drm.ws_ctx);
334 
335    return result;
336 }
337 
338 void
nvk_queue_finish_drm_nouveau(struct nvk_device * dev,struct nvk_queue * queue)339 nvk_queue_finish_drm_nouveau(struct nvk_device *dev,
340                              struct nvk_queue *queue)
341 {
342    ASSERTED int err = drmSyncobjDestroy(dev->ws_dev->fd, queue->drm.syncobj);
343    assert(err == 0);
344    nouveau_ws_context_destroy(queue->drm.ws_ctx);
345 }
346 
347 VkResult
nvk_queue_submit_simple_drm_nouveau(struct nvk_queue * queue,uint32_t push_dw_count,struct nouveau_ws_bo * push_bo,uint32_t extra_bo_count,struct nouveau_ws_bo ** extra_bos)348 nvk_queue_submit_simple_drm_nouveau(struct nvk_queue *queue,
349                                     uint32_t push_dw_count,
350                                     struct nouveau_ws_bo *push_bo,
351                                     uint32_t extra_bo_count,
352                                     struct nouveau_ws_bo **extra_bos)
353 {
354    struct push_builder pb;
355    push_builder_init(queue, &pb, false);
356 
357    push_add_push(&pb, push_bo->offset, push_dw_count * 4, false);
358 
359    return push_submit(queue, &pb, true);
360 }
361 
362 static void
push_add_queue_state(struct push_builder * pb,struct nvk_queue_state * qs)363 push_add_queue_state(struct push_builder *pb, struct nvk_queue_state *qs)
364 {
365    if (qs->push.bo)
366       push_add_push(pb, qs->push.bo->offset, qs->push.dw_count * 4, false);
367 }
368 
369 VkResult
nvk_queue_submit_drm_nouveau(struct nvk_queue * queue,struct vk_queue_submit * submit,bool sync)370 nvk_queue_submit_drm_nouveau(struct nvk_queue *queue,
371                              struct vk_queue_submit *submit,
372                              bool sync)
373 {
374    struct nvk_device *dev = nvk_queue_device(queue);
375    struct push_builder pb;
376    VkResult result;
377 
378    uint64_t upload_time_point;
379    result = nvk_upload_queue_flush(dev, &dev->upload, &upload_time_point);
380    if (result != VK_SUCCESS)
381       return result;
382 
383    const bool is_vmbind = submit->buffer_bind_count > 0 ||
384                           submit->image_opaque_bind_count > 0;
385    push_builder_init(queue, &pb, is_vmbind);
386 
387    if (!is_vmbind && upload_time_point > 0)
388       push_add_syncobj_wait(&pb, dev->upload.drm.syncobj, upload_time_point);
389 
390    for (uint32_t i = 0; i < submit->wait_count; i++)
391       push_add_sync_wait(&pb, &submit->waits[i]);
392 
393    if (is_vmbind) {
394       assert(submit->command_buffer_count == 0);
395 
396       for (uint32_t i = 0; i < submit->buffer_bind_count; i++)
397          push_add_buffer_bind(&pb, &submit->buffer_binds[i]);
398 
399       for (uint32_t i = 0; i < submit->image_opaque_bind_count; i++)
400          push_add_image_opaque_bind(&pb, &submit->image_opaque_binds[i]);
401    } else if (submit->command_buffer_count > 0) {
402       assert(submit->buffer_bind_count == 0);
403       assert(submit->image_opaque_bind_count == 0);
404 
405       push_add_queue_state(&pb, &queue->state);
406 
407       for (unsigned i = 0; i < submit->command_buffer_count; i++) {
408          struct nvk_cmd_buffer *cmd =
409             container_of(submit->command_buffers[i], struct nvk_cmd_buffer, vk);
410 
411          util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, push) {
412             if (push->range == 0)
413                continue;
414 
415             if (pb.req.push_count >= pb.max_push) {
416                result = push_submit(queue, &pb, sync);
417                if (result != VK_SUCCESS)
418                   return result;
419 
420                push_builder_init(queue, &pb, is_vmbind);
421             }
422 
423             push_add_push(&pb, push->addr, push->range, push->no_prefetch);
424          }
425       }
426    }
427 
428    for (uint32_t i = 0; i < submit->signal_count; i++)
429       push_add_sync_signal(&pb, &submit->signals[i]);
430 
431    if (is_vmbind)
432       return bind_submit(queue, &pb, sync);
433    else
434       return push_submit(queue, &pb, sync);
435 }
436