• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nvk_upload_queue.h"
7 
8 #include "nvk_device.h"
9 #include "vk_alloc.h"
10 
11 #include <xf86drm.h>
12 #include "nouveau_context.h"
13 #include "drm-uapi/nouveau_drm.h"
14 
15 #include "nv_push.h"
16 #include "nvk_cl90b5.h"
17 
18 #define NVK_UPLOAD_BO_SIZE 64*1024
19 
20 struct nvk_upload_bo {
21    struct nouveau_ws_bo *bo;
22    void *map;
23 
24    /** Link in nvk_upload_queue::bos */
25    struct list_head link;
26 
27    /** Time point at which point this BO will be idle */
28    uint64_t idle_time_point;
29 };
30 
31 static VkResult
nvk_upload_bo_create(struct nvk_device * dev,struct nvk_upload_bo ** bo_out)32 nvk_upload_bo_create(struct nvk_device *dev,
33                      struct nvk_upload_bo **bo_out)
34 {
35    struct nvk_upload_bo *bo;
36 
37    bo = vk_zalloc(&dev->vk.alloc, sizeof(*bo), 8,
38                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
39    if (bo == NULL)
40       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
41 
42    uint32_t flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP |
43                     NOUVEAU_WS_BO_NO_SHARE;
44    bo->bo = nouveau_ws_bo_new_mapped(dev->ws_dev, NVK_UPLOAD_BO_SIZE, 0,
45                                      flags, NOUVEAU_WS_BO_WR, &bo->map);
46    if (bo->bo == NULL) {
47       vk_free(&dev->vk.alloc, bo);
48       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
49    }
50 
51    *bo_out = bo;
52 
53    return VK_SUCCESS;
54 }
55 
56 static void
nvk_upload_bo_destroy(struct nvk_device * dev,struct nvk_upload_bo * bo)57 nvk_upload_bo_destroy(struct nvk_device *dev,
58                       struct nvk_upload_bo *bo)
59 {
60    nouveau_ws_bo_unmap(bo->bo, bo->map);
61    nouveau_ws_bo_destroy(bo->bo);
62    vk_free(&dev->vk.alloc, bo);
63 }
64 
65 VkResult
nvk_upload_queue_init(struct nvk_device * dev,struct nvk_upload_queue * queue)66 nvk_upload_queue_init(struct nvk_device *dev,
67                       struct nvk_upload_queue *queue)
68 {
69    VkResult result;
70 
71    memset(queue, 0, sizeof(*queue));
72 
73    simple_mtx_init(&queue->mutex, mtx_plain);
74 
75    int err = nouveau_ws_context_create(dev->ws_dev, NOUVEAU_WS_ENGINE_COPY,
76                                        &queue->drm.ws_ctx);
77    if (err != 0) {
78       if (err == -ENOSPC)
79          result = vk_error(dev, VK_ERROR_TOO_MANY_OBJECTS);
80       else
81          result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
82       goto fail_mutex;
83    }
84 
85    err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->drm.syncobj);
86    if (err < 0) {
87       result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
88       goto fail_context;
89    }
90 
91    list_inithead(&queue->bos);
92 
93    return VK_SUCCESS;
94 
95 fail_mutex:
96    simple_mtx_destroy(&queue->mutex);
97 fail_context:
98    nouveau_ws_context_destroy(queue->drm.ws_ctx);
99 
100    return result;
101 }
102 
103 void
nvk_upload_queue_finish(struct nvk_device * dev,struct nvk_upload_queue * queue)104 nvk_upload_queue_finish(struct nvk_device *dev,
105                         struct nvk_upload_queue *queue)
106 {
107    list_for_each_entry_safe(struct nvk_upload_bo, bo, &queue->bos, link)
108       nvk_upload_bo_destroy(dev, bo);
109 
110    if (queue->bo != NULL)
111       nvk_upload_bo_destroy(dev, queue->bo);
112 
113    drmSyncobjDestroy(dev->ws_dev->fd, queue->drm.syncobj);
114    nouveau_ws_context_destroy(queue->drm.ws_ctx);
115    simple_mtx_destroy(&queue->mutex);
116 }
117 
118 static VkResult
nvk_upload_queue_flush_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)119 nvk_upload_queue_flush_locked(struct nvk_device *dev,
120                               struct nvk_upload_queue *queue,
121                               uint64_t *time_point_out)
122 {
123    if (queue->bo == NULL || queue->bo_push_start == queue->bo_push_end) {
124       if (time_point_out != NULL)
125          *time_point_out = queue->last_time_point;
126       return VK_SUCCESS;
127    }
128 
129    uint64_t time_point = queue->last_time_point + 1;
130    if (time_point == UINT64_MAX)
131       abort();
132 
133    struct drm_nouveau_exec_push push = {
134       .va = queue->bo->bo->offset + queue->bo_push_start,
135       .va_len = queue->bo_push_end - queue->bo_push_start,
136    };
137 
138    struct drm_nouveau_sync sig = {
139       .flags = DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ,
140       .handle = queue->drm.syncobj,
141       .timeline_value = time_point,
142    };
143 
144    struct drm_nouveau_exec req = {
145       .channel = queue->drm.ws_ctx->channel,
146       .push_count = 1,
147       .sig_count = 1,
148       .push_ptr = (uintptr_t)&push,
149       .sig_ptr = (uintptr_t)&sig,
150    };
151 
152    int err = drmCommandWriteRead(dev->ws_dev->fd, DRM_NOUVEAU_EXEC,
153                                  &req, sizeof(req));
154    if (err != 0)
155       return vk_device_set_lost(&dev->vk, "DRM_NOUVEAU_EXEC failed: %m");
156 
157    /* Wait until now to update last_time_point so that, if we do fail and lose
158     * the device, nvk_upload_queue_sync won't wait forever on a time point
159     * that will never signal.
160     */
161    queue->last_time_point = time_point;
162 
163    queue->bo->idle_time_point = time_point;
164    queue->bo_push_start = queue->bo_push_end;
165 
166    if (time_point_out != NULL)
167       *time_point_out = time_point;
168 
169    return VK_SUCCESS;
170 }
171 
172 VkResult
nvk_upload_queue_flush(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)173 nvk_upload_queue_flush(struct nvk_device *dev,
174                        struct nvk_upload_queue *queue,
175                        uint64_t *time_point_out)
176 {
177    VkResult result;
178 
179    simple_mtx_lock(&queue->mutex);
180    result = nvk_upload_queue_flush_locked(dev, queue, time_point_out);
181    simple_mtx_unlock(&queue->mutex);
182 
183    return result;
184 }
185 
186 static VkResult
nvk_upload_queue_sync_locked(struct nvk_device * dev,struct nvk_upload_queue * queue)187 nvk_upload_queue_sync_locked(struct nvk_device *dev,
188                              struct nvk_upload_queue *queue)
189 {
190    VkResult result;
191 
192    result = nvk_upload_queue_flush_locked(dev, queue, NULL);
193    if (result != VK_SUCCESS)
194       return result;
195 
196    if (queue->last_time_point == 0)
197       return VK_SUCCESS;
198 
199    int err = drmSyncobjTimelineWait(dev->ws_dev->fd, &queue->drm.syncobj,
200                                     &queue->last_time_point, 1, INT64_MAX,
201                                     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
202                                     NULL);
203    if (err != 0)
204       return vk_device_set_lost(&dev->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
205 
206    return VK_SUCCESS;
207 }
208 
209 VkResult
nvk_upload_queue_sync(struct nvk_device * dev,struct nvk_upload_queue * queue)210 nvk_upload_queue_sync(struct nvk_device *dev,
211                       struct nvk_upload_queue *queue)
212 {
213    VkResult result;
214 
215    simple_mtx_lock(&queue->mutex);
216    result = nvk_upload_queue_sync_locked(dev, queue);
217    simple_mtx_unlock(&queue->mutex);
218 
219    return result;
220 }
221 
222 static VkResult
nvk_upload_queue_reserve(struct nvk_device * dev,struct nvk_upload_queue * queue,uint32_t min_bo_size)223 nvk_upload_queue_reserve(struct nvk_device *dev,
224                          struct nvk_upload_queue *queue,
225                          uint32_t min_bo_size)
226 {
227    VkResult result;
228 
229    assert(min_bo_size <= NVK_UPLOAD_BO_SIZE);
230    assert(queue->bo_push_end <= queue->bo_data_start);
231 
232    if (queue->bo != NULL) {
233       if (queue->bo_data_start - queue->bo_push_end >= min_bo_size)
234          return VK_SUCCESS;
235 
236       /* Not enough room in the BO.  Flush and add it to the recycle list */
237       result = nvk_upload_queue_flush_locked(dev, queue, NULL);
238       if (result != VK_SUCCESS)
239          return result;
240 
241       assert(queue->bo_push_start == queue->bo_push_end);
242       list_addtail(&queue->bo->link, &queue->bos);
243       queue->bo = NULL;
244    }
245 
246    assert(queue->bo == NULL);
247    queue->bo_push_start = queue->bo_push_end = 0;
248    queue->bo_data_start = NVK_UPLOAD_BO_SIZE;
249 
250    /* Try to pop an idle BO off the recycle list */
251    if (!list_is_empty(&queue->bos)) {
252       uint64_t time_point_passed = 0;
253       int err = drmSyncobjQuery(dev->ws_dev->fd, &queue->drm.syncobj,
254                                 &time_point_passed, 1);
255       if (err) {
256          return vk_device_set_lost(&dev->vk,
257                                    "DRM_IOCTL_SYNCOBJ_QUERY failed: %m");
258       }
259 
260       struct nvk_upload_bo *bo =
261          list_first_entry(&queue->bos, struct nvk_upload_bo, link);
262       if (time_point_passed >= bo->idle_time_point) {
263          list_del(&bo->link);
264          queue->bo = bo;
265          return VK_SUCCESS;
266       }
267    }
268 
269    return nvk_upload_bo_create(dev, &queue->bo);
270 }
271 
272 static VkResult
nvk_upload_queue_upload_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)273 nvk_upload_queue_upload_locked(struct nvk_device *dev,
274                                struct nvk_upload_queue *queue,
275                                uint64_t dst_addr,
276                                const void *src, size_t size)
277 {
278    VkResult result;
279 
280    assert(size % 4 == 0);
281 
282    while (size > 0) {
283       const uint32_t cmd_size_dw = 12;
284       const uint32_t cmd_size = cmd_size_dw * 4;
285 
286       /* Don't split the upload for stmall stuff.  If it's under 1KB and we
287        * can't fit it in the current buffer, just get another.
288        */
289       const uint32_t min_size = cmd_size + MIN2(size, 1024);
290       result = nvk_upload_queue_reserve(dev, queue, min_size);
291       if (result != VK_SUCCESS)
292          return result;
293 
294       assert(queue->bo != NULL);
295       assert(queue->bo_data_start > queue->bo_push_end);
296       const uint32_t avail = queue->bo_data_start - queue->bo_push_end;
297       assert(avail >= min_size);
298 
299       const uint32_t data_size = MIN2(size, avail - cmd_size);
300 
301       const uint32_t data_bo_offset = queue->bo_data_start - data_size;
302       assert(queue->bo_push_end + cmd_size <= data_bo_offset);
303       const uint64_t data_addr = queue->bo->bo->offset + data_bo_offset;
304       memcpy(queue->bo->map + data_bo_offset, src, data_size);
305       queue->bo_data_start = data_bo_offset;
306 
307       struct nv_push p;
308       nv_push_init(&p, queue->bo->map + queue->bo_push_end, cmd_size_dw);
309 
310       assert(data_size <= (1 << 17));
311 
312       P_MTHD(&p, NV90B5, OFFSET_IN_UPPER);
313       P_NV90B5_OFFSET_IN_UPPER(&p, data_addr >> 32);
314       P_NV90B5_OFFSET_IN_LOWER(&p, data_addr & 0xffffffff);
315       P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
316       P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
317       P_NV90B5_PITCH_IN(&p, data_size);
318       P_NV90B5_PITCH_OUT(&p, data_size);
319       P_NV90B5_LINE_LENGTH_IN(&p, data_size);
320       P_NV90B5_LINE_COUNT(&p, data_size);
321 
322       P_IMMD(&p, NV90B5, LAUNCH_DMA, {
323          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
324          .multi_line_enable = MULTI_LINE_ENABLE_FALSE,
325          .flush_enable = FLUSH_ENABLE_TRUE,
326          .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
327          .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
328       });
329 
330       queue->bo_push_end += nv_push_dw_count(&p) * 4;
331 
332       dst_addr += data_size;
333       src += data_size;
334       size -= data_size;
335    }
336 
337    return VK_SUCCESS;
338 }
339 
340 VkResult
nvk_upload_queue_upload(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)341 nvk_upload_queue_upload(struct nvk_device *dev,
342                         struct nvk_upload_queue *queue,
343                         uint64_t dst_addr,
344                         const void *src, size_t size)
345 {
346    VkResult result;
347 
348    simple_mtx_lock(&queue->mutex);
349    result = nvk_upload_queue_upload_locked(dev, queue, dst_addr, src, size);
350    simple_mtx_unlock(&queue->mutex);
351 
352    return result;
353 }
354