1 /*
2 * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nvk_upload_queue.h"
7
8 #include "nvk_device.h"
9 #include "vk_alloc.h"
10
11 #include <xf86drm.h>
12 #include "nouveau_context.h"
13 #include "drm-uapi/nouveau_drm.h"
14
15 #include "nv_push.h"
16 #include "nvk_cl90b5.h"
17
18 #define NVK_UPLOAD_BO_SIZE 64*1024
19
20 struct nvk_upload_bo {
21 struct nouveau_ws_bo *bo;
22 void *map;
23
24 /** Link in nvk_upload_queue::bos */
25 struct list_head link;
26
27 /** Time point at which point this BO will be idle */
28 uint64_t idle_time_point;
29 };
30
31 static VkResult
nvk_upload_bo_create(struct nvk_device * dev,struct nvk_upload_bo ** bo_out)32 nvk_upload_bo_create(struct nvk_device *dev,
33 struct nvk_upload_bo **bo_out)
34 {
35 struct nvk_upload_bo *bo;
36
37 bo = vk_zalloc(&dev->vk.alloc, sizeof(*bo), 8,
38 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
39 if (bo == NULL)
40 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
41
42 uint32_t flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP |
43 NOUVEAU_WS_BO_NO_SHARE;
44 bo->bo = nouveau_ws_bo_new_mapped(dev->ws_dev, NVK_UPLOAD_BO_SIZE, 0,
45 flags, NOUVEAU_WS_BO_WR, &bo->map);
46 if (bo->bo == NULL) {
47 vk_free(&dev->vk.alloc, bo);
48 return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
49 }
50
51 *bo_out = bo;
52
53 return VK_SUCCESS;
54 }
55
56 static void
nvk_upload_bo_destroy(struct nvk_device * dev,struct nvk_upload_bo * bo)57 nvk_upload_bo_destroy(struct nvk_device *dev,
58 struct nvk_upload_bo *bo)
59 {
60 nouveau_ws_bo_unmap(bo->bo, bo->map);
61 nouveau_ws_bo_destroy(bo->bo);
62 vk_free(&dev->vk.alloc, bo);
63 }
64
65 VkResult
nvk_upload_queue_init(struct nvk_device * dev,struct nvk_upload_queue * queue)66 nvk_upload_queue_init(struct nvk_device *dev,
67 struct nvk_upload_queue *queue)
68 {
69 VkResult result;
70
71 memset(queue, 0, sizeof(*queue));
72
73 simple_mtx_init(&queue->mutex, mtx_plain);
74
75 int err = nouveau_ws_context_create(dev->ws_dev, NOUVEAU_WS_ENGINE_COPY,
76 &queue->drm.ws_ctx);
77 if (err != 0) {
78 if (err == -ENOSPC)
79 result = vk_error(dev, VK_ERROR_TOO_MANY_OBJECTS);
80 else
81 result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
82 goto fail_mutex;
83 }
84
85 err = drmSyncobjCreate(dev->ws_dev->fd, 0, &queue->drm.syncobj);
86 if (err < 0) {
87 result = vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
88 goto fail_context;
89 }
90
91 list_inithead(&queue->bos);
92
93 return VK_SUCCESS;
94
95 fail_mutex:
96 simple_mtx_destroy(&queue->mutex);
97 fail_context:
98 nouveau_ws_context_destroy(queue->drm.ws_ctx);
99
100 return result;
101 }
102
103 void
nvk_upload_queue_finish(struct nvk_device * dev,struct nvk_upload_queue * queue)104 nvk_upload_queue_finish(struct nvk_device *dev,
105 struct nvk_upload_queue *queue)
106 {
107 list_for_each_entry_safe(struct nvk_upload_bo, bo, &queue->bos, link)
108 nvk_upload_bo_destroy(dev, bo);
109
110 if (queue->bo != NULL)
111 nvk_upload_bo_destroy(dev, queue->bo);
112
113 drmSyncobjDestroy(dev->ws_dev->fd, queue->drm.syncobj);
114 nouveau_ws_context_destroy(queue->drm.ws_ctx);
115 simple_mtx_destroy(&queue->mutex);
116 }
117
118 static VkResult
nvk_upload_queue_flush_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)119 nvk_upload_queue_flush_locked(struct nvk_device *dev,
120 struct nvk_upload_queue *queue,
121 uint64_t *time_point_out)
122 {
123 if (queue->bo == NULL || queue->bo_push_start == queue->bo_push_end) {
124 if (time_point_out != NULL)
125 *time_point_out = queue->last_time_point;
126 return VK_SUCCESS;
127 }
128
129 uint64_t time_point = queue->last_time_point + 1;
130 if (time_point == UINT64_MAX)
131 abort();
132
133 struct drm_nouveau_exec_push push = {
134 .va = queue->bo->bo->offset + queue->bo_push_start,
135 .va_len = queue->bo_push_end - queue->bo_push_start,
136 };
137
138 struct drm_nouveau_sync sig = {
139 .flags = DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ,
140 .handle = queue->drm.syncobj,
141 .timeline_value = time_point,
142 };
143
144 struct drm_nouveau_exec req = {
145 .channel = queue->drm.ws_ctx->channel,
146 .push_count = 1,
147 .sig_count = 1,
148 .push_ptr = (uintptr_t)&push,
149 .sig_ptr = (uintptr_t)&sig,
150 };
151
152 int err = drmCommandWriteRead(dev->ws_dev->fd, DRM_NOUVEAU_EXEC,
153 &req, sizeof(req));
154 if (err != 0)
155 return vk_device_set_lost(&dev->vk, "DRM_NOUVEAU_EXEC failed: %m");
156
157 /* Wait until now to update last_time_point so that, if we do fail and lose
158 * the device, nvk_upload_queue_sync won't wait forever on a time point
159 * that will never signal.
160 */
161 queue->last_time_point = time_point;
162
163 queue->bo->idle_time_point = time_point;
164 queue->bo_push_start = queue->bo_push_end;
165
166 if (time_point_out != NULL)
167 *time_point_out = time_point;
168
169 return VK_SUCCESS;
170 }
171
172 VkResult
nvk_upload_queue_flush(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)173 nvk_upload_queue_flush(struct nvk_device *dev,
174 struct nvk_upload_queue *queue,
175 uint64_t *time_point_out)
176 {
177 VkResult result;
178
179 simple_mtx_lock(&queue->mutex);
180 result = nvk_upload_queue_flush_locked(dev, queue, time_point_out);
181 simple_mtx_unlock(&queue->mutex);
182
183 return result;
184 }
185
186 static VkResult
nvk_upload_queue_sync_locked(struct nvk_device * dev,struct nvk_upload_queue * queue)187 nvk_upload_queue_sync_locked(struct nvk_device *dev,
188 struct nvk_upload_queue *queue)
189 {
190 VkResult result;
191
192 result = nvk_upload_queue_flush_locked(dev, queue, NULL);
193 if (result != VK_SUCCESS)
194 return result;
195
196 if (queue->last_time_point == 0)
197 return VK_SUCCESS;
198
199 int err = drmSyncobjTimelineWait(dev->ws_dev->fd, &queue->drm.syncobj,
200 &queue->last_time_point, 1, INT64_MAX,
201 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
202 NULL);
203 if (err != 0)
204 return vk_device_set_lost(&dev->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
205
206 return VK_SUCCESS;
207 }
208
209 VkResult
nvk_upload_queue_sync(struct nvk_device * dev,struct nvk_upload_queue * queue)210 nvk_upload_queue_sync(struct nvk_device *dev,
211 struct nvk_upload_queue *queue)
212 {
213 VkResult result;
214
215 simple_mtx_lock(&queue->mutex);
216 result = nvk_upload_queue_sync_locked(dev, queue);
217 simple_mtx_unlock(&queue->mutex);
218
219 return result;
220 }
221
222 static VkResult
nvk_upload_queue_reserve(struct nvk_device * dev,struct nvk_upload_queue * queue,uint32_t min_bo_size)223 nvk_upload_queue_reserve(struct nvk_device *dev,
224 struct nvk_upload_queue *queue,
225 uint32_t min_bo_size)
226 {
227 VkResult result;
228
229 assert(min_bo_size <= NVK_UPLOAD_BO_SIZE);
230 assert(queue->bo_push_end <= queue->bo_data_start);
231
232 if (queue->bo != NULL) {
233 if (queue->bo_data_start - queue->bo_push_end >= min_bo_size)
234 return VK_SUCCESS;
235
236 /* Not enough room in the BO. Flush and add it to the recycle list */
237 result = nvk_upload_queue_flush_locked(dev, queue, NULL);
238 if (result != VK_SUCCESS)
239 return result;
240
241 assert(queue->bo_push_start == queue->bo_push_end);
242 list_addtail(&queue->bo->link, &queue->bos);
243 queue->bo = NULL;
244 }
245
246 assert(queue->bo == NULL);
247 queue->bo_push_start = queue->bo_push_end = 0;
248 queue->bo_data_start = NVK_UPLOAD_BO_SIZE;
249
250 /* Try to pop an idle BO off the recycle list */
251 if (!list_is_empty(&queue->bos)) {
252 uint64_t time_point_passed = 0;
253 int err = drmSyncobjQuery(dev->ws_dev->fd, &queue->drm.syncobj,
254 &time_point_passed, 1);
255 if (err) {
256 return vk_device_set_lost(&dev->vk,
257 "DRM_IOCTL_SYNCOBJ_QUERY failed: %m");
258 }
259
260 struct nvk_upload_bo *bo =
261 list_first_entry(&queue->bos, struct nvk_upload_bo, link);
262 if (time_point_passed >= bo->idle_time_point) {
263 list_del(&bo->link);
264 queue->bo = bo;
265 return VK_SUCCESS;
266 }
267 }
268
269 return nvk_upload_bo_create(dev, &queue->bo);
270 }
271
272 static VkResult
nvk_upload_queue_upload_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)273 nvk_upload_queue_upload_locked(struct nvk_device *dev,
274 struct nvk_upload_queue *queue,
275 uint64_t dst_addr,
276 const void *src, size_t size)
277 {
278 VkResult result;
279
280 assert(size % 4 == 0);
281
282 while (size > 0) {
283 const uint32_t cmd_size_dw = 12;
284 const uint32_t cmd_size = cmd_size_dw * 4;
285
286 /* Don't split the upload for stmall stuff. If it's under 1KB and we
287 * can't fit it in the current buffer, just get another.
288 */
289 const uint32_t min_size = cmd_size + MIN2(size, 1024);
290 result = nvk_upload_queue_reserve(dev, queue, min_size);
291 if (result != VK_SUCCESS)
292 return result;
293
294 assert(queue->bo != NULL);
295 assert(queue->bo_data_start > queue->bo_push_end);
296 const uint32_t avail = queue->bo_data_start - queue->bo_push_end;
297 assert(avail >= min_size);
298
299 const uint32_t data_size = MIN2(size, avail - cmd_size);
300
301 const uint32_t data_bo_offset = queue->bo_data_start - data_size;
302 assert(queue->bo_push_end + cmd_size <= data_bo_offset);
303 const uint64_t data_addr = queue->bo->bo->offset + data_bo_offset;
304 memcpy(queue->bo->map + data_bo_offset, src, data_size);
305 queue->bo_data_start = data_bo_offset;
306
307 struct nv_push p;
308 nv_push_init(&p, queue->bo->map + queue->bo_push_end, cmd_size_dw);
309
310 assert(data_size <= (1 << 17));
311
312 P_MTHD(&p, NV90B5, OFFSET_IN_UPPER);
313 P_NV90B5_OFFSET_IN_UPPER(&p, data_addr >> 32);
314 P_NV90B5_OFFSET_IN_LOWER(&p, data_addr & 0xffffffff);
315 P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
316 P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
317 P_NV90B5_PITCH_IN(&p, data_size);
318 P_NV90B5_PITCH_OUT(&p, data_size);
319 P_NV90B5_LINE_LENGTH_IN(&p, data_size);
320 P_NV90B5_LINE_COUNT(&p, data_size);
321
322 P_IMMD(&p, NV90B5, LAUNCH_DMA, {
323 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
324 .multi_line_enable = MULTI_LINE_ENABLE_FALSE,
325 .flush_enable = FLUSH_ENABLE_TRUE,
326 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
327 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
328 });
329
330 queue->bo_push_end += nv_push_dw_count(&p) * 4;
331
332 dst_addr += data_size;
333 src += data_size;
334 size -= data_size;
335 }
336
337 return VK_SUCCESS;
338 }
339
340 VkResult
nvk_upload_queue_upload(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)341 nvk_upload_queue_upload(struct nvk_device *dev,
342 struct nvk_upload_queue *queue,
343 uint64_t dst_addr,
344 const void *src, size_t size)
345 {
346 VkResult result;
347
348 simple_mtx_lock(&queue->mutex);
349 result = nvk_upload_queue_upload_locked(dev, queue, dst_addr, src, size);
350 simple_mtx_unlock(&queue->mutex);
351
352 return result;
353 }
354