1 /*
2 * Copyright © 2021 Collabora Ltd.
3 * Copyright © 2024 Arm Ltd.
4 *
5 * Derived from tu_image.c which is:
6 * Copyright © 2016 Red Hat.
7 * Copyright © 2016 Bas Nieuwenhuizen
8 * Copyright © 2015 Intel Corporation
9 *
10 * SPDX-License-Identifier: MIT
11 */
12
13 #include "vk_cmd_enqueue_entrypoints.h"
14 #include "vk_common_entrypoints.h"
15
16 #include "panvk_buffer.h"
17 #include "panvk_cmd_alloc.h"
18 #include "panvk_cmd_buffer.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_instance.h"
22 #include "panvk_macros.h"
23 #include "panvk_physical_device.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_queue.h"
26 #include "panvk_utrace.h"
27 #include "panvk_utrace_perfetto.h"
28
29 #include "genxml/decode.h"
30 #include "genxml/gen_macros.h"
31
32 #include "kmod/pan_kmod.h"
33 #include "pan_props.h"
34 #include "pan_samples.h"
35
36 static void *
panvk_kmod_zalloc(const struct pan_kmod_allocator * allocator,size_t size,bool transient)37 panvk_kmod_zalloc(const struct pan_kmod_allocator *allocator, size_t size,
38 bool transient)
39 {
40 const VkAllocationCallbacks *vkalloc = allocator->priv;
41
42 void *obj = vk_zalloc(vkalloc, size, 8,
43 transient ? VK_SYSTEM_ALLOCATION_SCOPE_COMMAND
44 : VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
45
46 /* We force errno to -ENOMEM on host allocation failures so we can properly
47 * report it back as VK_ERROR_OUT_OF_HOST_MEMORY. */
48 if (!obj)
49 errno = -ENOMEM;
50
51 return obj;
52 }
53
54 static void
panvk_kmod_free(const struct pan_kmod_allocator * allocator,void * data)55 panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data)
56 {
57 const VkAllocationCallbacks *vkalloc = allocator->priv;
58
59 return vk_free(vkalloc, data);
60 }
61
62 static void
panvk_device_init_mempools(struct panvk_device * dev)63 panvk_device_init_mempools(struct panvk_device *dev)
64 {
65 struct panvk_pool_properties rw_pool_props = {
66 .create_flags = 0,
67 .slab_size = 16 * 1024,
68 .label = "Device RW cached memory pool",
69 .owns_bos = false,
70 .needs_locking = true,
71 .prealloc = false,
72 };
73
74 panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props);
75
76 struct panvk_pool_properties rw_nc_pool_props = {
77 .create_flags = PAN_ARCH <= 9 ? 0 : PAN_KMOD_BO_FLAG_GPU_UNCACHED,
78 .slab_size = 16 * 1024,
79 .label = "Device RW uncached memory pool",
80 .owns_bos = false,
81 .needs_locking = true,
82 .prealloc = false,
83 };
84
85 panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, &rw_nc_pool_props);
86
87 struct panvk_pool_properties exec_pool_props = {
88 .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
89 .slab_size = 16 * 1024,
90 .label = "Device executable memory pool (shaders)",
91 .owns_bos = false,
92 .needs_locking = true,
93 .prealloc = false,
94 };
95
96 panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props);
97 }
98
99 static void
panvk_device_cleanup_mempools(struct panvk_device * dev)100 panvk_device_cleanup_mempools(struct panvk_device *dev)
101 {
102 panvk_pool_cleanup(&dev->mempools.rw);
103 panvk_pool_cleanup(&dev->mempools.rw_nc);
104 panvk_pool_cleanup(&dev->mempools.exec);
105 }
106
107 static VkResult
panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buf,void ** map_out)108 panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd,
109 struct vk_meta_device *meta, VkBuffer buf,
110 void **map_out)
111 {
112 VK_FROM_HANDLE(panvk_buffer, buffer, buf);
113 struct panvk_cmd_buffer *cmdbuf =
114 container_of(cmd, struct panvk_cmd_buffer, vk);
115 struct panfrost_ptr mem =
116 panvk_cmd_alloc_dev_mem(cmdbuf, desc, buffer->vk.size, 64);
117
118 if (!mem.gpu)
119 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
120
121 buffer->dev_addr = mem.gpu;
122 *map_out = mem.cpu;
123 return VK_SUCCESS;
124 }
125
126 static VkResult
panvk_meta_init(struct panvk_device * device)127 panvk_meta_init(struct panvk_device *device)
128 {
129 const struct vk_physical_device *pdev = device->vk.physical;
130
131 VkResult result = vk_meta_device_init(&device->vk, &device->meta);
132 if (result != VK_SUCCESS)
133 return result;
134
135 device->meta.use_stencil_export = true;
136 device->meta.use_rect_list_pipeline = true;
137 device->meta.max_bind_map_buffer_size_B = 64 * 1024;
138 device->meta.cmd_bind_map_buffer = panvk_meta_cmd_bind_map_buffer;
139
140 /* Assume a maximum of 1024 bytes per worgroup and choose the workgroup size
141 * accordingly. */
142 for (uint32_t i = 0;
143 i < ARRAY_SIZE(device->meta.buffer_access.optimal_wg_size); i++) {
144 device->meta.buffer_access.optimal_wg_size[i] =
145 MIN2(1024 >> i, pdev->properties.maxComputeWorkGroupSize[0]);
146 }
147
148 return VK_SUCCESS;
149 }
150
151 static void
panvk_meta_cleanup(struct panvk_device * device)152 panvk_meta_cleanup(struct panvk_device *device)
153 {
154 vk_meta_device_finish(&device->vk, &device->meta);
155 }
156
157 /* Always reserve the lower 32MB. */
158 #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull
159
160 static enum pan_kmod_group_allow_priority_flags
global_priority_to_group_allow_priority_flag(VkQueueGlobalPriorityKHR priority)161 global_priority_to_group_allow_priority_flag(
162 VkQueueGlobalPriorityKHR priority)
163 {
164 switch (priority) {
165 case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
166 return PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
167 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
168 return PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
169 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
170 return PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
171 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
172 return PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
173 default:
174 unreachable("Invalid global priority");
175 }
176 }
177
178 static VkResult
check_global_priority(const struct panvk_physical_device * phys_dev,const VkDeviceQueueCreateInfo * create_info)179 check_global_priority(const struct panvk_physical_device *phys_dev,
180 const VkDeviceQueueCreateInfo *create_info)
181 {
182 const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
183 vk_find_struct_const(create_info->pNext,
184 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
185 const VkQueueGlobalPriorityKHR priority =
186 priority_info ? priority_info->globalPriority
187 : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
188
189 enum pan_kmod_group_allow_priority_flags requested_prio =
190 global_priority_to_group_allow_priority_flag(priority);
191 enum pan_kmod_group_allow_priority_flags allowed_prio_mask =
192 phys_dev->kmod.props.allowed_group_priorities_mask;
193
194 if (requested_prio & allowed_prio_mask)
195 return VK_SUCCESS;
196
197 return VK_ERROR_NOT_PERMITTED_KHR;
198 }
199
200 VkResult
panvk_per_arch(create_device)201 panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
202 const VkDeviceCreateInfo *pCreateInfo,
203 const VkAllocationCallbacks *pAllocator,
204 VkDevice *pDevice)
205 {
206 struct panvk_instance *instance =
207 to_panvk_instance(physical_device->vk.instance);
208 VkResult result;
209 struct panvk_device *device;
210
211 device = vk_zalloc2(&instance->vk.alloc, pAllocator, sizeof(*device), 8,
212 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
213 if (!device)
214 return panvk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
215
216 struct vk_device_dispatch_table dispatch_table;
217
218
219
220 if (PAN_ARCH <= 9) {
221 /* For secondary command buffer support, overwrite any command entrypoints
222 * in the main device-level dispatch table with
223 * vk_cmd_enqueue_unless_primary_Cmd*.
224 */
225 vk_device_dispatch_table_from_entrypoints(
226 &dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
227
228 /* Populate our primary cmd_dispatch table. */
229 vk_device_dispatch_table_from_entrypoints(
230 &device->cmd_dispatch, &panvk_per_arch(device_entrypoints), true);
231 vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
232 &panvk_device_entrypoints,
233 false);
234 vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
235 &vk_common_device_entrypoints,
236 false);
237 }
238
239 vk_device_dispatch_table_from_entrypoints(
240 &dispatch_table, &panvk_per_arch(device_entrypoints), PAN_ARCH > 9);
241 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
242 &panvk_device_entrypoints, false);
243 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
244 &wsi_device_entrypoints, false);
245
246 result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
247 pCreateInfo, pAllocator);
248 if (result != VK_SUCCESS)
249 goto err_free_dev;
250
251 /* Must be done after vk_device_init() because this function memset(0) the
252 * whole struct.
253 */
254 device->vk.command_dispatch_table = &device->cmd_dispatch;
255 device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
256 device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
257 #if PAN_ARCH >= 10
258 device->vk.check_status = panvk_per_arch(device_check_status);
259 #endif
260
261 device->kmod.allocator = (struct pan_kmod_allocator){
262 .zalloc = panvk_kmod_zalloc,
263 .free = panvk_kmod_free,
264 .priv = &device->vk.alloc,
265 };
266 device->kmod.dev =
267 pan_kmod_dev_create(dup(physical_device->kmod.dev->fd),
268 PAN_KMOD_DEV_FLAG_OWNS_FD, &device->kmod.allocator);
269
270 if (!device->kmod.dev) {
271 result = panvk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
272 "cannot create device");
273 goto err_finish_dev;
274 }
275
276 if (instance->debug_flags &
277 (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC | PANVK_DEBUG_DUMP))
278 device->debug.decode_ctx = pandecode_create_context(false);
279
280 /* 32bit address space, with the lower 32MB reserved. We clamp
281 * things so it matches kmod VA range limitations.
282 */
283 uint64_t user_va_start = panfrost_clamp_to_usable_va_range(
284 device->kmod.dev, PANVK_VA_RESERVE_BOTTOM);
285 uint64_t user_va_end =
286 panfrost_clamp_to_usable_va_range(device->kmod.dev, 1ull << 32);
287 uint32_t vm_flags = PAN_ARCH <= 7 ? PAN_KMOD_VM_FLAG_AUTO_VA : 0;
288
289 simple_mtx_init(&device->as.lock, mtx_plain);
290 util_vma_heap_init(&device->as.heap, user_va_start,
291 user_va_end - user_va_start);
292
293 device->kmod.vm =
294 pan_kmod_vm_create(device->kmod.dev, vm_flags,
295 user_va_start, user_va_end - user_va_start);
296
297 if (!device->kmod.vm) {
298 result = panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
299 goto err_destroy_kdev;
300 }
301
302 panvk_device_init_mempools(device);
303
304 #if PAN_ARCH <= 9
305 result = panvk_priv_bo_create(
306 device, 128 * 1024 * 1024,
307 PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT,
308 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->tiler_heap);
309 if (result != VK_SUCCESS)
310 goto err_free_priv_bos;
311 #endif
312
313 result = panvk_priv_bo_create(
314 device, panfrost_sample_positions_buffer_size(), 0,
315 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->sample_positions);
316 if (result != VK_SUCCESS)
317 goto err_free_priv_bos;
318
319 panfrost_upload_sample_positions(device->sample_positions->addr.host);
320
321 #if PAN_ARCH >= 10
322 result = panvk_per_arch(init_tiler_oom)(device);
323 if (result != VK_SUCCESS)
324 goto err_free_priv_bos;
325 #endif
326
327 vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
328
329 result = panvk_meta_init(device);
330 if (result != VK_SUCCESS)
331 goto err_free_priv_bos;
332
333 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
334 const VkDeviceQueueCreateInfo *queue_create =
335 &pCreateInfo->pQueueCreateInfos[i];
336
337 result = check_global_priority(physical_device, queue_create);
338 if (result != VK_SUCCESS)
339 goto err_finish_queues;
340
341 uint32_t qfi = queue_create->queueFamilyIndex;
342 device->queues[qfi] =
343 vk_zalloc(&device->vk.alloc,
344 queue_create->queueCount * sizeof(struct panvk_queue), 8,
345 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
346 if (!device->queues[qfi]) {
347 result = panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
348 goto err_finish_queues;
349 }
350
351 for (unsigned q = 0; q < queue_create->queueCount; q++) {
352 result = panvk_per_arch(queue_init)(device, &device->queues[qfi][q], q,
353 queue_create);
354 if (result != VK_SUCCESS)
355 goto err_finish_queues;
356
357 device->queue_count[qfi]++;
358 }
359 }
360
361 panvk_per_arch(utrace_context_init)(device);
362 #if PAN_ARCH >= 10
363 panvk_utrace_perfetto_init(device, PANVK_SUBQUEUE_COUNT);
364 #else
365 panvk_utrace_perfetto_init(device, 2);
366 #endif
367
368 *pDevice = panvk_device_to_handle(device);
369 return VK_SUCCESS;
370
371 err_finish_queues:
372 for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
373 for (unsigned q = 0; q < device->queue_count[i]; q++)
374 panvk_per_arch(queue_finish)(&device->queues[i][q]);
375 if (device->queues[i])
376 vk_free(&device->vk.alloc, device->queues[i]);
377 }
378
379 panvk_meta_cleanup(device);
380
381 err_free_priv_bos:
382 panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
383 panvk_priv_bo_unref(device->sample_positions);
384 panvk_priv_bo_unref(device->tiler_heap);
385 panvk_device_cleanup_mempools(device);
386 pan_kmod_vm_destroy(device->kmod.vm);
387 util_vma_heap_finish(&device->as.heap);
388 simple_mtx_destroy(&device->as.lock);
389
390 err_destroy_kdev:
391 pan_kmod_dev_destroy(device->kmod.dev);
392
393 err_finish_dev:
394 vk_device_finish(&device->vk);
395
396 err_free_dev:
397 vk_free(&device->vk.alloc, device);
398 return result;
399 }
400
401 void
panvk_per_arch(destroy_device)402 panvk_per_arch(destroy_device)(struct panvk_device *device,
403 const VkAllocationCallbacks *pAllocator)
404 {
405 if (!device)
406 return;
407
408 panvk_per_arch(utrace_context_fini)(device);
409
410 for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
411 for (unsigned q = 0; q < device->queue_count[i]; q++)
412 panvk_per_arch(queue_finish)(&device->queues[i][q]);
413 if (device->queue_count[i])
414 vk_free(&device->vk.alloc, device->queues[i]);
415 }
416
417 panvk_meta_cleanup(device);
418 panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
419 panvk_priv_bo_unref(device->tiler_heap);
420 panvk_priv_bo_unref(device->sample_positions);
421 panvk_device_cleanup_mempools(device);
422 pan_kmod_vm_destroy(device->kmod.vm);
423 util_vma_heap_finish(&device->as.heap);
424 simple_mtx_destroy(&device->as.lock);
425
426 if (device->debug.decode_ctx)
427 pandecode_destroy_context(device->debug.decode_ctx);
428
429 pan_kmod_dev_destroy(device->kmod.dev);
430 vk_device_finish(&device->vk);
431 vk_free(&device->vk.alloc, device);
432 }
433
434 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderAreaGranularity)435 panvk_per_arch(GetRenderAreaGranularity)(VkDevice device,
436 VkRenderPass renderPass,
437 VkExtent2D *pGranularity)
438 {
439 *pGranularity = (VkExtent2D){32, 32};
440 }
441
442 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderingAreaGranularityKHR)443 panvk_per_arch(GetRenderingAreaGranularityKHR)(
444 VkDevice _device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
445 VkExtent2D *pGranularity)
446 {
447 *pGranularity = (VkExtent2D){32, 32};
448 }
449