• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  * Copyright © 2024 Arm Ltd.
4  *
5  * Derived from tu_image.c which is:
6  * Copyright © 2016 Red Hat.
7  * Copyright © 2016 Bas Nieuwenhuizen
8  * Copyright © 2015 Intel Corporation
9  *
10  * SPDX-License-Identifier: MIT
11  */
12 
13 #include "vk_cmd_enqueue_entrypoints.h"
14 #include "vk_common_entrypoints.h"
15 
16 #include "panvk_buffer.h"
17 #include "panvk_cmd_alloc.h"
18 #include "panvk_cmd_buffer.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_instance.h"
22 #include "panvk_macros.h"
23 #include "panvk_physical_device.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_queue.h"
26 #include "panvk_utrace.h"
27 #include "panvk_utrace_perfetto.h"
28 
29 #include "genxml/decode.h"
30 #include "genxml/gen_macros.h"
31 
32 #include "kmod/pan_kmod.h"
33 #include "pan_props.h"
34 #include "pan_samples.h"
35 
36 static void *
panvk_kmod_zalloc(const struct pan_kmod_allocator * allocator,size_t size,bool transient)37 panvk_kmod_zalloc(const struct pan_kmod_allocator *allocator, size_t size,
38                   bool transient)
39 {
40    const VkAllocationCallbacks *vkalloc = allocator->priv;
41 
42    void *obj = vk_zalloc(vkalloc, size, 8,
43                          transient ? VK_SYSTEM_ALLOCATION_SCOPE_COMMAND
44                                    : VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
45 
46    /* We force errno to -ENOMEM on host allocation failures so we can properly
47     * report it back as VK_ERROR_OUT_OF_HOST_MEMORY. */
48    if (!obj)
49       errno = -ENOMEM;
50 
51    return obj;
52 }
53 
54 static void
panvk_kmod_free(const struct pan_kmod_allocator * allocator,void * data)55 panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data)
56 {
57    const VkAllocationCallbacks *vkalloc = allocator->priv;
58 
59    return vk_free(vkalloc, data);
60 }
61 
62 static void
panvk_device_init_mempools(struct panvk_device * dev)63 panvk_device_init_mempools(struct panvk_device *dev)
64 {
65    struct panvk_pool_properties rw_pool_props = {
66       .create_flags = 0,
67       .slab_size = 16 * 1024,
68       .label = "Device RW cached memory pool",
69       .owns_bos = false,
70       .needs_locking = true,
71       .prealloc = false,
72    };
73 
74    panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props);
75 
76    struct panvk_pool_properties rw_nc_pool_props = {
77       .create_flags = PAN_ARCH <= 9 ? 0 : PAN_KMOD_BO_FLAG_GPU_UNCACHED,
78       .slab_size = 16 * 1024,
79       .label = "Device RW uncached memory pool",
80       .owns_bos = false,
81       .needs_locking = true,
82       .prealloc = false,
83    };
84 
85    panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, &rw_nc_pool_props);
86 
87    struct panvk_pool_properties exec_pool_props = {
88       .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
89       .slab_size = 16 * 1024,
90       .label = "Device executable memory pool (shaders)",
91       .owns_bos = false,
92       .needs_locking = true,
93       .prealloc = false,
94    };
95 
96    panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props);
97 }
98 
99 static void
panvk_device_cleanup_mempools(struct panvk_device * dev)100 panvk_device_cleanup_mempools(struct panvk_device *dev)
101 {
102    panvk_pool_cleanup(&dev->mempools.rw);
103    panvk_pool_cleanup(&dev->mempools.rw_nc);
104    panvk_pool_cleanup(&dev->mempools.exec);
105 }
106 
107 static VkResult
panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buf,void ** map_out)108 panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd,
109                                struct vk_meta_device *meta, VkBuffer buf,
110                                void **map_out)
111 {
112    VK_FROM_HANDLE(panvk_buffer, buffer, buf);
113    struct panvk_cmd_buffer *cmdbuf =
114       container_of(cmd, struct panvk_cmd_buffer, vk);
115    struct panfrost_ptr mem =
116       panvk_cmd_alloc_dev_mem(cmdbuf, desc, buffer->vk.size, 64);
117 
118    if (!mem.gpu)
119       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
120 
121    buffer->dev_addr = mem.gpu;
122    *map_out = mem.cpu;
123    return VK_SUCCESS;
124 }
125 
126 static VkResult
panvk_meta_init(struct panvk_device * device)127 panvk_meta_init(struct panvk_device *device)
128 {
129    const struct vk_physical_device *pdev = device->vk.physical;
130 
131    VkResult result = vk_meta_device_init(&device->vk, &device->meta);
132    if (result != VK_SUCCESS)
133       return result;
134 
135    device->meta.use_stencil_export = true;
136    device->meta.use_rect_list_pipeline = true;
137    device->meta.max_bind_map_buffer_size_B = 64 * 1024;
138    device->meta.cmd_bind_map_buffer = panvk_meta_cmd_bind_map_buffer;
139 
140    /* Assume a maximum of 1024 bytes per worgroup and choose the workgroup size
141     * accordingly. */
142    for (uint32_t i = 0;
143         i < ARRAY_SIZE(device->meta.buffer_access.optimal_wg_size); i++) {
144       device->meta.buffer_access.optimal_wg_size[i] =
145          MIN2(1024 >> i, pdev->properties.maxComputeWorkGroupSize[0]);
146    }
147 
148    return VK_SUCCESS;
149 }
150 
151 static void
panvk_meta_cleanup(struct panvk_device * device)152 panvk_meta_cleanup(struct panvk_device *device)
153 {
154    vk_meta_device_finish(&device->vk, &device->meta);
155 }
156 
157 /* Always reserve the lower 32MB. */
158 #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull
159 
160 static enum pan_kmod_group_allow_priority_flags
global_priority_to_group_allow_priority_flag(VkQueueGlobalPriorityKHR priority)161 global_priority_to_group_allow_priority_flag(
162    VkQueueGlobalPriorityKHR priority)
163 {
164    switch (priority) {
165    case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
166       return PAN_KMOD_GROUP_ALLOW_PRIORITY_LOW;
167    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
168       return PAN_KMOD_GROUP_ALLOW_PRIORITY_MEDIUM;
169    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
170       return PAN_KMOD_GROUP_ALLOW_PRIORITY_HIGH;
171    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
172       return PAN_KMOD_GROUP_ALLOW_PRIORITY_REALTIME;
173    default:
174       unreachable("Invalid global priority");
175    }
176 }
177 
178 static VkResult
check_global_priority(const struct panvk_physical_device * phys_dev,const VkDeviceQueueCreateInfo * create_info)179 check_global_priority(const struct panvk_physical_device *phys_dev,
180                       const VkDeviceQueueCreateInfo *create_info)
181 {
182    const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
183       vk_find_struct_const(create_info->pNext,
184                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
185    const VkQueueGlobalPriorityKHR priority =
186       priority_info ? priority_info->globalPriority
187                     : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
188 
189    enum pan_kmod_group_allow_priority_flags requested_prio =
190       global_priority_to_group_allow_priority_flag(priority);
191    enum pan_kmod_group_allow_priority_flags allowed_prio_mask =
192       phys_dev->kmod.props.allowed_group_priorities_mask;
193 
194    if (requested_prio & allowed_prio_mask)
195       return VK_SUCCESS;
196 
197    return VK_ERROR_NOT_PERMITTED_KHR;
198 }
199 
200 VkResult
panvk_per_arch(create_device)201 panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
202                               const VkDeviceCreateInfo *pCreateInfo,
203                               const VkAllocationCallbacks *pAllocator,
204                               VkDevice *pDevice)
205 {
206    struct panvk_instance *instance =
207       to_panvk_instance(physical_device->vk.instance);
208    VkResult result;
209    struct panvk_device *device;
210 
211    device = vk_zalloc2(&instance->vk.alloc, pAllocator, sizeof(*device), 8,
212                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
213    if (!device)
214       return panvk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
215 
216    struct vk_device_dispatch_table dispatch_table;
217 
218 
219 
220    if (PAN_ARCH <= 9) {
221       /* For secondary command buffer support, overwrite any command entrypoints
222        * in the main device-level dispatch table with
223        * vk_cmd_enqueue_unless_primary_Cmd*.
224        */
225       vk_device_dispatch_table_from_entrypoints(
226          &dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
227 
228       /* Populate our primary cmd_dispatch table. */
229       vk_device_dispatch_table_from_entrypoints(
230          &device->cmd_dispatch, &panvk_per_arch(device_entrypoints), true);
231       vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
232                                                 &panvk_device_entrypoints,
233                                                 false);
234       vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
235 		                                &vk_common_device_entrypoints,
236                                                 false);
237    }
238 
239    vk_device_dispatch_table_from_entrypoints(
240       &dispatch_table, &panvk_per_arch(device_entrypoints), PAN_ARCH > 9);
241    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
242                                              &panvk_device_entrypoints, false);
243    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
244                                              &wsi_device_entrypoints, false);
245 
246    result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
247                            pCreateInfo, pAllocator);
248    if (result != VK_SUCCESS)
249       goto err_free_dev;
250 
251    /* Must be done after vk_device_init() because this function memset(0) the
252     * whole struct.
253     */
254    device->vk.command_dispatch_table = &device->cmd_dispatch;
255    device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
256    device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
257 #if PAN_ARCH >= 10
258    device->vk.check_status = panvk_per_arch(device_check_status);
259 #endif
260 
261    device->kmod.allocator = (struct pan_kmod_allocator){
262       .zalloc = panvk_kmod_zalloc,
263       .free = panvk_kmod_free,
264       .priv = &device->vk.alloc,
265    };
266    device->kmod.dev =
267       pan_kmod_dev_create(dup(physical_device->kmod.dev->fd),
268                           PAN_KMOD_DEV_FLAG_OWNS_FD, &device->kmod.allocator);
269 
270    if (!device->kmod.dev) {
271       result = panvk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
272                             "cannot create device");
273       goto err_finish_dev;
274    }
275 
276    if (instance->debug_flags &
277        (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC | PANVK_DEBUG_DUMP))
278       device->debug.decode_ctx = pandecode_create_context(false);
279 
280    /* 32bit address space, with the lower 32MB reserved. We clamp
281     * things so it matches kmod VA range limitations.
282     */
283    uint64_t user_va_start = panfrost_clamp_to_usable_va_range(
284       device->kmod.dev, PANVK_VA_RESERVE_BOTTOM);
285    uint64_t user_va_end =
286       panfrost_clamp_to_usable_va_range(device->kmod.dev, 1ull << 32);
287    uint32_t vm_flags = PAN_ARCH <= 7 ? PAN_KMOD_VM_FLAG_AUTO_VA : 0;
288 
289    simple_mtx_init(&device->as.lock, mtx_plain);
290    util_vma_heap_init(&device->as.heap, user_va_start,
291                       user_va_end - user_va_start);
292 
293    device->kmod.vm =
294       pan_kmod_vm_create(device->kmod.dev, vm_flags,
295                          user_va_start, user_va_end - user_va_start);
296 
297    if (!device->kmod.vm) {
298       result = panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
299       goto err_destroy_kdev;
300    }
301 
302    panvk_device_init_mempools(device);
303 
304 #if PAN_ARCH <= 9
305    result = panvk_priv_bo_create(
306       device, 128 * 1024 * 1024,
307       PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT,
308       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->tiler_heap);
309    if (result != VK_SUCCESS)
310       goto err_free_priv_bos;
311 #endif
312 
313    result = panvk_priv_bo_create(
314       device, panfrost_sample_positions_buffer_size(), 0,
315       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &device->sample_positions);
316    if (result != VK_SUCCESS)
317       goto err_free_priv_bos;
318 
319    panfrost_upload_sample_positions(device->sample_positions->addr.host);
320 
321 #if PAN_ARCH >= 10
322    result = panvk_per_arch(init_tiler_oom)(device);
323    if (result != VK_SUCCESS)
324       goto err_free_priv_bos;
325 #endif
326 
327    vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
328 
329    result = panvk_meta_init(device);
330    if (result != VK_SUCCESS)
331       goto err_free_priv_bos;
332 
333    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
334       const VkDeviceQueueCreateInfo *queue_create =
335          &pCreateInfo->pQueueCreateInfos[i];
336 
337       result = check_global_priority(physical_device, queue_create);
338       if (result != VK_SUCCESS)
339          goto err_finish_queues;
340 
341       uint32_t qfi = queue_create->queueFamilyIndex;
342       device->queues[qfi] =
343          vk_zalloc(&device->vk.alloc,
344                   queue_create->queueCount * sizeof(struct panvk_queue), 8,
345                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
346       if (!device->queues[qfi]) {
347          result = panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
348          goto err_finish_queues;
349       }
350 
351       for (unsigned q = 0; q < queue_create->queueCount; q++) {
352          result = panvk_per_arch(queue_init)(device, &device->queues[qfi][q], q,
353                                              queue_create);
354          if (result != VK_SUCCESS)
355             goto err_finish_queues;
356 
357          device->queue_count[qfi]++;
358       }
359    }
360 
361    panvk_per_arch(utrace_context_init)(device);
362 #if PAN_ARCH >= 10
363    panvk_utrace_perfetto_init(device, PANVK_SUBQUEUE_COUNT);
364 #else
365    panvk_utrace_perfetto_init(device, 2);
366 #endif
367 
368    *pDevice = panvk_device_to_handle(device);
369    return VK_SUCCESS;
370 
371 err_finish_queues:
372    for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
373       for (unsigned q = 0; q < device->queue_count[i]; q++)
374          panvk_per_arch(queue_finish)(&device->queues[i][q]);
375       if (device->queues[i])
376          vk_free(&device->vk.alloc, device->queues[i]);
377    }
378 
379    panvk_meta_cleanup(device);
380 
381 err_free_priv_bos:
382    panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
383    panvk_priv_bo_unref(device->sample_positions);
384    panvk_priv_bo_unref(device->tiler_heap);
385    panvk_device_cleanup_mempools(device);
386    pan_kmod_vm_destroy(device->kmod.vm);
387    util_vma_heap_finish(&device->as.heap);
388    simple_mtx_destroy(&device->as.lock);
389 
390 err_destroy_kdev:
391    pan_kmod_dev_destroy(device->kmod.dev);
392 
393 err_finish_dev:
394    vk_device_finish(&device->vk);
395 
396 err_free_dev:
397    vk_free(&device->vk.alloc, device);
398    return result;
399 }
400 
401 void
panvk_per_arch(destroy_device)402 panvk_per_arch(destroy_device)(struct panvk_device *device,
403                                const VkAllocationCallbacks *pAllocator)
404 {
405    if (!device)
406       return;
407 
408    panvk_per_arch(utrace_context_fini)(device);
409 
410    for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
411       for (unsigned q = 0; q < device->queue_count[i]; q++)
412          panvk_per_arch(queue_finish)(&device->queues[i][q]);
413       if (device->queue_count[i])
414          vk_free(&device->vk.alloc, device->queues[i]);
415    }
416 
417    panvk_meta_cleanup(device);
418    panvk_priv_bo_unref(device->tiler_oom.handlers_bo);
419    panvk_priv_bo_unref(device->tiler_heap);
420    panvk_priv_bo_unref(device->sample_positions);
421    panvk_device_cleanup_mempools(device);
422    pan_kmod_vm_destroy(device->kmod.vm);
423    util_vma_heap_finish(&device->as.heap);
424    simple_mtx_destroy(&device->as.lock);
425 
426    if (device->debug.decode_ctx)
427       pandecode_destroy_context(device->debug.decode_ctx);
428 
429    pan_kmod_dev_destroy(device->kmod.dev);
430    vk_device_finish(&device->vk);
431    vk_free(&device->vk.alloc, device);
432 }
433 
434 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderAreaGranularity)435 panvk_per_arch(GetRenderAreaGranularity)(VkDevice device,
436                                          VkRenderPass renderPass,
437                                          VkExtent2D *pGranularity)
438 {
439    *pGranularity = (VkExtent2D){32, 32};
440 }
441 
442 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderingAreaGranularityKHR)443 panvk_per_arch(GetRenderingAreaGranularityKHR)(
444    VkDevice _device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
445    VkExtent2D *pGranularity)
446 {
447    *pGranularity = (VkExtent2D){32, 32};
448 }
449