• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on v3dv driver which is:
8  * Copyright © 2019 Raspberry Pi
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE.
28  */
29 
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <vulkan/vulkan.h>
39 #include <xf86drm.h>
40 
41 #include "hwdef/rogue_hw_utils.h"
42 #include "pipe/p_defines.h"
43 #include "pvr_bo.h"
44 #include "pvr_csb.h"
45 #include "pvr_csb_enum_helpers.h"
46 #include "pvr_debug.h"
47 #include "pvr_device_info.h"
48 #include "pvr_hardcode.h"
49 #include "pvr_job_render.h"
50 #include "pvr_limits.h"
51 #include "pvr_nop_usc.h"
52 #include "pvr_pds.h"
53 #include "pvr_private.h"
54 #include "pvr_tex_state.h"
55 #include "pvr_types.h"
56 #include "pvr_winsys.h"
57 #include "rogue/rogue_compiler.h"
58 #include "util/build_id.h"
59 #include "util/log.h"
60 #include "util/mesa-sha1.h"
61 #include "util/os_misc.h"
62 #include "util/u_math.h"
63 #include "vk_alloc.h"
64 #include "vk_log.h"
65 #include "vk_object.h"
66 #include "vk_util.h"
67 
68 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
69 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
70 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
71 
72 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
73  * been rounded up since the percentage is treated as an integer.
74  */
75 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
76 
77 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
78 #   define PVR_USE_WSI_PLATFORM
79 #endif
80 
81 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
82 
83 #define DEF_DRIVER(str_name)                        \
84    {                                                \
85       .name = str_name, .len = sizeof(str_name) - 1 \
86    }
87 
88 struct pvr_drm_device_info {
89    const char *name;
90    size_t len;
91 };
92 
93 /* This is the list of supported DRM display drivers. */
94 static const struct pvr_drm_device_info pvr_display_devices[] = {
95    DEF_DRIVER("mediatek-drm"),
96    DEF_DRIVER("ti,am65x-dss"),
97 };
98 
99 /* This is the list of supported DRM render drivers. */
100 static const struct pvr_drm_device_info pvr_render_devices[] = {
101    DEF_DRIVER("mediatek,mt8173-gpu"),
102    DEF_DRIVER("ti,am62-gpu"),
103 };
104 
105 #undef DEF_DRIVER
106 
107 static const struct vk_instance_extension_table pvr_instance_extensions = {
108 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
109    .KHR_display = true,
110 #endif
111    .KHR_external_memory_capabilities = true,
112    .KHR_get_physical_device_properties2 = true,
113 #if defined(PVR_USE_WSI_PLATFORM)
114    .KHR_surface = true,
115 #endif
116    .EXT_debug_report = true,
117    .EXT_debug_utils = true,
118 };
119 
pvr_physical_device_get_supported_extensions(const struct pvr_physical_device * pdevice,struct vk_device_extension_table * extensions)120 static void pvr_physical_device_get_supported_extensions(
121    const struct pvr_physical_device *pdevice,
122    struct vk_device_extension_table *extensions)
123 {
124    /* clang-format off */
125    *extensions = (struct vk_device_extension_table){
126       .KHR_external_memory = true,
127       .KHR_external_memory_fd = true,
128 #if defined(PVR_USE_WSI_PLATFORM)
129       .KHR_swapchain = true,
130 #endif
131       .EXT_external_memory_dma_buf = true,
132       .EXT_private_data = true,
133    };
134    /* clang-format on */
135 }
136 
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)137 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
138 {
139    *pApiVersion = PVR_API_VERSION;
140    return VK_SUCCESS;
141 }
142 
143 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)144 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
145                                          uint32_t *pPropertyCount,
146                                          VkExtensionProperties *pProperties)
147 {
148    if (pLayerName)
149       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
150 
151    return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
152                                                      pPropertyCount,
153                                                      pProperties);
154 }
155 
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)156 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
157                             const VkAllocationCallbacks *pAllocator,
158                             VkInstance *pInstance)
159 {
160    struct vk_instance_dispatch_table dispatch_table;
161    struct pvr_instance *instance;
162    VkResult result;
163 
164    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
165 
166    if (!pAllocator)
167       pAllocator = vk_default_allocator();
168 
169    instance = vk_alloc(pAllocator,
170                        sizeof(*instance),
171                        8,
172                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
173    if (!instance)
174       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
175 
176    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
177                                                &pvr_instance_entrypoints,
178                                                true);
179 
180    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
181                                                &wsi_instance_entrypoints,
182                                                false);
183 
184    result = vk_instance_init(&instance->vk,
185                              &pvr_instance_extensions,
186                              &dispatch_table,
187                              pCreateInfo,
188                              pAllocator);
189    if (result != VK_SUCCESS) {
190       vk_free(pAllocator, instance);
191       return vk_error(NULL, result);
192    }
193 
194    pvr_process_debug_variable();
195 
196    instance->physical_devices_count = -1;
197 
198    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
199 
200    *pInstance = pvr_instance_to_handle(instance);
201 
202    return VK_SUCCESS;
203 }
204 
pvr_physical_device_finish(struct pvr_physical_device * pdevice)205 static void pvr_physical_device_finish(struct pvr_physical_device *pdevice)
206 {
207    /* Be careful here. The device might not have been initialized. This can
208     * happen since initialization is done in vkEnumeratePhysicalDevices() but
209     * finish is done in vkDestroyInstance(). Make sure that you check for NULL
210     * before freeing or that the freeing functions accept NULL pointers.
211     */
212 
213    if (pdevice->compiler)
214       rogue_compiler_destroy(pdevice->compiler);
215 
216    pvr_wsi_finish(pdevice);
217 
218    free(pdevice->name);
219 
220    if (pdevice->ws)
221       pvr_winsys_destroy(pdevice->ws);
222 
223    if (pdevice->master_fd >= 0) {
224       vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
225       close(pdevice->master_fd);
226    }
227 
228    if (pdevice->render_fd >= 0) {
229       vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
230       close(pdevice->render_fd);
231    }
232    vk_physical_device_finish(&pdevice->vk);
233 }
234 
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)235 void pvr_DestroyInstance(VkInstance _instance,
236                          const VkAllocationCallbacks *pAllocator)
237 {
238    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
239 
240    if (!instance)
241       return;
242 
243    pvr_physical_device_finish(&instance->physical_device);
244 
245    VG(VALGRIND_DESTROY_MEMPOOL(instance));
246 
247    vk_instance_finish(&instance->vk);
248    vk_free(&instance->vk.alloc, instance);
249 }
250 
251 static VkResult
pvr_physical_device_init_uuids(struct pvr_physical_device * pdevice)252 pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
253 {
254    struct mesa_sha1 sha1_ctx;
255    unsigned build_id_len;
256    uint8_t sha1[20];
257    uint64_t bvnc;
258 
259    const struct build_id_note *note =
260       build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
261    if (!note) {
262       return vk_errorf(pdevice,
263                        VK_ERROR_INITIALIZATION_FAILED,
264                        "Failed to find build-id");
265    }
266 
267    build_id_len = build_id_length(note);
268    if (build_id_len < 20) {
269       return vk_errorf(pdevice,
270                        VK_ERROR_INITIALIZATION_FAILED,
271                        "Build-id too short. It needs to be a SHA");
272    }
273 
274    bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
275 
276    _mesa_sha1_init(&sha1_ctx);
277    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
278    _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
279    _mesa_sha1_final(&sha1_ctx, sha1);
280    memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
281 
282    return VK_SUCCESS;
283 }
284 
pvr_compute_heap_size(void)285 static uint64_t pvr_compute_heap_size(void)
286 {
287    /* Query the total ram from the system */
288    uint64_t total_ram;
289    if (!os_get_total_physical_memory(&total_ram))
290       return 0;
291 
292    /* We don't want to burn too much ram with the GPU. If the user has 4GiB
293     * or less, we use at most half. If they have more than 4GiB, we use 3/4.
294     */
295    uint64_t available_ram;
296    if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
297       available_ram = total_ram / 2U;
298    else
299       available_ram = total_ram * 3U / 4U;
300 
301    return available_ram;
302 }
303 
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_primary_device)304 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
305                                          struct pvr_instance *instance,
306                                          drmDevicePtr drm_render_device,
307                                          drmDevicePtr drm_primary_device)
308 {
309    const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
310    struct vk_device_extension_table supported_extensions;
311    struct vk_physical_device_dispatch_table dispatch_table;
312    const char *primary_path;
313    VkResult result;
314    int ret;
315 
316    if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
317       return vk_errorf(instance,
318                        VK_ERROR_INCOMPATIBLE_DRIVER,
319                        "WARNING: powervr is not a conformant Vulkan "
320                        "implementation. Pass "
321                        "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
322                        "what you're doing.");
323    }
324 
325    pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions);
326 
327    vk_physical_device_dispatch_table_from_entrypoints(
328       &dispatch_table,
329       &pvr_physical_device_entrypoints,
330       true);
331 
332    vk_physical_device_dispatch_table_from_entrypoints(
333       &dispatch_table,
334       &wsi_physical_device_entrypoints,
335       false);
336 
337    result = vk_physical_device_init(&pdevice->vk,
338                                     &instance->vk,
339                                     &supported_extensions,
340                                     &dispatch_table);
341    if (result != VK_SUCCESS)
342       return result;
343 
344    pdevice->instance = instance;
345 
346    pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC);
347    if (pdevice->render_fd < 0) {
348       result = vk_errorf(instance,
349                          VK_ERROR_INCOMPATIBLE_DRIVER,
350                          "Failed to open device %s",
351                          path);
352       goto err_vk_physical_device_finish;
353    }
354 
355    pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc,
356                                     path,
357                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
358    if (!pdevice->render_path) {
359       result = VK_ERROR_OUT_OF_HOST_MEMORY;
360       goto err_close_render_fd;
361    }
362 
363    if (instance->vk.enabled_extensions.KHR_display) {
364       primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
365 
366       pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
367    } else {
368       pdevice->master_fd = -1;
369    }
370 
371    if (pdevice->master_fd >= 0) {
372       pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc,
373                                        primary_path,
374                                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
375       if (!pdevice->master_path) {
376          result = VK_ERROR_OUT_OF_HOST_MEMORY;
377          goto err_close_master_fd;
378       }
379    } else {
380       pdevice->master_path = NULL;
381    }
382 
383    pdevice->ws = pvr_winsys_create(pdevice->master_fd,
384                                    pdevice->render_fd,
385                                    &pdevice->vk.instance->alloc);
386    if (!pdevice->ws) {
387       result = VK_ERROR_INITIALIZATION_FAILED;
388       goto err_vk_free_master_path;
389    }
390 
391    pdevice->vk.supported_sync_types = pdevice->ws->sync_types;
392 
393    ret = pdevice->ws->ops->device_info_init(pdevice->ws,
394                                             &pdevice->dev_info,
395                                             &pdevice->dev_runtime_info);
396    if (ret) {
397       result = VK_ERROR_INITIALIZATION_FAILED;
398       goto err_pvr_winsys_destroy;
399    }
400 
401    result = pvr_physical_device_init_uuids(pdevice);
402    if (result != VK_SUCCESS)
403       goto err_pvr_winsys_destroy;
404 
405    if (asprintf(&pdevice->name,
406                 "Imagination PowerVR %s %s",
407                 pdevice->dev_info.ident.series_name,
408                 pdevice->dev_info.ident.public_name) < 0) {
409       result = vk_errorf(instance,
410                          VK_ERROR_OUT_OF_HOST_MEMORY,
411                          "Unable to allocate memory to store device name");
412       goto err_pvr_winsys_destroy;
413    }
414 
415    /* Setup available memory heaps and types */
416    pdevice->memory.memoryHeapCount = 1;
417    pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
418    pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
419 
420    pdevice->memory.memoryTypeCount = 1;
421    pdevice->memory.memoryTypes[0].propertyFlags =
422       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
423       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
424       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
425    pdevice->memory.memoryTypes[0].heapIndex = 0;
426 
427    result = pvr_wsi_init(pdevice);
428    if (result != VK_SUCCESS) {
429       vk_error(instance, result);
430       goto err_free_name;
431    }
432 
433    pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
434    if (!pdevice->compiler) {
435       result = vk_errorf(instance,
436                          VK_ERROR_INITIALIZATION_FAILED,
437                          "Failed to initialize Rogue compiler");
438       goto err_wsi_finish;
439    }
440 
441    return VK_SUCCESS;
442 
443 err_wsi_finish:
444    pvr_wsi_finish(pdevice);
445 
446 err_free_name:
447    free(pdevice->name);
448 
449 err_pvr_winsys_destroy:
450    pvr_winsys_destroy(pdevice->ws);
451 
452 err_vk_free_master_path:
453    vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
454 
455 err_close_master_fd:
456    if (pdevice->master_fd >= 0)
457       close(pdevice->master_fd);
458 
459    vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
460 
461 err_close_render_fd:
462    close(pdevice->render_fd);
463 
464 err_vk_physical_device_finish:
465    vk_physical_device_finish(&pdevice->vk);
466 
467    return result;
468 }
469 
pvr_drm_device_is_supported(drmDevicePtr drm_dev,int node_type)470 static bool pvr_drm_device_is_supported(drmDevicePtr drm_dev, int node_type)
471 {
472    char **compat = drm_dev->deviceinfo.platform->compatible;
473 
474    if (!(drm_dev->available_nodes & BITFIELD_BIT(node_type))) {
475       assert(node_type == DRM_NODE_RENDER || node_type == DRM_NODE_PRIMARY);
476       return false;
477    }
478 
479    if (node_type == DRM_NODE_RENDER) {
480       while (*compat) {
481          for (size_t i = 0U; i < ARRAY_SIZE(pvr_render_devices); i++) {
482             const char *const name = pvr_render_devices[i].name;
483             const size_t len = pvr_render_devices[i].len;
484 
485             if (strncmp(*compat, name, len) == 0)
486                return true;
487          }
488 
489          compat++;
490       }
491 
492       return false;
493    } else if (node_type == DRM_NODE_PRIMARY) {
494       while (*compat) {
495          for (size_t i = 0U; i < ARRAY_SIZE(pvr_display_devices); i++) {
496             const char *const name = pvr_display_devices[i].name;
497             const size_t len = pvr_display_devices[i].len;
498 
499             if (strncmp(*compat, name, len) == 0)
500                return true;
501          }
502 
503          compat++;
504       }
505 
506       return false;
507    }
508 
509    unreachable("Incorrect node_type.");
510 }
511 
pvr_enumerate_devices(struct pvr_instance * instance)512 static VkResult pvr_enumerate_devices(struct pvr_instance *instance)
513 {
514    /* FIXME: It should be possible to query the number of devices via
515     * drmGetDevices2 by passing in NULL for the 'devices' parameter. However,
516     * this was broken by libdrm commit
517     * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in
518     * upstream, hard-code the maximum number of devices.
519     */
520    drmDevicePtr drm_primary_device = NULL;
521    drmDevicePtr drm_render_device = NULL;
522    drmDevicePtr drm_devices[8];
523    int max_drm_devices;
524    VkResult result;
525 
526    instance->physical_devices_count = 0;
527 
528    max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices));
529    if (max_drm_devices < 1)
530       return VK_SUCCESS;
531 
532    for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) {
533       if (drm_devices[i]->bustype != DRM_BUS_PLATFORM)
534          continue;
535 
536       if (pvr_drm_device_is_supported(drm_devices[i], DRM_NODE_RENDER)) {
537          drm_render_device = drm_devices[i];
538 
539          mesa_logd("Found compatible render device '%s'.",
540                    drm_render_device->nodes[DRM_NODE_RENDER]);
541       } else if (pvr_drm_device_is_supported(drm_devices[i],
542                                              DRM_NODE_PRIMARY)) {
543          drm_primary_device = drm_devices[i];
544 
545          mesa_logd("Found compatible primary device '%s'.",
546                    drm_primary_device->nodes[DRM_NODE_PRIMARY]);
547       }
548    }
549 
550    if (drm_render_device && drm_primary_device) {
551       result = pvr_physical_device_init(&instance->physical_device,
552                                         instance,
553                                         drm_render_device,
554                                         drm_primary_device);
555       if (result == VK_SUCCESS)
556          instance->physical_devices_count = 1;
557       else if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
558          result = VK_SUCCESS;
559    } else {
560       result = VK_SUCCESS;
561    }
562 
563    drmFreeDevices(drm_devices, max_drm_devices);
564 
565    return result;
566 }
567 
pvr_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)568 VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance,
569                                       uint32_t *pPhysicalDeviceCount,
570                                       VkPhysicalDevice *pPhysicalDevices)
571 {
572    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice,
573                           out,
574                           pPhysicalDevices,
575                           pPhysicalDeviceCount);
576    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
577    VkResult result;
578 
579    if (instance->physical_devices_count < 0) {
580       result = pvr_enumerate_devices(instance);
581       if (result != VK_SUCCESS)
582          return result;
583    }
584 
585    if (instance->physical_devices_count == 0)
586       return VK_SUCCESS;
587 
588    assert(instance->physical_devices_count == 1);
589    vk_outarray_append_typed (VkPhysicalDevice, &out, p) {
590       *p = pvr_physical_device_to_handle(&instance->physical_device);
591    }
592 
593    return vk_outarray_status(&out);
594 }
595 
pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)596 void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
597                                     VkPhysicalDeviceFeatures2 *pFeatures)
598 {
599    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
600 
601    pFeatures->features = (VkPhysicalDeviceFeatures){
602       .robustBufferAccess =
603          PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access),
604       .fullDrawIndexUint32 = true,
605       .imageCubeArray = true,
606       .independentBlend = true,
607       .geometryShader = false,
608       .tessellationShader = false,
609       .sampleRateShading = true,
610       .dualSrcBlend = false,
611       .logicOp = true,
612       .multiDrawIndirect = true,
613       .drawIndirectFirstInstance = true,
614       .depthClamp = true,
615       .depthBiasClamp = true,
616       .fillModeNonSolid = false,
617       .depthBounds = false,
618       .wideLines = true,
619       .largePoints = true,
620       .alphaToOne = true,
621       .multiViewport = false,
622       .samplerAnisotropy = true,
623       .textureCompressionETC2 = true,
624       .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc),
625       .textureCompressionBC = false,
626       .occlusionQueryPrecise = true,
627       .pipelineStatisticsQuery = false,
628       .vertexPipelineStoresAndAtomics = true,
629       .fragmentStoresAndAtomics = true,
630       .shaderTessellationAndGeometryPointSize = false,
631       .shaderImageGatherExtended = false,
632       .shaderStorageImageExtendedFormats = true,
633       .shaderStorageImageMultisample = false,
634       .shaderStorageImageReadWithoutFormat = true,
635       .shaderStorageImageWriteWithoutFormat = false,
636       .shaderUniformBufferArrayDynamicIndexing = true,
637       .shaderSampledImageArrayDynamicIndexing = true,
638       .shaderStorageBufferArrayDynamicIndexing = true,
639       .shaderStorageImageArrayDynamicIndexing = true,
640       .shaderClipDistance = true,
641       .shaderCullDistance = true,
642       .shaderFloat64 = false,
643       .shaderInt64 = true,
644       .shaderInt16 = true,
645       .shaderResourceResidency = false,
646       .shaderResourceMinLod = false,
647       .sparseBinding = false,
648       .sparseResidencyBuffer = false,
649       .sparseResidencyImage2D = false,
650       .sparseResidencyImage3D = false,
651       .sparseResidency2Samples = false,
652       .sparseResidency4Samples = false,
653       .sparseResidency8Samples = false,
654       .sparseResidency16Samples = false,
655       .sparseResidencyAliased = false,
656       .variableMultisampleRate = false,
657       .inheritedQueries = false,
658    };
659 
660    vk_foreach_struct (ext, pFeatures->pNext) {
661       pvr_debug_ignored_stype(ext->sType);
662    }
663 }
664 
665 /* TODO: See if this function can be improved once fully implemented. */
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_physical_device * pdevice,uint32_t fs_common_size,uint32_t min_tiles_in_flight)666 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
667    const struct pvr_physical_device *pdevice,
668    uint32_t fs_common_size,
669    uint32_t min_tiles_in_flight)
670 {
671    const struct pvr_device_runtime_info *dev_runtime_info =
672       &pdevice->dev_runtime_info;
673    const struct pvr_device_info *dev_info = &pdevice->dev_info;
674    uint32_t max_tiles_in_flight;
675    uint32_t num_allocs;
676 
677    if (PVR_HAS_FEATURE(dev_info, s8xe)) {
678       num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
679    } else {
680       uint32_t min_cluster_per_phantom = 0;
681 
682       if (dev_runtime_info->num_phantoms > 1) {
683          pvr_finishme("Unimplemented path!!");
684       } else {
685          min_cluster_per_phantom =
686             PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
687       }
688 
689       if (dev_runtime_info->num_phantoms > 1)
690          pvr_finishme("Unimplemented path!!");
691 
692       if (dev_runtime_info->num_phantoms > 2)
693          pvr_finishme("Unimplemented path!!");
694 
695       if (dev_runtime_info->num_phantoms > 3)
696          pvr_finishme("Unimplemented path!!");
697 
698       if (min_cluster_per_phantom >= 4)
699          num_allocs = 1;
700       else if (min_cluster_per_phantom == 2)
701          num_allocs = 2;
702       else
703          num_allocs = 4;
704    }
705 
706    max_tiles_in_flight =
707       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
708 
709    if (fs_common_size == UINT_MAX) {
710       const struct pvr_device_runtime_info *dev_runtime_info =
711          &pdevice->dev_runtime_info;
712       uint32_t max_common_size;
713 
714       num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
715 
716       if (!PVR_HAS_ERN(dev_info, 38748)) {
717          /* Hardware needs space for one extra shared allocation. */
718          num_allocs += 1;
719       }
720 
721       max_common_size =
722          dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
723 
724       /* Double resource requirements to deal with fragmentation. */
725       max_common_size /= num_allocs * 2;
726       max_common_size =
727          ROUND_DOWN_TO(max_common_size,
728                        PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
729 
730       return max_common_size;
731    } else if (fs_common_size == 0) {
732       return max_tiles_in_flight;
733    }
734 
735    pvr_finishme("Unimplemented path!!");
736 
737    return 0;
738 }
739 
740 struct pvr_descriptor_limits {
741    uint32_t max_per_stage_resources;
742    uint32_t max_per_stage_samplers;
743    uint32_t max_per_stage_uniform_buffers;
744    uint32_t max_per_stage_storage_buffers;
745    uint32_t max_per_stage_sampled_images;
746    uint32_t max_per_stage_storage_images;
747    uint32_t max_per_stage_input_attachments;
748 };
749 
750 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(struct pvr_physical_device * pdevice)751 pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
752 {
753    enum pvr_descriptor_cs_level {
754       /* clang-format off */
755       CS4096, /* 6XT and some XE cores with large CS. */
756       CS2560, /* Mid range Rogue XE cores. */
757       CS2048, /* Low end Rogue XE cores. */
758       CS1536, /* Ultra-low-end 9XEP. */
759       CS680,  /* lower limits for older devices. */
760       CS408,  /* 7XE. */
761       /* clang-format on */
762    };
763 
764    static const struct pvr_descriptor_limits descriptor_limits[] = {
765       [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
766       [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
767       [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
768       [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
769       [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
770       [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
771    };
772 
773    const uint32_t common_size =
774       pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1);
775    enum pvr_descriptor_cs_level cs_level;
776 
777    if (common_size >= 2048) {
778       cs_level = CS2048;
779    } else if (common_size >= 1526) {
780       cs_level = CS1536;
781    } else if (common_size >= 680) {
782       cs_level = CS680;
783    } else if (common_size >= 408) {
784       cs_level = CS408;
785    } else {
786       mesa_loge("This core appears to have a very limited amount of shared "
787                 "register space and may not meet the Vulkan spec limits.");
788       abort();
789    }
790 
791    return &descriptor_limits[cs_level];
792 }
793 
pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)794 void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
795                                       VkPhysicalDeviceProperties2 *pProperties)
796 {
797    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
798    const struct pvr_descriptor_limits *descriptor_limits =
799       pvr_get_physical_device_descriptor_limits(pdevice);
800 
801    /* Default value based on the minimum value found in all existing cores. */
802    const uint32_t max_multisample =
803       PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4);
804 
805    /* Default value based on the minimum value found in all existing cores. */
806    const uint32_t uvs_banks =
807       PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2);
808 
809    /* Default value based on the minimum value found in all existing cores. */
810    const uint32_t uvs_pba_entries =
811       PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160);
812 
813    /* Default value based on the minimum value found in all existing cores. */
814    const uint32_t num_user_clip_planes =
815       PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8);
816 
817    const uint32_t sub_pixel_precision =
818       PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format)
819          ? 4U
820          : 8U;
821 
822    const uint32_t max_render_size =
823       rogue_get_render_size_max(&pdevice->dev_info);
824 
825    const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
826 
827    const uint32_t max_user_vertex_components =
828       ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
829 
830    /* The workgroup invocations are limited by the case where we have a compute
831     * barrier - each slot has a fixed number of invocations, the whole workgroup
832     * may need to span multiple slots. As each slot will WAIT at the barrier
833     * until the last invocation completes, all have to be schedulable at the
834     * same time.
835     *
836     * Typically all Rogue cores have 16 slots. Some of the smallest cores are
837     * reduced to 14.
838     *
839     * The compute barrier slot exhaustion scenario can be tested with:
840     * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
841     *    .atomicwrite*guard*comp
842     */
843 
844    /* Default value based on the minimum value found in all existing cores. */
845    const uint32_t usc_slots =
846       PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
847 
848    /* Default value based on the minimum value found in all existing cores. */
849    const uint32_t max_instances_per_pds_task =
850       PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
851                             max_instances_per_pds_task,
852                             32U);
853 
854    const uint32_t max_compute_work_group_invocations =
855       (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
856 
857    VkPhysicalDeviceLimits limits = {
858       .maxImageDimension1D = max_render_size,
859       .maxImageDimension2D = max_render_size,
860       .maxImageDimension3D = 2U * 1024U,
861       .maxImageDimensionCube = max_render_size,
862       .maxImageArrayLayers = 2U * 1024U,
863       .maxTexelBufferElements = 64U * 1024U,
864       .maxUniformBufferRange = 128U * 1024U * 1024U,
865       .maxStorageBufferRange = 128U * 1024U * 1024U,
866       .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
867       .maxMemoryAllocationCount = UINT32_MAX,
868       .maxSamplerAllocationCount = UINT32_MAX,
869       .bufferImageGranularity = 1U,
870       .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
871 
872       /* Maximum number of descriptor sets that can be bound at the same time.
873        */
874       .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
875 
876       .maxPerStageResources = descriptor_limits->max_per_stage_resources,
877       .maxPerStageDescriptorSamplers =
878          descriptor_limits->max_per_stage_samplers,
879       .maxPerStageDescriptorUniformBuffers =
880          descriptor_limits->max_per_stage_uniform_buffers,
881       .maxPerStageDescriptorStorageBuffers =
882          descriptor_limits->max_per_stage_storage_buffers,
883       .maxPerStageDescriptorSampledImages =
884          descriptor_limits->max_per_stage_sampled_images,
885       .maxPerStageDescriptorStorageImages =
886          descriptor_limits->max_per_stage_storage_images,
887       .maxPerStageDescriptorInputAttachments =
888          descriptor_limits->max_per_stage_input_attachments,
889 
890       .maxDescriptorSetSamplers = 256U,
891       .maxDescriptorSetUniformBuffers = 256U,
892       .maxDescriptorSetUniformBuffersDynamic = 8U,
893       .maxDescriptorSetStorageBuffers = 256U,
894       .maxDescriptorSetStorageBuffersDynamic = 8U,
895       .maxDescriptorSetSampledImages = 256U,
896       .maxDescriptorSetStorageImages = 256U,
897       .maxDescriptorSetInputAttachments = 256U,
898 
899       /* Vertex Shader Limits */
900       .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
901       .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
902       .maxVertexInputAttributeOffset = 0xFFFF,
903       .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
904       .maxVertexOutputComponents = max_user_vertex_components,
905 
906       /* Tessellation Limits */
907       .maxTessellationGenerationLevel = 0,
908       .maxTessellationPatchSize = 0,
909       .maxTessellationControlPerVertexInputComponents = 0,
910       .maxTessellationControlPerVertexOutputComponents = 0,
911       .maxTessellationControlPerPatchOutputComponents = 0,
912       .maxTessellationControlTotalOutputComponents = 0,
913       .maxTessellationEvaluationInputComponents = 0,
914       .maxTessellationEvaluationOutputComponents = 0,
915 
916       /* Geometry Shader Limits */
917       .maxGeometryShaderInvocations = 0,
918       .maxGeometryInputComponents = 0,
919       .maxGeometryOutputComponents = 0,
920       .maxGeometryOutputVertices = 0,
921       .maxGeometryTotalOutputComponents = 0,
922 
923       /* Fragment Shader Limits */
924       .maxFragmentInputComponents = max_user_vertex_components,
925       .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
926       .maxFragmentDualSrcAttachments = 0,
927       .maxFragmentCombinedOutputResources =
928          descriptor_limits->max_per_stage_storage_buffers +
929          descriptor_limits->max_per_stage_storage_images +
930          PVR_MAX_COLOR_ATTACHMENTS,
931 
932       /* Compute Shader Limits */
933       .maxComputeSharedMemorySize = 16U * 1024U,
934       .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
935       .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
936       .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
937                                    max_compute_work_group_invocations,
938                                    64U },
939 
940       /* Rasterization Limits */
941       .subPixelPrecisionBits = sub_pixel_precision,
942       .subTexelPrecisionBits = 8U,
943       .mipmapPrecisionBits = 8U,
944 
945       .maxDrawIndexedIndexValue = UINT32_MAX,
946       .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
947       .maxSamplerLodBias = 16.0f,
948       .maxSamplerAnisotropy = 1.0f,
949       .maxViewports = PVR_MAX_VIEWPORTS,
950 
951       .maxViewportDimensions[0] = max_render_size,
952       .maxViewportDimensions[1] = max_render_size,
953       .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
954       .viewportBoundsRange[1] = 2U * max_render_size,
955 
956       .viewportSubPixelBits = 0,
957       .minMemoryMapAlignment = 64U,
958       .minTexelBufferOffsetAlignment = 16U,
959       .minUniformBufferOffsetAlignment = 4U,
960       .minStorageBufferOffsetAlignment = 4U,
961 
962       .minTexelOffset = -8,
963       .maxTexelOffset = 7U,
964       .minTexelGatherOffset = -8,
965       .maxTexelGatherOffset = 7,
966       .minInterpolationOffset = -0.5,
967       .maxInterpolationOffset = 0.5,
968       .subPixelInterpolationOffsetBits = 4U,
969 
970       .maxFramebufferWidth = max_render_size,
971       .maxFramebufferHeight = max_render_size,
972       .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
973 
974       .framebufferColorSampleCounts = max_sample_bits,
975       .framebufferDepthSampleCounts = max_sample_bits,
976       .framebufferStencilSampleCounts = max_sample_bits,
977       .framebufferNoAttachmentsSampleCounts = max_sample_bits,
978       .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
979       .sampledImageColorSampleCounts = max_sample_bits,
980       .sampledImageIntegerSampleCounts = max_sample_bits,
981       .sampledImageDepthSampleCounts = max_sample_bits,
982       .sampledImageStencilSampleCounts = max_sample_bits,
983       .storageImageSampleCounts = max_sample_bits,
984       .maxSampleMaskWords = 1U,
985       .timestampComputeAndGraphics = false,
986       .timestampPeriod = 0.0f,
987       .maxClipDistances = num_user_clip_planes,
988       .maxCullDistances = num_user_clip_planes,
989       .maxCombinedClipAndCullDistances = num_user_clip_planes,
990       .discreteQueuePriorities = 2U,
991       .pointSizeRange[0] = 1.0f,
992       .pointSizeRange[1] = 511.0f,
993       .pointSizeGranularity = 0.0625f,
994       .lineWidthRange[0] = 1.0f / 16.0f,
995       .lineWidthRange[1] = 16.0f,
996       .lineWidthGranularity = 1.0f / 16.0f,
997       .strictLines = false,
998       .standardSampleLocations = true,
999       .optimalBufferCopyOffsetAlignment = 4U,
1000       .optimalBufferCopyRowPitchAlignment = 4U,
1001       .nonCoherentAtomSize = 1U,
1002    };
1003 
1004    pProperties->properties = (VkPhysicalDeviceProperties){
1005       .apiVersion = PVR_API_VERSION,
1006       .driverVersion = vk_get_driver_version(),
1007       .vendorID = VK_VENDOR_ID_IMAGINATION,
1008       .deviceID = pdevice->dev_info.ident.device_id,
1009       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1010       .limits = limits,
1011       .sparseProperties = { 0 },
1012    };
1013 
1014    snprintf(pProperties->properties.deviceName,
1015             sizeof(pProperties->properties.deviceName),
1016             "%s",
1017             pdevice->name);
1018 
1019    memcpy(pProperties->properties.pipelineCacheUUID,
1020           pdevice->pipeline_cache_uuid,
1021           VK_UUID_SIZE);
1022 
1023    vk_foreach_struct (ext, pProperties->pNext) {
1024       pvr_debug_ignored_stype(ext->sType);
1025    }
1026 }
1027 
1028 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1029    .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1030                  VK_QUEUE_TRANSFER_BIT,
1031    .queueCount = PVR_MAX_QUEUES,
1032    .timestampValidBits = 0,
1033    .minImageTransferGranularity = { 1, 1, 1 },
1034 };
1035 
pvr_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)1036 void pvr_GetPhysicalDeviceQueueFamilyProperties(
1037    VkPhysicalDevice physicalDevice,
1038    uint32_t *pCount,
1039    VkQueueFamilyProperties *pQueueFamilyProperties)
1040 {
1041    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties,
1042                           out,
1043                           pQueueFamilyProperties,
1044                           pCount);
1045 
1046    vk_outarray_append_typed (VkQueueFamilyProperties, &out, p) {
1047       *p = pvr_queue_family_properties;
1048    }
1049 }
1050 
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1051 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1052    VkPhysicalDevice physicalDevice,
1053    uint32_t *pQueueFamilyPropertyCount,
1054    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1055 {
1056    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1057                           out,
1058                           pQueueFamilyProperties,
1059                           pQueueFamilyPropertyCount);
1060 
1061    vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1062       p->queueFamilyProperties = pvr_queue_family_properties;
1063 
1064       vk_foreach_struct (ext, p->pNext) {
1065          pvr_debug_ignored_stype(ext->sType);
1066       }
1067    }
1068 }
1069 
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1070 void pvr_GetPhysicalDeviceMemoryProperties2(
1071    VkPhysicalDevice physicalDevice,
1072    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1073 {
1074    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1075 
1076    pMemoryProperties->memoryProperties = pdevice->memory;
1077 
1078    vk_foreach_struct (ext, pMemoryProperties->pNext) {
1079       pvr_debug_ignored_stype(ext->sType);
1080    }
1081 }
1082 
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1083 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1084                                            const char *pName)
1085 {
1086    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1087    return vk_instance_get_proc_addr(&instance->vk,
1088                                     &pvr_instance_entrypoints,
1089                                     pName);
1090 }
1091 
1092 /* With version 1+ of the loader interface the ICD should expose
1093  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1094  * apps.
1095  */
1096 PUBLIC
1097 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1098 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1099 {
1100    return pvr_GetInstanceProcAddr(instance, pName);
1101 }
1102 
1103 /* With version 4+ of the loader interface the ICD should expose
1104  * vk_icdGetPhysicalDeviceProcAddr().
1105  */
1106 PUBLIC
1107 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)1108 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
1109 {
1110    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1111    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
1112 }
1113 
pvr_device_init_compute_fence_program(struct pvr_device * device)1114 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1115 {
1116    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1117    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1118    struct pvr_pds_compute_shader_program program = { 0U };
1119    size_t staging_buffer_size;
1120    uint32_t *staging_buffer;
1121    uint32_t *data_buffer;
1122    uint32_t *code_buffer;
1123    VkResult result;
1124 
1125    STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1126                  ARRAY_SIZE(program.work_group_input_regs));
1127    STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1128                  ARRAY_SIZE(program.global_input_regs));
1129 
1130    /* Initialize PDS structure. */
1131    for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) {
1132       program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1133       program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1134       program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1135    }
1136 
1137    program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1138 
1139    /* Fence kernel. */
1140    program.fence = true;
1141    program.clear_pds_barrier = true;
1142 
1143    /* Calculate how much space we'll need for the compute shader PDS program.
1144     */
1145    pvr_pds_set_sizes_compute_shader(&program, dev_info);
1146 
1147    /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1148     * data size being in dwords.
1149     */
1150    /* Code size is in bytes, data size in dwords. */
1151    staging_buffer_size =
1152       program.data_size * sizeof(uint32_t) + program.code_size;
1153 
1154    staging_buffer = vk_alloc(&device->vk.alloc,
1155                              staging_buffer_size,
1156                              8U,
1157                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1158    if (!staging_buffer)
1159       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1160 
1161    data_buffer = staging_buffer;
1162    code_buffer = pvr_pds_generate_compute_shader_data_segment(&program,
1163                                                               data_buffer,
1164                                                               dev_info);
1165    pvr_pds_generate_compute_shader_code_segment(&program,
1166                                                 code_buffer,
1167                                                 dev_info);
1168    result = pvr_gpu_upload_pds(device,
1169                                data_buffer,
1170                                program.data_size,
1171                                PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1172                                code_buffer,
1173                                program.code_size / sizeof(uint32_t),
1174                                PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1175                                cache_line_size,
1176                                &device->pds_compute_fence_program);
1177 
1178    vk_free(&device->vk.alloc, staging_buffer);
1179 
1180    return result;
1181 }
1182 
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1183 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1184    struct pvr_device *device,
1185    pvr_dev_addr_t usc_addr,
1186    uint32_t shareds,
1187    uint32_t temps,
1188    pvr_dev_addr_t shareds_buffer_addr,
1189    struct pvr_pds_upload *const upload_out,
1190    struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1191 {
1192    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1193    struct pvr_pds_vertex_shader_sa_program program = {
1194       .kick_usc = true,
1195       .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1196    };
1197    size_t staging_buffer_size;
1198    uint32_t *staging_buffer;
1199    VkResult result;
1200 
1201    /* We'll need to DMA the shareds into the USC's Common Store. */
1202    program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1203                                                     program.dma_address,
1204                                                     0,
1205                                                     shareds,
1206                                                     shareds_buffer_addr.addr,
1207                                                     dev_info);
1208 
1209    /* DMA temp regs. */
1210    pvr_pds_setup_doutu(&program.usc_task_control,
1211                        usc_addr.addr,
1212                        temps,
1213                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1214                        false);
1215 
1216    pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1217 
1218    staging_buffer_size =
1219       (program.code_size + program.data_size) * sizeof(*staging_buffer);
1220 
1221    staging_buffer = vk_alloc(&device->vk.alloc,
1222                              staging_buffer_size,
1223                              8,
1224                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1225    if (!staging_buffer)
1226       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1227 
1228    /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1229    pvr_pds_vertex_shader_sa(&program,
1230                             staging_buffer,
1231                             PDS_GENERATE_DATA_SEGMENT,
1232                             dev_info);
1233    pvr_pds_vertex_shader_sa(&program,
1234                             &staging_buffer[program.data_size],
1235                             PDS_GENERATE_CODE_SEGMENT,
1236                             dev_info);
1237 
1238    /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1239     * is bigger so we handle it first (if needed) and realloc() for a smaller
1240     * size.
1241     */
1242    if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1243       /* FIXME: Figure out the define for alignment of 16. */
1244       result = pvr_gpu_upload_pds(device,
1245                                   &staging_buffer[0],
1246                                   program.data_size,
1247                                   16,
1248                                   &staging_buffer[program.data_size],
1249                                   program.code_size,
1250                                   16,
1251                                   16,
1252                                   sw_compute_barrier_upload_out);
1253       if (result != VK_SUCCESS) {
1254          vk_free(&device->vk.alloc, staging_buffer);
1255          return result;
1256       }
1257 
1258       program.clear_pds_barrier = false;
1259 
1260       pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1261 
1262       staging_buffer_size =
1263          (program.code_size + program.data_size) * sizeof(*staging_buffer);
1264 
1265       staging_buffer = vk_realloc(&device->vk.alloc,
1266                                   staging_buffer,
1267                                   staging_buffer_size,
1268                                   8,
1269                                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1270       if (!staging_buffer) {
1271          pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1272 
1273          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1274       }
1275 
1276       /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1277       pvr_pds_vertex_shader_sa(&program,
1278                                staging_buffer,
1279                                PDS_GENERATE_DATA_SEGMENT,
1280                                dev_info);
1281       pvr_pds_vertex_shader_sa(&program,
1282                                &staging_buffer[program.data_size],
1283                                PDS_GENERATE_CODE_SEGMENT,
1284                                dev_info);
1285    } else {
1286       *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1287          .pvr_bo = NULL,
1288       };
1289    }
1290 
1291    /* FIXME: Figure out the define for alignment of 16. */
1292    result = pvr_gpu_upload_pds(device,
1293                                &staging_buffer[0],
1294                                program.data_size,
1295                                16,
1296                                &staging_buffer[program.data_size],
1297                                program.code_size,
1298                                16,
1299                                16,
1300                                upload_out);
1301    if (result != VK_SUCCESS) {
1302       vk_free(&device->vk.alloc, staging_buffer);
1303       pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1304 
1305       return result;
1306    }
1307 
1308    vk_free(&device->vk.alloc, staging_buffer);
1309 
1310    return VK_SUCCESS;
1311 }
1312 
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1313 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1314 {
1315    uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1316    uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1317    const struct rogue_shader_binary *usc_program;
1318    struct pvr_texture_state_info tex_info;
1319    uint32_t *dword_ptr;
1320    uint32_t usc_shareds;
1321    uint32_t usc_temps;
1322    VkResult result;
1323 
1324    pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1325                                     &usc_program,
1326                                     &usc_shareds,
1327                                     &usc_temps);
1328 
1329    device->idfwdf_state.usc_shareds = usc_shareds;
1330 
1331    /* FIXME: Figure out the define for alignment of 16. */
1332    result = pvr_gpu_upload_usc(device,
1333                                usc_program->data,
1334                                usc_program->size,
1335                                16,
1336                                &device->idfwdf_state.usc);
1337    if (result != VK_SUCCESS)
1338       return result;
1339 
1340    /* TODO: Get the store buffer size from the compiler? */
1341    /* TODO: How was the size derived here? */
1342    result = pvr_bo_alloc(device,
1343                          device->heaps.general_heap,
1344                          4 * sizeof(float) * 4 * 2,
1345                          4,
1346                          0,
1347                          &device->idfwdf_state.store_bo);
1348    if (result != VK_SUCCESS)
1349       goto err_free_usc_program;
1350 
1351    result = pvr_bo_alloc(device,
1352                          device->heaps.general_heap,
1353                          usc_shareds * ROGUE_REG_SIZE_BYTES,
1354                          ROGUE_REG_SIZE_BYTES,
1355                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1356                          &device->idfwdf_state.shareds_bo);
1357    if (result != VK_SUCCESS)
1358       goto err_free_store_buffer;
1359 
1360    /* Pack state words. */
1361 
1362    pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1363       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1364       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1365       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1366       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1367    }
1368 
1369    /* clang-format off */
1370    pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1371    /* clang-format on */
1372 
1373    STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1374 
1375    tex_info = (struct pvr_texture_state_info){
1376       .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1377       .mem_layout = PVR_MEMLAYOUT_LINEAR,
1378       .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1379       /* TODO: Is this correct? Is it 2D, 3D, or 2D_ARRAY? */
1380       .type = VK_IMAGE_VIEW_TYPE_2D,
1381       .extent = { .width = 4, .height = 2, .depth = 0 },
1382       .mip_levels = 1,
1383       .sample_count = 1,
1384       .stride = 4,
1385       .swizzle = { PIPE_SWIZZLE_X,
1386                    PIPE_SWIZZLE_Y,
1387                    PIPE_SWIZZLE_Z,
1388                    PIPE_SWIZZLE_W },
1389       .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1390    };
1391 
1392    result = pvr_pack_tex_state(device, &tex_info, image_state);
1393    if (result != VK_SUCCESS)
1394       goto err_free_shareds_buffer;
1395 
1396    /* Fill the shareds buffer. */
1397 
1398    dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1399 
1400 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1401 #define LOW_32(val) ((uint32_t)(val))
1402 
1403    /* TODO: Should we use compiler info to setup the shareds data instead of
1404     * assuming there's always 12 and this is how they should be setup?
1405     */
1406 
1407    dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1408    dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1409 
1410    /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1411    dword_ptr[2] = 0U;
1412    dword_ptr[3] = 0U;
1413 
1414    dword_ptr[4] = LOW_32(image_state[0]);
1415    dword_ptr[5] = HIGH_32(image_state[0]);
1416    dword_ptr[6] = LOW_32(image_state[1]);
1417    dword_ptr[7] = HIGH_32(image_state[1]);
1418 
1419    dword_ptr[8] = LOW_32(sampler_state[0]);
1420    dword_ptr[9] = HIGH_32(sampler_state[0]);
1421    dword_ptr[10] = LOW_32(sampler_state[1]);
1422    dword_ptr[11] = HIGH_32(sampler_state[1]);
1423    assert(11 + 1 == usc_shareds);
1424 
1425 #undef HIGH_32
1426 #undef LOW_32
1427 
1428    pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1429    dword_ptr = NULL;
1430 
1431    /* Generate and upload PDS programs. */
1432    result = pvr_pds_idfwdf_programs_create_and_upload(
1433       device,
1434       device->idfwdf_state.usc->vma->dev_addr,
1435       usc_shareds,
1436       usc_temps,
1437       device->idfwdf_state.shareds_bo->vma->dev_addr,
1438       &device->idfwdf_state.pds,
1439       &device->idfwdf_state.sw_compute_barrier_pds);
1440    if (result != VK_SUCCESS)
1441       goto err_free_shareds_buffer;
1442 
1443    return VK_SUCCESS;
1444 
1445 err_free_shareds_buffer:
1446    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1447 
1448 err_free_store_buffer:
1449    pvr_bo_free(device, device->idfwdf_state.store_bo);
1450 
1451 err_free_usc_program:
1452    pvr_bo_free(device, device->idfwdf_state.usc);
1453 
1454    return result;
1455 }
1456 
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1457 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1458 {
1459    pvr_bo_free(device, device->idfwdf_state.pds.pvr_bo);
1460    pvr_bo_free(device, device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1461    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1462    pvr_bo_free(device, device->idfwdf_state.store_bo);
1463    pvr_bo_free(device, device->idfwdf_state.usc);
1464 }
1465 
1466 /* FIXME: We should be calculating the size when we upload the code in
1467  * pvr_srv_setup_static_pixel_event_program().
1468  */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1469 static void pvr_device_get_pixel_event_pds_program_data_size(
1470    const struct pvr_device_info *dev_info,
1471    uint32_t *const data_size_in_dwords_out)
1472 {
1473    struct pvr_pds_event_program program = {
1474       /* No data to DMA, just a DOUTU needed. */
1475       .num_emit_word_pairs = 0,
1476    };
1477 
1478    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1479 
1480    *data_size_in_dwords_out = program.data_size;
1481 }
1482 
pvr_device_init_nop_program(struct pvr_device * device)1483 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1484 {
1485    const uint32_t cache_line_size =
1486       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1487    struct pvr_pds_kickusc_program program = { 0 };
1488    uint32_t staging_buffer_size;
1489    uint32_t *staging_buffer;
1490    VkResult result;
1491 
1492    result = pvr_gpu_upload_usc(device,
1493                                pvr_nop_usc_code,
1494                                sizeof(pvr_nop_usc_code),
1495                                cache_line_size,
1496                                &device->nop_program.usc);
1497    if (result != VK_SUCCESS)
1498       return result;
1499 
1500    /* Setup a PDS program that kicks the static USC program. */
1501    pvr_pds_setup_doutu(&program.usc_task_control,
1502                        device->nop_program.usc->vma->dev_addr.addr,
1503                        0U,
1504                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1505                        false);
1506 
1507    pvr_pds_set_sizes_pixel_shader(&program);
1508 
1509    staging_buffer_size =
1510       (program.code_size + program.data_size) * sizeof(*staging_buffer);
1511 
1512    staging_buffer = vk_alloc(&device->vk.alloc,
1513                              staging_buffer_size,
1514                              8U,
1515                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1516    if (!staging_buffer) {
1517       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1518       goto err_free_nop_usc_bo;
1519    }
1520 
1521    pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1522 
1523    /* FIXME: Figure out the define for alignment of 16. */
1524    result = pvr_gpu_upload_pds(device,
1525                                staging_buffer,
1526                                program.data_size,
1527                                16U,
1528                                &staging_buffer[program.data_size],
1529                                program.code_size,
1530                                16U,
1531                                16U,
1532                                &device->nop_program.pds);
1533    if (result != VK_SUCCESS)
1534       goto err_free_staging_buffer;
1535 
1536    vk_free(&device->vk.alloc, staging_buffer);
1537 
1538    return VK_SUCCESS;
1539 
1540 err_free_staging_buffer:
1541    vk_free(&device->vk.alloc, staging_buffer);
1542 
1543 err_free_nop_usc_bo:
1544    pvr_bo_free(device, device->nop_program.usc);
1545 
1546    return result;
1547 }
1548 
pvr_device_init_default_sampler_state(struct pvr_device * device)1549 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1550 {
1551    pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1552       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1553       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1554       sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1555       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1556       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1557       sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1558       sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1559       sampler.non_normalized_coords = true;
1560    }
1561 }
1562 
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1563 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1564                           const VkDeviceCreateInfo *pCreateInfo,
1565                           const VkAllocationCallbacks *pAllocator,
1566                           VkDevice *pDevice)
1567 {
1568    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1569    struct pvr_instance *instance = pdevice->instance;
1570    struct vk_device_dispatch_table dispatch_table;
1571    struct pvr_device *device;
1572    VkResult result;
1573 
1574    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1575 
1576    device = vk_alloc2(&pdevice->vk.instance->alloc,
1577                       pAllocator,
1578                       sizeof(*device),
1579                       8,
1580                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1581    if (!device)
1582       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1583 
1584    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1585                                              &pvr_device_entrypoints,
1586                                              true);
1587 
1588    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1589                                              &wsi_device_entrypoints,
1590                                              false);
1591 
1592    result = vk_device_init(&device->vk,
1593                            &pdevice->vk,
1594                            &dispatch_table,
1595                            pCreateInfo,
1596                            pAllocator);
1597    if (result != VK_SUCCESS)
1598       goto err_free_device;
1599 
1600    device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC);
1601    if (device->render_fd < 0) {
1602       result = vk_errorf(instance,
1603                          VK_ERROR_INITIALIZATION_FAILED,
1604                          "Failed to open device %s",
1605                          pdevice->render_path);
1606       goto err_vk_device_finish;
1607    }
1608 
1609    if (pdevice->master_path)
1610       device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC);
1611    else
1612       device->master_fd = -1;
1613 
1614    vk_device_set_drm_fd(&device->vk, device->render_fd);
1615 
1616    device->instance = instance;
1617    device->pdevice = pdevice;
1618 
1619    device->ws = pvr_winsys_create(device->master_fd,
1620                                   device->render_fd,
1621                                   &device->vk.alloc);
1622    if (!device->ws) {
1623       result = VK_ERROR_INITIALIZATION_FAILED;
1624       goto err_close_master_fd;
1625    }
1626 
1627    device->ws->ops->get_heaps_info(device->ws, &device->heaps);
1628 
1629    result = pvr_free_list_create(device,
1630                                  PVR_GLOBAL_FREE_LIST_INITIAL_SIZE,
1631                                  PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1632                                  PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1633                                  PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1634                                  NULL /* parent_free_list */,
1635                                  &device->global_free_list);
1636    if (result != VK_SUCCESS)
1637       goto err_pvr_winsys_destroy;
1638 
1639    result = pvr_device_init_nop_program(device);
1640    if (result != VK_SUCCESS)
1641       goto err_pvr_free_list_destroy;
1642 
1643    result = pvr_device_init_compute_fence_program(device);
1644    if (result != VK_SUCCESS)
1645       goto err_pvr_free_nop_program;
1646 
1647    result = pvr_device_init_compute_idfwdf_state(device);
1648    if (result != VK_SUCCESS)
1649       goto err_pvr_free_compute_fence;
1650 
1651    result = pvr_queues_create(device, pCreateInfo);
1652    if (result != VK_SUCCESS)
1653       goto err_pvr_finish_compute_idfwdf;
1654 
1655    pvr_device_init_default_sampler_state(device);
1656 
1657    if (pCreateInfo->pEnabledFeatures)
1658       memcpy(&device->features,
1659              pCreateInfo->pEnabledFeatures,
1660              sizeof(device->features));
1661 
1662    /* FIXME: Move this to a later stage and possibly somewhere other than
1663     * pvr_device. The purpose of this is so that we don't have to get the size
1664     * on each kick.
1665     */
1666    pvr_device_get_pixel_event_pds_program_data_size(
1667       &pdevice->dev_info,
1668       &device->pixel_event_data_size_in_dwords);
1669 
1670    device->global_queue_job_count = 0;
1671    device->global_queue_present_count = 0;
1672 
1673    *pDevice = pvr_device_to_handle(device);
1674 
1675    return VK_SUCCESS;
1676 
1677 err_pvr_finish_compute_idfwdf:
1678    pvr_device_finish_compute_idfwdf_state(device);
1679 
1680 err_pvr_free_compute_fence:
1681    pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1682 
1683 err_pvr_free_nop_program:
1684    pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1685    pvr_bo_free(device, device->nop_program.usc);
1686 
1687 err_pvr_free_list_destroy:
1688    pvr_free_list_destroy(device->global_free_list);
1689 
1690 err_pvr_winsys_destroy:
1691    pvr_winsys_destroy(device->ws);
1692 
1693 err_close_master_fd:
1694    if (device->master_fd >= 0)
1695       close(device->master_fd);
1696 
1697    close(device->render_fd);
1698 
1699 err_vk_device_finish:
1700    vk_device_finish(&device->vk);
1701 
1702 err_free_device:
1703    vk_free(&device->vk.alloc, device);
1704 
1705    return result;
1706 }
1707 
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1708 void pvr_DestroyDevice(VkDevice _device,
1709                        const VkAllocationCallbacks *pAllocator)
1710 {
1711    PVR_FROM_HANDLE(pvr_device, device, _device);
1712 
1713    pvr_queues_destroy(device);
1714    pvr_device_finish_compute_idfwdf_state(device);
1715    pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1716    pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1717    pvr_bo_free(device, device->nop_program.usc);
1718    pvr_free_list_destroy(device->global_free_list);
1719    pvr_winsys_destroy(device->ws);
1720    close(device->render_fd);
1721    vk_device_finish(&device->vk);
1722    vk_free(&device->vk.alloc, device);
1723 }
1724 
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1725 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1726                                               VkLayerProperties *pProperties)
1727 {
1728    if (!pProperties) {
1729       *pPropertyCount = 0;
1730       return VK_SUCCESS;
1731    }
1732 
1733    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1734 }
1735 
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1736 VkResult pvr_AllocateMemory(VkDevice _device,
1737                             const VkMemoryAllocateInfo *pAllocateInfo,
1738                             const VkAllocationCallbacks *pAllocator,
1739                             VkDeviceMemory *pMem)
1740 {
1741    const VkImportMemoryFdInfoKHR *fd_info = NULL;
1742    PVR_FROM_HANDLE(pvr_device, device, _device);
1743    enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
1744    struct pvr_device_memory *mem;
1745    VkResult result;
1746 
1747    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1748    assert(pAllocateInfo->allocationSize > 0);
1749 
1750    mem = vk_object_alloc(&device->vk,
1751                          pAllocator,
1752                          sizeof(*mem),
1753                          VK_OBJECT_TYPE_DEVICE_MEMORY);
1754    if (!mem)
1755       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1756 
1757    vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
1758       switch ((unsigned)ext->sType) {
1759       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1760          type = PVR_WINSYS_BO_TYPE_DISPLAY;
1761          break;
1762       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1763          fd_info = (void *)ext;
1764          break;
1765       default:
1766          pvr_debug_ignored_stype(ext->sType);
1767          break;
1768       }
1769    }
1770 
1771    if (fd_info && fd_info->handleType) {
1772       VkDeviceSize aligned_alloc_size =
1773          ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
1774 
1775       assert(
1776          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1777          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1778 
1779       result = device->ws->ops->buffer_create_from_fd(device->ws,
1780                                                       fd_info->fd,
1781                                                       &mem->bo);
1782       if (result != VK_SUCCESS)
1783          goto err_vk_object_free_mem;
1784 
1785       /* For security purposes, we reject importing the bo if it's smaller
1786        * than the requested allocation size. This prevents a malicious client
1787        * from passing a buffer to a trusted client, lying about the size, and
1788        * telling the trusted client to try and texture from an image that goes
1789        * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1790        * in the trusted client. The trusted client can protect itself against
1791        * this sort of attack but only if it can trust the buffer size.
1792        */
1793       if (aligned_alloc_size > mem->bo->size) {
1794          result = vk_errorf(device,
1795                             VK_ERROR_INVALID_EXTERNAL_HANDLE,
1796                             "Aligned requested size too large for the given fd "
1797                             "%" PRIu64 "B > %" PRIu64 "B",
1798                             pAllocateInfo->allocationSize,
1799                             mem->bo->size);
1800          device->ws->ops->buffer_destroy(mem->bo);
1801          goto err_vk_object_free_mem;
1802       }
1803 
1804       /* From the Vulkan spec:
1805        *
1806        *    "Importing memory from a file descriptor transfers ownership of
1807        *    the file descriptor from the application to the Vulkan
1808        *    implementation. The application must not perform any operations on
1809        *    the file descriptor after a successful import."
1810        *
1811        * If the import fails, we leave the file descriptor open.
1812        */
1813       close(fd_info->fd);
1814    } else {
1815       /* Align physical allocations to the page size of the heap that will be
1816        * used when binding device memory (see pvr_bind_memory()) to ensure the
1817        * entire allocation can be mapped.
1818        */
1819       const uint64_t alignment = device->heaps.general_heap->page_size;
1820 
1821       /* FIXME: Need to determine the flags based on
1822        * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
1823        *
1824        * The alternative would be to store the flags alongside the memory
1825        * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
1826        * that they can be looked up.
1827        */
1828       result = device->ws->ops->buffer_create(device->ws,
1829                                               pAllocateInfo->allocationSize,
1830                                               alignment,
1831                                               type,
1832                                               PVR_WINSYS_BO_FLAG_CPU_ACCESS,
1833                                               &mem->bo);
1834       if (result != VK_SUCCESS)
1835          goto err_vk_object_free_mem;
1836    }
1837 
1838    *pMem = pvr_device_memory_to_handle(mem);
1839 
1840    return VK_SUCCESS;
1841 
1842 err_vk_object_free_mem:
1843    vk_object_free(&device->vk, pAllocator, mem);
1844 
1845    return result;
1846 }
1847 
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)1848 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
1849                             const VkMemoryGetFdInfoKHR *pGetFdInfo,
1850                             int *pFd)
1851 {
1852    PVR_FROM_HANDLE(pvr_device, device, _device);
1853    PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
1854 
1855    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1856 
1857    assert(
1858       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1859       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1860 
1861    return device->ws->ops->buffer_get_fd(mem->bo, pFd);
1862 }
1863 
1864 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1865 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
1866                              VkExternalMemoryHandleTypeFlagBits handleType,
1867                              int fd,
1868                              VkMemoryFdPropertiesKHR *pMemoryFdProperties)
1869 {
1870    PVR_FROM_HANDLE(pvr_device, device, _device);
1871 
1872    switch (handleType) {
1873    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1874       /* FIXME: This should only allow memory types having
1875        * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
1876        * dma-buf should be imported using cacheable memory types,
1877        * given exporter's mmap will always map it as cacheable.
1878        * Ref:
1879        * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
1880        */
1881       pMemoryFdProperties->memoryTypeBits =
1882          (1 << device->pdevice->memory.memoryTypeCount) - 1;
1883       return VK_SUCCESS;
1884    default:
1885       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1886    }
1887 }
1888 
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1889 void pvr_FreeMemory(VkDevice _device,
1890                     VkDeviceMemory _mem,
1891                     const VkAllocationCallbacks *pAllocator)
1892 {
1893    PVR_FROM_HANDLE(pvr_device, device, _device);
1894    PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
1895 
1896    if (!mem)
1897       return;
1898 
1899    device->ws->ops->buffer_destroy(mem->bo);
1900 
1901    vk_object_free(&device->vk, pAllocator, mem);
1902 }
1903 
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)1904 VkResult pvr_MapMemory(VkDevice _device,
1905                        VkDeviceMemory _memory,
1906                        VkDeviceSize offset,
1907                        VkDeviceSize size,
1908                        VkMemoryMapFlags flags,
1909                        void **ppData)
1910 {
1911    PVR_FROM_HANDLE(pvr_device, device, _device);
1912    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1913    void *map;
1914 
1915    if (!mem) {
1916       *ppData = NULL;
1917       return VK_SUCCESS;
1918    }
1919 
1920    if (size == VK_WHOLE_SIZE)
1921       size = mem->bo->size - offset;
1922 
1923    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1924     *
1925     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1926     *    assert(size != 0);
1927     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1928     *    equal to the size of the memory minus offset
1929     */
1930 
1931    assert(size > 0);
1932    assert(offset + size <= mem->bo->size);
1933 
1934    /* Check if already mapped */
1935    if (mem->bo->map) {
1936       *ppData = mem->bo->map + offset;
1937       return VK_SUCCESS;
1938    }
1939 
1940    /* Map it all at once */
1941    map = device->ws->ops->buffer_map(mem->bo);
1942    if (!map)
1943       return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
1944 
1945    *ppData = map + offset;
1946 
1947    return VK_SUCCESS;
1948 }
1949 
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)1950 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
1951 {
1952    PVR_FROM_HANDLE(pvr_device, device, _device);
1953    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1954 
1955    if (!mem || !mem->bo->map)
1956       return;
1957 
1958    device->ws->ops->buffer_unmap(mem->bo);
1959 }
1960 
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1961 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
1962                                      uint32_t memoryRangeCount,
1963                                      const VkMappedMemoryRange *pMemoryRanges)
1964 {
1965    return VK_SUCCESS;
1966 }
1967 
1968 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1969 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
1970                                  uint32_t memoryRangeCount,
1971                                  const VkMappedMemoryRange *pMemoryRanges)
1972 {
1973    return VK_SUCCESS;
1974 }
1975 
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)1976 void pvr_GetImageSparseMemoryRequirements2(
1977    VkDevice device,
1978    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
1979    uint32_t *pSparseMemoryRequirementCount,
1980    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
1981 {
1982    *pSparseMemoryRequirementCount = 0;
1983 }
1984 
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1985 void pvr_GetDeviceMemoryCommitment(VkDevice device,
1986                                    VkDeviceMemory memory,
1987                                    VkDeviceSize *pCommittedMemoryInBytes)
1988 {
1989    *pCommittedMemoryInBytes = 0;
1990 }
1991 
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)1992 VkResult pvr_bind_memory(struct pvr_device *device,
1993                          struct pvr_device_memory *mem,
1994                          VkDeviceSize offset,
1995                          VkDeviceSize size,
1996                          VkDeviceSize alignment,
1997                          struct pvr_winsys_vma **const vma_out,
1998                          pvr_dev_addr_t *const dev_addr_out)
1999 {
2000    VkDeviceSize virt_size =
2001       size + (offset & (device->heaps.general_heap->page_size - 1));
2002    struct pvr_winsys_vma *vma;
2003    pvr_dev_addr_t dev_addr;
2004 
2005    /* Valid usage:
2006     *
2007     *   "memoryOffset must be an integer multiple of the alignment member of
2008     *    the VkMemoryRequirements structure returned from a call to
2009     *    vkGetBufferMemoryRequirements with buffer"
2010     *
2011     *   "memoryOffset must be an integer multiple of the alignment member of
2012     *    the VkMemoryRequirements structure returned from a call to
2013     *    vkGetImageMemoryRequirements with image"
2014     */
2015    assert(offset % alignment == 0);
2016    assert(offset < mem->bo->size);
2017 
2018    vma = device->ws->ops->heap_alloc(device->heaps.general_heap,
2019                                      virt_size,
2020                                      alignment);
2021    if (!vma)
2022       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2023 
2024    dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size);
2025    if (!dev_addr.addr) {
2026       device->ws->ops->heap_free(vma);
2027       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2028    }
2029 
2030    *dev_addr_out = dev_addr;
2031    *vma_out = vma;
2032 
2033    return VK_SUCCESS;
2034 }
2035 
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2036 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2037 {
2038    device->ws->ops->vma_unmap(vma);
2039    device->ws->ops->heap_free(vma);
2040 }
2041 
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2042 VkResult pvr_BindBufferMemory2(VkDevice _device,
2043                                uint32_t bindInfoCount,
2044                                const VkBindBufferMemoryInfo *pBindInfos)
2045 {
2046    PVR_FROM_HANDLE(pvr_device, device, _device);
2047    uint32_t i;
2048 
2049    for (i = 0; i < bindInfoCount; i++) {
2050       PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2051       PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2052 
2053       VkResult result = pvr_bind_memory(device,
2054                                         mem,
2055                                         pBindInfos[i].memoryOffset,
2056                                         buffer->vk.size,
2057                                         buffer->alignment,
2058                                         &buffer->vma,
2059                                         &buffer->dev_addr);
2060       if (result != VK_SUCCESS) {
2061          while (i--) {
2062             PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2063             pvr_unbind_memory(device, buffer->vma);
2064          }
2065 
2066          return result;
2067       }
2068    }
2069 
2070    return VK_SUCCESS;
2071 }
2072 
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2073 VkResult pvr_QueueBindSparse(VkQueue _queue,
2074                              uint32_t bindInfoCount,
2075                              const VkBindSparseInfo *pBindInfo,
2076                              VkFence fence)
2077 {
2078    return VK_SUCCESS;
2079 }
2080 
2081 /* Event functions. */
2082 
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2083 VkResult pvr_CreateEvent(VkDevice _device,
2084                          const VkEventCreateInfo *pCreateInfo,
2085                          const VkAllocationCallbacks *pAllocator,
2086                          VkEvent *pEvent)
2087 {
2088    assert(!"Unimplemented");
2089    return VK_SUCCESS;
2090 }
2091 
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2092 void pvr_DestroyEvent(VkDevice _device,
2093                       VkEvent _event,
2094                       const VkAllocationCallbacks *pAllocator)
2095 {
2096    assert(!"Unimplemented");
2097 }
2098 
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2099 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2100 {
2101    assert(!"Unimplemented");
2102    return VK_SUCCESS;
2103 }
2104 
pvr_SetEvent(VkDevice _device,VkEvent _event)2105 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2106 {
2107    assert(!"Unimplemented");
2108    return VK_SUCCESS;
2109 }
2110 
pvr_ResetEvent(VkDevice _device,VkEvent _event)2111 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2112 {
2113    assert(!"Unimplemented");
2114    return VK_SUCCESS;
2115 }
2116 
2117 /* Buffer functions. */
2118 
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2119 VkResult pvr_CreateBuffer(VkDevice _device,
2120                           const VkBufferCreateInfo *pCreateInfo,
2121                           const VkAllocationCallbacks *pAllocator,
2122                           VkBuffer *pBuffer)
2123 {
2124    PVR_FROM_HANDLE(pvr_device, device, _device);
2125    const uint32_t alignment = 4096;
2126    struct pvr_buffer *buffer;
2127 
2128    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2129    assert(pCreateInfo->usage != 0);
2130 
2131    /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2132    if (pCreateInfo->size >= ULONG_MAX - alignment)
2133       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2134 
2135    buffer =
2136       vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2137    if (!buffer)
2138       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2139 
2140    buffer->alignment = alignment;
2141 
2142    *pBuffer = pvr_buffer_to_handle(buffer);
2143 
2144    return VK_SUCCESS;
2145 }
2146 
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2147 void pvr_DestroyBuffer(VkDevice _device,
2148                        VkBuffer _buffer,
2149                        const VkAllocationCallbacks *pAllocator)
2150 {
2151    PVR_FROM_HANDLE(pvr_device, device, _device);
2152    PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2153 
2154    if (!buffer)
2155       return;
2156 
2157    if (buffer->vma)
2158       pvr_unbind_memory(device, buffer->vma);
2159 
2160    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2161 }
2162 
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_bo ** const pvr_bo_out)2163 VkResult pvr_gpu_upload(struct pvr_device *device,
2164                         struct pvr_winsys_heap *heap,
2165                         const void *data,
2166                         size_t size,
2167                         uint64_t alignment,
2168                         struct pvr_bo **const pvr_bo_out)
2169 {
2170    struct pvr_bo *pvr_bo = NULL;
2171    VkResult result;
2172 
2173    assert(size > 0);
2174 
2175    result = pvr_bo_alloc(device,
2176                          heap,
2177                          size,
2178                          alignment,
2179                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2180                          &pvr_bo);
2181    if (result != VK_SUCCESS)
2182       return result;
2183 
2184    memcpy(pvr_bo->bo->map, data, size);
2185    pvr_bo_cpu_unmap(device, pvr_bo);
2186 
2187    *pvr_bo_out = pvr_bo;
2188 
2189    return VK_SUCCESS;
2190 }
2191 
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_bo ** const pvr_bo_out)2192 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2193                             const void *code,
2194                             size_t code_size,
2195                             uint64_t code_alignment,
2196                             struct pvr_bo **const pvr_bo_out)
2197 {
2198    struct pvr_bo *pvr_bo = NULL;
2199    VkResult result;
2200 
2201    assert(code_size > 0);
2202 
2203    /* The USC will prefetch the next instruction, so over allocate by 1
2204     * instruction to prevent reading off the end of a page into a potentially
2205     * unallocated page.
2206     */
2207    result = pvr_bo_alloc(device,
2208                          device->heaps.usc_heap,
2209                          code_size + ROGUE_MAX_INSTR_BYTES,
2210                          code_alignment,
2211                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2212                          &pvr_bo);
2213    if (result != VK_SUCCESS)
2214       return result;
2215 
2216    memcpy(pvr_bo->bo->map, code, code_size);
2217    pvr_bo_cpu_unmap(device, pvr_bo);
2218 
2219    *pvr_bo_out = pvr_bo;
2220 
2221    return VK_SUCCESS;
2222 }
2223 
2224 /**
2225  * \brief Upload PDS program data and code segments from host memory to device
2226  * memory.
2227  *
2228  * \param[in] device            Logical device pointer.
2229  * \param[in] data              Pointer to PDS data segment to upload.
2230  * \param[in] data_size_dwords  Size of PDS data segment in dwords.
2231  * \param[in] data_alignment    Required alignment of the PDS data segment in
2232  *                              bytes. Must be a power of two.
2233  * \param[in] code              Pointer to PDS code segment to upload.
2234  * \param[in] code_size_dwords  Size of PDS code segment in dwords.
2235  * \param[in] code_alignment    Required alignment of the PDS code segment in
2236  *                              bytes. Must be a power of two.
2237  * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
2238  *                              program in bytes.
2239  * \param[out] pds_upload_out   On success will be initialized based on the
2240  *                              uploaded PDS program.
2241  * \return VK_SUCCESS on success, or error code otherwise.
2242  */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2243 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2244                             const uint32_t *data,
2245                             uint32_t data_size_dwords,
2246                             uint32_t data_alignment,
2247                             const uint32_t *code,
2248                             uint32_t code_size_dwords,
2249                             uint32_t code_alignment,
2250                             uint64_t min_alignment,
2251                             struct pvr_pds_upload *const pds_upload_out)
2252 {
2253    /* All alignment and sizes below are in bytes. */
2254    const size_t data_size = data_size_dwords * sizeof(*data);
2255    const size_t code_size = code_size_dwords * sizeof(*code);
2256    const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2257    const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2258    const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2259    const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2260    const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2261                                      : data_aligned_size;
2262    const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED |
2263                              PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
2264    VkResult result;
2265 
2266    assert(code || data);
2267    assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2268    assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2269 
2270    result = pvr_bo_alloc(device,
2271                          device->heaps.pds_heap,
2272                          bo_size,
2273                          bo_alignment,
2274                          bo_flags,
2275                          &pds_upload_out->pvr_bo);
2276    if (result != VK_SUCCESS)
2277       return result;
2278 
2279    if (data) {
2280       memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size);
2281 
2282       pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr -
2283                                     device->heaps.pds_heap->base_addr.addr;
2284 
2285       /* Store data size in dwords. */
2286       assert(data_aligned_size % 4 == 0);
2287       pds_upload_out->data_size = data_aligned_size / 4;
2288    } else {
2289       pds_upload_out->data_offset = 0;
2290       pds_upload_out->data_size = 0;
2291    }
2292 
2293    if (code) {
2294       memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset,
2295              code,
2296              code_size);
2297 
2298       pds_upload_out->code_offset =
2299          (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) -
2300          device->heaps.pds_heap->base_addr.addr;
2301 
2302       /* Store code size in dwords. */
2303       assert(code_aligned_size % 4 == 0);
2304       pds_upload_out->code_size = code_aligned_size / 4;
2305    } else {
2306       pds_upload_out->code_offset = 0;
2307       pds_upload_out->code_size = 0;
2308    }
2309 
2310    pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo);
2311 
2312    return VK_SUCCESS;
2313 }
2314 
2315 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2316 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2317                                  struct pvr_framebuffer *framebuffer)
2318 {
2319    const uint32_t cache_line_size =
2320       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2321    uint32_t ppp_state[3];
2322    VkResult result;
2323 
2324    pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2325       header.pres_terminate = true;
2326    }
2327 
2328    pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2329       term0.clip_right =
2330          DIV_ROUND_UP(
2331             framebuffer->width,
2332             PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2333          1;
2334       term0.clip_bottom =
2335          DIV_ROUND_UP(
2336             framebuffer->height,
2337             PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2338          1;
2339    }
2340 
2341    pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2342       term1.render_target = 0;
2343       term1.clip_left = 0;
2344    }
2345 
2346    result = pvr_gpu_upload(device,
2347                            device->heaps.general_heap,
2348                            ppp_state,
2349                            sizeof(ppp_state),
2350                            cache_line_size,
2351                            &framebuffer->ppp_state_bo);
2352    if (result != VK_SUCCESS)
2353       return result;
2354 
2355    /* Calculate the size of PPP state in dwords. */
2356    framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2357 
2358    return VK_SUCCESS;
2359 }
2360 
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2361 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2362                                     uint32_t render_targets_count)
2363 {
2364    uint32_t i;
2365 
2366    for (i = 0; i < render_targets_count; i++) {
2367       if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2368          goto err_mutex_destroy;
2369    }
2370 
2371    return true;
2372 
2373 err_mutex_destroy:
2374    while (i--)
2375       pthread_mutex_destroy(&render_targets[i].mutex);
2376 
2377    return false;
2378 }
2379 
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2380 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2381                                     uint32_t render_targets_count)
2382 {
2383    for (uint32_t i = 0; i < render_targets_count; i++) {
2384       if (render_targets[i].valid) {
2385          pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2386          render_targets[i].valid = false;
2387       }
2388 
2389       pthread_mutex_destroy(&render_targets[i].mutex);
2390    }
2391 }
2392 
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2393 VkResult pvr_CreateFramebuffer(VkDevice _device,
2394                                const VkFramebufferCreateInfo *pCreateInfo,
2395                                const VkAllocationCallbacks *pAllocator,
2396                                VkFramebuffer *pFramebuffer)
2397 {
2398    PVR_FROM_HANDLE(pvr_device, device, _device);
2399    struct pvr_render_target *render_targets;
2400    struct pvr_framebuffer *framebuffer;
2401    struct pvr_image_view **attachments;
2402    uint32_t render_targets_count;
2403    VkResult result;
2404 
2405    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2406 
2407    render_targets_count =
2408       PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2409 
2410    VK_MULTIALLOC(ma);
2411    vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2412    vk_multialloc_add(&ma,
2413                      &attachments,
2414                      __typeof__(*attachments),
2415                      pCreateInfo->attachmentCount);
2416    vk_multialloc_add(&ma,
2417                      &render_targets,
2418                      __typeof__(*render_targets),
2419                      render_targets_count);
2420 
2421    if (!vk_multialloc_zalloc2(&ma,
2422                               &device->vk.alloc,
2423                               pAllocator,
2424                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2425       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2426 
2427    vk_object_base_init(&device->vk,
2428                        &framebuffer->base,
2429                        VK_OBJECT_TYPE_FRAMEBUFFER);
2430 
2431    framebuffer->width = pCreateInfo->width;
2432    framebuffer->height = pCreateInfo->height;
2433    framebuffer->layers = pCreateInfo->layers;
2434 
2435    framebuffer->attachments = attachments;
2436    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2437    for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2438       framebuffer->attachments[i] =
2439          pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2440    }
2441 
2442    result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2443    if (result != VK_SUCCESS)
2444       goto err_free_framebuffer;
2445 
2446    framebuffer->render_targets = render_targets;
2447    framebuffer->render_targets_count = render_targets_count;
2448    if (!pvr_render_targets_init(framebuffer->render_targets,
2449                                 render_targets_count)) {
2450       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2451       goto err_free_ppp_state_bo;
2452    }
2453 
2454    *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2455 
2456    return VK_SUCCESS;
2457 
2458 err_free_ppp_state_bo:
2459    pvr_bo_free(device, framebuffer->ppp_state_bo);
2460 
2461 err_free_framebuffer:
2462    vk_object_base_finish(&framebuffer->base);
2463    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2464 
2465    return result;
2466 }
2467 
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2468 void pvr_DestroyFramebuffer(VkDevice _device,
2469                             VkFramebuffer _fb,
2470                             const VkAllocationCallbacks *pAllocator)
2471 {
2472    PVR_FROM_HANDLE(pvr_device, device, _device);
2473    PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
2474 
2475    if (!framebuffer)
2476       return;
2477 
2478    pvr_render_targets_fini(framebuffer->render_targets,
2479                            framebuffer->render_targets_count);
2480    pvr_bo_free(device, framebuffer->ppp_state_bo);
2481    vk_object_base_finish(&framebuffer->base);
2482    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2483 }
2484 
2485 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2486 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2487 {
2488    /* For the full details on loader interface versioning, see
2489     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2490     * What follows is a condensed summary, to help you navigate the large and
2491     * confusing official doc.
2492     *
2493     *   - Loader interface v0 is incompatible with later versions. We don't
2494     *     support it.
2495     *
2496     *   - In loader interface v1:
2497     *       - The first ICD entrypoint called by the loader is
2498     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2499     *         entrypoint.
2500     *       - The ICD must statically expose no other Vulkan symbol unless it
2501     *         is linked with -Bsymbolic.
2502     *       - Each dispatchable Vulkan handle created by the ICD must be
2503     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2504     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2505     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2506     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2507     *         such loader-managed surfaces.
2508     *
2509     *    - Loader interface v2 differs from v1 in:
2510     *       - The first ICD entrypoint called by the loader is
2511     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2512     *         statically expose this entrypoint.
2513     *
2514     *    - Loader interface v3 differs from v2 in:
2515     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2516     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2517     *          because the loader no longer does so.
2518     *
2519     *    - Loader interface v4 differs from v3 in:
2520     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2521     */
2522    *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
2523    return VK_SUCCESS;
2524 }
2525 
2526 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)2527 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
2528                                   VkFilter filter)
2529 {
2530    switch (filter) {
2531    case VK_FILTER_NEAREST:
2532       return PVRX(TEXSTATE_FILTER_POINT);
2533    case VK_FILTER_LINEAR:
2534       return PVRX(TEXSTATE_FILTER_LINEAR);
2535    default:
2536       unreachable("Unknown filter type.");
2537    }
2538 }
2539 
2540 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)2541 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
2542 {
2543    switch (addr_mode) {
2544    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2545       return PVRX(TEXSTATE_ADDRMODE_REPEAT);
2546    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2547       return PVRX(TEXSTATE_ADDRMODE_FLIP);
2548    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2549       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
2550    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2551       return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
2552    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2553       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
2554    default:
2555       unreachable("Invalid sampler address mode.");
2556    }
2557 }
2558 
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2559 VkResult pvr_CreateSampler(VkDevice _device,
2560                            const VkSamplerCreateInfo *pCreateInfo,
2561                            const VkAllocationCallbacks *pAllocator,
2562                            VkSampler *pSampler)
2563 {
2564    PVR_FROM_HANDLE(pvr_device, device, _device);
2565    struct pvr_sampler *sampler;
2566    float lod_rounding_bias;
2567    VkFilter min_filter;
2568    VkFilter mag_filter;
2569    float min_lod;
2570    float max_lod;
2571 
2572    STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
2573                  sizeof(((union pvr_sampler_descriptor *)NULL)->words));
2574 
2575    sampler = vk_object_alloc(&device->vk,
2576                              pAllocator,
2577                              sizeof(*sampler),
2578                              VK_OBJECT_TYPE_SAMPLER);
2579    if (!sampler)
2580       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2581 
2582    mag_filter = pCreateInfo->magFilter;
2583    min_filter = pCreateInfo->minFilter;
2584 
2585    if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
2586       /* The min/mag filters may need adjustment here, the GPU should decide
2587        * which of the two filters to use based on the clamped LOD value: LOD
2588        * <= 0 implies magnification, while LOD > 0 implies minification.
2589        *
2590        * As a workaround, we override magFilter with minFilter if we know that
2591        * the magnification filter will never be used due to clamping anyway
2592        * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
2593        * if maxLod <= 0.
2594        */
2595       if (pCreateInfo->minLod > 0.0f) {
2596          /* The clamped LOD will always be positive => always minify. */
2597          mag_filter = pCreateInfo->minFilter;
2598       }
2599 
2600       if (pCreateInfo->maxLod <= 0.0f) {
2601          /* The clamped LOD will always be negative or zero => always
2602           * magnify.
2603           */
2604          min_filter = pCreateInfo->magFilter;
2605       }
2606    }
2607 
2608    if (pCreateInfo->compareEnable) {
2609       sampler->descriptor.data.compare_op =
2610          (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
2611    } else {
2612       sampler->descriptor.data.compare_op =
2613          (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
2614    }
2615 
2616    sampler->descriptor.data.word3 = 0;
2617    pvr_csb_pack (&sampler->descriptor.data.sampler_word,
2618                  TEXSTATE_SAMPLER,
2619                  word) {
2620       const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
2621       const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
2622                                   (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2623       const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
2624                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2625                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2626       const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
2627                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2628                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2629 
2630       word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
2631       word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
2632 
2633       if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
2634          word.mipfilter = true;
2635 
2636       word.addrmode_u =
2637          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
2638       word.addrmode_v =
2639          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
2640       word.addrmode_w =
2641          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
2642 
2643       /* TODO: Figure out defines for these. */
2644       if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
2645          sampler->descriptor.data.word3 |= 0x40000000;
2646 
2647       if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
2648          sampler->descriptor.data.word3 |= 0x20000000;
2649 
2650       /* The Vulkan 1.0.205 spec says:
2651        *
2652        *    The absolute value of mipLodBias must be less than or equal to
2653        *    VkPhysicalDeviceLimits::maxSamplerLodBias.
2654        */
2655       word.dadjust =
2656          PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
2657          util_signed_fixed(
2658             CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
2659             PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2660 
2661       /* Anisotropy is not supported for now. */
2662       word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
2663 
2664       if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
2665           pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
2666          /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
2667           * selected by adding 0.5 and then truncating the input LOD value.
2668           * This hardware adds the 0.5 bias before clamping against
2669           * lodmin/lodmax, while Vulkan specifies the bias to be added after
2670           * clamping. We compensate for this difference by adding the 0.5
2671           * bias to the LOD bounds, too.
2672           */
2673          lod_rounding_bias = 0.5f;
2674       } else {
2675          lod_rounding_bias = 0.0f;
2676       }
2677 
2678       min_lod = pCreateInfo->minLod + lod_rounding_bias;
2679       word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
2680                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2681 
2682       max_lod = pCreateInfo->maxLod + lod_rounding_bias;
2683       word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
2684                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2685 
2686       word.bordercolor_index = pCreateInfo->borderColor;
2687 
2688       if (pCreateInfo->unnormalizedCoordinates)
2689          word.non_normalized_coords = true;
2690    }
2691 
2692    *pSampler = pvr_sampler_to_handle(sampler);
2693 
2694    return VK_SUCCESS;
2695 }
2696 
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2697 void pvr_DestroySampler(VkDevice _device,
2698                         VkSampler _sampler,
2699                         const VkAllocationCallbacks *pAllocator)
2700 {
2701    PVR_FROM_HANDLE(pvr_device, device, _device);
2702    PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
2703 
2704    if (!sampler)
2705       return;
2706 
2707    vk_object_free(&device->vk, pAllocator, sampler);
2708 }
2709 
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2710 void pvr_GetBufferMemoryRequirements2(
2711    VkDevice _device,
2712    const VkBufferMemoryRequirementsInfo2 *pInfo,
2713    VkMemoryRequirements2 *pMemoryRequirements)
2714 {
2715    PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
2716    PVR_FROM_HANDLE(pvr_device, device, _device);
2717 
2718    /* The Vulkan 1.0.166 spec says:
2719     *
2720     *    memoryTypeBits is a bitmask and contains one bit set for every
2721     *    supported memory type for the resource. Bit 'i' is set if and only
2722     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2723     *    structure for the physical device is supported for the resource.
2724     *
2725     * All types are currently supported for buffers.
2726     */
2727    pMemoryRequirements->memoryRequirements.memoryTypeBits =
2728       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2729 
2730    pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
2731    pMemoryRequirements->memoryRequirements.size =
2732       ALIGN_POT(buffer->vk.size, buffer->alignment);
2733 }
2734 
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2735 void pvr_GetImageMemoryRequirements2(VkDevice _device,
2736                                      const VkImageMemoryRequirementsInfo2 *pInfo,
2737                                      VkMemoryRequirements2 *pMemoryRequirements)
2738 {
2739    PVR_FROM_HANDLE(pvr_device, device, _device);
2740    PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
2741 
2742    /* The Vulkan 1.0.166 spec says:
2743     *
2744     *    memoryTypeBits is a bitmask and contains one bit set for every
2745     *    supported memory type for the resource. Bit 'i' is set if and only
2746     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2747     *    structure for the physical device is supported for the resource.
2748     *
2749     * All types are currently supported for images.
2750     */
2751    const uint32_t memory_types =
2752       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2753 
2754    /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
2755     * in GetImageMemoryRequirements()), but this should be known at image
2756     * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
2757     * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
2758     * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
2759     * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
2760     *
2761     * Note: Presumably the 4096 alignment requirement comes from the Vulkan
2762     * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
2763     * render and compute jobs.
2764     */
2765    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
2766    pMemoryRequirements->memoryRequirements.size =
2767       ALIGN(image->size, image->alignment);
2768    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
2769 }
2770