• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41 
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/intel_aux_map.h"
58 #include "common/intel_defines.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61 
62 #include "genxml/gen7_pack.h"
63 #include "genxml/genX_bits.h"
64 
65 static const driOptionDescription anv_dri_options[] = {
66    DRI_CONF_SECTION_PERFORMANCE
67       DRI_CONF_ADAPTIVE_SYNC(true)
68       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
71       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
72       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
73    DRI_CONF_SECTION_END
74 
75    DRI_CONF_SECTION_DEBUG
76       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
77       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
78       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
79    DRI_CONF_SECTION_END
80 };
81 
82 /* This is probably far to big but it reflects the max size used for messages
83  * in OpenGLs KHR_debug.
84  */
85 #define MAX_DEBUG_MESSAGE_LENGTH    4096
86 
87 /* Render engine timestamp register */
88 #define TIMESTAMP 0x2358
89 
90 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
91 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
92 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
93 #endif
94 
95 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)96 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
97 {
98    char str[MAX_DEBUG_MESSAGE_LENGTH];
99    struct anv_device *device = (struct anv_device *)data;
100    UNUSED struct anv_instance *instance = device->physical->instance;
101 
102    va_list args;
103    va_start(args, fmt);
104    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
105    va_end(args);
106 
107    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
108 }
109 
110 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)111 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
112 {
113    va_list args;
114    va_start(args, fmt);
115 
116    if (INTEL_DEBUG(DEBUG_PERF))
117       mesa_logd_v(fmt, args);
118 
119    va_end(args);
120 }
121 
122 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
123     defined(VK_USE_PLATFORM_XCB_KHR) || \
124     defined(VK_USE_PLATFORM_XLIB_KHR) || \
125     defined(VK_USE_PLATFORM_DISPLAY_KHR)
126 #define ANV_USE_WSI_PLATFORM
127 #endif
128 
129 #ifdef ANDROID
130 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
131 #else
132 #define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
133 #endif
134 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)135 VkResult anv_EnumerateInstanceVersion(
136     uint32_t*                                   pApiVersion)
137 {
138     *pApiVersion = ANV_API_VERSION;
139     return VK_SUCCESS;
140 }
141 
142 static const struct vk_instance_extension_table instance_extensions = {
143    .KHR_device_group_creation                = true,
144    .KHR_external_fence_capabilities          = true,
145    .KHR_external_memory_capabilities         = true,
146    .KHR_external_semaphore_capabilities      = true,
147    .KHR_get_physical_device_properties2      = true,
148    .EXT_debug_report                         = true,
149    .EXT_debug_utils                          = true,
150 
151 #ifdef ANV_USE_WSI_PLATFORM
152    .KHR_get_surface_capabilities2            = true,
153    .KHR_surface                              = true,
154    .KHR_surface_protected_capabilities       = true,
155 #endif
156 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
157    .KHR_wayland_surface                      = true,
158 #endif
159 #ifdef VK_USE_PLATFORM_XCB_KHR
160    .KHR_xcb_surface                          = true,
161 #endif
162 #ifdef VK_USE_PLATFORM_XLIB_KHR
163    .KHR_xlib_surface                         = true,
164 #endif
165 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
166    .EXT_acquire_xlib_display                 = true,
167 #endif
168 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
169    .KHR_display                              = true,
170    .KHR_get_display_properties2              = true,
171    .EXT_direct_mode_display                  = true,
172    .EXT_display_surface_counter              = true,
173    .EXT_acquire_drm_display                  = true,
174 #endif
175 };
176 
177 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)178 get_device_extensions(const struct anv_physical_device *device,
179                       struct vk_device_extension_table *ext)
180 {
181    const bool has_syncobj_wait =
182       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
183 
184    const bool nv_mesh_shading_enabled =
185       env_var_as_boolean("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
186 
187    *ext = (struct vk_device_extension_table) {
188       .KHR_8bit_storage                      = device->info.ver >= 8,
189       .KHR_16bit_storage                     = device->info.ver >= 8,
190       .KHR_bind_memory2                      = true,
191       .KHR_buffer_device_address             = device->has_a64_buffer_access,
192       .KHR_copy_commands2                    = true,
193       .KHR_create_renderpass2                = true,
194       .KHR_dedicated_allocation              = true,
195       .KHR_deferred_host_operations          = true,
196       .KHR_depth_stencil_resolve             = true,
197       .KHR_descriptor_update_template        = true,
198       .KHR_device_group                      = true,
199       .KHR_draw_indirect_count               = true,
200       .KHR_driver_properties                 = true,
201       .KHR_dynamic_rendering                 = true,
202       .KHR_external_fence                    = has_syncobj_wait,
203       .KHR_external_fence_fd                 = has_syncobj_wait,
204       .KHR_external_memory                   = true,
205       .KHR_external_memory_fd                = true,
206       .KHR_external_semaphore                = true,
207       .KHR_external_semaphore_fd             = true,
208       .KHR_format_feature_flags2             = true,
209       .KHR_fragment_shading_rate             = device->info.ver >= 11,
210       .KHR_get_memory_requirements2          = true,
211       .KHR_image_format_list                 = true,
212       .KHR_imageless_framebuffer             = true,
213 #ifdef ANV_USE_WSI_PLATFORM
214       .KHR_incremental_present               = true,
215 #endif
216       .KHR_maintenance1                      = true,
217       .KHR_maintenance2                      = true,
218       .KHR_maintenance3                      = true,
219       .KHR_maintenance4                      = true,
220       .KHR_multiview                         = true,
221       .KHR_performance_query =
222          !anv_use_relocations(device) && device->perf &&
223          (device->perf->i915_perf_version >= 3 ||
224           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
225          device->use_call_secondary,
226       .KHR_pipeline_executable_properties    = true,
227       .KHR_push_descriptor                   = true,
228       .KHR_ray_query                         = device->info.has_ray_tracing,
229       .KHR_relaxed_block_layout              = true,
230       .KHR_sampler_mirror_clamp_to_edge      = true,
231       .KHR_sampler_ycbcr_conversion          = true,
232       .KHR_separate_depth_stencil_layouts    = true,
233       .KHR_shader_atomic_int64               = device->info.ver >= 9,
234       .KHR_shader_clock                      = true,
235       .KHR_shader_draw_parameters            = true,
236       .KHR_shader_float16_int8               = device->info.ver >= 8,
237       .KHR_shader_float_controls             = device->info.ver >= 8,
238       .KHR_shader_integer_dot_product        = true,
239       .KHR_shader_non_semantic_info          = true,
240       .KHR_shader_subgroup_extended_types    = device->info.ver >= 8,
241       .KHR_shader_subgroup_uniform_control_flow = true,
242       .KHR_shader_terminate_invocation       = true,
243       .KHR_spirv_1_4                         = true,
244       .KHR_storage_buffer_storage_class      = true,
245 #ifdef ANV_USE_WSI_PLATFORM
246       .KHR_swapchain                         = true,
247       .KHR_swapchain_mutable_format          = true,
248 #endif
249       .KHR_synchronization2                  = true,
250       .KHR_timeline_semaphore                = true,
251       .KHR_uniform_buffer_standard_layout    = true,
252       .KHR_variable_pointers                 = true,
253       .KHR_vulkan_memory_model               = true,
254       .KHR_workgroup_memory_explicit_layout  = true,
255       .KHR_zero_initialize_workgroup_memory  = true,
256       .EXT_4444_formats                      = true,
257       .EXT_border_color_swizzle              = device->info.ver >= 8,
258       .EXT_buffer_device_address             = device->has_a64_buffer_access,
259       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
260       .EXT_color_write_enable                = true,
261       .EXT_conditional_rendering             = device->info.verx10 >= 75,
262       .EXT_conservative_rasterization        = device->info.ver >= 9,
263       .EXT_custom_border_color               = device->info.ver >= 8,
264       .EXT_depth_clip_control                = true,
265       .EXT_depth_clip_enable                 = true,
266       .EXT_descriptor_indexing               = device->has_a64_buffer_access &&
267                                                device->has_bindless_images,
268 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
269       .EXT_display_control                   = true,
270 #endif
271       .EXT_extended_dynamic_state            = true,
272       .EXT_extended_dynamic_state2           = true,
273       .EXT_external_memory_dma_buf           = true,
274       .EXT_external_memory_host              = true,
275       .EXT_fragment_shader_interlock         = device->info.ver >= 9,
276       .EXT_global_priority                   = device->max_context_priority >=
277                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
278       .EXT_global_priority_query             = device->max_context_priority >=
279                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
280       .EXT_host_query_reset                  = true,
281       .EXT_image_2d_view_of_3d               = true,
282       .EXT_image_robustness                  = true,
283       .EXT_image_drm_format_modifier         = true,
284       .EXT_image_view_min_lod                = true,
285       .EXT_index_type_uint8                  = true,
286       .EXT_inline_uniform_block              = true,
287       .EXT_line_rasterization                = true,
288       /* Enable the extension only if we have support on both the local &
289        * system memory
290        */
291       .EXT_memory_budget                     = (!device->info.has_local_mem ||
292                                                 device->vram_mappable.available > 0) &&
293                                                device->sys.available,
294       .EXT_non_seamless_cube_map             = true,
295       .EXT_pci_bus_info                      = true,
296       .EXT_physical_device_drm               = true,
297       .EXT_pipeline_creation_cache_control   = true,
298       .EXT_pipeline_creation_feedback        = true,
299       .EXT_post_depth_coverage               = device->info.ver >= 9,
300       .EXT_primitives_generated_query        = true,
301       .EXT_primitive_topology_list_restart   = true,
302       .EXT_private_data                      = true,
303       .EXT_provoking_vertex                  = true,
304       .EXT_queue_family_foreign              = true,
305       .EXT_robustness2                       = true,
306       .EXT_sample_locations                  = true,
307       .EXT_sampler_filter_minmax             = device->info.ver >= 9,
308       .EXT_scalar_block_layout               = true,
309       .EXT_separate_stencil_usage            = true,
310       .EXT_shader_atomic_float               = true,
311       .EXT_shader_atomic_float2              = device->info.ver >= 9,
312       .EXT_shader_demote_to_helper_invocation = true,
313       .EXT_shader_module_identifier          = true,
314       .EXT_shader_stencil_export             = device->info.ver >= 9,
315       .EXT_shader_subgroup_ballot            = true,
316       .EXT_shader_subgroup_vote              = true,
317       .EXT_shader_viewport_index_layer       = true,
318       .EXT_subgroup_size_control             = true,
319       .EXT_texel_buffer_alignment            = true,
320       .EXT_tooling_info                      = true,
321       .EXT_transform_feedback                = true,
322       .EXT_vertex_attribute_divisor          = true,
323       .EXT_ycbcr_image_arrays                = true,
324 #ifdef ANDROID
325       .ANDROID_external_memory_android_hardware_buffer = true,
326       .ANDROID_native_buffer                 = true,
327 #endif
328       .GOOGLE_decorate_string                = true,
329       .GOOGLE_hlsl_functionality1            = true,
330       .GOOGLE_user_type                      = true,
331       .INTEL_performance_query               = device->perf &&
332                                                device->perf->i915_perf_version >= 3,
333       .INTEL_shader_integer_functions2       = device->info.ver >= 8,
334       .EXT_multi_draw                        = true,
335       .NV_compute_shader_derivatives         = true,
336       .NV_mesh_shader                        = device->info.has_mesh_shading &&
337                                                nv_mesh_shading_enabled,
338       .VALVE_mutable_descriptor_type         = true,
339    };
340 }
341 
342 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t total_ram)343 anv_compute_sys_heap_size(struct anv_physical_device *device,
344                           uint64_t total_ram)
345 {
346    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
347     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
348     */
349    uint64_t available_ram;
350    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
351       available_ram = total_ram / 2;
352    else
353       available_ram = total_ram * 3 / 4;
354 
355    /* We also want to leave some padding for things we allocate in the driver,
356     * so don't go over 3/4 of the GTT either.
357     */
358    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
359 
360    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
361       /* When running with an overridden PCI ID, we may get a GTT size from
362        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
363        * address support can still fail.  Just clamp the address space size to
364        * 2 GiB if we don't have 48-bit support.
365        */
366       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
367                 "not support for 48-bit addresses",
368                 __FILE__, __LINE__);
369       available_ram = 2ull << 30;
370    }
371 
372    return available_ram;
373 }
374 
375 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)376 anv_init_meminfo(struct anv_physical_device *device, int fd)
377 {
378    const struct intel_device_info *devinfo = &device->info;
379 
380    device->sys.region.memory_class = devinfo->mem.sram.mem_class;
381    device->sys.region.memory_instance = devinfo->mem.sram.mem_instance;
382    device->sys.size =
383       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
384    device->sys.available = devinfo->mem.sram.mappable.free;
385 
386    device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
387    device->vram_mappable.region.memory_instance =
388       devinfo->mem.vram.mem_instance;
389    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
390    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
391 
392    device->vram_non_mappable.region.memory_class =
393       devinfo->mem.vram.mem_class;
394    device->vram_non_mappable.region.memory_instance =
395       devinfo->mem.vram.mem_instance;
396    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
397    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
398 
399    return VK_SUCCESS;
400 }
401 
402 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)403 anv_update_meminfo(struct anv_physical_device *device, int fd)
404 {
405    if (!intel_device_info_update_memory_info(&device->info, fd))
406       return;
407 
408    const struct intel_device_info *devinfo = &device->info;
409    device->sys.available = devinfo->mem.sram.mappable.free;
410    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
411    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
412 }
413 
414 
415 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)416 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
417 {
418    VkResult result = anv_init_meminfo(device, fd);
419    if (result != VK_SUCCESS)
420       return result;
421 
422    assert(device->sys.size != 0);
423 
424    if (anv_physical_device_has_vram(device)) {
425       /* We can create 2 or 3 different heaps when we have local memory
426        * support, first heap with local memory size and second with system
427        * memory size and the third is added only if part of the vram is
428        * mappable to the host.
429        */
430       device->memory.heap_count = 2;
431       device->memory.heaps[0] = (struct anv_memory_heap) {
432          /* If there is a vram_non_mappable, use that for the device only
433           * heap. Otherwise use the vram_mappable.
434           */
435          .size = device->vram_non_mappable.size != 0 ?
436                  device->vram_non_mappable.size : device->vram_mappable.size,
437          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
438          .is_local_mem = true,
439       };
440       device->memory.heaps[1] = (struct anv_memory_heap) {
441          .size = device->sys.size,
442          .flags = 0,
443          .is_local_mem = false,
444       };
445       /* Add an additional smaller vram mappable heap if we can't map all the
446        * vram to the host.
447        */
448       if (device->vram_non_mappable.size > 0) {
449          device->memory.heap_count++;
450          device->memory.heaps[2] = (struct anv_memory_heap) {
451             .size = device->vram_mappable.size,
452             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
453             .is_local_mem = true,
454          };
455       }
456 
457       device->memory.type_count = 3;
458       device->memory.types[0] = (struct anv_memory_type) {
459          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
460          .heapIndex = 0,
461       };
462       device->memory.types[1] = (struct anv_memory_type) {
463          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
464                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
465                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
466          .heapIndex = 1,
467       };
468       device->memory.types[2] = (struct anv_memory_type) {
469          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
470                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
471                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
472          /* This memory type either comes from heaps[0] if there is only
473           * mappable vram region, or from heaps[2] if there is both mappable &
474           * non-mappable vram regions.
475           */
476          .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
477       };
478    } else if (device->info.has_llc) {
479       device->memory.heap_count = 1;
480       device->memory.heaps[0] = (struct anv_memory_heap) {
481          .size = device->sys.size,
482          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
483          .is_local_mem = false,
484       };
485 
486       /* Big core GPUs share LLC with the CPU and thus one memory type can be
487        * both cached and coherent at the same time.
488        */
489       device->memory.type_count = 1;
490       device->memory.types[0] = (struct anv_memory_type) {
491          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
492                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
493                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
494                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
495          .heapIndex = 0,
496       };
497    } else {
498       device->memory.heap_count = 1;
499       device->memory.heaps[0] = (struct anv_memory_heap) {
500          .size = device->sys.size,
501          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
502          .is_local_mem = false,
503       };
504 
505       /* The spec requires that we expose a host-visible, coherent memory
506        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
507        * to give the application a choice between cached, but not coherent and
508        * coherent but uncached (WC though).
509        */
510       device->memory.type_count = 2;
511       device->memory.types[0] = (struct anv_memory_type) {
512          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
513                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
514                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
515          .heapIndex = 0,
516       };
517       device->memory.types[1] = (struct anv_memory_type) {
518          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
519                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
520                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
521          .heapIndex = 0,
522       };
523    }
524 
525    device->memory.need_clflush = false;
526    for (unsigned i = 0; i < device->memory.type_count; i++) {
527       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
528       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
529           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
530          device->memory.need_clflush = true;
531    }
532 
533    return VK_SUCCESS;
534 }
535 
536 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)537 anv_physical_device_init_uuids(struct anv_physical_device *device)
538 {
539    const struct build_id_note *note =
540       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
541    if (!note) {
542       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
543                        "Failed to find build-id");
544    }
545 
546    unsigned build_id_len = build_id_length(note);
547    if (build_id_len < 20) {
548       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
549                        "build-id too short.  It needs to be a SHA");
550    }
551 
552    memcpy(device->driver_build_sha1, build_id_data(note), 20);
553 
554    struct mesa_sha1 sha1_ctx;
555    uint8_t sha1[20];
556    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
557 
558    /* The pipeline cache UUID is used for determining when a pipeline cache is
559     * invalid.  It needs both a driver build and the PCI ID of the device.
560     */
561    _mesa_sha1_init(&sha1_ctx);
562    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
563    _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
564                      sizeof(device->info.pci_device_id));
565    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
566                      sizeof(device->always_use_bindless));
567    _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
568                      sizeof(device->has_a64_buffer_access));
569    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
570                      sizeof(device->has_bindless_images));
571    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
572                      sizeof(device->has_bindless_samplers));
573    _mesa_sha1_final(&sha1_ctx, sha1);
574    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
575 
576    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
577    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
578 
579    return VK_SUCCESS;
580 }
581 
582 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)583 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
584 {
585 #ifdef ENABLE_SHADER_CACHE
586    char renderer[10];
587    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
588                                device->info.pci_device_id);
589    assert(len == sizeof(renderer) - 2);
590 
591    char timestamp[41];
592    _mesa_sha1_format(timestamp, device->driver_build_sha1);
593 
594    const uint64_t driver_flags =
595       brw_get_compiler_config_value(device->compiler);
596    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
597 #endif
598 }
599 
600 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)601 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
602 {
603 #ifdef ENABLE_SHADER_CACHE
604    if (device->vk.disk_cache) {
605       disk_cache_destroy(device->vk.disk_cache);
606       device->vk.disk_cache = NULL;
607    }
608 #else
609    assert(device->vk.disk_cache == NULL);
610 #endif
611 }
612 
613 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
614  * queue overrides.
615  *
616  * To override the number queues:
617  *  * "gc" is for graphics queues with compute support
618  *  * "g" is for graphics queues with no compute support
619  *  * "c" is for compute queues with no graphics support
620  *
621  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
622  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
623  * with compute-only support.
624  *
625  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
626  * include 1 queue with compute-only support, but it will not change the
627  * number of graphics+compute queues.
628  *
629  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
630  * to include 1 queue with compute-only support, and it would override the
631  * number of graphics+compute queues to be 0.
632  */
633 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)634 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
635 {
636    int gc_override = -1;
637    int g_override = -1;
638    int c_override = -1;
639    char *env = getenv("ANV_QUEUE_OVERRIDE");
640 
641    if (env == NULL)
642       return;
643 
644    env = strdup(env);
645    char *save = NULL;
646    char *next = strtok_r(env, ",", &save);
647    while (next != NULL) {
648       if (strncmp(next, "gc=", 3) == 0) {
649          gc_override = strtol(next + 3, NULL, 0);
650       } else if (strncmp(next, "g=", 2) == 0) {
651          g_override = strtol(next + 2, NULL, 0);
652       } else if (strncmp(next, "c=", 2) == 0) {
653          c_override = strtol(next + 2, NULL, 0);
654       } else {
655          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
656       }
657       next = strtok_r(NULL, ",", &save);
658    }
659    free(env);
660    if (gc_override >= 0)
661       *gc_count = gc_override;
662    if (g_override >= 0)
663       *g_count = g_override;
664    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
665       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
666                 "Vulkan specification");
667    if (c_override >= 0)
668       *c_count = c_override;
669 }
670 
671 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)672 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
673 {
674    uint32_t family_count = 0;
675 
676    if (pdevice->engine_info) {
677       int gc_count =
678          intel_gem_count_engines(pdevice->engine_info,
679                                  I915_ENGINE_CLASS_RENDER);
680       int g_count = 0;
681       int c_count = 0;
682       if (env_var_as_boolean("INTEL_COMPUTE_CLASS", false))
683          c_count = intel_gem_count_engines(pdevice->engine_info,
684                                            I915_ENGINE_CLASS_COMPUTE);
685       enum drm_i915_gem_engine_class compute_class =
686          c_count < 1 ? I915_ENGINE_CLASS_RENDER : I915_ENGINE_CLASS_COMPUTE;
687 
688       anv_override_engine_counts(&gc_count, &g_count, &c_count);
689 
690       if (gc_count > 0) {
691          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
692             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
693                           VK_QUEUE_COMPUTE_BIT |
694                           VK_QUEUE_TRANSFER_BIT,
695             .queueCount = gc_count,
696             .engine_class = I915_ENGINE_CLASS_RENDER,
697          };
698       }
699       if (g_count > 0) {
700          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
701             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
702                           VK_QUEUE_TRANSFER_BIT,
703             .queueCount = g_count,
704             .engine_class = I915_ENGINE_CLASS_RENDER,
705          };
706       }
707       if (c_count > 0) {
708          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
709             .queueFlags = VK_QUEUE_COMPUTE_BIT |
710                           VK_QUEUE_TRANSFER_BIT,
711             .queueCount = c_count,
712             .engine_class = compute_class,
713          };
714       }
715       /* Increase count below when other families are added as a reminder to
716        * increase the ANV_MAX_QUEUE_FAMILIES value.
717        */
718       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
719    } else {
720       /* Default to a single render queue */
721       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
722          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
723                        VK_QUEUE_COMPUTE_BIT |
724                        VK_QUEUE_TRANSFER_BIT,
725          .queueCount = 1,
726          .engine_class = I915_ENGINE_CLASS_RENDER,
727       };
728       family_count = 1;
729    }
730    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
731    pdevice->queue.family_count = family_count;
732 }
733 
734 static VkResult
anv_physical_device_try_create(struct anv_instance * instance,drmDevicePtr drm_device,struct anv_physical_device ** device_out)735 anv_physical_device_try_create(struct anv_instance *instance,
736                                drmDevicePtr drm_device,
737                                struct anv_physical_device **device_out)
738 {
739    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
740    const char *path = drm_device->nodes[DRM_NODE_RENDER];
741    VkResult result;
742    int fd;
743    int master_fd = -1;
744 
745    brw_process_intel_debug_variable();
746 
747    fd = open(path, O_RDWR | O_CLOEXEC);
748    if (fd < 0) {
749       if (errno == ENOMEM) {
750          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
751                           "Unable to open device %s: out of memory", path);
752       }
753       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
754                        "Unable to open device %s: %m", path);
755    }
756 
757    struct intel_device_info devinfo;
758    if (!intel_get_device_info_from_fd(fd, &devinfo)) {
759       result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
760       goto fail_fd;
761    }
762 
763    bool is_alpha = true;
764    if (devinfo.platform == INTEL_PLATFORM_HSW) {
765       mesa_logw("Haswell Vulkan support is incomplete");
766    } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
767       mesa_logw("Ivy Bridge Vulkan support is incomplete");
768    } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
769       mesa_logw("Bay Trail Vulkan support is incomplete");
770    } else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
771       /* Gfx8-12 fully supported */
772       is_alpha = false;
773    } else {
774       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
775                          "Vulkan not yet supported on %s", devinfo.name);
776       goto fail_fd;
777    }
778 
779    struct anv_physical_device *device =
780       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
781                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
782    if (device == NULL) {
783       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
784       goto fail_fd;
785    }
786 
787    struct vk_physical_device_dispatch_table dispatch_table;
788    vk_physical_device_dispatch_table_from_entrypoints(
789       &dispatch_table, &anv_physical_device_entrypoints, true);
790    vk_physical_device_dispatch_table_from_entrypoints(
791       &dispatch_table, &wsi_physical_device_entrypoints, false);
792 
793    result = vk_physical_device_init(&device->vk, &instance->vk,
794                                     NULL, /* We set up extensions later */
795                                     &dispatch_table);
796    if (result != VK_SUCCESS) {
797       vk_error(instance, result);
798       goto fail_alloc;
799    }
800    device->instance = instance;
801 
802    assert(strlen(path) < ARRAY_SIZE(device->path));
803    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
804 
805    device->info = devinfo;
806    device->is_alpha = is_alpha;
807 
808    device->cmd_parser_version = -1;
809    if (device->info.ver == 7) {
810       device->cmd_parser_version =
811          anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
812       if (device->cmd_parser_version == -1) {
813          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
814                             "failed to get command parser version");
815          goto fail_base;
816       }
817    }
818 
819    if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
820       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
821                          "kernel missing gem wait");
822       goto fail_base;
823    }
824 
825    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
826       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
827                          "kernel missing execbuf2");
828       goto fail_base;
829    }
830 
831    if (!device->info.has_llc &&
832        anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
833       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
834                          "kernel missing wc mmap");
835       goto fail_base;
836    }
837 
838    device->use_relocations = device->info.ver < 8 ||
839                              device->info.platform == INTEL_PLATFORM_CHV;
840 
841    if (!device->use_relocations &&
842        !anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)) {
843       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
844                          "kernel missing softpin");
845       goto fail_alloc;
846    }
847 
848    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY)) {
849       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
850                          "kernel missing syncobj support");
851       goto fail_base;
852    }
853 
854    device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
855    device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
856 
857    /* Start with medium; sorted low to high */
858    const int priorities[] = {
859       INTEL_CONTEXT_MEDIUM_PRIORITY,
860       INTEL_CONTEXT_HIGH_PRIORITY,
861       INTEL_CONTEXT_REALTIME_PRIORITY,
862    };
863    device->max_context_priority = INT_MIN;
864    for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
865       if (!anv_gem_has_context_priority(fd, priorities[i]))
866          break;
867       device->max_context_priority = priorities[i];
868    }
869 
870    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
871                                               device->info.aperture_bytes;
872 
873    /* We only allow 48-bit addresses with softpin because knowing the actual
874     * address is required for the vertex cache flush workaround.
875     */
876    device->supports_48bit_addresses = (device->info.ver >= 8) &&
877                                       device->gtt_size > (4ULL << 30 /* GiB */);
878 
879    /* Initialize memory regions struct to 0. */
880    memset(&device->vram_non_mappable, 0, sizeof(device->vram_non_mappable));
881    memset(&device->vram_mappable, 0, sizeof(device->vram_mappable));
882    memset(&device->sys, 0, sizeof(device->sys));
883 
884    result = anv_physical_device_init_heaps(device, fd);
885    if (result != VK_SUCCESS)
886       goto fail_base;
887 
888    assert(device->supports_48bit_addresses == !device->use_relocations);
889    device->use_softpin = !device->use_relocations;
890 
891    device->has_context_isolation =
892       anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
893 
894    device->has_exec_timeline =
895       anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES);
896    if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
897       device->has_exec_timeline = false;
898 
899    unsigned st_idx = 0;
900 
901    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
902    if (!device->has_exec_timeline)
903       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
904    device->sync_types[st_idx++] = &device->sync_syncobj_type;
905 
906    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
907       device->sync_types[st_idx++] = &anv_bo_sync_type;
908 
909    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
910       device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
911       device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
912    }
913 
914    device->sync_types[st_idx++] = NULL;
915    assert(st_idx <= ARRAY_SIZE(device->sync_types));
916    device->vk.supported_sync_types = device->sync_types;
917 
918    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
919 
920    device->always_use_bindless =
921       env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
922 
923    device->use_call_secondary =
924       device->use_softpin &&
925       !env_var_as_boolean("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
926 
927    /* We first got the A64 messages on broadwell and we can only use them if
928     * we can pass addresses directly into the shader which requires softpin.
929     */
930    device->has_a64_buffer_access = device->info.ver >= 8 &&
931                                    device->use_softpin;
932 
933    /* We first get bindless image access on Skylake.
934     */
935    device->has_bindless_images = device->info.ver >= 9;
936 
937    /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
938     * because it's just a matter of setting the sampler address in the sample
939     * message header.  However, we've not bothered to wire it up for vec4 so
940     * we leave it disabled on gfx7.
941     */
942    device->has_bindless_samplers = device->info.ver >= 8;
943 
944    device->has_implicit_ccs = device->info.has_aux_map ||
945                               device->info.verx10 >= 125;
946 
947    /* Check if we can read the GPU timestamp register from the CPU */
948    uint64_t u64_ignore;
949    device->has_reg_timestamp = anv_gem_reg_read(fd, TIMESTAMP | I915_REG_READ_8B_WA,
950                                                 &u64_ignore) == 0;
951 
952    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
953       driQueryOptionb(&instance->dri_options, "always_flush_cache");
954 
955    device->has_mmap_offset =
956       anv_gem_get_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
957 
958    device->has_userptr_probe =
959       anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE);
960 
961    device->compiler = brw_compiler_create(NULL, &device->info);
962    if (device->compiler == NULL) {
963       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
964       goto fail_base;
965    }
966    device->compiler->shader_debug_log = compiler_debug_log;
967    device->compiler->shader_perf_log = compiler_perf_log;
968    device->compiler->constant_buffer_0_is_relative =
969       device->info.ver < 8 || !device->has_context_isolation;
970    device->compiler->supports_shader_constants = true;
971    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
972 
973    isl_device_init(&device->isl_dev, &device->info);
974 
975    result = anv_physical_device_init_uuids(device);
976    if (result != VK_SUCCESS)
977       goto fail_compiler;
978 
979    anv_physical_device_init_disk_cache(device);
980 
981    if (instance->vk.enabled_extensions.KHR_display) {
982       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
983       if (master_fd >= 0) {
984          /* prod the device with a GETPARAM call which will fail if
985           * we don't have permission to even render on this device
986           */
987          if (anv_gem_get_param(master_fd, I915_PARAM_CHIPSET_ID) == 0) {
988             close(master_fd);
989             master_fd = -1;
990          }
991       }
992    }
993    device->master_fd = master_fd;
994 
995    device->engine_info = anv_gem_get_engine_info(fd);
996    anv_physical_device_init_queue_families(device);
997 
998    device->local_fd = fd;
999 
1000    anv_physical_device_init_perf(device, fd);
1001 
1002    get_device_extensions(device, &device->vk.supported_extensions);
1003 
1004    result = anv_init_wsi(device);
1005    if (result != VK_SUCCESS)
1006       goto fail_perf;
1007 
1008    anv_measure_device_init(device);
1009 
1010    anv_genX(&device->info, init_physical_device_state)(device);
1011 
1012    *device_out = device;
1013 
1014    struct stat st;
1015 
1016    if (stat(primary_path, &st) == 0) {
1017       device->has_master = true;
1018       device->master_major = major(st.st_rdev);
1019       device->master_minor = minor(st.st_rdev);
1020    } else {
1021       device->has_master = false;
1022       device->master_major = 0;
1023       device->master_minor = 0;
1024    }
1025 
1026    if (stat(path, &st) == 0) {
1027       device->has_local = true;
1028       device->local_major = major(st.st_rdev);
1029       device->local_minor = minor(st.st_rdev);
1030    } else {
1031       device->has_local = false;
1032       device->local_major = 0;
1033       device->local_minor = 0;
1034    }
1035 
1036    return VK_SUCCESS;
1037 
1038 fail_perf:
1039    ralloc_free(device->perf);
1040    free(device->engine_info);
1041    anv_physical_device_free_disk_cache(device);
1042 fail_compiler:
1043    ralloc_free(device->compiler);
1044 fail_base:
1045    vk_physical_device_finish(&device->vk);
1046 fail_alloc:
1047    vk_free(&instance->vk.alloc, device);
1048 fail_fd:
1049    close(fd);
1050    if (master_fd != -1)
1051       close(master_fd);
1052    return result;
1053 }
1054 
1055 static void
anv_physical_device_destroy(struct anv_physical_device * device)1056 anv_physical_device_destroy(struct anv_physical_device *device)
1057 {
1058    anv_finish_wsi(device);
1059    anv_measure_device_destroy(device);
1060    free(device->engine_info);
1061    anv_physical_device_free_disk_cache(device);
1062    ralloc_free(device->compiler);
1063    ralloc_free(device->perf);
1064    close(device->local_fd);
1065    if (device->master_fd >= 0)
1066       close(device->master_fd);
1067    vk_physical_device_finish(&device->vk);
1068    vk_free(&device->instance->vk.alloc, device);
1069 }
1070 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1071 VkResult anv_EnumerateInstanceExtensionProperties(
1072     const char*                                 pLayerName,
1073     uint32_t*                                   pPropertyCount,
1074     VkExtensionProperties*                      pProperties)
1075 {
1076    if (pLayerName)
1077       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1078 
1079    return vk_enumerate_instance_extension_properties(
1080       &instance_extensions, pPropertyCount, pProperties);
1081 }
1082 
1083 static void
anv_init_dri_options(struct anv_instance * instance)1084 anv_init_dri_options(struct anv_instance *instance)
1085 {
1086    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1087                       ARRAY_SIZE(anv_dri_options));
1088    driParseConfigFiles(&instance->dri_options,
1089                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1090                        instance->vk.app_info.app_name,
1091                        instance->vk.app_info.app_version,
1092                        instance->vk.app_info.engine_name,
1093                        instance->vk.app_info.engine_version);
1094 
1095     instance->assume_full_subgroups =
1096             driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
1097     instance->limit_trig_input_range =
1098             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1099     instance->sample_mask_out_opengl_behaviour =
1100             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1101 }
1102 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1103 VkResult anv_CreateInstance(
1104     const VkInstanceCreateInfo*                 pCreateInfo,
1105     const VkAllocationCallbacks*                pAllocator,
1106     VkInstance*                                 pInstance)
1107 {
1108    struct anv_instance *instance;
1109    VkResult result;
1110 
1111    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1112 
1113    if (pAllocator == NULL)
1114       pAllocator = vk_default_allocator();
1115 
1116    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1117                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1118    if (!instance)
1119       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1120 
1121    struct vk_instance_dispatch_table dispatch_table;
1122    vk_instance_dispatch_table_from_entrypoints(
1123       &dispatch_table, &anv_instance_entrypoints, true);
1124    vk_instance_dispatch_table_from_entrypoints(
1125       &dispatch_table, &wsi_instance_entrypoints, false);
1126 
1127    result = vk_instance_init(&instance->vk, &instance_extensions,
1128                              &dispatch_table, pCreateInfo, pAllocator);
1129    if (result != VK_SUCCESS) {
1130       vk_free(pAllocator, instance);
1131       return vk_error(NULL, result);
1132    }
1133 
1134    instance->physical_devices_enumerated = false;
1135    list_inithead(&instance->physical_devices);
1136 
1137    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1138 
1139    anv_init_dri_options(instance);
1140 
1141    intel_driver_ds_init();
1142 
1143    *pInstance = anv_instance_to_handle(instance);
1144 
1145    return VK_SUCCESS;
1146 }
1147 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1148 void anv_DestroyInstance(
1149     VkInstance                                  _instance,
1150     const VkAllocationCallbacks*                pAllocator)
1151 {
1152    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1153 
1154    if (!instance)
1155       return;
1156 
1157    list_for_each_entry_safe(struct anv_physical_device, pdevice,
1158                             &instance->physical_devices, link)
1159       anv_physical_device_destroy(pdevice);
1160 
1161    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1162 
1163    driDestroyOptionCache(&instance->dri_options);
1164    driDestroyOptionInfo(&instance->available_dri_options);
1165 
1166    vk_instance_finish(&instance->vk);
1167    vk_free(&instance->vk.alloc, instance);
1168 }
1169 
1170 static VkResult
anv_enumerate_physical_devices(struct anv_instance * instance)1171 anv_enumerate_physical_devices(struct anv_instance *instance)
1172 {
1173    if (instance->physical_devices_enumerated)
1174       return VK_SUCCESS;
1175 
1176    instance->physical_devices_enumerated = true;
1177 
1178    /* TODO: Check for more devices ? */
1179    drmDevicePtr devices[8];
1180    int max_devices;
1181 
1182    max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1183    if (max_devices < 1)
1184       return VK_SUCCESS;
1185 
1186    VkResult result = VK_SUCCESS;
1187    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1188       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1189           devices[i]->bustype == DRM_BUS_PCI &&
1190           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
1191 
1192          struct anv_physical_device *pdevice;
1193          result = anv_physical_device_try_create(instance, devices[i],
1194                                                  &pdevice);
1195          /* Incompatible DRM device, skip. */
1196          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1197             result = VK_SUCCESS;
1198             continue;
1199          }
1200 
1201          /* Error creating the physical device, report the error. */
1202          if (result != VK_SUCCESS)
1203             break;
1204 
1205          list_addtail(&pdevice->link, &instance->physical_devices);
1206       }
1207    }
1208    drmFreeDevices(devices, max_devices);
1209 
1210    /* If we successfully enumerated any devices, call it success */
1211    return result;
1212 }
1213 
anv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)1214 VkResult anv_EnumeratePhysicalDevices(
1215     VkInstance                                  _instance,
1216     uint32_t*                                   pPhysicalDeviceCount,
1217     VkPhysicalDevice*                           pPhysicalDevices)
1218 {
1219    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1220    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
1221                           pPhysicalDevices, pPhysicalDeviceCount);
1222 
1223    VkResult result = anv_enumerate_physical_devices(instance);
1224    if (result != VK_SUCCESS)
1225       return result;
1226 
1227    list_for_each_entry(struct anv_physical_device, pdevice,
1228                        &instance->physical_devices, link) {
1229       vk_outarray_append_typed(VkPhysicalDevice, &out, i) {
1230          *i = anv_physical_device_to_handle(pdevice);
1231       }
1232    }
1233 
1234    return vk_outarray_status(&out);
1235 }
1236 
anv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)1237 VkResult anv_EnumeratePhysicalDeviceGroups(
1238     VkInstance                                  _instance,
1239     uint32_t*                                   pPhysicalDeviceGroupCount,
1240     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
1241 {
1242    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1243    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
1244                           pPhysicalDeviceGroupProperties,
1245                           pPhysicalDeviceGroupCount);
1246 
1247    VkResult result = anv_enumerate_physical_devices(instance);
1248    if (result != VK_SUCCESS)
1249       return result;
1250 
1251    list_for_each_entry(struct anv_physical_device, pdevice,
1252                        &instance->physical_devices, link) {
1253       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
1254          p->physicalDeviceCount = 1;
1255          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1256          p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
1257          p->subsetAllocation = false;
1258 
1259          vk_foreach_struct(ext, p->pNext)
1260             anv_debug_ignored_stype(ext->sType);
1261       }
1262    }
1263 
1264    return vk_outarray_status(&out);
1265 }
1266 
anv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)1267 void anv_GetPhysicalDeviceFeatures(
1268     VkPhysicalDevice                            physicalDevice,
1269     VkPhysicalDeviceFeatures*                   pFeatures)
1270 {
1271    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1272 
1273    /* Just pick one; they're all the same */
1274    const bool has_astc_ldr =
1275       isl_format_supports_sampling(&pdevice->info,
1276                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1277 
1278    *pFeatures = (VkPhysicalDeviceFeatures) {
1279       .robustBufferAccess                       = true,
1280       .fullDrawIndexUint32                      = true,
1281       .imageCubeArray                           = true,
1282       .independentBlend                         = true,
1283       .geometryShader                           = true,
1284       .tessellationShader                       = true,
1285       .sampleRateShading                        = true,
1286       .dualSrcBlend                             = true,
1287       .logicOp                                  = true,
1288       .multiDrawIndirect                        = true,
1289       .drawIndirectFirstInstance                = true,
1290       .depthClamp                               = true,
1291       .depthBiasClamp                           = true,
1292       .fillModeNonSolid                         = true,
1293       .depthBounds                              = pdevice->info.ver >= 12,
1294       .wideLines                                = true,
1295       .largePoints                              = true,
1296       .alphaToOne                               = true,
1297       .multiViewport                            = true,
1298       .samplerAnisotropy                        = true,
1299       .textureCompressionETC2                   = pdevice->info.ver >= 8 ||
1300                                                   pdevice->info.platform == INTEL_PLATFORM_BYT,
1301       .textureCompressionASTC_LDR               = has_astc_ldr,
1302       .textureCompressionBC                     = true,
1303       .occlusionQueryPrecise                    = true,
1304       .pipelineStatisticsQuery                  = true,
1305       .fragmentStoresAndAtomics                 = true,
1306       .shaderTessellationAndGeometryPointSize   = true,
1307       .shaderImageGatherExtended                = true,
1308       .shaderStorageImageExtendedFormats        = true,
1309       .shaderStorageImageMultisample            = false,
1310       .shaderStorageImageReadWithoutFormat      = false,
1311       .shaderStorageImageWriteWithoutFormat     = true,
1312       .shaderUniformBufferArrayDynamicIndexing  = true,
1313       .shaderSampledImageArrayDynamicIndexing   = true,
1314       .shaderStorageBufferArrayDynamicIndexing  = true,
1315       .shaderStorageImageArrayDynamicIndexing   = true,
1316       .shaderClipDistance                       = true,
1317       .shaderCullDistance                       = true,
1318       .shaderFloat64                            = pdevice->info.ver >= 8 &&
1319                                                   pdevice->info.has_64bit_float,
1320       .shaderInt64                              = pdevice->info.ver >= 8,
1321       .shaderInt16                              = pdevice->info.ver >= 8,
1322       .shaderResourceMinLod                     = pdevice->info.ver >= 9,
1323       .variableMultisampleRate                  = true,
1324       .inheritedQueries                         = true,
1325    };
1326 
1327    /* We can't do image stores in vec4 shaders */
1328    pFeatures->vertexPipelineStoresAndAtomics =
1329       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1330       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1331 
1332    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1333 
1334    /* The new DOOM and Wolfenstein games require depthBounds without
1335     * checking for it.  They seem to run fine without it so just claim it's
1336     * there and accept the consequences.
1337     */
1338    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1339       pFeatures->depthBounds = true;
1340 }
1341 
1342 static void
anv_get_physical_device_features_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1343 anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1344                                      VkPhysicalDeviceVulkan11Features *f)
1345 {
1346    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1347 
1348    f->storageBuffer16BitAccess            = pdevice->info.ver >= 8;
1349    f->uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8;
1350    f->storagePushConstant16               = pdevice->info.ver >= 8;
1351    f->storageInputOutput16                = false;
1352    f->multiview                           = true;
1353    f->multiviewGeometryShader             = true;
1354    f->multiviewTessellationShader         = true;
1355    f->variablePointersStorageBuffer       = true;
1356    f->variablePointers                    = true;
1357    f->protectedMemory                     = false;
1358    f->samplerYcbcrConversion              = true;
1359    f->shaderDrawParameters                = true;
1360 }
1361 
1362 static void
anv_get_physical_device_features_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1363 anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1364                                      VkPhysicalDeviceVulkan12Features *f)
1365 {
1366    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1367 
1368    f->samplerMirrorClampToEdge            = true;
1369    f->drawIndirectCount                   = true;
1370    f->storageBuffer8BitAccess             = pdevice->info.ver >= 8;
1371    f->uniformAndStorageBuffer8BitAccess   = pdevice->info.ver >= 8;
1372    f->storagePushConstant8                = pdevice->info.ver >= 8;
1373    f->shaderBufferInt64Atomics            = pdevice->info.ver >= 9;
1374    f->shaderSharedInt64Atomics            = false;
1375    f->shaderFloat16                       = pdevice->info.ver >= 8;
1376    f->shaderInt8                          = pdevice->info.ver >= 8;
1377 
1378    bool descIndexing = pdevice->has_a64_buffer_access &&
1379                        pdevice->has_bindless_images;
1380    f->descriptorIndexing                                 = descIndexing;
1381    f->shaderInputAttachmentArrayDynamicIndexing          = false;
1382    f->shaderUniformTexelBufferArrayDynamicIndexing       = descIndexing;
1383    f->shaderStorageTexelBufferArrayDynamicIndexing       = descIndexing;
1384    f->shaderUniformBufferArrayNonUniformIndexing         = false;
1385    f->shaderSampledImageArrayNonUniformIndexing          = descIndexing;
1386    f->shaderStorageBufferArrayNonUniformIndexing         = descIndexing;
1387    f->shaderStorageImageArrayNonUniformIndexing          = descIndexing;
1388    f->shaderInputAttachmentArrayNonUniformIndexing       = false;
1389    f->shaderUniformTexelBufferArrayNonUniformIndexing    = descIndexing;
1390    f->shaderStorageTexelBufferArrayNonUniformIndexing    = descIndexing;
1391    f->descriptorBindingUniformBufferUpdateAfterBind      = descIndexing;
1392    f->descriptorBindingSampledImageUpdateAfterBind       = descIndexing;
1393    f->descriptorBindingStorageImageUpdateAfterBind       = descIndexing;
1394    f->descriptorBindingStorageBufferUpdateAfterBind      = descIndexing;
1395    f->descriptorBindingUniformTexelBufferUpdateAfterBind = descIndexing;
1396    f->descriptorBindingStorageTexelBufferUpdateAfterBind = descIndexing;
1397    f->descriptorBindingUpdateUnusedWhilePending          = descIndexing;
1398    f->descriptorBindingPartiallyBound                    = descIndexing;
1399    f->descriptorBindingVariableDescriptorCount           = descIndexing;
1400    f->runtimeDescriptorArray                             = descIndexing;
1401 
1402    f->samplerFilterMinmax                 = pdevice->info.ver >= 9;
1403    f->scalarBlockLayout                   = true;
1404    f->imagelessFramebuffer                = true;
1405    f->uniformBufferStandardLayout         = true;
1406    f->shaderSubgroupExtendedTypes         = true;
1407    f->separateDepthStencilLayouts         = true;
1408    f->hostQueryReset                      = true;
1409    f->timelineSemaphore                   = true;
1410    f->bufferDeviceAddress                 = pdevice->has_a64_buffer_access;
1411    f->bufferDeviceAddressCaptureReplay    = pdevice->has_a64_buffer_access;
1412    f->bufferDeviceAddressMultiDevice      = false;
1413    f->vulkanMemoryModel                   = true;
1414    f->vulkanMemoryModelDeviceScope        = true;
1415    f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1416    f->shaderOutputViewportIndex           = true;
1417    f->shaderOutputLayer                   = true;
1418    f->subgroupBroadcastDynamicId          = true;
1419 }
1420 
1421 static void
anv_get_physical_device_features_1_3(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan13Features * f)1422 anv_get_physical_device_features_1_3(struct anv_physical_device *pdevice,
1423                                      VkPhysicalDeviceVulkan13Features *f)
1424 {
1425    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1426 
1427    f->robustImageAccess = true;
1428    f->inlineUniformBlock = true;
1429    f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1430    f->pipelineCreationCacheControl = true;
1431    f->privateData = true;
1432    f->shaderDemoteToHelperInvocation = true;
1433    f->shaderTerminateInvocation = true;
1434    f->subgroupSizeControl = true;
1435    f->computeFullSubgroups = true;
1436    f->synchronization2 = true;
1437    f->textureCompressionASTC_HDR = false;
1438    f->shaderZeroInitializeWorkgroupMemory = true;
1439    f->dynamicRendering = true;
1440    f->shaderIntegerDotProduct = true;
1441    f->maintenance4 = true;
1442 }
1443 
anv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1444 void anv_GetPhysicalDeviceFeatures2(
1445     VkPhysicalDevice                            physicalDevice,
1446     VkPhysicalDeviceFeatures2*                  pFeatures)
1447 {
1448    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1449    anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1450 
1451    VkPhysicalDeviceVulkan11Features core_1_1 = {
1452       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1453    };
1454    anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1455 
1456    VkPhysicalDeviceVulkan12Features core_1_2 = {
1457       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1458    };
1459    anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1460 
1461    VkPhysicalDeviceVulkan13Features core_1_3 = {
1462       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1463    };
1464    anv_get_physical_device_features_1_3(pdevice, &core_1_3);
1465 
1466    vk_foreach_struct(ext, pFeatures->pNext) {
1467       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1468          continue;
1469       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1470          continue;
1471       if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1472          continue;
1473 
1474       switch (ext->sType) {
1475       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1476          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1477             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1478          features->formatA4R4G4B4 = true;
1479          features->formatA4B4G4R4 = false;
1480          break;
1481       }
1482 
1483       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1484          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1485          features->accelerationStructure = false;
1486          features->accelerationStructureCaptureReplay = false;
1487          features->accelerationStructureIndirectBuild = false;
1488          features->accelerationStructureHostCommands = false;
1489          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1490          break;
1491       }
1492 
1493       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1494          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1495          features->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1496          features->bufferDeviceAddressCaptureReplay = false;
1497          features->bufferDeviceAddressMultiDevice = false;
1498          break;
1499       }
1500 
1501       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
1502          VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features =
1503             (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext;
1504          features->borderColorSwizzle = true;
1505          features->borderColorSwizzleFromImage = true;
1506          break;
1507       }
1508 
1509       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1510          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1511             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1512          features->colorWriteEnable = true;
1513          break;
1514       }
1515 
1516       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1517          VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1518             (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1519          features->image2DViewOf3D = true;
1520          features->sampler2DViewOf3D = pdevice->info.ver >= 9;
1521          break;
1522       }
1523 
1524       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1525          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1526             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1527          features->computeDerivativeGroupQuads = true;
1528          features->computeDerivativeGroupLinear = true;
1529          break;
1530       }
1531 
1532       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1533          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1534             (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1535          features->conditionalRendering = pdevice->info.verx10 >= 75;
1536          features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
1537          break;
1538       }
1539 
1540       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1541          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1542             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1543          features->customBorderColors = pdevice->info.ver >= 8;
1544          features->customBorderColorWithoutFormat = pdevice->info.ver >= 8;
1545          break;
1546       }
1547 
1548       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1549          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1550             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1551          features->depthClipEnable = true;
1552          break;
1553       }
1554 
1555       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1556          VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1557             (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1558          features->fragmentShaderSampleInterlock = pdevice->info.ver >= 9;
1559          features->fragmentShaderPixelInterlock = pdevice->info.ver >= 9;
1560          features->fragmentShaderShadingRateInterlock = false;
1561          break;
1562       }
1563 
1564       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: {
1565          VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features =
1566             (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *)ext;
1567          features->globalPriorityQuery = true;
1568          break;
1569       }
1570 
1571       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1572          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1573             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1574          features->attachmentFragmentShadingRate = false;
1575          features->pipelineFragmentShadingRate = true;
1576          features->primitiveFragmentShadingRate =
1577             pdevice->info.has_coarse_pixel_primitive_and_cb;
1578          features->attachmentFragmentShadingRate =
1579             pdevice->info.has_coarse_pixel_primitive_and_cb;
1580          break;
1581       }
1582 
1583       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
1584          VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
1585             (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
1586          features->minLod = true;
1587          break;
1588       }
1589 
1590       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1591          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1592             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1593          features->indexTypeUint8 = true;
1594          break;
1595       }
1596 
1597       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1598          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1599             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1600          /* Rectangular lines must use the strict algorithm, which is not
1601           * supported for wide lines prior to ICL.  See rasterization_mode for
1602           * details and how the HW states are programmed.
1603           */
1604          features->rectangularLines = pdevice->info.ver >= 10;
1605          features->bresenhamLines = true;
1606          /* Support for Smooth lines with MSAA was removed on gfx11.  From the
1607           * BSpec section "Multisample ModesState" table for "AA Line Support
1608           * Requirements":
1609           *
1610           *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
1611           *
1612           * Fortunately, this isn't a case most people care about.
1613           */
1614          features->smoothLines = pdevice->info.ver < 10;
1615          features->stippledRectangularLines = false;
1616          features->stippledBresenhamLines = true;
1617          features->stippledSmoothLines = false;
1618          break;
1619       }
1620 
1621       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1622          VkPhysicalDeviceMeshShaderFeaturesNV *features =
1623             (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1624          features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1625          features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1626          break;
1627       }
1628 
1629       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1630          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1631             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1632          features->mutableDescriptorType = true;
1633          break;
1634       }
1635 
1636       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1637          VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1638             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1639          feature->performanceCounterQueryPools = true;
1640          /* HW only supports a single configuration at a time. */
1641          feature->performanceCounterMultipleQueryPools = false;
1642          break;
1643       }
1644 
1645       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1646          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1647             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1648          features->pipelineExecutableInfo = true;
1649          break;
1650       }
1651 
1652       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
1653          VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
1654             (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
1655          features->primitivesGeneratedQuery = true;
1656          features->primitivesGeneratedQueryWithRasterizerDiscard = false;
1657          features->primitivesGeneratedQueryWithNonZeroStreams = false;
1658          break;
1659       }
1660 
1661       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1662          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1663             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1664          features->provokingVertexLast = true;
1665          features->transformFeedbackPreservesProvokingVertex = true;
1666          break;
1667       }
1668 
1669       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1670          VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
1671          features->rayQuery = pdevice->info.has_ray_tracing;
1672          break;
1673       }
1674 
1675       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1676          VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1677          features->robustBufferAccess2 = true;
1678          features->robustImageAccess2 = true;
1679          features->nullDescriptor = true;
1680          break;
1681       }
1682 
1683       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1684          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1685          features->shaderBufferFloat32Atomics =    true;
1686          features->shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc;
1687          features->shaderBufferFloat64Atomics =
1688             pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1689          features->shaderBufferFloat64AtomicAdd =  false;
1690          features->shaderSharedFloat32Atomics =    true;
1691          features->shaderSharedFloat32AtomicAdd =  false;
1692          features->shaderSharedFloat64Atomics =    false;
1693          features->shaderSharedFloat64AtomicAdd =  false;
1694          features->shaderImageFloat32Atomics =     true;
1695          features->shaderImageFloat32AtomicAdd =   false;
1696          features->sparseImageFloat32Atomics =     false;
1697          features->sparseImageFloat32AtomicAdd =   false;
1698          break;
1699       }
1700 
1701       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1702          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (void *)ext;
1703          features->shaderBufferFloat16Atomics      = false;
1704          features->shaderBufferFloat16AtomicAdd    = false;
1705          features->shaderBufferFloat16AtomicMinMax = false;
1706          features->shaderBufferFloat32AtomicMinMax = pdevice->info.ver >= 9;
1707          features->shaderBufferFloat64AtomicMinMax =
1708             pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1709          features->shaderSharedFloat16Atomics      = false;
1710          features->shaderSharedFloat16AtomicAdd    = false;
1711          features->shaderSharedFloat16AtomicMinMax = false;
1712          features->shaderSharedFloat32AtomicMinMax = pdevice->info.ver >= 9;
1713          features->shaderSharedFloat64AtomicMinMax = false;
1714          features->shaderImageFloat32AtomicMinMax  = false;
1715          features->sparseImageFloat32AtomicMinMax  = false;
1716          break;
1717       }
1718 
1719       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1720          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1721             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1722          features->shaderSubgroupClock = true;
1723          features->shaderDeviceClock = false;
1724          break;
1725       }
1726 
1727       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1728          VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1729             (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1730          features->shaderIntegerFunctions2 = true;
1731          break;
1732       }
1733 
1734       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
1735          VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
1736             (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
1737          features->shaderModuleIdentifier = true;
1738          break;
1739       }
1740 
1741       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1742          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1743             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1744          features->shaderSubgroupUniformControlFlow = true;
1745          break;
1746       }
1747 
1748       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1749          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1750             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1751          features->texelBufferAlignment = true;
1752          break;
1753       }
1754 
1755       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1756          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1757             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1758          features->transformFeedback = true;
1759          features->geometryStreams = true;
1760          break;
1761       }
1762 
1763       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1764          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1765             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1766          features->vertexAttributeInstanceRateDivisor = true;
1767          features->vertexAttributeInstanceRateZeroDivisor = true;
1768          break;
1769       }
1770 
1771       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1772          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1773             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1774          features->workgroupMemoryExplicitLayout = true;
1775          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1776          features->workgroupMemoryExplicitLayout8BitAccess = true;
1777          features->workgroupMemoryExplicitLayout16BitAccess = true;
1778          break;
1779       }
1780 
1781       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1782          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1783             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1784          features->ycbcrImageArrays = true;
1785          break;
1786       }
1787 
1788       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1789          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1790             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1791          features->extendedDynamicState = true;
1792          break;
1793       }
1794 
1795       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1796          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1797             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1798          features->extendedDynamicState2 = true;
1799          features->extendedDynamicState2LogicOp = true;
1800          features->extendedDynamicState2PatchControlPoints = false;
1801          break;
1802       }
1803 
1804       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1805          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1806          features->multiDraw = true;
1807          break;
1808       }
1809 
1810       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : {
1811          VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features =
1812             (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext;
1813          features->nonSeamlessCubeMap = true;
1814          break;
1815       }
1816 
1817       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1818          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1819             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1820          features->primitiveTopologyListRestart = true;
1821          features->primitiveTopologyPatchListRestart = true;
1822          break;
1823       }
1824 
1825       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1826          VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1827             (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1828          features->depthClipControl = true;
1829          break;
1830       }
1831 
1832       default:
1833          anv_debug_ignored_stype(ext->sType);
1834          break;
1835       }
1836    }
1837 
1838 }
1839 
1840 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
1841 
1842 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1843 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
1844 
1845 #define MAX_CUSTOM_BORDER_COLORS                   4096
1846 
anv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1847 void anv_GetPhysicalDeviceProperties(
1848     VkPhysicalDevice                            physicalDevice,
1849     VkPhysicalDeviceProperties*                 pProperties)
1850 {
1851    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1852    const struct intel_device_info *devinfo = &pdevice->info;
1853 
1854    const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1855    const uint32_t max_textures =
1856       pdevice->has_bindless_images ? UINT16_MAX : 128;
1857    const uint32_t max_samplers =
1858       pdevice->has_bindless_samplers ? UINT16_MAX :
1859       (devinfo->verx10 >= 75) ? 128 : 16;
1860    const uint32_t max_images =
1861       pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
1862 
1863    /* If we can use bindless for everything, claim a high per-stage limit,
1864     * otherwise use the binding table size, minus the slots reserved for
1865     * render targets and one slot for the descriptor buffer. */
1866    const uint32_t max_per_stage =
1867       pdevice->has_bindless_images && pdevice->has_a64_buffer_access
1868       ? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1869 
1870    const uint32_t max_workgroup_size =
1871       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1872 
1873    VkSampleCountFlags sample_counts =
1874       isl_device_get_sample_counts(&pdevice->isl_dev);
1875 
1876 
1877    VkPhysicalDeviceLimits limits = {
1878       .maxImageDimension1D                      = (1 << 14),
1879       .maxImageDimension2D                      = (1 << 14),
1880       .maxImageDimension3D                      = (1 << 11),
1881       .maxImageDimensionCube                    = (1 << 14),
1882       .maxImageArrayLayers                      = (1 << 11),
1883       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1884       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1885       .maxStorageBufferRange                    = pdevice->isl_dev.max_buffer_size,
1886       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1887       .maxMemoryAllocationCount                 = UINT32_MAX,
1888       .maxSamplerAllocationCount                = 64 * 1024,
1889       .bufferImageGranularity                   = 1,
1890       .sparseAddressSpaceSize                   = 0,
1891       .maxBoundDescriptorSets                   = MAX_SETS,
1892       .maxPerStageDescriptorSamplers            = max_samplers,
1893       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1894       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1895       .maxPerStageDescriptorSampledImages       = max_textures,
1896       .maxPerStageDescriptorStorageImages       = max_images,
1897       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1898       .maxPerStageResources                     = max_per_stage,
1899       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1900       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1901       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1902       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1903       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1904       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1905       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1906       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1907       .maxVertexInputAttributes                 = MAX_VES,
1908       .maxVertexInputBindings                   = MAX_VBS,
1909       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1910        *
1911        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1912        */
1913       .maxVertexInputAttributeOffset            = 2047,
1914       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1915        *
1916        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1917        *
1918        * Skylake PRMs: Volume 2d: Command Reference: Structures:
1919        *
1920        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1921        */
1922       .maxVertexInputBindingStride              = devinfo->ver < 9 ? 2048 : 4095,
1923       .maxVertexOutputComponents                = 128,
1924       .maxTessellationGenerationLevel           = 64,
1925       .maxTessellationPatchSize                 = 32,
1926       .maxTessellationControlPerVertexInputComponents = 128,
1927       .maxTessellationControlPerVertexOutputComponents = 128,
1928       .maxTessellationControlPerPatchOutputComponents = 128,
1929       .maxTessellationControlTotalOutputComponents = 2048,
1930       .maxTessellationEvaluationInputComponents = 128,
1931       .maxTessellationEvaluationOutputComponents = 128,
1932       .maxGeometryShaderInvocations             = 32,
1933       .maxGeometryInputComponents               = devinfo->ver >= 8 ? 128 : 64,
1934       .maxGeometryOutputComponents              = 128,
1935       .maxGeometryOutputVertices                = 256,
1936       .maxGeometryTotalOutputComponents         = 1024,
1937       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1938       .maxFragmentOutputAttachments             = 8,
1939       .maxFragmentDualSrcAttachments            = 1,
1940       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1941       .maxComputeSharedMemorySize               = 64 * 1024,
1942       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1943       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1944       .maxComputeWorkGroupSize = {
1945          max_workgroup_size,
1946          max_workgroup_size,
1947          max_workgroup_size,
1948       },
1949       .subPixelPrecisionBits                    = 8,
1950       .subTexelPrecisionBits                    = 8,
1951       .mipmapPrecisionBits                      = 8,
1952       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1953       .maxDrawIndirectCount                     = UINT32_MAX,
1954       .maxSamplerLodBias                        = 16,
1955       .maxSamplerAnisotropy                     = 16,
1956       .maxViewports                             = MAX_VIEWPORTS,
1957       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1958       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1959       .viewportSubPixelBits                     = 13, /* We take a float? */
1960       .minMemoryMapAlignment                    = 4096, /* A page */
1961       /* The dataport requires texel alignment so we need to assume a worst
1962        * case of R32G32B32A32 which is 16 bytes.
1963        */
1964       .minTexelBufferOffsetAlignment            = 16,
1965       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1966       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1967       .minTexelOffset                           = -8,
1968       .maxTexelOffset                           = 7,
1969       .minTexelGatherOffset                     = -32,
1970       .maxTexelGatherOffset                     = 31,
1971       .minInterpolationOffset                   = -0.5,
1972       .maxInterpolationOffset                   = 0.4375,
1973       .subPixelInterpolationOffsetBits          = 4,
1974       .maxFramebufferWidth                      = (1 << 14),
1975       .maxFramebufferHeight                     = (1 << 14),
1976       .maxFramebufferLayers                     = (1 << 11),
1977       .framebufferColorSampleCounts             = sample_counts,
1978       .framebufferDepthSampleCounts             = sample_counts,
1979       .framebufferStencilSampleCounts           = sample_counts,
1980       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1981       .maxColorAttachments                      = MAX_RTS,
1982       .sampledImageColorSampleCounts            = sample_counts,
1983       .sampledImageIntegerSampleCounts          = sample_counts,
1984       .sampledImageDepthSampleCounts            = sample_counts,
1985       .sampledImageStencilSampleCounts          = sample_counts,
1986       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1987       .maxSampleMaskWords                       = 1,
1988       .timestampComputeAndGraphics              = true,
1989       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1990       .maxClipDistances                         = 8,
1991       .maxCullDistances                         = 8,
1992       .maxCombinedClipAndCullDistances          = 8,
1993       .discreteQueuePriorities                  = 2,
1994       .pointSizeRange                           = { 0.125, 255.875 },
1995       /* While SKL and up support much wider lines than we are setting here,
1996        * in practice we run into conformance issues if we go past this limit.
1997        * Since the Windows driver does the same, it's probably fair to assume
1998        * that no one needs more than this.
1999        */
2000       .lineWidthRange                           = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
2001       .pointSizeGranularity                     = (1.0 / 8.0),
2002       .lineWidthGranularity                     = (1.0 / 128.0),
2003       .strictLines                              = false,
2004       .standardSampleLocations                  = true,
2005       .optimalBufferCopyOffsetAlignment         = 128,
2006       .optimalBufferCopyRowPitchAlignment       = 128,
2007       .nonCoherentAtomSize                      = 64,
2008    };
2009 
2010    *pProperties = (VkPhysicalDeviceProperties) {
2011       .apiVersion = ANV_API_VERSION,
2012       .driverVersion = vk_get_driver_version(),
2013       .vendorID = 0x8086,
2014       .deviceID = pdevice->info.pci_device_id,
2015       .deviceType = pdevice->info.has_local_mem ?
2016                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
2017                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
2018       .limits = limits,
2019       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
2020    };
2021 
2022    snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
2023             "%s", pdevice->info.name);
2024    memcpy(pProperties->pipelineCacheUUID,
2025           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
2026 }
2027 
2028 static void
anv_get_physical_device_properties_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)2029 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
2030                                        VkPhysicalDeviceVulkan11Properties *p)
2031 {
2032    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
2033 
2034    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
2035    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
2036    memset(p->deviceLUID, 0, VK_LUID_SIZE);
2037    p->deviceNodeMask = 0;
2038    p->deviceLUIDValid = false;
2039 
2040    p->subgroupSize = BRW_SUBGROUP_SIZE;
2041    VkShaderStageFlags scalar_stages = 0;
2042    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
2043       if (pdevice->compiler->scalar_stage[stage])
2044          scalar_stages |= mesa_to_vk_shader_stage(stage);
2045    }
2046    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
2047       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2048                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2049                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2050                        VK_SHADER_STAGE_MISS_BIT_KHR |
2051                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2052                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2053    }
2054    if (pdevice->vk.supported_extensions.NV_mesh_shader) {
2055       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_NV |
2056                        VK_SHADER_STAGE_MESH_BIT_NV;
2057    }
2058    p->subgroupSupportedStages = scalar_stages;
2059    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
2060                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
2061                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
2062                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
2063                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
2064                                     VK_SUBGROUP_FEATURE_QUAD_BIT;
2065    if (pdevice->info.ver >= 8) {
2066       /* TODO: There's no technical reason why these can't be made to
2067        * work on gfx7 but they don't at the moment so it's best to leave
2068        * the feature disabled than enabled and broken.
2069        */
2070       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
2071                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
2072    }
2073    p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
2074 
2075    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2076    p->maxMultiviewViewCount      = 16;
2077    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
2078    p->protectedNoFault           = false;
2079    /* This value doesn't matter for us today as our per-stage descriptors are
2080     * the real limit.
2081     */
2082    p->maxPerSetDescriptors       = 1024;
2083    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
2084 }
2085 
2086 static void
anv_get_physical_device_properties_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)2087 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2088                                        VkPhysicalDeviceVulkan12Properties *p)
2089 {
2090    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2091 
2092    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
2093    memset(p->driverName, 0, sizeof(p->driverName));
2094    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
2095             "Intel open-source Mesa driver");
2096    memset(p->driverInfo, 0, sizeof(p->driverInfo));
2097    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
2098             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2099 
2100    /* Don't advertise conformance with a particular version if the hardware's
2101     * support is incomplete/alpha.
2102     */
2103    if (pdevice->is_alpha) {
2104       p->conformanceVersion = (VkConformanceVersion) {
2105          .major = 0,
2106          .minor = 0,
2107          .subminor = 0,
2108          .patch = 0,
2109       };
2110    }
2111    else {
2112       p->conformanceVersion = (VkConformanceVersion) {
2113          .major = 1,
2114          .minor = 3,
2115          .subminor = 0,
2116          .patch = 0,
2117       };
2118    }
2119 
2120    p->denormBehaviorIndependence =
2121       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2122    p->roundingModeIndependence =
2123       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
2124 
2125    /* Broadwell does not support HF denorms and there are restrictions
2126     * other gens. According to Kabylake's PRM:
2127     *
2128     * "math - Extended Math Function
2129     * [...]
2130     * Restriction : Half-float denorms are always retained."
2131     */
2132    p->shaderDenormFlushToZeroFloat16         = false;
2133    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
2134    p->shaderRoundingModeRTEFloat16           = true;
2135    p->shaderRoundingModeRTZFloat16           = true;
2136    p->shaderSignedZeroInfNanPreserveFloat16  = true;
2137 
2138    p->shaderDenormFlushToZeroFloat32         = true;
2139    p->shaderDenormPreserveFloat32            = true;
2140    p->shaderRoundingModeRTEFloat32           = true;
2141    p->shaderRoundingModeRTZFloat32           = true;
2142    p->shaderSignedZeroInfNanPreserveFloat32  = true;
2143 
2144    p->shaderDenormFlushToZeroFloat64         = true;
2145    p->shaderDenormPreserveFloat64            = true;
2146    p->shaderRoundingModeRTEFloat64           = true;
2147    p->shaderRoundingModeRTZFloat64           = true;
2148    p->shaderSignedZeroInfNanPreserveFloat64  = true;
2149 
2150    /* It's a bit hard to exactly map our implementation to the limits
2151     * described by Vulkan.  The bindless surface handle in the extended
2152     * message descriptors is 20 bits and it's an index into the table of
2153     * RENDER_SURFACE_STATE structs that starts at bindless surface base
2154     * address.  This means that we can have at must 1M surface states
2155     * allocated at any given time.  Since most image views take two
2156     * descriptors, this means we have a limit of about 500K image views.
2157     *
2158     * However, since we allocate surface states at vkCreateImageView time,
2159     * this means our limit is actually something on the order of 500K image
2160     * views allocated at any time.  The actual limit describe by Vulkan, on
2161     * the other hand, is a limit of how many you can have in a descriptor set.
2162     * Assuming anyone using 1M descriptors will be using the same image view
2163     * twice a bunch of times (or a bunch of null descriptors), we can safely
2164     * advertise a larger limit here.
2165     */
2166    const unsigned max_bindless_views = 1 << 20;
2167    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
2168    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
2169    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
2170    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
2171    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
2172    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2173    p->robustBufferAccessUpdateAfterBind                  = true;
2174    p->quadDivergentImplicitLod                           = false;
2175    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
2176    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2177    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2178    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
2179    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
2180    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2181    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
2182    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
2183    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2184    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2185    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
2186    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2187    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
2188    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
2189    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2190 
2191    /* We support all of the depth resolve modes */
2192    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2193                                       VK_RESOLVE_MODE_AVERAGE_BIT |
2194                                       VK_RESOLVE_MODE_MIN_BIT |
2195                                       VK_RESOLVE_MODE_MAX_BIT;
2196    /* Average doesn't make sense for stencil so we don't support that */
2197    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
2198    if (pdevice->info.ver >= 8) {
2199       /* The advanced stencil resolve modes currently require stencil
2200        * sampling be supported by the hardware.
2201        */
2202       p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
2203                                          VK_RESOLVE_MODE_MAX_BIT;
2204    }
2205    p->independentResolveNone  = true;
2206    p->independentResolve      = true;
2207 
2208    p->filterMinmaxSingleComponentFormats  = pdevice->info.ver >= 9;
2209    p->filterMinmaxImageComponentMapping   = pdevice->info.ver >= 9;
2210 
2211    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2212 
2213    p->framebufferIntegerColorSampleCounts =
2214       isl_device_get_sample_counts(&pdevice->isl_dev);
2215 }
2216 
2217 static void
anv_get_physical_device_properties_1_3(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)2218 anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
2219                                        VkPhysicalDeviceVulkan13Properties *p)
2220 {
2221    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2222 
2223    p->minSubgroupSize = 8;
2224    p->maxSubgroupSize = 32;
2225    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
2226    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
2227                                    VK_SHADER_STAGE_TASK_BIT_NV |
2228                                    VK_SHADER_STAGE_MESH_BIT_NV;
2229 
2230    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2231    p->maxPerStageDescriptorInlineUniformBlocks =
2232       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2233    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2234       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2235    p->maxDescriptorSetInlineUniformBlocks =
2236       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2237    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2238       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2239    p->maxInlineUniformTotalSize = UINT16_MAX;
2240 
2241    p->integerDotProduct8BitUnsignedAccelerated = false;
2242    p->integerDotProduct8BitSignedAccelerated = false;
2243    p->integerDotProduct8BitMixedSignednessAccelerated = false;
2244    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2245    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2246    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2247    p->integerDotProduct16BitUnsignedAccelerated = false;
2248    p->integerDotProduct16BitSignedAccelerated = false;
2249    p->integerDotProduct16BitMixedSignednessAccelerated = false;
2250    p->integerDotProduct32BitUnsignedAccelerated = false;
2251    p->integerDotProduct32BitSignedAccelerated = false;
2252    p->integerDotProduct32BitMixedSignednessAccelerated = false;
2253    p->integerDotProduct64BitUnsignedAccelerated = false;
2254    p->integerDotProduct64BitSignedAccelerated = false;
2255    p->integerDotProduct64BitMixedSignednessAccelerated = false;
2256    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
2257    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
2258    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2259    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2260    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2261    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2262    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
2263    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
2264    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2265    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2266    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2267    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2268    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2269    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2270    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2271 
2272    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2273     * Base Address:
2274     *
2275     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2276     *    specifies the base address of the first element of the surface,
2277     *    computed in software by adding the surface base address to the
2278     *    byte offset of the element in the buffer. The base address must
2279     *    be aligned to element size."
2280     *
2281     * The typed dataport messages require that things be texel aligned.
2282     * Otherwise, we may just load/store the wrong data or, in the worst
2283     * case, there may be hangs.
2284     */
2285    p->storageTexelBufferOffsetAlignmentBytes = 16;
2286    p->storageTexelBufferOffsetSingleTexelAlignment = true;
2287 
2288    /* The sampler, however, is much more forgiving and it can handle
2289     * arbitrary byte alignment for linear and buffer surfaces.  It's
2290     * hard to find a good PRM citation for this but years of empirical
2291     * experience demonstrate that this is true.
2292     */
2293    p->uniformTexelBufferOffsetAlignmentBytes = 1;
2294    p->uniformTexelBufferOffsetSingleTexelAlignment = false;
2295 
2296    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2297 }
2298 
anv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)2299 void anv_GetPhysicalDeviceProperties2(
2300     VkPhysicalDevice                            physicalDevice,
2301     VkPhysicalDeviceProperties2*                pProperties)
2302 {
2303    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2304 
2305    anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2306 
2307    VkPhysicalDeviceVulkan11Properties core_1_1 = {
2308       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2309    };
2310    anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2311 
2312    VkPhysicalDeviceVulkan12Properties core_1_2 = {
2313       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2314    };
2315    anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2316 
2317    VkPhysicalDeviceVulkan13Properties core_1_3 = {
2318       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2319    };
2320    anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2321 
2322    vk_foreach_struct(ext, pProperties->pNext) {
2323       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2324          continue;
2325       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2326          continue;
2327       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2328          continue;
2329 
2330       switch (ext->sType) {
2331       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2332          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2333          props->maxGeometryCount = (1u << 24) - 1;
2334          props->maxInstanceCount = (1u << 24) - 1;
2335          props->maxPrimitiveCount = (1u << 29) - 1;
2336          props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2337          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2338          props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2339          props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2340          props->minAccelerationStructureScratchOffsetAlignment = 64;
2341          break;
2342       }
2343 
2344       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2345          /* TODO: Real limits */
2346          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2347             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2348          /* There's nothing in the public docs about this value as far as I
2349           * can tell.  However, this is the value the Windows driver reports
2350           * and there's a comment on a rejected HW feature in the internal
2351           * docs that says:
2352           *
2353           *    "This is similar to conservative rasterization, except the
2354           *    primitive area is not extended by 1/512 and..."
2355           *
2356           * That's a bit of an obtuse reference but it's the best we've got
2357           * for now.
2358           */
2359          properties->primitiveOverestimationSize = 1.0f / 512.0f;
2360          properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2361          properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2362          properties->primitiveUnderestimation = false;
2363          properties->conservativePointAndLineRasterization = false;
2364          properties->degenerateTrianglesRasterized = true;
2365          properties->degenerateLinesRasterized = false;
2366          properties->fullyCoveredFragmentShaderInputVariable = false;
2367          properties->conservativeRasterizationPostDepthCoverage = true;
2368          break;
2369       }
2370 
2371       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2372          VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2373             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2374          properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2375          break;
2376       }
2377 
2378       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2379          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2380             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2381          props->primitiveFragmentShadingRateWithMultipleViewports =
2382             pdevice->info.has_coarse_pixel_primitive_and_cb;
2383          props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
2384          props->fragmentShadingRateNonTrivialCombinerOps =
2385             pdevice->info.has_coarse_pixel_primitive_and_cb;
2386          props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2387          props->maxFragmentSizeAspectRatio =
2388             pdevice->info.has_coarse_pixel_primitive_and_cb ?
2389             2 : 4;
2390          props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
2391             (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
2392          props->maxFragmentShadingRateRasterizationSamples =
2393             pdevice->info.has_coarse_pixel_primitive_and_cb ?
2394             VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
2395          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2396          props->fragmentShadingRateWithSampleMask = true;
2397          props->fragmentShadingRateWithShaderSampleMask = false;
2398          props->fragmentShadingRateWithConservativeRasterization = true;
2399          props->fragmentShadingRateWithFragmentShaderInterlock = true;
2400          props->fragmentShadingRateWithCustomSampleLocations = true;
2401 
2402          /* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
2403           * the fix.
2404           */
2405          props->fragmentShadingRateStrictMultiplyCombiner =
2406             pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
2407             pdevice->info.revision >= 8 :
2408             pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
2409             pdevice->info.revision >= 4 : true;
2410 
2411          if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
2412             props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2413             props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2414             props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2415          } else {
2416             /* Those must be 0 if attachmentFragmentShadingRate is not
2417              * supported.
2418              */
2419             props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2420             props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2421             props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2422          }
2423          break;
2424       }
2425 
2426       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2427          VkPhysicalDeviceDrmPropertiesEXT *props =
2428             (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2429 
2430          props->hasPrimary = pdevice->has_master;
2431          props->primaryMajor = pdevice->master_major;
2432          props->primaryMinor = pdevice->master_minor;
2433 
2434          props->hasRender = pdevice->has_local;
2435          props->renderMajor = pdevice->local_major;
2436          props->renderMinor = pdevice->local_minor;
2437 
2438          break;
2439       }
2440 
2441       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2442          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2443             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2444          /* Userptr needs page aligned memory. */
2445          props->minImportedHostPointerAlignment = 4096;
2446          break;
2447       }
2448 
2449       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2450          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2451             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2452          /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2453           * Sampling Rules - Legacy Mode", it says the following:
2454           *
2455           *    "Note that the device divides a pixel into a 16x16 array of
2456           *    subpixels, referenced by their upper left corners."
2457           *
2458           * This is the only known reference in the PRMs to the subpixel
2459           * precision of line rasterization and a "16x16 array of subpixels"
2460           * implies 4 subpixel precision bits.  Empirical testing has shown
2461           * that 4 subpixel precision bits applies to all line rasterization
2462           * types.
2463           */
2464          props->lineSubPixelPrecisionBits = 4;
2465          break;
2466       }
2467 
2468       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
2469          VkPhysicalDeviceMaintenance4Properties *properties =
2470             (VkPhysicalDeviceMaintenance4Properties *)ext;
2471          properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2472          break;
2473       }
2474 
2475       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2476          VkPhysicalDeviceMeshShaderPropertiesNV *props =
2477             (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2478 
2479          /* Bounded by the maximum representable size in
2480           * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
2481           */
2482          const uint32_t max_slm_size = 64 * 1024;
2483 
2484          /* Bounded by the maximum representable size in
2485           * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
2486           */
2487          const uint32_t max_workgroup_size = 1 << 10;
2488 
2489          /* Bounded by the maximum representable count in
2490           * 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
2491           */
2492          const uint32_t max_primitives = 1024;
2493 
2494          /* TODO(mesh): Multiview. */
2495          const uint32_t max_view_count = 1;
2496 
2497          props->maxDrawMeshTasksCount = UINT32_MAX;
2498 
2499          /* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
2500           * mapping them to/from the single value that HW provides us
2501           * (currently used for X).
2502           */
2503 
2504          props->maxTaskWorkGroupInvocations = max_workgroup_size;
2505          props->maxTaskWorkGroupSize[0] = max_workgroup_size;
2506          props->maxTaskWorkGroupSize[1] = 1;
2507          props->maxTaskWorkGroupSize[2] = 1;
2508          props->maxTaskTotalMemorySize = max_slm_size;
2509          props->maxTaskOutputCount = UINT16_MAX;
2510 
2511          props->maxMeshWorkGroupInvocations = max_workgroup_size;
2512          props->maxMeshWorkGroupSize[0] = max_workgroup_size;
2513          props->maxMeshWorkGroupSize[1] = 1;
2514          props->maxMeshWorkGroupSize[2] = 1;
2515          props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
2516          props->maxMeshOutputPrimitives = max_primitives / max_view_count;
2517          props->maxMeshMultiviewViewCount = max_view_count;
2518 
2519          /* Depends on what indices can be represented with IndexFormat.  For
2520           * now we always use U32, so bound to the maximum unique vertices we
2521           * need for the maximum primitives.
2522           *
2523           * TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
2524           * support for others.
2525           */
2526          props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
2527 
2528 
2529          props->meshOutputPerVertexGranularity = 32;
2530          props->meshOutputPerPrimitiveGranularity = 32;
2531 
2532          break;
2533       }
2534 
2535       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2536          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2537             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2538          properties->pciDomain = pdevice->info.pci_domain;
2539          properties->pciBus = pdevice->info.pci_bus;
2540          properties->pciDevice = pdevice->info.pci_dev;
2541          properties->pciFunction = pdevice->info.pci_func;
2542          break;
2543       }
2544 
2545       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2546          VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2547             (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2548          /* We could support this by spawning a shader to do the equation
2549           * normalization.
2550           */
2551          properties->allowCommandBufferQueryCopies = false;
2552          break;
2553       }
2554 
2555 #pragma GCC diagnostic push
2556 #pragma GCC diagnostic ignored "-Wswitch"
2557       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2558          VkPhysicalDevicePresentationPropertiesANDROID *props =
2559             (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2560          props->sharedImage = VK_FALSE;
2561          break;
2562       }
2563 #pragma GCC diagnostic pop
2564 
2565       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2566          VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2567             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2568          properties->provokingVertexModePerPipeline = true;
2569          properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2570          break;
2571       }
2572 
2573       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2574          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2575             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2576          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2577          break;
2578       }
2579 
2580       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2581          VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2582          properties->robustStorageBufferAccessSizeAlignment =
2583             ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2584          properties->robustUniformBufferAccessSizeAlignment =
2585             ANV_UBO_ALIGNMENT;
2586          break;
2587       }
2588 
2589       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2590          VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2591             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2592 
2593          props->sampleLocationSampleCounts =
2594             isl_device_get_sample_counts(&pdevice->isl_dev);
2595 
2596          /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2597          props->maxSampleLocationGridSize.width = 1;
2598          props->maxSampleLocationGridSize.height = 1;
2599 
2600          props->sampleLocationCoordinateRange[0] = 0;
2601          props->sampleLocationCoordinateRange[1] = 0.9375;
2602          props->sampleLocationSubPixelBits = 4;
2603 
2604          props->variableSampleLocations = true;
2605          break;
2606       }
2607 
2608       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2609          VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
2610             (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2611          STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2612                        sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2613          memcpy(props->shaderModuleIdentifierAlgorithmUUID,
2614                 vk_shaderModuleIdentifierAlgorithmUUID,
2615                 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2616          break;
2617       }
2618 
2619       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2620          VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2621             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2622 
2623          props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2624          props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2625          props->maxTransformFeedbackBufferSize = (1ull << 32);
2626          props->maxTransformFeedbackStreamDataSize = 128 * 4;
2627          props->maxTransformFeedbackBufferDataSize = 128 * 4;
2628          props->maxTransformFeedbackBufferDataStride = 2048;
2629          props->transformFeedbackQueries = true;
2630          props->transformFeedbackStreamsLinesTriangles = false;
2631          props->transformFeedbackRasterizationStreamSelect = false;
2632          /* This requires MI_MATH */
2633          props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2634          break;
2635       }
2636 
2637       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2638          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2639             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2640          /* We have to restrict this a bit for multiview */
2641          props->maxVertexAttribDivisor = UINT32_MAX / 16;
2642          break;
2643       }
2644 
2645       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2646          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2647          props->maxMultiDrawCount = 2048;
2648          break;
2649       }
2650 
2651       default:
2652          anv_debug_ignored_stype(ext->sType);
2653          break;
2654       }
2655    }
2656 }
2657 
2658 static int
vk_priority_to_gen(int priority)2659 vk_priority_to_gen(int priority)
2660 {
2661    switch (priority) {
2662    case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2663       return INTEL_CONTEXT_LOW_PRIORITY;
2664    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2665       return INTEL_CONTEXT_MEDIUM_PRIORITY;
2666    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2667       return INTEL_CONTEXT_HIGH_PRIORITY;
2668    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2669       return INTEL_CONTEXT_REALTIME_PRIORITY;
2670    default:
2671       unreachable("Invalid priority");
2672    }
2673 }
2674 
2675 static const VkQueueFamilyProperties
2676 anv_queue_family_properties_template = {
2677    .timestampValidBits = 36, /* XXX: Real value here */
2678    .minImageTransferGranularity = { 1, 1, 1 },
2679 };
2680 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2681 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2682     VkPhysicalDevice                            physicalDevice,
2683     uint32_t*                                   pQueueFamilyPropertyCount,
2684     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2685 {
2686    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2687    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2688                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2689 
2690    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2691       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2692       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2693          p->queueFamilyProperties = anv_queue_family_properties_template;
2694          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2695          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2696 
2697          vk_foreach_struct(ext, p->pNext) {
2698             switch (ext->sType) {
2699             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2700                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2701                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2702 
2703                /* Deliberately sorted low to high */
2704                VkQueueGlobalPriorityKHR all_priorities[] = {
2705                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2706                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2707                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2708                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2709                };
2710 
2711                uint32_t count = 0;
2712                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2713                   if (vk_priority_to_gen(all_priorities[i]) >
2714                       pdevice->max_context_priority)
2715                      break;
2716 
2717                   properties->priorities[count++] = all_priorities[i];
2718                }
2719                properties->priorityCount = count;
2720                break;
2721             }
2722 
2723             default:
2724                anv_debug_ignored_stype(ext->sType);
2725             }
2726          }
2727       }
2728    }
2729 }
2730 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2731 void anv_GetPhysicalDeviceMemoryProperties(
2732     VkPhysicalDevice                            physicalDevice,
2733     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2734 {
2735    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2736 
2737    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2738    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2739       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2740          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2741          .heapIndex     = physical_device->memory.types[i].heapIndex,
2742       };
2743    }
2744 
2745    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2746    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2747       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2748          .size    = physical_device->memory.heaps[i].size,
2749          .flags   = physical_device->memory.heaps[i].flags,
2750       };
2751    }
2752 }
2753 
2754 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2755 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2756                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2757 {
2758    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2759 
2760    if (!device->vk.supported_extensions.EXT_memory_budget)
2761       return;
2762 
2763    anv_update_meminfo(device, device->local_fd);
2764 
2765    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2766    for (size_t i = 0; i < device->memory.heap_count; i++) {
2767       if (device->memory.heaps[i].is_local_mem) {
2768          total_vram_heaps_size += device->memory.heaps[i].size;
2769       } else {
2770          total_sys_heaps_size += device->memory.heaps[i].size;
2771       }
2772    }
2773 
2774    for (size_t i = 0; i < device->memory.heap_count; i++) {
2775       VkDeviceSize heap_size = device->memory.heaps[i].size;
2776       VkDeviceSize heap_used = device->memory.heaps[i].used;
2777       VkDeviceSize heap_budget, total_heaps_size;
2778       uint64_t mem_available = 0;
2779 
2780       if (device->memory.heaps[i].is_local_mem) {
2781          total_heaps_size = total_vram_heaps_size;
2782          if (device->vram_non_mappable.size > 0 && i == 0) {
2783             mem_available = device->vram_non_mappable.available;
2784          } else {
2785             mem_available = device->vram_mappable.available;
2786          }
2787       } else {
2788          total_heaps_size = total_sys_heaps_size;
2789          mem_available = device->sys.available;
2790       }
2791 
2792       double heap_proportion = (double) heap_size / total_heaps_size;
2793       VkDeviceSize available_prop = mem_available * heap_proportion;
2794 
2795       /*
2796        * Let's not incite the app to starve the system: report at most 90% of
2797        * the available heap memory.
2798        */
2799       uint64_t heap_available = available_prop * 9 / 10;
2800       heap_budget = MIN2(heap_size, heap_used + heap_available);
2801 
2802       /*
2803        * Round down to the nearest MB
2804        */
2805       heap_budget &= ~((1ull << 20) - 1);
2806 
2807       /*
2808        * The heapBudget value must be non-zero for array elements less than
2809        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2810        * value must be less than or equal to VkMemoryHeap::size for each heap.
2811        */
2812       assert(0 < heap_budget && heap_budget <= heap_size);
2813 
2814       memoryBudget->heapUsage[i] = heap_used;
2815       memoryBudget->heapBudget[i] = heap_budget;
2816    }
2817 
2818    /* The heapBudget and heapUsage values must be zero for array elements
2819     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2820     */
2821    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2822       memoryBudget->heapBudget[i] = 0;
2823       memoryBudget->heapUsage[i] = 0;
2824    }
2825 }
2826 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2827 void anv_GetPhysicalDeviceMemoryProperties2(
2828     VkPhysicalDevice                            physicalDevice,
2829     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2830 {
2831    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2832                                          &pMemoryProperties->memoryProperties);
2833 
2834    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2835       switch (ext->sType) {
2836       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2837          anv_get_memory_budget(physicalDevice, (void*)ext);
2838          break;
2839       default:
2840          anv_debug_ignored_stype(ext->sType);
2841          break;
2842       }
2843    }
2844 }
2845 
2846 void
anv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)2847 anv_GetDeviceGroupPeerMemoryFeatures(
2848     VkDevice                                    device,
2849     uint32_t                                    heapIndex,
2850     uint32_t                                    localDeviceIndex,
2851     uint32_t                                    remoteDeviceIndex,
2852     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
2853 {
2854    assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2855    *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2856                           VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2857                           VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2858                           VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2859 }
2860 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2861 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2862     VkInstance                                  _instance,
2863     const char*                                 pName)
2864 {
2865    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2866    return vk_instance_get_proc_addr(&instance->vk,
2867                                     &anv_instance_entrypoints,
2868                                     pName);
2869 }
2870 
2871 /* With version 1+ of the loader interface the ICD should expose
2872  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2873  */
2874 PUBLIC
2875 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2876     VkInstance                                  instance,
2877     const char*                                 pName);
2878 
2879 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2880 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2881     VkInstance                                  instance,
2882     const char*                                 pName)
2883 {
2884    return anv_GetInstanceProcAddr(instance, pName);
2885 }
2886 
2887 /* With version 4+ of the loader interface the ICD should expose
2888  * vk_icdGetPhysicalDeviceProcAddr()
2889  */
2890 PUBLIC
2891 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2892     VkInstance  _instance,
2893     const char* pName);
2894 
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)2895 PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2896     VkInstance  _instance,
2897     const char* pName)
2898 {
2899    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2900    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2901 }
2902 
2903 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2904 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2905 {
2906    struct anv_state state;
2907 
2908    state = anv_state_pool_alloc(pool, size, align);
2909    memcpy(state.map, p, size);
2910 
2911    return state;
2912 }
2913 
2914 static void
anv_device_init_border_colors(struct anv_device * device)2915 anv_device_init_border_colors(struct anv_device *device)
2916 {
2917    if (device->info.platform == INTEL_PLATFORM_HSW) {
2918       static const struct hsw_border_color border_colors[] = {
2919          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2920          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2921          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2922          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2923          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2924          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2925       };
2926 
2927       device->border_colors =
2928          anv_state_pool_emit_data(&device->dynamic_state_pool,
2929                                   sizeof(border_colors), 512, border_colors);
2930    } else {
2931       static const struct gfx8_border_color border_colors[] = {
2932          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2933          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2934          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2935          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2936          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2937          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2938       };
2939 
2940       device->border_colors =
2941          anv_state_pool_emit_data(&device->dynamic_state_pool,
2942                                   sizeof(border_colors), 64, border_colors);
2943    }
2944 }
2945 
2946 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2947 anv_device_init_trivial_batch(struct anv_device *device)
2948 {
2949    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2950                                          ANV_BO_ALLOC_MAPPED,
2951                                          0 /* explicit_address */,
2952                                          &device->trivial_batch_bo);
2953    if (result != VK_SUCCESS)
2954       return result;
2955 
2956    struct anv_batch batch = {
2957       .start = device->trivial_batch_bo->map,
2958       .next = device->trivial_batch_bo->map,
2959       .end = device->trivial_batch_bo->map + 4096,
2960    };
2961 
2962    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2963    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2964 
2965    if (device->physical->memory.need_clflush)
2966       intel_clflush_range(batch.start, batch.next - batch.start);
2967 
2968    return VK_SUCCESS;
2969 }
2970 
2971 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2972 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2973                  struct anv_block_pool *pool,
2974                  uint64_t address)
2975 {
2976    anv_block_pool_foreach_bo(bo, pool) {
2977       uint64_t bo_address = intel_48b_address(bo->offset);
2978       if (address >= bo_address && address < (bo_address + bo->size)) {
2979          *ret = (struct intel_batch_decode_bo) {
2980             .addr = bo_address,
2981             .size = bo->size,
2982             .map = bo->map,
2983          };
2984          return true;
2985       }
2986    }
2987    return false;
2988 }
2989 
2990 /* Finding a buffer for batch decoding */
2991 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2992 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2993 {
2994    struct anv_device *device = v_batch;
2995    struct intel_batch_decode_bo ret_bo = {};
2996 
2997    assert(ppgtt);
2998 
2999    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
3000       return ret_bo;
3001    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
3002       return ret_bo;
3003    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
3004       return ret_bo;
3005    if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
3006       return ret_bo;
3007 
3008    if (!device->cmd_buffer_being_decoded)
3009       return (struct intel_batch_decode_bo) { };
3010 
3011    struct anv_batch_bo **bo;
3012 
3013    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
3014       /* The decoder zeroes out the top 16 bits, so we need to as well */
3015       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
3016 
3017       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
3018          return (struct intel_batch_decode_bo) {
3019             .addr = bo_address,
3020             .size = (*bo)->bo->size,
3021             .map = (*bo)->bo->map,
3022          };
3023       }
3024    }
3025 
3026    return (struct intel_batch_decode_bo) { };
3027 }
3028 
3029 struct intel_aux_map_buffer {
3030    struct intel_buffer base;
3031    struct anv_state state;
3032 };
3033 
3034 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)3035 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
3036 {
3037    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
3038    if (!buf)
3039       return NULL;
3040 
3041    struct anv_device *device = (struct anv_device*)driver_ctx;
3042    assert(device->physical->supports_48bit_addresses &&
3043           device->physical->use_softpin);
3044 
3045    struct anv_state_pool *pool = &device->dynamic_state_pool;
3046    buf->state = anv_state_pool_alloc(pool, size, size);
3047 
3048    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
3049    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
3050    buf->base.map = buf->state.map;
3051    buf->base.driver_bo = &buf->state;
3052    return &buf->base;
3053 }
3054 
3055 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)3056 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3057 {
3058    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3059    struct anv_device *device = (struct anv_device*)driver_ctx;
3060    struct anv_state_pool *pool = &device->dynamic_state_pool;
3061    anv_state_pool_free(pool, buf->state);
3062    free(buf);
3063 }
3064 
3065 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3066    .alloc = intel_aux_map_buffer_alloc,
3067    .free = intel_aux_map_buffer_free,
3068 };
3069 
3070 static VkResult anv_device_check_status(struct vk_device *vk_device);
3071 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)3072 VkResult anv_CreateDevice(
3073     VkPhysicalDevice                            physicalDevice,
3074     const VkDeviceCreateInfo*                   pCreateInfo,
3075     const VkAllocationCallbacks*                pAllocator,
3076     VkDevice*                                   pDevice)
3077 {
3078    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3079    VkResult result;
3080    struct anv_device *device;
3081 
3082    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3083 
3084    /* Check enabled features */
3085    bool robust_buffer_access = false;
3086    if (pCreateInfo->pEnabledFeatures) {
3087       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3088          robust_buffer_access = true;
3089    }
3090 
3091    vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3092       switch (ext->sType) {
3093       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3094          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3095          if (features->features.robustBufferAccess)
3096             robust_buffer_access = true;
3097          break;
3098       }
3099 
3100       default:
3101          /* Don't warn */
3102          break;
3103       }
3104    }
3105 
3106    /* Check requested queues and fail if we are requested to create any
3107     * queues with flags we don't support.
3108     */
3109    assert(pCreateInfo->queueCreateInfoCount > 0);
3110    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3111       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
3112          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
3113    }
3114 
3115    /* Check if client specified queue priority. */
3116    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
3117       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
3118                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
3119 
3120    VkQueueGlobalPriorityKHR priority =
3121       queue_priority ? queue_priority->globalPriority :
3122          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
3123 
3124    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
3125                        sizeof(*device), 8,
3126                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3127    if (!device)
3128       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
3129 
3130    struct vk_device_dispatch_table dispatch_table;
3131    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3132       anv_genX(&physical_device->info, device_entrypoints), true);
3133    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3134       &anv_device_entrypoints, false);
3135    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3136       &wsi_device_entrypoints, false);
3137 
3138    result = vk_device_init(&device->vk, &physical_device->vk,
3139                            &dispatch_table, pCreateInfo, pAllocator);
3140    if (result != VK_SUCCESS)
3141       goto fail_alloc;
3142 
3143    if (INTEL_DEBUG(DEBUG_BATCH)) {
3144       const unsigned decode_flags =
3145          INTEL_BATCH_DECODE_FULL |
3146          (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
3147          INTEL_BATCH_DECODE_OFFSETS |
3148          INTEL_BATCH_DECODE_FLOATS;
3149 
3150       intel_batch_decode_ctx_init(&device->decoder_ctx,
3151                                   &physical_device->compiler->isa,
3152                                   &physical_device->info,
3153                                   stderr, decode_flags, NULL,
3154                                   decode_get_bo, NULL, device);
3155 
3156       device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
3157       device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
3158       device->decoder_ctx.instruction_base =
3159          INSTRUCTION_STATE_POOL_MIN_ADDRESS;
3160    }
3161 
3162    device->physical = physical_device;
3163 
3164    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
3165    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3166    if (device->fd == -1) {
3167       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3168       goto fail_device;
3169    }
3170 
3171    device->vk.check_status = anv_device_check_status;
3172    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3173    vk_device_set_drm_fd(&device->vk, device->fd);
3174 
3175    uint32_t num_queues = 0;
3176    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3177       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3178 
3179    if (device->physical->engine_info) {
3180       /* The kernel API supports at most 64 engines */
3181       assert(num_queues <= 64);
3182       uint16_t engine_classes[64];
3183       int engine_count = 0;
3184       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3185          const VkDeviceQueueCreateInfo *queueCreateInfo =
3186             &pCreateInfo->pQueueCreateInfos[i];
3187 
3188          assert(queueCreateInfo->queueFamilyIndex <
3189                 physical_device->queue.family_count);
3190          struct anv_queue_family *queue_family =
3191             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3192 
3193          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3194             engine_classes[engine_count++] = queue_family->engine_class;
3195       }
3196       device->context_id =
3197          intel_gem_create_context_engines(device->fd,
3198                                           physical_device->engine_info,
3199                                           engine_count, engine_classes);
3200    } else {
3201       assert(num_queues == 1);
3202       device->context_id = anv_gem_create_context(device);
3203    }
3204    if (device->context_id == -1) {
3205       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3206       goto fail_fd;
3207    }
3208 
3209    /* Here we tell the kernel not to attempt to recover our context but
3210     * immediately (on the next batchbuffer submission) report that the
3211     * context is lost, and we will do the recovery ourselves.  In the case
3212     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
3213     * the client clean up the pieces.
3214     */
3215    anv_gem_set_context_param(device->fd, device->context_id,
3216                              I915_CONTEXT_PARAM_RECOVERABLE, false);
3217 
3218    device->queues =
3219       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3220                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3221    if (device->queues == NULL) {
3222       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3223       goto fail_context_id;
3224    }
3225 
3226    device->queue_count = 0;
3227    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3228       const VkDeviceQueueCreateInfo *queueCreateInfo =
3229          &pCreateInfo->pQueueCreateInfos[i];
3230 
3231       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3232          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
3233           * engine-based contexts, the bottom 6 bits of exec_flags are used
3234           * for the engine ID.
3235           */
3236          uint32_t exec_flags = device->physical->engine_info ?
3237                                device->queue_count : I915_EXEC_RENDER;
3238 
3239          result = anv_queue_init(device, &device->queues[device->queue_count],
3240                                  exec_flags, queueCreateInfo, j);
3241          if (result != VK_SUCCESS)
3242             goto fail_queues;
3243 
3244          device->queue_count++;
3245       }
3246    }
3247 
3248    if (!anv_use_relocations(physical_device)) {
3249       if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3250          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3251          goto fail_queues;
3252       }
3253 
3254       /* keep the page with address zero out of the allocator */
3255       util_vma_heap_init(&device->vma_lo,
3256                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3257 
3258       util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3259                          CLIENT_VISIBLE_HEAP_SIZE);
3260 
3261       /* Leave the last 4GiB out of the high vma range, so that no state
3262        * base address + size can overflow 48 bits. For more information see
3263        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3264        */
3265       util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3266                          physical_device->gtt_size - (1ull << 32) -
3267                          HIGH_HEAP_MIN_ADDRESS);
3268    }
3269 
3270    list_inithead(&device->memory_objects);
3271 
3272    /* As per spec, the driver implementation may deny requests to acquire
3273     * a priority above the default priority (MEDIUM) if the caller does not
3274     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
3275     * is returned.
3276     */
3277    if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
3278       int err = anv_gem_set_context_param(device->fd, device->context_id,
3279                                           I915_CONTEXT_PARAM_PRIORITY,
3280                                           vk_priority_to_gen(priority));
3281       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
3282          result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
3283          goto fail_vmas;
3284       }
3285    }
3286 
3287    device->info = physical_device->info;
3288    device->isl_dev = physical_device->isl_dev;
3289 
3290    /* On Broadwell and later, we can use batch chaining to more efficiently
3291     * implement growing command buffers.  Prior to Haswell, the kernel
3292     * command parser gets in the way and we have to fall back to growing
3293     * the batch.
3294     */
3295    device->can_chain_batches = device->info.ver >= 8;
3296 
3297    device->robust_buffer_access = robust_buffer_access;
3298 
3299    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3300       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3301       goto fail_queues;
3302    }
3303 
3304    pthread_condattr_t condattr;
3305    if (pthread_condattr_init(&condattr) != 0) {
3306       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3307       goto fail_mutex;
3308    }
3309    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3310       pthread_condattr_destroy(&condattr);
3311       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3312       goto fail_mutex;
3313    }
3314    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3315       pthread_condattr_destroy(&condattr);
3316       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3317       goto fail_mutex;
3318    }
3319    pthread_condattr_destroy(&condattr);
3320 
3321    result = anv_bo_cache_init(&device->bo_cache, device);
3322    if (result != VK_SUCCESS)
3323       goto fail_queue_cond;
3324 
3325    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3326 
3327    /* Because scratch is also relative to General State Base Address, we leave
3328     * the base address 0 and start the pool memory at an offset.  This way we
3329     * get the correct offsets in the anv_states that get allocated from it.
3330     */
3331    result = anv_state_pool_init(&device->general_state_pool, device,
3332                                 "general pool",
3333                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3334    if (result != VK_SUCCESS)
3335       goto fail_batch_bo_pool;
3336 
3337    result = anv_state_pool_init(&device->dynamic_state_pool, device,
3338                                 "dynamic pool",
3339                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3340    if (result != VK_SUCCESS)
3341       goto fail_general_state_pool;
3342 
3343    if (device->info.ver >= 8) {
3344       /* The border color pointer is limited to 24 bits, so we need to make
3345        * sure that any such color used at any point in the program doesn't
3346        * exceed that limit.
3347        * We achieve that by reserving all the custom border colors we support
3348        * right off the bat, so they are close to the base address.
3349        */
3350       anv_state_reserved_pool_init(&device->custom_border_colors,
3351                                    &device->dynamic_state_pool,
3352                                    MAX_CUSTOM_BORDER_COLORS,
3353                                    sizeof(struct gfx8_border_color), 64);
3354    }
3355 
3356    result = anv_state_pool_init(&device->instruction_state_pool, device,
3357                                 "instruction pool",
3358                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3359    if (result != VK_SUCCESS)
3360       goto fail_dynamic_state_pool;
3361 
3362    result = anv_state_pool_init(&device->surface_state_pool, device,
3363                                 "surface state pool",
3364                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3365    if (result != VK_SUCCESS)
3366       goto fail_instruction_state_pool;
3367 
3368    if (device->info.verx10 >= 125) {
3369       /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3370        * table its own base address separately from surface state base.
3371        */
3372       result = anv_state_pool_init(&device->binding_table_pool, device,
3373                                    "binding table pool",
3374                                    BINDING_TABLE_POOL_MIN_ADDRESS, 0,
3375                                    BINDING_TABLE_POOL_BLOCK_SIZE);
3376    } else if (!anv_use_relocations(physical_device)) {
3377       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3378                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
3379       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3380       result = anv_state_pool_init(&device->binding_table_pool, device,
3381                                    "binding table pool",
3382                                    SURFACE_STATE_POOL_MIN_ADDRESS,
3383                                    bt_pool_offset,
3384                                    BINDING_TABLE_POOL_BLOCK_SIZE);
3385    }
3386    if (result != VK_SUCCESS)
3387       goto fail_surface_state_pool;
3388 
3389    if (device->info.has_aux_map) {
3390       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3391                                                &physical_device->info);
3392       if (!device->aux_map_ctx)
3393          goto fail_binding_table_pool;
3394    }
3395 
3396    result = anv_device_alloc_bo(device, "workaround", 4096,
3397                                 ANV_BO_ALLOC_CAPTURE |
3398                                 ANV_BO_ALLOC_MAPPED |
3399                                 ANV_BO_ALLOC_LOCAL_MEM,
3400                                 0 /* explicit_address */,
3401                                 &device->workaround_bo);
3402    if (result != VK_SUCCESS)
3403       goto fail_surface_aux_map_pool;
3404 
3405    device->workaround_address = (struct anv_address) {
3406       .bo = device->workaround_bo,
3407       .offset = align_u32(
3408          intel_debug_write_identifiers(device->workaround_bo->map,
3409                                        device->workaround_bo->size,
3410                                        "Anv") + 8, 8),
3411    };
3412 
3413    device->debug_frame_desc =
3414       intel_debug_get_identifier_block(device->workaround_bo->map,
3415                                        device->workaround_bo->size,
3416                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
3417 
3418    if (device->vk.enabled_extensions.KHR_ray_query) {
3419       uint32_t ray_queries_size =
3420          align_u32(brw_rt_ray_queries_hw_stacks_size(&device->info), 4096);
3421 
3422       result = anv_device_alloc_bo(device, "ray queries",
3423                                    ray_queries_size,
3424                                    ANV_BO_ALLOC_LOCAL_MEM,
3425                                    0 /* explicit_address */,
3426                                    &device->ray_query_bo);
3427       if (result != VK_SUCCESS)
3428          goto fail_workaround_bo;
3429    }
3430 
3431    result = anv_device_init_trivial_batch(device);
3432    if (result != VK_SUCCESS)
3433       goto fail_ray_query_bo;
3434 
3435    if (device->info.ver >= 12 &&
3436        device->vk.enabled_extensions.KHR_fragment_shading_rate) {
3437       uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
3438 
3439       if (device->info.has_coarse_pixel_primitive_and_cb)
3440          n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
3441 
3442       n_cps_states += 1; /* Disable CPS */
3443 
3444        /* Each of the combinaison must be replicated on all viewports */
3445       n_cps_states *= MAX_VIEWPORTS;
3446 
3447       device->cps_states =
3448          anv_state_pool_alloc(&device->dynamic_state_pool,
3449                               n_cps_states * CPS_STATE_length(&device->info) * 4,
3450                               32);
3451       if (device->cps_states.map == NULL)
3452          goto fail_trivial_batch;
3453 
3454       anv_genX(&device->info, init_cps_device_state)(device);
3455    }
3456 
3457    /* Allocate a null surface state at surface state offset 0.  This makes
3458     * NULL descriptor handling trivial because we can just memset structures
3459     * to zero and they have a valid descriptor.
3460     */
3461    device->null_surface_state =
3462       anv_state_pool_alloc(&device->surface_state_pool,
3463                            device->isl_dev.ss.size,
3464                            device->isl_dev.ss.align);
3465    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3466                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3467    assert(device->null_surface_state.offset == 0);
3468 
3469    anv_scratch_pool_init(device, &device->scratch_pool);
3470 
3471    /* TODO(RT): Do we want some sort of data structure for this? */
3472    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3473 
3474    result = anv_genX(&device->info, init_device_state)(device);
3475    if (result != VK_SUCCESS)
3476       goto fail_trivial_batch_bo_and_scratch_pool;
3477 
3478    struct vk_pipeline_cache_create_info pcc_info = { };
3479    device->default_pipeline_cache =
3480       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3481    if (!device->default_pipeline_cache) {
3482       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3483       goto fail_trivial_batch_bo_and_scratch_pool;
3484    }
3485 
3486    /* Internal shaders need their own pipeline cache because, unlike the rest
3487     * of ANV, it won't work at all without the cache. It depends on it for
3488     * shaders to remain resident while it runs. Therefore, we need a special
3489     * cache just for BLORP/RT that's forced to always be enabled.
3490     */
3491    pcc_info.force_enable = true;
3492    device->internal_cache =
3493       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3494    if (device->internal_cache == NULL) {
3495       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3496       goto fail_default_pipeline_cache;
3497    }
3498 
3499    result = anv_device_init_rt_shaders(device);
3500    if (result != VK_SUCCESS) {
3501       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3502       goto fail_internal_cache;
3503    }
3504 
3505    anv_device_init_blorp(device);
3506 
3507    anv_device_init_border_colors(device);
3508 
3509    anv_device_perf_init(device);
3510 
3511    anv_device_utrace_init(device);
3512 
3513    *pDevice = anv_device_to_handle(device);
3514 
3515    return VK_SUCCESS;
3516 
3517  fail_internal_cache:
3518    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3519  fail_default_pipeline_cache:
3520    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3521  fail_trivial_batch_bo_and_scratch_pool:
3522    anv_scratch_pool_finish(device, &device->scratch_pool);
3523  fail_trivial_batch:
3524    anv_device_release_bo(device, device->trivial_batch_bo);
3525  fail_ray_query_bo:
3526    if (device->ray_query_bo)
3527       anv_device_release_bo(device, device->ray_query_bo);
3528  fail_workaround_bo:
3529    anv_device_release_bo(device, device->workaround_bo);
3530  fail_surface_aux_map_pool:
3531    if (device->info.has_aux_map) {
3532       intel_aux_map_finish(device->aux_map_ctx);
3533       device->aux_map_ctx = NULL;
3534    }
3535  fail_binding_table_pool:
3536    if (!anv_use_relocations(physical_device))
3537       anv_state_pool_finish(&device->binding_table_pool);
3538  fail_surface_state_pool:
3539    anv_state_pool_finish(&device->surface_state_pool);
3540  fail_instruction_state_pool:
3541    anv_state_pool_finish(&device->instruction_state_pool);
3542  fail_dynamic_state_pool:
3543    if (device->info.ver >= 8)
3544       anv_state_reserved_pool_finish(&device->custom_border_colors);
3545    anv_state_pool_finish(&device->dynamic_state_pool);
3546  fail_general_state_pool:
3547    anv_state_pool_finish(&device->general_state_pool);
3548  fail_batch_bo_pool:
3549    anv_bo_pool_finish(&device->batch_bo_pool);
3550    anv_bo_cache_finish(&device->bo_cache);
3551  fail_queue_cond:
3552    pthread_cond_destroy(&device->queue_submit);
3553  fail_mutex:
3554    pthread_mutex_destroy(&device->mutex);
3555  fail_vmas:
3556    if (!anv_use_relocations(physical_device)) {
3557       util_vma_heap_finish(&device->vma_hi);
3558       util_vma_heap_finish(&device->vma_cva);
3559       util_vma_heap_finish(&device->vma_lo);
3560    }
3561  fail_queues:
3562    for (uint32_t i = 0; i < device->queue_count; i++)
3563       anv_queue_finish(&device->queues[i]);
3564    vk_free(&device->vk.alloc, device->queues);
3565  fail_context_id:
3566    anv_gem_destroy_context(device, device->context_id);
3567  fail_fd:
3568    close(device->fd);
3569  fail_device:
3570    vk_device_finish(&device->vk);
3571  fail_alloc:
3572    vk_free(&device->vk.alloc, device);
3573 
3574    return result;
3575 }
3576 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3577 void anv_DestroyDevice(
3578     VkDevice                                    _device,
3579     const VkAllocationCallbacks*                pAllocator)
3580 {
3581    ANV_FROM_HANDLE(anv_device, device, _device);
3582 
3583    if (!device)
3584       return;
3585 
3586    anv_device_utrace_finish(device);
3587 
3588    anv_device_finish_blorp(device);
3589 
3590    anv_device_finish_rt_shaders(device);
3591 
3592    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3593    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3594 
3595 #ifdef HAVE_VALGRIND
3596    /* We only need to free these to prevent valgrind errors.  The backing
3597     * BO will go away in a couple of lines so we don't actually leak.
3598     */
3599    if (device->info.ver >= 8)
3600       anv_state_reserved_pool_finish(&device->custom_border_colors);
3601    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3602    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3603    anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
3604 #endif
3605 
3606    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3607       if (device->rt_scratch_bos[i] != NULL)
3608          anv_device_release_bo(device, device->rt_scratch_bos[i]);
3609    }
3610 
3611    anv_scratch_pool_finish(device, &device->scratch_pool);
3612 
3613    if (device->vk.enabled_extensions.KHR_ray_query) {
3614       for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
3615          if (device->ray_query_shadow_bos[i] != NULL)
3616             anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
3617       }
3618       anv_device_release_bo(device, device->ray_query_bo);
3619    }
3620    anv_device_release_bo(device, device->workaround_bo);
3621    anv_device_release_bo(device, device->trivial_batch_bo);
3622 
3623    if (device->info.has_aux_map) {
3624       intel_aux_map_finish(device->aux_map_ctx);
3625       device->aux_map_ctx = NULL;
3626    }
3627 
3628    if (!anv_use_relocations(device->physical))
3629       anv_state_pool_finish(&device->binding_table_pool);
3630    anv_state_pool_finish(&device->surface_state_pool);
3631    anv_state_pool_finish(&device->instruction_state_pool);
3632    anv_state_pool_finish(&device->dynamic_state_pool);
3633    anv_state_pool_finish(&device->general_state_pool);
3634 
3635    anv_bo_pool_finish(&device->batch_bo_pool);
3636 
3637    anv_bo_cache_finish(&device->bo_cache);
3638 
3639    if (!anv_use_relocations(device->physical)) {
3640       util_vma_heap_finish(&device->vma_hi);
3641       util_vma_heap_finish(&device->vma_cva);
3642       util_vma_heap_finish(&device->vma_lo);
3643    }
3644 
3645    pthread_cond_destroy(&device->queue_submit);
3646    pthread_mutex_destroy(&device->mutex);
3647 
3648    for (uint32_t i = 0; i < device->queue_count; i++)
3649       anv_queue_finish(&device->queues[i]);
3650    vk_free(&device->vk.alloc, device->queues);
3651 
3652    anv_gem_destroy_context(device, device->context_id);
3653 
3654    if (INTEL_DEBUG(DEBUG_BATCH))
3655       intel_batch_decode_ctx_finish(&device->decoder_ctx);
3656 
3657    close(device->fd);
3658 
3659    vk_device_finish(&device->vk);
3660    vk_free(&device->vk.alloc, device);
3661 }
3662 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3663 VkResult anv_EnumerateInstanceLayerProperties(
3664     uint32_t*                                   pPropertyCount,
3665     VkLayerProperties*                          pProperties)
3666 {
3667    if (pProperties == NULL) {
3668       *pPropertyCount = 0;
3669       return VK_SUCCESS;
3670    }
3671 
3672    /* None supported at this time */
3673    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3674 }
3675 
3676 static VkResult
anv_device_check_status(struct vk_device * vk_device)3677 anv_device_check_status(struct vk_device *vk_device)
3678 {
3679    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
3680 
3681    uint32_t active, pending;
3682    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3683                                              &active, &pending);
3684    if (ret == -1) {
3685       /* We don't know the real error. */
3686       return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
3687    }
3688 
3689    if (active) {
3690       return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
3691    } else if (pending) {
3692       return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
3693    }
3694 
3695    return VK_SUCCESS;
3696 }
3697 
3698 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)3699 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3700                 int64_t timeout)
3701 {
3702    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3703    if (ret == -1 && errno == ETIME) {
3704       return VK_TIMEOUT;
3705    } else if (ret == -1) {
3706       /* We don't know the real error. */
3707       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
3708    } else {
3709       return VK_SUCCESS;
3710    }
3711 }
3712 
3713 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)3714 anv_vma_alloc(struct anv_device *device,
3715               uint64_t size, uint64_t align,
3716               enum anv_bo_alloc_flags alloc_flags,
3717               uint64_t client_address)
3718 {
3719    pthread_mutex_lock(&device->vma_mutex);
3720 
3721    uint64_t addr = 0;
3722 
3723    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3724       if (client_address) {
3725          if (util_vma_heap_alloc_addr(&device->vma_cva,
3726                                       client_address, size)) {
3727             addr = client_address;
3728          }
3729       } else {
3730          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3731       }
3732       /* We don't want to fall back to other heaps */
3733       goto done;
3734    }
3735 
3736    assert(client_address == 0);
3737 
3738    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3739       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3740 
3741    if (addr == 0)
3742       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3743 
3744 done:
3745    pthread_mutex_unlock(&device->vma_mutex);
3746 
3747    assert(addr == intel_48b_address(addr));
3748    return intel_canonical_address(addr);
3749 }
3750 
3751 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)3752 anv_vma_free(struct anv_device *device,
3753              uint64_t address, uint64_t size)
3754 {
3755    const uint64_t addr_48b = intel_48b_address(address);
3756 
3757    pthread_mutex_lock(&device->vma_mutex);
3758 
3759    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3760        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3761       util_vma_heap_free(&device->vma_lo, addr_48b, size);
3762    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3763               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3764       util_vma_heap_free(&device->vma_cva, addr_48b, size);
3765    } else {
3766       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3767       util_vma_heap_free(&device->vma_hi, addr_48b, size);
3768    }
3769 
3770    pthread_mutex_unlock(&device->vma_mutex);
3771 }
3772 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3773 VkResult anv_AllocateMemory(
3774     VkDevice                                    _device,
3775     const VkMemoryAllocateInfo*                 pAllocateInfo,
3776     const VkAllocationCallbacks*                pAllocator,
3777     VkDeviceMemory*                             pMem)
3778 {
3779    ANV_FROM_HANDLE(anv_device, device, _device);
3780    struct anv_physical_device *pdevice = device->physical;
3781    struct anv_device_memory *mem;
3782    VkResult result = VK_SUCCESS;
3783 
3784    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3785 
3786    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3787    assert(pAllocateInfo->allocationSize > 0);
3788 
3789    VkDeviceSize aligned_alloc_size =
3790       align_u64(pAllocateInfo->allocationSize, 4096);
3791 
3792    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3793       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3794 
3795    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3796    struct anv_memory_type *mem_type =
3797       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3798    assert(mem_type->heapIndex < pdevice->memory.heap_count);
3799    struct anv_memory_heap *mem_heap =
3800       &pdevice->memory.heaps[mem_type->heapIndex];
3801 
3802    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3803    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3804       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3805 
3806    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3807                          VK_OBJECT_TYPE_DEVICE_MEMORY);
3808    if (mem == NULL)
3809       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3810 
3811    mem->type = mem_type;
3812    mem->map = NULL;
3813    mem->map_size = 0;
3814    mem->map_delta = 0;
3815    mem->ahw = NULL;
3816    mem->host_ptr = NULL;
3817 
3818    enum anv_bo_alloc_flags alloc_flags = 0;
3819 
3820    const VkExportMemoryAllocateInfo *export_info = NULL;
3821    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3822    const VkImportMemoryFdInfoKHR *fd_info = NULL;
3823    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3824    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3825    VkMemoryAllocateFlags vk_flags = 0;
3826    uint64_t client_address = 0;
3827 
3828    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3829       switch (ext->sType) {
3830       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3831          export_info = (void *)ext;
3832          break;
3833 
3834       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3835          ahw_import_info = (void *)ext;
3836          break;
3837 
3838       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3839          fd_info = (void *)ext;
3840          break;
3841 
3842       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3843          host_ptr_info = (void *)ext;
3844          break;
3845 
3846       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3847          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3848          vk_flags = flags_info->flags;
3849          break;
3850       }
3851 
3852       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3853          dedicated_info = (void *)ext;
3854          break;
3855 
3856       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3857          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3858             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3859          client_address = addr_info->opaqueCaptureAddress;
3860          break;
3861       }
3862 
3863       default:
3864          if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3865             /* this isn't a real enum value,
3866              * so use conditional to avoid compiler warn
3867              */
3868             anv_debug_ignored_stype(ext->sType);
3869          break;
3870       }
3871    }
3872 
3873    /* By default, we want all VkDeviceMemory objects to support CCS */
3874    if (device->physical->has_implicit_ccs && device->info.has_aux_map)
3875       alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
3876 
3877    /* If i915 reported a mappable/non_mappable vram regions and the
3878     * application want lmem mappable, then we need to use the
3879     * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
3880     */
3881    if (pdevice->vram_mappable.size > 0 &&
3882        pdevice->vram_non_mappable.size > 0 &&
3883        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
3884        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
3885       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
3886 
3887    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3888       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3889 
3890    if ((export_info && export_info->handleTypes) ||
3891        (fd_info && fd_info->handleType) ||
3892        (host_ptr_info && host_ptr_info->handleType)) {
3893       /* Anything imported or exported is EXTERNAL */
3894       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3895    }
3896 
3897    /* Check if we need to support Android HW buffer export. If so,
3898     * create AHardwareBuffer and import memory from it.
3899     */
3900    bool android_export = false;
3901    if (export_info && export_info->handleTypes &
3902        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3903       android_export = true;
3904 
3905    if (ahw_import_info) {
3906       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3907       if (result != VK_SUCCESS)
3908          goto fail;
3909 
3910       goto success;
3911    } else if (android_export) {
3912       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3913       if (result != VK_SUCCESS)
3914          goto fail;
3915 
3916       goto success;
3917    }
3918 
3919    /* The Vulkan spec permits handleType to be 0, in which case the struct is
3920     * ignored.
3921     */
3922    if (fd_info && fd_info->handleType) {
3923       /* At the moment, we support only the below handle types. */
3924       assert(fd_info->handleType ==
3925                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3926              fd_info->handleType ==
3927                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3928 
3929       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3930                                     client_address, &mem->bo);
3931       if (result != VK_SUCCESS)
3932          goto fail;
3933 
3934       /* For security purposes, we reject importing the bo if it's smaller
3935        * than the requested allocation size.  This prevents a malicious client
3936        * from passing a buffer to a trusted client, lying about the size, and
3937        * telling the trusted client to try and texture from an image that goes
3938        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
3939        * in the trusted client.  The trusted client can protect itself against
3940        * this sort of attack but only if it can trust the buffer size.
3941        */
3942       if (mem->bo->size < aligned_alloc_size) {
3943          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3944                             "aligned allocationSize too large for "
3945                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3946                             "%"PRIu64"B > %"PRIu64"B",
3947                             aligned_alloc_size, mem->bo->size);
3948          anv_device_release_bo(device, mem->bo);
3949          goto fail;
3950       }
3951 
3952       /* From the Vulkan spec:
3953        *
3954        *    "Importing memory from a file descriptor transfers ownership of
3955        *    the file descriptor from the application to the Vulkan
3956        *    implementation. The application must not perform any operations on
3957        *    the file descriptor after a successful import."
3958        *
3959        * If the import fails, we leave the file descriptor open.
3960        */
3961       close(fd_info->fd);
3962       goto success;
3963    }
3964 
3965    if (host_ptr_info && host_ptr_info->handleType) {
3966       if (host_ptr_info->handleType ==
3967           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3968          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3969          goto fail;
3970       }
3971 
3972       assert(host_ptr_info->handleType ==
3973              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3974 
3975       result = anv_device_import_bo_from_host_ptr(device,
3976                                                   host_ptr_info->pHostPointer,
3977                                                   pAllocateInfo->allocationSize,
3978                                                   alloc_flags,
3979                                                   client_address,
3980                                                   &mem->bo);
3981       if (result != VK_SUCCESS)
3982          goto fail;
3983 
3984       mem->host_ptr = host_ptr_info->pHostPointer;
3985       goto success;
3986    }
3987 
3988    /* Set ALLOC_LOCAL_MEM flag if heap has device local bit set and requested
3989     * memory property flag has DEVICE_LOCAL_BIT set.
3990     */
3991    if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3992       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM;
3993 
3994    /* Regular allocate (not importing memory). */
3995 
3996    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3997                                 alloc_flags, client_address, &mem->bo);
3998    if (result != VK_SUCCESS)
3999       goto fail;
4000 
4001    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
4002       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4003 
4004       /* Some legacy (non-modifiers) consumers need the tiling to be set on
4005        * the BO.  In this case, we have a dedicated allocation.
4006        */
4007       if (image->vk.wsi_legacy_scanout) {
4008          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
4009          result = anv_device_set_bo_tiling(device, mem->bo,
4010                                            surf->row_pitch_B,
4011                                            surf->tiling);
4012          if (result != VK_SUCCESS) {
4013             anv_device_release_bo(device, mem->bo);
4014             goto fail;
4015          }
4016       }
4017    }
4018 
4019  success:
4020    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
4021    if (mem_heap_used > mem_heap->size) {
4022       p_atomic_add(&mem_heap->used, -mem->bo->size);
4023       anv_device_release_bo(device, mem->bo);
4024       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
4025                          "Out of heap memory");
4026       goto fail;
4027    }
4028 
4029    pthread_mutex_lock(&device->mutex);
4030    list_addtail(&mem->link, &device->memory_objects);
4031    pthread_mutex_unlock(&device->mutex);
4032 
4033    *pMem = anv_device_memory_to_handle(mem);
4034 
4035    return VK_SUCCESS;
4036 
4037  fail:
4038    vk_object_free(&device->vk, pAllocator, mem);
4039 
4040    return result;
4041 }
4042 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)4043 VkResult anv_GetMemoryFdKHR(
4044     VkDevice                                    device_h,
4045     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
4046     int*                                        pFd)
4047 {
4048    ANV_FROM_HANDLE(anv_device, dev, device_h);
4049    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
4050 
4051    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4052 
4053    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4054           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4055 
4056    return anv_device_export_bo(dev, mem->bo, pFd);
4057 }
4058 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)4059 VkResult anv_GetMemoryFdPropertiesKHR(
4060     VkDevice                                    _device,
4061     VkExternalMemoryHandleTypeFlagBits          handleType,
4062     int                                         fd,
4063     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
4064 {
4065    ANV_FROM_HANDLE(anv_device, device, _device);
4066 
4067    switch (handleType) {
4068    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4069       /* dma-buf can be imported as any memory type */
4070       pMemoryFdProperties->memoryTypeBits =
4071          (1 << device->physical->memory.type_count) - 1;
4072       return VK_SUCCESS;
4073 
4074    default:
4075       /* The valid usage section for this function says:
4076        *
4077        *    "handleType must not be one of the handle types defined as
4078        *    opaque."
4079        *
4080        * So opaque handle types fall into the default "unsupported" case.
4081        */
4082       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4083    }
4084 }
4085 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)4086 VkResult anv_GetMemoryHostPointerPropertiesEXT(
4087    VkDevice                                    _device,
4088    VkExternalMemoryHandleTypeFlagBits          handleType,
4089    const void*                                 pHostPointer,
4090    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
4091 {
4092    ANV_FROM_HANDLE(anv_device, device, _device);
4093 
4094    assert(pMemoryHostPointerProperties->sType ==
4095           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4096 
4097    switch (handleType) {
4098    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4099       /* Host memory can be imported as any memory type. */
4100       pMemoryHostPointerProperties->memoryTypeBits =
4101          (1ull << device->physical->memory.type_count) - 1;
4102 
4103       return VK_SUCCESS;
4104 
4105    default:
4106       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4107    }
4108 }
4109 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)4110 void anv_FreeMemory(
4111     VkDevice                                    _device,
4112     VkDeviceMemory                              _mem,
4113     const VkAllocationCallbacks*                pAllocator)
4114 {
4115    ANV_FROM_HANDLE(anv_device, device, _device);
4116    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4117 
4118    if (mem == NULL)
4119       return;
4120 
4121    pthread_mutex_lock(&device->mutex);
4122    list_del(&mem->link);
4123    pthread_mutex_unlock(&device->mutex);
4124 
4125    if (mem->map)
4126       anv_UnmapMemory(_device, _mem);
4127 
4128    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4129                 -mem->bo->size);
4130 
4131    anv_device_release_bo(device, mem->bo);
4132 
4133 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
4134    if (mem->ahw)
4135       AHardwareBuffer_release(mem->ahw);
4136 #endif
4137 
4138    vk_object_free(&device->vk, pAllocator, mem);
4139 }
4140 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)4141 VkResult anv_MapMemory(
4142     VkDevice                                    _device,
4143     VkDeviceMemory                              _memory,
4144     VkDeviceSize                                offset,
4145     VkDeviceSize                                size,
4146     VkMemoryMapFlags                            flags,
4147     void**                                      ppData)
4148 {
4149    ANV_FROM_HANDLE(anv_device, device, _device);
4150    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4151 
4152    if (mem == NULL) {
4153       *ppData = NULL;
4154       return VK_SUCCESS;
4155    }
4156 
4157    if (mem->host_ptr) {
4158       *ppData = mem->host_ptr + offset;
4159       return VK_SUCCESS;
4160    }
4161 
4162    if (size == VK_WHOLE_SIZE)
4163       size = mem->bo->size - offset;
4164 
4165    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4166     *
4167     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
4168     *    assert(size != 0);
4169     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
4170     *    equal to the size of the memory minus offset
4171     */
4172    assert(size > 0);
4173    assert(offset + size <= mem->bo->size);
4174 
4175    if (size != (size_t)size) {
4176       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4177                        "requested size 0x%"PRIx64" does not fit in %u bits",
4178                        size, (unsigned)(sizeof(size_t) * 8));
4179    }
4180 
4181    /* From the Vulkan 1.2.194 spec:
4182     *
4183     *    "memory must not be currently host mapped"
4184     */
4185    if (mem->map != NULL) {
4186       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4187                        "Memory object already mapped.");
4188    }
4189 
4190    uint32_t gem_flags = 0;
4191 
4192    if (!device->info.has_llc &&
4193        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4194       gem_flags |= I915_MMAP_WC;
4195 
4196    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
4197    uint64_t map_offset;
4198    if (!device->physical->has_mmap_offset)
4199       map_offset = offset & ~4095ull;
4200    else
4201       map_offset = 0;
4202    assert(offset >= map_offset);
4203    uint64_t map_size = (offset + size) - map_offset;
4204 
4205    /* Let's map whole pages */
4206    map_size = align_u64(map_size, 4096);
4207 
4208    void *map;
4209    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
4210                                        map_size, gem_flags, &map);
4211    if (result != VK_SUCCESS)
4212       return result;
4213 
4214    mem->map = map;
4215    mem->map_size = map_size;
4216    mem->map_delta = (offset - map_offset);
4217    *ppData = mem->map + mem->map_delta;
4218 
4219    return VK_SUCCESS;
4220 }
4221 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)4222 void anv_UnmapMemory(
4223     VkDevice                                    _device,
4224     VkDeviceMemory                              _memory)
4225 {
4226    ANV_FROM_HANDLE(anv_device, device, _device);
4227    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4228 
4229    if (mem == NULL || mem->host_ptr)
4230       return;
4231 
4232    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
4233 
4234    mem->map = NULL;
4235    mem->map_size = 0;
4236    mem->map_delta = 0;
4237 }
4238 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4239 VkResult anv_FlushMappedMemoryRanges(
4240     VkDevice                                    _device,
4241     uint32_t                                    memoryRangeCount,
4242     const VkMappedMemoryRange*                  pMemoryRanges)
4243 {
4244    ANV_FROM_HANDLE(anv_device, device, _device);
4245 
4246    if (!device->physical->memory.need_clflush)
4247       return VK_SUCCESS;
4248 
4249    /* Make sure the writes we're flushing have landed. */
4250    __builtin_ia32_mfence();
4251 
4252    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4253       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4254       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4255          continue;
4256 
4257       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4258       if (map_offset >= mem->map_size)
4259          continue;
4260 
4261       intel_clflush_range(mem->map + map_offset,
4262                           MIN2(pMemoryRanges[i].size,
4263                                mem->map_size - map_offset));
4264    }
4265 
4266    return VK_SUCCESS;
4267 }
4268 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4269 VkResult anv_InvalidateMappedMemoryRanges(
4270     VkDevice                                    _device,
4271     uint32_t                                    memoryRangeCount,
4272     const VkMappedMemoryRange*                  pMemoryRanges)
4273 {
4274    ANV_FROM_HANDLE(anv_device, device, _device);
4275 
4276    if (!device->physical->memory.need_clflush)
4277       return VK_SUCCESS;
4278 
4279    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4280       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4281       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4282          continue;
4283 
4284       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4285       if (map_offset >= mem->map_size)
4286          continue;
4287 
4288       intel_invalidate_range(mem->map + map_offset,
4289                              MIN2(pMemoryRanges[i].size,
4290                                   mem->map_size - map_offset));
4291    }
4292 
4293    /* Make sure no reads get moved up above the invalidate. */
4294    __builtin_ia32_mfence();
4295 
4296    return VK_SUCCESS;
4297 }
4298 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)4299 void anv_GetDeviceMemoryCommitment(
4300     VkDevice                                    device,
4301     VkDeviceMemory                              memory,
4302     VkDeviceSize*                               pCommittedMemoryInBytes)
4303 {
4304    *pCommittedMemoryInBytes = 0;
4305 }
4306 
4307 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)4308 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4309 {
4310    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4311    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4312 
4313    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4314 
4315    if (mem) {
4316       assert(pBindInfo->memoryOffset < mem->bo->size);
4317       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
4318       buffer->address = (struct anv_address) {
4319          .bo = mem->bo,
4320          .offset = pBindInfo->memoryOffset,
4321       };
4322    } else {
4323       buffer->address = ANV_NULL_ADDRESS;
4324    }
4325 }
4326 
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)4327 VkResult anv_BindBufferMemory2(
4328     VkDevice                                    device,
4329     uint32_t                                    bindInfoCount,
4330     const VkBindBufferMemoryInfo*               pBindInfos)
4331 {
4332    for (uint32_t i = 0; i < bindInfoCount; i++)
4333       anv_bind_buffer_memory(&pBindInfos[i]);
4334 
4335    return VK_SUCCESS;
4336 }
4337 
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)4338 VkResult anv_QueueBindSparse(
4339     VkQueue                                     _queue,
4340     uint32_t                                    bindInfoCount,
4341     const VkBindSparseInfo*                     pBindInfo,
4342     VkFence                                     fence)
4343 {
4344    ANV_FROM_HANDLE(anv_queue, queue, _queue);
4345    if (vk_device_is_lost(&queue->device->vk))
4346       return VK_ERROR_DEVICE_LOST;
4347 
4348    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
4349 }
4350 
4351 // Event functions
4352 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)4353 VkResult anv_CreateEvent(
4354     VkDevice                                    _device,
4355     const VkEventCreateInfo*                    pCreateInfo,
4356     const VkAllocationCallbacks*                pAllocator,
4357     VkEvent*                                    pEvent)
4358 {
4359    ANV_FROM_HANDLE(anv_device, device, _device);
4360    struct anv_event *event;
4361 
4362    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4363 
4364    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4365                            VK_OBJECT_TYPE_EVENT);
4366    if (event == NULL)
4367       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4368 
4369    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4370                                        sizeof(uint64_t), 8);
4371    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4372 
4373    *pEvent = anv_event_to_handle(event);
4374 
4375    return VK_SUCCESS;
4376 }
4377 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)4378 void anv_DestroyEvent(
4379     VkDevice                                    _device,
4380     VkEvent                                     _event,
4381     const VkAllocationCallbacks*                pAllocator)
4382 {
4383    ANV_FROM_HANDLE(anv_device, device, _device);
4384    ANV_FROM_HANDLE(anv_event, event, _event);
4385 
4386    if (!event)
4387       return;
4388 
4389    anv_state_pool_free(&device->dynamic_state_pool, event->state);
4390 
4391    vk_object_free(&device->vk, pAllocator, event);
4392 }
4393 
anv_GetEventStatus(VkDevice _device,VkEvent _event)4394 VkResult anv_GetEventStatus(
4395     VkDevice                                    _device,
4396     VkEvent                                     _event)
4397 {
4398    ANV_FROM_HANDLE(anv_device, device, _device);
4399    ANV_FROM_HANDLE(anv_event, event, _event);
4400 
4401    if (vk_device_is_lost(&device->vk))
4402       return VK_ERROR_DEVICE_LOST;
4403 
4404    return *(uint64_t *)event->state.map;
4405 }
4406 
anv_SetEvent(VkDevice _device,VkEvent _event)4407 VkResult anv_SetEvent(
4408     VkDevice                                    _device,
4409     VkEvent                                     _event)
4410 {
4411    ANV_FROM_HANDLE(anv_event, event, _event);
4412 
4413    *(uint64_t *)event->state.map = VK_EVENT_SET;
4414 
4415    return VK_SUCCESS;
4416 }
4417 
anv_ResetEvent(VkDevice _device,VkEvent _event)4418 VkResult anv_ResetEvent(
4419     VkDevice                                    _device,
4420     VkEvent                                     _event)
4421 {
4422    ANV_FROM_HANDLE(anv_event, event, _event);
4423 
4424    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4425 
4426    return VK_SUCCESS;
4427 }
4428 
4429 // Buffer functions
4430 
4431 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)4432 anv_get_buffer_memory_requirements(struct anv_device *device,
4433                                    VkDeviceSize size,
4434                                    VkBufferUsageFlags usage,
4435                                    VkMemoryRequirements2* pMemoryRequirements)
4436 {
4437    /* The Vulkan spec (git aaed022) says:
4438     *
4439     *    memoryTypeBits is a bitfield and contains one bit set for every
4440     *    supported memory type for the resource. The bit `1<<i` is set if and
4441     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4442     *    structure for the physical device is supported.
4443     */
4444    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4445 
4446    /* Base alignment requirement of a cache line */
4447    uint32_t alignment = 16;
4448 
4449    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4450       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4451 
4452    pMemoryRequirements->memoryRequirements.size = size;
4453    pMemoryRequirements->memoryRequirements.alignment = alignment;
4454 
4455    /* Storage and Uniform buffers should have their size aligned to
4456     * 32-bits to avoid boundary checks when last DWord is not complete.
4457     * This would ensure that not internal padding would be needed for
4458     * 16-bit types.
4459     */
4460    if (device->robust_buffer_access &&
4461        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4462         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4463       pMemoryRequirements->memoryRequirements.size = align_u64(size, 4);
4464 
4465    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4466 
4467    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4468       switch (ext->sType) {
4469       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4470          VkMemoryDedicatedRequirements *requirements = (void *)ext;
4471          requirements->prefersDedicatedAllocation = false;
4472          requirements->requiresDedicatedAllocation = false;
4473          break;
4474       }
4475 
4476       default:
4477          anv_debug_ignored_stype(ext->sType);
4478          break;
4479       }
4480    }
4481 }
4482 
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)4483 void anv_GetBufferMemoryRequirements2(
4484     VkDevice                                    _device,
4485     const VkBufferMemoryRequirementsInfo2*      pInfo,
4486     VkMemoryRequirements2*                      pMemoryRequirements)
4487 {
4488    ANV_FROM_HANDLE(anv_device, device, _device);
4489    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4490 
4491    anv_get_buffer_memory_requirements(device,
4492                                       buffer->vk.size,
4493                                       buffer->vk.usage,
4494                                       pMemoryRequirements);
4495 }
4496 
anv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)4497 void anv_GetDeviceBufferMemoryRequirementsKHR(
4498     VkDevice                                    _device,
4499     const VkDeviceBufferMemoryRequirements*     pInfo,
4500     VkMemoryRequirements2*                      pMemoryRequirements)
4501 {
4502    ANV_FROM_HANDLE(anv_device, device, _device);
4503 
4504    anv_get_buffer_memory_requirements(device,
4505                                       pInfo->pCreateInfo->size,
4506                                       pInfo->pCreateInfo->usage,
4507                                       pMemoryRequirements);
4508 }
4509 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)4510 VkResult anv_CreateBuffer(
4511     VkDevice                                    _device,
4512     const VkBufferCreateInfo*                   pCreateInfo,
4513     const VkAllocationCallbacks*                pAllocator,
4514     VkBuffer*                                   pBuffer)
4515 {
4516    ANV_FROM_HANDLE(anv_device, device, _device);
4517    struct anv_buffer *buffer;
4518 
4519    /* Don't allow creating buffers bigger than our address space.  The real
4520     * issue here is that we may align up the buffer size and we don't want
4521     * doing so to cause roll-over.  However, no one has any business
4522     * allocating a buffer larger than our GTT size.
4523     */
4524    if (pCreateInfo->size > device->physical->gtt_size)
4525       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4526 
4527    buffer = vk_buffer_create(&device->vk, pCreateInfo,
4528                              pAllocator, sizeof(*buffer));
4529    if (buffer == NULL)
4530       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4531 
4532    buffer->address = ANV_NULL_ADDRESS;
4533 
4534    *pBuffer = anv_buffer_to_handle(buffer);
4535 
4536    return VK_SUCCESS;
4537 }
4538 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)4539 void anv_DestroyBuffer(
4540     VkDevice                                    _device,
4541     VkBuffer                                    _buffer,
4542     const VkAllocationCallbacks*                pAllocator)
4543 {
4544    ANV_FROM_HANDLE(anv_device, device, _device);
4545    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4546 
4547    if (!buffer)
4548       return;
4549 
4550    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
4551 }
4552 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)4553 VkDeviceAddress anv_GetBufferDeviceAddress(
4554     VkDevice                                    device,
4555     const VkBufferDeviceAddressInfo*            pInfo)
4556 {
4557    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4558 
4559    assert(!anv_address_is_null(buffer->address));
4560    assert(anv_bo_is_pinned(buffer->address.bo));
4561 
4562    return anv_address_physical(buffer->address);
4563 }
4564 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)4565 uint64_t anv_GetBufferOpaqueCaptureAddress(
4566     VkDevice                                    device,
4567     const VkBufferDeviceAddressInfo*            pInfo)
4568 {
4569    return 0;
4570 }
4571 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)4572 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4573     VkDevice                                    device,
4574     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
4575 {
4576    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4577 
4578    assert(anv_bo_is_pinned(memory->bo));
4579    assert(memory->bo->has_client_visible_address);
4580 
4581    return intel_48b_address(memory->bo->offset);
4582 }
4583 
4584 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)4585 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4586                               enum isl_format format,
4587                               struct isl_swizzle swizzle,
4588                               isl_surf_usage_flags_t usage,
4589                               struct anv_address address,
4590                               uint32_t range, uint32_t stride)
4591 {
4592    isl_buffer_fill_state(&device->isl_dev, state.map,
4593                          .address = anv_address_physical(address),
4594                          .mocs = isl_mocs(&device->isl_dev, usage,
4595                                           address.bo && address.bo->is_external),
4596                          .size_B = range,
4597                          .format = format,
4598                          .swizzle = swizzle,
4599                          .stride_B = stride);
4600 }
4601 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)4602 void anv_DestroySampler(
4603     VkDevice                                    _device,
4604     VkSampler                                   _sampler,
4605     const VkAllocationCallbacks*                pAllocator)
4606 {
4607    ANV_FROM_HANDLE(anv_device, device, _device);
4608    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4609 
4610    if (!sampler)
4611       return;
4612 
4613    if (sampler->bindless_state.map) {
4614       anv_state_pool_free(&device->dynamic_state_pool,
4615                           sampler->bindless_state);
4616    }
4617 
4618    if (sampler->custom_border_color.map) {
4619       anv_state_reserved_pool_free(&device->custom_border_colors,
4620                                    sampler->custom_border_color);
4621    }
4622 
4623    vk_object_free(&device->vk, pAllocator, sampler);
4624 }
4625 
4626 static const VkTimeDomainEXT anv_time_domains[] = {
4627    VK_TIME_DOMAIN_DEVICE_EXT,
4628    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4629 #ifdef CLOCK_MONOTONIC_RAW
4630    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4631 #endif
4632 };
4633 
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)4634 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4635    VkPhysicalDevice                             physicalDevice,
4636    uint32_t                                     *pTimeDomainCount,
4637    VkTimeDomainEXT                              *pTimeDomains)
4638 {
4639    int d;
4640    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
4641 
4642    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4643       vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
4644          *i = anv_time_domains[d];
4645       }
4646    }
4647 
4648    return vk_outarray_status(&out);
4649 }
4650 
4651 static uint64_t
anv_clock_gettime(clockid_t clock_id)4652 anv_clock_gettime(clockid_t clock_id)
4653 {
4654    struct timespec current;
4655    int ret;
4656 
4657    ret = clock_gettime(clock_id, &current);
4658 #ifdef CLOCK_MONOTONIC_RAW
4659    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
4660       ret = clock_gettime(CLOCK_MONOTONIC, &current);
4661 #endif
4662    if (ret < 0)
4663       return 0;
4664 
4665    return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
4666 }
4667 
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)4668 VkResult anv_GetCalibratedTimestampsEXT(
4669    VkDevice                                     _device,
4670    uint32_t                                     timestampCount,
4671    const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
4672    uint64_t                                     *pTimestamps,
4673    uint64_t                                     *pMaxDeviation)
4674 {
4675    ANV_FROM_HANDLE(anv_device, device, _device);
4676    uint64_t timestamp_frequency = device->info.timestamp_frequency;
4677    int  ret;
4678    int d;
4679    uint64_t begin, end;
4680    uint64_t max_clock_period = 0;
4681 
4682 #ifdef CLOCK_MONOTONIC_RAW
4683    begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4684 #else
4685    begin = anv_clock_gettime(CLOCK_MONOTONIC);
4686 #endif
4687 
4688    for (d = 0; d < timestampCount; d++) {
4689       switch (pTimestampInfos[d].timeDomain) {
4690       case VK_TIME_DOMAIN_DEVICE_EXT:
4691          ret = anv_gem_reg_read(device->fd, TIMESTAMP | I915_REG_READ_8B_WA,
4692                                 &pTimestamps[d]);
4693 
4694          if (ret != 0) {
4695             return vk_device_set_lost(&device->vk, "Failed to read the "
4696                                       "TIMESTAMP register: %m");
4697          }
4698          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4699          max_clock_period = MAX2(max_clock_period, device_period);
4700          break;
4701       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4702          pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
4703          max_clock_period = MAX2(max_clock_period, 1);
4704          break;
4705 
4706 #ifdef CLOCK_MONOTONIC_RAW
4707       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4708          pTimestamps[d] = begin;
4709          break;
4710 #endif
4711       default:
4712          pTimestamps[d] = 0;
4713          break;
4714       }
4715    }
4716 
4717 #ifdef CLOCK_MONOTONIC_RAW
4718    end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4719 #else
4720    end = anv_clock_gettime(CLOCK_MONOTONIC);
4721 #endif
4722 
4723     /*
4724      * The maximum deviation is the sum of the interval over which we
4725      * perform the sampling and the maximum period of any sampled
4726      * clock. That's because the maximum skew between any two sampled
4727      * clock edges is when the sampled clock with the largest period is
4728      * sampled at the end of that period but right at the beginning of the
4729      * sampling interval and some other clock is sampled right at the
4730      * beginning of its sampling period and right at the end of the
4731      * sampling interval. Let's assume the GPU has the longest clock
4732      * period and that the application is sampling GPU and monotonic:
4733      *
4734      *                               s                 e
4735      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
4736      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4737      *
4738      *                               g
4739      *		  0         1         2         3
4740      *	GPU       -----_____-----_____-----_____-----_____
4741      *
4742      *                                                m
4743      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
4744      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4745      *
4746      *	Interval                     <----------------->
4747      *	Deviation           <-------------------------->
4748      *
4749      *		s  = read(raw)       2
4750      *		g  = read(GPU)       1
4751      *		m  = read(monotonic) 2
4752      *		e  = read(raw)       b
4753      *
4754      * We round the sample interval up by one tick to cover sampling error
4755      * in the interval clock
4756      */
4757 
4758    uint64_t sample_interval = end - begin + 1;
4759 
4760    *pMaxDeviation = sample_interval + max_clock_period;
4761 
4762    return VK_SUCCESS;
4763 }
4764 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)4765 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4766     VkPhysicalDevice                            physicalDevice,
4767     VkSampleCountFlagBits                       samples,
4768     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
4769 {
4770    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4771 
4772    assert(pMultisampleProperties->sType ==
4773           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4774 
4775    VkExtent2D grid_size;
4776    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4777       grid_size.width = 1;
4778       grid_size.height = 1;
4779    } else {
4780       grid_size.width = 0;
4781       grid_size.height = 0;
4782    }
4783    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4784 
4785    vk_foreach_struct(ext, pMultisampleProperties->pNext)
4786       anv_debug_ignored_stype(ext->sType);
4787 }
4788 
4789 /* vk_icd.h does not declare this function, so we declare it here to
4790  * suppress Wmissing-prototypes.
4791  */
4792 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4793 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4794 
4795 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)4796 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4797 {
4798    /* For the full details on loader interface versioning, see
4799     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4800     * What follows is a condensed summary, to help you navigate the large and
4801     * confusing official doc.
4802     *
4803     *   - Loader interface v0 is incompatible with later versions. We don't
4804     *     support it.
4805     *
4806     *   - In loader interface v1:
4807     *       - The first ICD entrypoint called by the loader is
4808     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4809     *         entrypoint.
4810     *       - The ICD must statically expose no other Vulkan symbol unless it is
4811     *         linked with -Bsymbolic.
4812     *       - Each dispatchable Vulkan handle created by the ICD must be
4813     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
4814     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4815     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4816     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
4817     *         such loader-managed surfaces.
4818     *
4819     *    - Loader interface v2 differs from v1 in:
4820     *       - The first ICD entrypoint called by the loader is
4821     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4822     *         statically expose this entrypoint.
4823     *
4824     *    - Loader interface v3 differs from v2 in:
4825     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4826     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4827     *          because the loader no longer does so.
4828     *
4829     *    - Loader interface v4 differs from v3 in:
4830     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4831     *
4832     *    - Loader interface v5 differs from v4 in:
4833     *        - The ICD must support Vulkan API version 1.1 and must not return
4834     *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
4835     *          Vulkan Loader with interface v4 or smaller is being used and the
4836     *          application provides an API version that is greater than 1.0.
4837     */
4838    *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
4839    return VK_SUCCESS;
4840 }
4841 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)4842 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4843     VkPhysicalDevice                            physicalDevice,
4844     uint32_t*                                   pFragmentShadingRateCount,
4845     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
4846 {
4847    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4848    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
4849                           pFragmentShadingRates, pFragmentShadingRateCount);
4850 
4851 #define append_rate(_samples, _width, _height)                                      \
4852    do {                                                                             \
4853       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
4854          __r->sampleCounts = _samples;                                              \
4855          __r->fragmentSize = (VkExtent2D) {                                         \
4856             .width = _width,                                                        \
4857             .height = _height,                                                      \
4858          };                                                                         \
4859       }                                                                             \
4860    } while (0)
4861 
4862    VkSampleCountFlags sample_counts =
4863       isl_device_get_sample_counts(&physical_device->isl_dev);
4864 
4865    /* BSpec 47003: There are a number of restrictions on the sample count
4866     * based off the coarse pixel size.
4867     */
4868    static const VkSampleCountFlags cp_size_sample_limits[] = {
4869       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
4870              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4871       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4872       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4873       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4874       [16] = ISL_SAMPLE_COUNT_1_BIT,
4875    };
4876 
4877    for (uint32_t x = 4; x >= 1; x /= 2) {
4878        for (uint32_t y = 4; y >= 1; y /= 2) {
4879           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
4880              /* BSpec 47003:
4881               *   "CPsize 1x4 and 4x1 are not supported"
4882               */
4883              if ((x == 1 && y == 4) || (x == 4 && y == 1))
4884                 continue;
4885 
4886              /* For size {1, 1}, the sample count must be ~0
4887               *
4888               * 4x2 is also a specially case.
4889               */
4890              if (x == 1 && y == 1)
4891                 append_rate(~0, x, y);
4892              else if (x == 4 && y == 2)
4893                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
4894              else
4895                 append_rate(cp_size_sample_limits[x * y], x, y);
4896           } else {
4897              /* For size {1, 1}, the sample count must be ~0 */
4898              if (x == 1 && y == 1)
4899                 append_rate(~0, x, y);
4900              else
4901                 append_rate(sample_counts, x, y);
4902           }
4903        }
4904    }
4905 
4906 #undef append_rate
4907 
4908    return vk_outarray_status(&out);
4909 }
4910