• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8 
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11 
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14 
15 #include "perf/intel_perf.h"
16 
17 #include "git_sha1.h"
18 
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21 
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27 
28 /* This is probably far to big but it reflects the max size used for messages
29  * in OpenGLs KHR_debug.
30  */
31 #define MAX_DEBUG_MESSAGE_LENGTH    4096
32 
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36    char str[MAX_DEBUG_MESSAGE_LENGTH];
37    struct anv_device *device = (struct anv_device *)data;
38    UNUSED struct anv_instance *instance = device->physical->instance;
39 
40    va_list args;
41    va_start(args, fmt);
42    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43    va_end(args);
44 
45    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47 
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51    va_list args;
52    va_start(args, fmt);
53 
54    if (INTEL_DEBUG(DEBUG_PERF))
55       mesa_logd_v(fmt, args);
56 
57    va_end(args);
58 }
59 
60 struct anv_descriptor_limits {
61    uint32_t max_ubos;
62    uint32_t max_ssbos;
63    uint32_t max_samplers;
64    uint32_t max_images;
65    uint32_t max_resources;
66 };
67 
68 static void
get_device_descriptor_limits(const struct anv_physical_device * device,struct anv_descriptor_limits * limits)69 get_device_descriptor_limits(const struct anv_physical_device *device,
70                              struct anv_descriptor_limits *limits)
71 {
72    memset(limits, 0, sizeof(*limits));
73 
74    /* It's a bit hard to exactly map our implementation to the limits
75     * described by Vulkan. The bindless surface handle in the extended message
76     * descriptors is 20 bits on <= Gfx12.0, 26 bits on >= Gfx12.5 and it's an
77     * index into the table of RENDER_SURFACE_STATE structs that starts at
78     * bindless surface base address. On <= Gfx12.0, this means that we can
79     * have at must 1M surface states allocated at any given time. Since most
80     * image views take two descriptors, this means we have a limit of about
81     * 500K image views. On >= Gfx12.5, we do not need 2 surfaces per
82     * descriptors and we have 33M+ descriptors (we have a 2GB limit, due to
83     * overlapping heaps for workarounds, but HW can do 4GB).
84     *
85     * However, on <= Gfx12.0, since we allocate surface states at
86     * vkCreateImageView time, this means our limit is actually something on
87     * the order of 500K image views allocated at any time. The actual limit
88     * describe by Vulkan, on the other hand, is a limit of how many you can
89     * have in a descriptor set. Assuming anyone using 1M descriptors will be
90     * using the same image view twice a bunch of times (or a bunch of null
91     * descriptors), we can safely advertise a larger limit here.
92     *
93     * Here we use the size of the heap in which the descriptors are stored and
94     * divide by the size of the descriptor to get a limit value.
95     */
96    const uint64_t descriptor_heap_size =
97       device->indirect_descriptors ?
98       device->va.indirect_descriptor_pool.size :
99       device->va.bindless_surface_state_pool.size;;
100 
101    const uint32_t buffer_descriptor_size =
102       device->indirect_descriptors ?
103       sizeof(struct anv_address_range_descriptor) :
104       ANV_SURFACE_STATE_SIZE;
105    const uint32_t image_descriptor_size =
106       device->indirect_descriptors ?
107       sizeof(struct anv_address_range_descriptor) :
108       ANV_SURFACE_STATE_SIZE;
109    const uint32_t sampler_descriptor_size =
110       device->indirect_descriptors ?
111       sizeof(struct anv_sampled_image_descriptor) :
112       ANV_SAMPLER_STATE_SIZE;
113 
114    limits->max_ubos = descriptor_heap_size / buffer_descriptor_size;
115    limits->max_ssbos = descriptor_heap_size / buffer_descriptor_size;
116    limits->max_images = descriptor_heap_size / image_descriptor_size;
117    limits->max_samplers = descriptor_heap_size / sampler_descriptor_size;
118 
119    limits->max_resources = UINT32_MAX;
120    limits->max_resources = MIN2(limits->max_resources, limits->max_ubos);
121    limits->max_resources = MIN2(limits->max_resources, limits->max_ssbos);
122    limits->max_resources = MIN2(limits->max_resources, limits->max_images);
123    limits->max_resources = MIN2(limits->max_resources, limits->max_samplers);
124 }
125 
126 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)127 get_device_extensions(const struct anv_physical_device *device,
128                       struct vk_device_extension_table *ext)
129 {
130    const bool has_syncobj_wait =
131       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
132 
133    const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
134 
135    *ext = (struct vk_device_extension_table) {
136       .KHR_8bit_storage                      = true,
137       .KHR_16bit_storage                     = !device->instance->no_16bit,
138       .KHR_acceleration_structure            = rt_enabled,
139       .KHR_bind_memory2                      = true,
140       .KHR_buffer_device_address             = true,
141       .KHR_calibrated_timestamps             = device->has_reg_timestamp,
142       .KHR_compute_shader_derivatives        = true,
143       .KHR_cooperative_matrix                = anv_has_cooperative_matrix(device),
144       .KHR_copy_commands2                    = true,
145       .KHR_create_renderpass2                = true,
146       .KHR_dedicated_allocation              = true,
147       .KHR_deferred_host_operations          = true,
148       .KHR_depth_stencil_resolve             = true,
149       .KHR_descriptor_update_template        = true,
150       .KHR_device_group                      = true,
151       .KHR_draw_indirect_count               = true,
152       .KHR_driver_properties                 = true,
153       .KHR_dynamic_rendering                 = true,
154       .KHR_dynamic_rendering_local_read      = true,
155       .KHR_external_fence                    = has_syncobj_wait,
156       .KHR_external_fence_fd                 = has_syncobj_wait,
157       .KHR_external_memory                   = true,
158       .KHR_external_memory_fd                = true,
159       .KHR_external_semaphore                = true,
160       .KHR_external_semaphore_fd             = true,
161       .KHR_format_feature_flags2             = true,
162       .KHR_fragment_shading_rate             = device->info.ver >= 11,
163       .KHR_get_memory_requirements2          = true,
164       .KHR_global_priority                   = device->max_context_priority >=
165                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
166       .KHR_image_format_list                 = true,
167       .KHR_imageless_framebuffer             = true,
168 #ifdef ANV_USE_WSI_PLATFORM
169       .KHR_incremental_present               = true,
170 #endif
171       .KHR_index_type_uint8                  = true,
172       .KHR_line_rasterization                = true,
173       .KHR_load_store_op_none                = true,
174       .KHR_maintenance1                      = true,
175       .KHR_maintenance2                      = true,
176       .KHR_maintenance3                      = true,
177       .KHR_maintenance4                      = true,
178       .KHR_maintenance5                      = true,
179       .KHR_maintenance6                      = true,
180       .KHR_maintenance7                      = true,
181       .KHR_map_memory2                       = true,
182       .KHR_multiview                         = true,
183       .KHR_performance_query =
184          device->perf &&
185          (intel_perf_has_hold_preemption(device->perf) ||
186           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
187          device->use_call_secondary,
188       .KHR_pipeline_executable_properties    = true,
189       .KHR_pipeline_library                  = true,
190       /* Hide these behind dri configs for now since we cannot implement it reliably on
191        * all surfaces yet. There is no surface capability query for present wait/id,
192        * but the feature is useful enough to hide behind an opt-in mechanism for now.
193        * If the instance only enables surface extensions that unconditionally support present wait,
194        * we can also expose the extension that way. */
195       .KHR_present_id =
196          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
197          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
198       .KHR_present_wait =
199          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
200          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
201       .KHR_push_descriptor                   = true,
202       .KHR_ray_query                         = rt_enabled,
203       .KHR_ray_tracing_maintenance1          = rt_enabled,
204       .KHR_ray_tracing_pipeline              = rt_enabled,
205       .KHR_ray_tracing_position_fetch        = rt_enabled,
206       .KHR_relaxed_block_layout              = true,
207       .KHR_sampler_mirror_clamp_to_edge      = true,
208       .KHR_sampler_ycbcr_conversion          = true,
209       .KHR_separate_depth_stencil_layouts    = true,
210       .KHR_shader_atomic_int64               = true,
211       .KHR_shader_clock                      = true,
212       .KHR_shader_draw_parameters            = true,
213       .KHR_shader_expect_assume              = true,
214       .KHR_shader_float16_int8               = !device->instance->no_16bit,
215       .KHR_shader_float_controls             = true,
216       .KHR_shader_float_controls2            = true,
217       .KHR_shader_integer_dot_product        = true,
218       .KHR_shader_maximal_reconvergence      = true,
219       .KHR_shader_non_semantic_info          = true,
220       .KHR_shader_quad_control               = true,
221       .KHR_shader_relaxed_extended_instruction = true,
222       .KHR_shader_subgroup_extended_types    = true,
223       .KHR_shader_subgroup_rotate            = true,
224       .KHR_shader_subgroup_uniform_control_flow = true,
225       .KHR_shader_terminate_invocation       = true,
226       .KHR_spirv_1_4                         = true,
227       .KHR_storage_buffer_storage_class      = true,
228 #ifdef ANV_USE_WSI_PLATFORM
229       .KHR_swapchain                         = true,
230       .KHR_swapchain_mutable_format          = true,
231 #endif
232       .KHR_synchronization2                  = true,
233       .KHR_timeline_semaphore                = true,
234       .KHR_uniform_buffer_standard_layout    = true,
235       .KHR_variable_pointers                 = true,
236       .KHR_vertex_attribute_divisor          = true,
237       .KHR_video_queue                       = device->video_decode_enabled || device->video_encode_enabled,
238       .KHR_video_decode_queue                = device->video_decode_enabled,
239       .KHR_video_decode_h264                 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
240       .KHR_video_decode_h265                 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
241       .KHR_video_decode_av1                  = device->info.ver >= 12 && VIDEO_CODEC_AV1DEC && device->video_decode_enabled,
242       .KHR_video_encode_queue                = device->video_encode_enabled,
243       .KHR_video_encode_h264                 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
244       .KHR_video_encode_h265                 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
245       .KHR_video_maintenance1                = (device->video_decode_enabled &&
246                                                (VIDEO_CODEC_H264DEC || VIDEO_CODEC_H265DEC)) ||
247                                                (device->video_encode_enabled &&
248                                                (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)),
249       .KHR_vulkan_memory_model               = true,
250       .KHR_workgroup_memory_explicit_layout  = true,
251       .KHR_zero_initialize_workgroup_memory  = true,
252       .EXT_4444_formats                      = true,
253       .EXT_attachment_feedback_loop_layout   = true,
254       .EXT_attachment_feedback_loop_dynamic_state = true,
255       .EXT_border_color_swizzle              = true,
256       .EXT_buffer_device_address             = true,
257       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
258       .EXT_color_write_enable                = true,
259       .EXT_conditional_rendering             = true,
260       .EXT_conservative_rasterization        = true,
261       .EXT_custom_border_color               = true,
262       .EXT_depth_bias_control                = true,
263       .EXT_depth_clamp_control               = true,
264       .EXT_depth_clamp_zero_one              = true,
265       .EXT_depth_clip_control                = true,
266       .EXT_depth_clip_enable                 = true,
267       .EXT_depth_range_unrestricted          = device->info.ver >= 20,
268       .EXT_descriptor_buffer                 = true,
269       .EXT_descriptor_indexing               = true,
270 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
271       .EXT_display_control                   = true,
272 #endif
273       .EXT_dynamic_rendering_unused_attachments = true,
274       .EXT_extended_dynamic_state            = true,
275       .EXT_extended_dynamic_state2           = true,
276       .EXT_extended_dynamic_state3           = true,
277       .EXT_external_memory_dma_buf           = true,
278       .EXT_external_memory_host              = true,
279       .EXT_fragment_shader_interlock         = true,
280       .EXT_global_priority                   = device->max_context_priority >=
281                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
282       .EXT_global_priority_query             = device->max_context_priority >=
283                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
284       .EXT_graphics_pipeline_library         = !debug_get_bool_option("ANV_NO_GPL", false),
285       .EXT_host_image_copy                   = !device->emu_astc_ldr,
286       .EXT_host_query_reset                  = true,
287       .EXT_image_2d_view_of_3d               = true,
288       /* Because of Xe2 PAT selected compression and the Vulkan spec
289        * requirement to always return the same memory types for Images with
290        * same properties we can't support EXT_image_compression_control on Xe2+
291        */
292       .EXT_image_compression_control         = device->instance->compression_control_enabled &&
293                                                device->info.ver < 20,
294       .EXT_image_drm_format_modifier         = true,
295       .EXT_image_robustness                  = true,
296       .EXT_image_sliced_view_of_3d           = true,
297       .EXT_image_view_min_lod                = true,
298       .EXT_index_type_uint8                  = true,
299       .EXT_inline_uniform_block              = true,
300       .EXT_legacy_dithering                  = true,
301       .EXT_legacy_vertex_attributes          = true,
302       .EXT_line_rasterization                = true,
303       .EXT_load_store_op_none                = true,
304       .EXT_map_memory_placed                 = device->info.has_mmap_offset,
305       /* Enable the extension only if we have support on both the local &
306        * system memory
307        */
308       .EXT_memory_budget                     = (!device->info.has_local_mem ||
309                                                 device->vram_mappable.available > 0) &&
310                                                device->sys.available,
311       .EXT_mesh_shader                       = device->info.has_mesh_shading,
312       .EXT_multi_draw                        = true,
313       .EXT_mutable_descriptor_type           = true,
314       .EXT_nested_command_buffer             = true,
315       .EXT_non_seamless_cube_map             = true,
316       .EXT_pci_bus_info                      = true,
317       .EXT_physical_device_drm               = true,
318       .EXT_pipeline_creation_cache_control   = true,
319       .EXT_pipeline_creation_feedback        = true,
320       .EXT_pipeline_library_group_handles    = rt_enabled,
321       .EXT_pipeline_protected_access         = device->has_protected_contexts,
322       .EXT_pipeline_robustness               = true,
323       .EXT_post_depth_coverage               = true,
324       .EXT_primitive_topology_list_restart   = true,
325       .EXT_primitives_generated_query        = true,
326       .EXT_private_data                      = true,
327       .EXT_provoking_vertex                  = true,
328       .EXT_queue_family_foreign              = true,
329       .EXT_robustness2                       = true,
330       .EXT_sample_locations                  = true,
331       .EXT_sampler_filter_minmax             = true,
332       .EXT_scalar_block_layout               = true,
333       .EXT_separate_stencil_usage            = true,
334       .EXT_shader_atomic_float               = true,
335       .EXT_shader_atomic_float2              = true,
336       .EXT_shader_demote_to_helper_invocation = true,
337       .EXT_shader_module_identifier          = true,
338       .EXT_shader_replicated_composites      = true,
339       .EXT_shader_stencil_export             = true,
340       .EXT_shader_subgroup_ballot            = true,
341       .EXT_shader_subgroup_vote              = true,
342       .EXT_shader_viewport_index_layer       = true,
343       .EXT_subgroup_size_control             = true,
344 #ifdef ANV_USE_WSI_PLATFORM
345       .EXT_swapchain_maintenance1            = true,
346 #endif
347       .EXT_texel_buffer_alignment            = true,
348       .EXT_tooling_info                      = true,
349       .EXT_transform_feedback                = true,
350       .EXT_vertex_attribute_divisor          = true,
351       .EXT_vertex_input_dynamic_state        = true,
352       .EXT_ycbcr_2plane_444_formats          = true,
353       .EXT_ycbcr_image_arrays                = true,
354       .AMD_buffer_marker                     = true,
355       .AMD_texture_gather_bias_lod           = device->info.ver >= 20,
356 #if DETECT_OS_ANDROID
357       .ANDROID_external_memory_android_hardware_buffer = true,
358       .ANDROID_native_buffer                 = true,
359 #endif
360       .GOOGLE_decorate_string                = true,
361       .GOOGLE_hlsl_functionality1            = true,
362       .GOOGLE_user_type                      = true,
363       .INTEL_performance_query               = device->perf &&
364                                                intel_perf_has_hold_preemption(device->perf),
365       .INTEL_shader_integer_functions2       = true,
366       .MESA_image_alignment_control          = true,
367       .NV_compute_shader_derivatives         = true,
368       .VALVE_mutable_descriptor_type         = true,
369    };
370 }
371 
372 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)373 get_features(const struct anv_physical_device *pdevice,
374              struct vk_features *features)
375 {
376    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
377 
378    const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
379 
380    const bool mesh_shader =
381       pdevice->vk.supported_extensions.EXT_mesh_shader;
382 
383    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
384 
385    *features = (struct vk_features) {
386       /* Vulkan 1.0 */
387       .robustBufferAccess                       = true,
388       .fullDrawIndexUint32                      = true,
389       .imageCubeArray                           = true,
390       .independentBlend                         = true,
391       .geometryShader                           = true,
392       .tessellationShader                       = true,
393       .sampleRateShading                        = true,
394       .dualSrcBlend                             = true,
395       .logicOp                                  = true,
396       .multiDrawIndirect                        = true,
397       .drawIndirectFirstInstance                = true,
398       .depthClamp                               = true,
399       .depthBiasClamp                           = true,
400       .fillModeNonSolid                         = true,
401       .depthBounds                              = pdevice->info.ver >= 12,
402       .wideLines                                = true,
403       .largePoints                              = true,
404       .alphaToOne                               = true,
405       .multiViewport                            = true,
406       .samplerAnisotropy                        = true,
407       .textureCompressionETC2                   = true,
408       .textureCompressionASTC_LDR               = pdevice->has_astc_ldr ||
409                                                   pdevice->emu_astc_ldr,
410       .textureCompressionBC                     = true,
411       .occlusionQueryPrecise                    = true,
412       .pipelineStatisticsQuery                  = true,
413       .vertexPipelineStoresAndAtomics           = true,
414       .fragmentStoresAndAtomics                 = true,
415       .shaderTessellationAndGeometryPointSize   = true,
416       .shaderImageGatherExtended                = true,
417       .shaderStorageImageExtendedFormats        = true,
418       .shaderStorageImageMultisample            = false,
419       /* Gfx12.5 has all the required format supported in HW for typed
420        * read/writes
421        */
422       .shaderStorageImageReadWithoutFormat      = pdevice->info.verx10 >= 125,
423       .shaderStorageImageWriteWithoutFormat     = true,
424       .shaderUniformBufferArrayDynamicIndexing  = true,
425       .shaderSampledImageArrayDynamicIndexing   = true,
426       .shaderStorageBufferArrayDynamicIndexing  = true,
427       .shaderStorageImageArrayDynamicIndexing   = true,
428       .shaderClipDistance                       = true,
429       .shaderCullDistance                       = true,
430       .shaderFloat64                            = pdevice->info.has_64bit_float ||
431                                                   pdevice->instance->fp64_workaround_enabled,
432       .shaderInt64                              = true,
433       .shaderInt16                              = true,
434       .shaderResourceMinLod                     = true,
435       .shaderResourceResidency                  = has_sparse_or_fake,
436       .sparseBinding                            = has_sparse_or_fake,
437       .sparseResidencyAliased                   = has_sparse_or_fake,
438       .sparseResidencyBuffer                    = has_sparse_or_fake,
439       .sparseResidencyImage2D                   = has_sparse_or_fake,
440       .sparseResidencyImage3D                   = has_sparse_or_fake,
441       .sparseResidency2Samples                  = has_sparse_or_fake,
442       .sparseResidency4Samples                  = has_sparse_or_fake,
443       .sparseResidency8Samples                  = has_sparse_or_fake &&
444                                                   pdevice->info.verx10 != 125,
445       .sparseResidency16Samples                 = has_sparse_or_fake &&
446                                                   pdevice->info.verx10 != 125,
447       .variableMultisampleRate                  = true,
448       .inheritedQueries                         = true,
449 
450       /* Vulkan 1.1 */
451       .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
452       .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
453       .storagePushConstant16               = true,
454       .storageInputOutput16                = false,
455       .multiview                           = true,
456       .multiviewGeometryShader             = true,
457       .multiviewTessellationShader         = true,
458       .variablePointersStorageBuffer       = true,
459       .variablePointers                    = true,
460       .protectedMemory                     = pdevice->has_protected_contexts,
461       .samplerYcbcrConversion              = true,
462       .shaderDrawParameters                = true,
463 
464       /* Vulkan 1.2 */
465       .samplerMirrorClampToEdge            = true,
466       .drawIndirectCount                   = true,
467       .storageBuffer8BitAccess             = true,
468       .uniformAndStorageBuffer8BitAccess   = true,
469       .storagePushConstant8                = true,
470       .shaderBufferInt64Atomics            = true,
471       .shaderSharedInt64Atomics            = false,
472       .shaderFloat16                       = !pdevice->instance->no_16bit,
473       .shaderInt8                          = !pdevice->instance->no_16bit,
474 
475       .descriptorIndexing                                 = true,
476       .shaderInputAttachmentArrayDynamicIndexing          = false,
477       .shaderUniformTexelBufferArrayDynamicIndexing       = true,
478       .shaderStorageTexelBufferArrayDynamicIndexing       = true,
479       .shaderUniformBufferArrayNonUniformIndexing         = true,
480       .shaderSampledImageArrayNonUniformIndexing          = true,
481       .shaderStorageBufferArrayNonUniformIndexing         = true,
482       .shaderStorageImageArrayNonUniformIndexing          = true,
483       .shaderInputAttachmentArrayNonUniformIndexing       = false,
484       .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
485       .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
486       .descriptorBindingUniformBufferUpdateAfterBind      = true,
487       .descriptorBindingSampledImageUpdateAfterBind       = true,
488       .descriptorBindingStorageImageUpdateAfterBind       = true,
489       .descriptorBindingStorageBufferUpdateAfterBind      = true,
490       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
491       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
492       .descriptorBindingUpdateUnusedWhilePending          = true,
493       .descriptorBindingPartiallyBound                    = true,
494       .descriptorBindingVariableDescriptorCount           = true,
495       .runtimeDescriptorArray                             = true,
496 
497       .samplerFilterMinmax                 = true,
498       .scalarBlockLayout                   = true,
499       .imagelessFramebuffer                = true,
500       .uniformBufferStandardLayout         = true,
501       .shaderSubgroupExtendedTypes         = true,
502       .separateDepthStencilLayouts         = true,
503       .hostQueryReset                      = true,
504       .timelineSemaphore                   = true,
505       .bufferDeviceAddress                 = true,
506       .bufferDeviceAddressCaptureReplay    = true,
507       .bufferDeviceAddressMultiDevice      = false,
508       .vulkanMemoryModel                   = true,
509       .vulkanMemoryModelDeviceScope        = true,
510       .vulkanMemoryModelAvailabilityVisibilityChains = true,
511       .shaderOutputViewportIndex           = true,
512       .shaderOutputLayer                   = true,
513       .subgroupBroadcastDynamicId          = true,
514 
515       /* Vulkan 1.3 */
516       .robustImageAccess = true,
517       .inlineUniformBlock = true,
518       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
519       .pipelineCreationCacheControl = true,
520       .privateData = true,
521       .shaderDemoteToHelperInvocation = true,
522       .shaderTerminateInvocation = true,
523       .subgroupSizeControl = true,
524       .computeFullSubgroups = true,
525       .synchronization2 = true,
526       .textureCompressionASTC_HDR = false,
527       .shaderZeroInitializeWorkgroupMemory = true,
528       .dynamicRendering = true,
529       .shaderIntegerDotProduct = true,
530       .maintenance4 = true,
531 
532       /* Vulkan 1.4 */
533       .pushDescriptor = true,
534 
535       /* VK_EXT_4444_formats */
536       .formatA4R4G4B4 = true,
537       .formatA4B4G4R4 = false,
538 
539       /* VK_KHR_acceleration_structure */
540       .accelerationStructure = rt_enabled,
541       .accelerationStructureCaptureReplay = false, /* TODO */
542       .accelerationStructureIndirectBuild = false, /* TODO */
543       .accelerationStructureHostCommands = false,
544       .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
545 
546       /* VK_EXT_border_color_swizzle */
547       .borderColorSwizzle = true,
548       .borderColorSwizzleFromImage = true,
549 
550       /* VK_EXT_color_write_enable */
551       .colorWriteEnable = true,
552 
553       /* VK_EXT_image_2d_view_of_3d  */
554       .image2DViewOf3D = true,
555       .sampler2DViewOf3D = true,
556 
557       /* VK_EXT_image_sliced_view_of_3d */
558       .imageSlicedViewOf3D = true,
559 
560       /* VK_KHR_compute_shader_derivatives */
561       .computeDerivativeGroupQuads = true,
562       .computeDerivativeGroupLinear = true,
563 
564       /* VK_EXT_conditional_rendering */
565       .conditionalRendering = true,
566       .inheritedConditionalRendering = true,
567 
568       /* VK_EXT_custom_border_color */
569       .customBorderColors = true,
570       .customBorderColorWithoutFormat =
571          pdevice->instance->custom_border_colors_without_format,
572 
573       /* VK_EXT_depth_clamp_zero_one */
574       .depthClampZeroOne = true,
575 
576       /* VK_EXT_depth_clip_enable */
577       .depthClipEnable = true,
578 
579       /* VK_EXT_fragment_shader_interlock */
580       .fragmentShaderSampleInterlock = true,
581       .fragmentShaderPixelInterlock = true,
582       .fragmentShaderShadingRateInterlock = false,
583 
584       /* VK_EXT_global_priority_query */
585       .globalPriorityQuery = true,
586 
587       /* VK_EXT_graphics_pipeline_library */
588       .graphicsPipelineLibrary =
589          pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
590 
591       /* VK_KHR_fragment_shading_rate */
592       .pipelineFragmentShadingRate = true,
593       .primitiveFragmentShadingRate =
594          pdevice->info.has_coarse_pixel_primitive_and_cb,
595       .attachmentFragmentShadingRate =
596          pdevice->info.has_coarse_pixel_primitive_and_cb,
597 
598       /* VK_EXT_image_view_min_lod */
599       .minLod = true,
600 
601       /* VK_EXT_index_type_uint8 */
602       .indexTypeUint8 = true,
603 
604       /* VK_EXT_line_rasterization */
605       /* Rectangular lines must use the strict algorithm, which is not
606        * supported for wide lines prior to ICL.  See rasterization_mode for
607        * details and how the HW states are programmed.
608        */
609       .rectangularLines = pdevice->info.ver >= 10,
610       .bresenhamLines = true,
611       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
612        * BSpec section "Multisample ModesState" table for "AA Line Support
613        * Requirements":
614        *
615        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
616        *
617        * Fortunately, this isn't a case most people care about.
618        */
619       .smoothLines = pdevice->info.ver < 10,
620       .stippledRectangularLines = false,
621       .stippledBresenhamLines = true,
622       .stippledSmoothLines = false,
623 
624       /* VK_NV_mesh_shader */
625       .taskShaderNV = false,
626       .meshShaderNV = false,
627 
628       /* VK_EXT_mesh_shader */
629       .taskShader = mesh_shader,
630       .meshShader = mesh_shader,
631       .multiviewMeshShader = false,
632       .primitiveFragmentShadingRateMeshShader = mesh_shader,
633       .meshShaderQueries = mesh_shader,
634 
635       /* VK_EXT_mutable_descriptor_type */
636       .mutableDescriptorType = true,
637 
638       /* VK_KHR_performance_query */
639       .performanceCounterQueryPools = true,
640       /* HW only supports a single configuration at a time. */
641       .performanceCounterMultipleQueryPools = false,
642 
643       /* VK_KHR_pipeline_executable_properties */
644       .pipelineExecutableInfo = true,
645 
646       /* VK_EXT_primitives_generated_query */
647       .primitivesGeneratedQuery = true,
648       .primitivesGeneratedQueryWithRasterizerDiscard = false,
649       .primitivesGeneratedQueryWithNonZeroStreams = false,
650 
651       /* VK_EXT_pipeline_library_group_handles */
652       .pipelineLibraryGroupHandles = true,
653 
654       /* VK_EXT_provoking_vertex */
655       .provokingVertexLast = true,
656       .transformFeedbackPreservesProvokingVertex = true,
657 
658       /* VK_KHR_ray_query */
659       .rayQuery = rt_enabled,
660 
661       /* VK_KHR_ray_tracing_maintenance1 */
662       .rayTracingMaintenance1 = rt_enabled,
663       .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
664 
665       /* VK_KHR_ray_tracing_pipeline */
666       .rayTracingPipeline = rt_enabled,
667       .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
668       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
669       .rayTracingPipelineTraceRaysIndirect = rt_enabled,
670       .rayTraversalPrimitiveCulling = rt_enabled,
671 
672       /* VK_EXT_robustness2 */
673       .robustBufferAccess2 = true,
674       .robustImageAccess2 = true,
675       .nullDescriptor = true,
676 
677       /* VK_EXT_shader_replicated_composites */
678       .shaderReplicatedComposites = true,
679 
680       /* VK_EXT_shader_atomic_float */
681       .shaderBufferFloat32Atomics =    true,
682       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
683       .shaderBufferFloat64Atomics =
684          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
685       .shaderBufferFloat64AtomicAdd =  pdevice->info.ver >= 20,
686       .shaderSharedFloat32Atomics =    true,
687       .shaderSharedFloat32AtomicAdd =  false,
688       .shaderSharedFloat64Atomics =    false,
689       .shaderSharedFloat64AtomicAdd =  false,
690       .shaderImageFloat32Atomics =     true,
691       .shaderImageFloat32AtomicAdd =   pdevice->info.ver >= 20,
692       .sparseImageFloat32Atomics =     false,
693       .sparseImageFloat32AtomicAdd =   false,
694 
695       /* VK_EXT_shader_atomic_float2 */
696       .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
697       .shaderBufferFloat16AtomicAdd    = false,
698       .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
699       .shaderBufferFloat32AtomicMinMax = true,
700       .shaderBufferFloat64AtomicMinMax =
701          pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
702          pdevice->info.ver < 20,
703       .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
704       .shaderSharedFloat16AtomicAdd    = false,
705       .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
706       .shaderSharedFloat32AtomicMinMax = true,
707       .shaderSharedFloat64AtomicMinMax = false,
708       .shaderImageFloat32AtomicMinMax  = false,
709       .sparseImageFloat32AtomicMinMax  = false,
710 
711       /* VK_KHR_shader_clock */
712       .shaderSubgroupClock = true,
713       .shaderDeviceClock = false,
714 
715       /* VK_INTEL_shader_integer_functions2 */
716       .shaderIntegerFunctions2 = true,
717 
718       /* VK_EXT_shader_module_identifier */
719       .shaderModuleIdentifier = true,
720 
721       /* VK_KHR_shader_subgroup_uniform_control_flow */
722       .shaderSubgroupUniformControlFlow = true,
723 
724       /* VK_EXT_texel_buffer_alignment */
725       .texelBufferAlignment = true,
726 
727       /* VK_EXT_transform_feedback */
728       .transformFeedback = true,
729       .geometryStreams = true,
730 
731       /* VK_KHR_vertex_attribute_divisor */
732       .vertexAttributeInstanceRateDivisor = true,
733       .vertexAttributeInstanceRateZeroDivisor = true,
734 
735       /* VK_KHR_workgroup_memory_explicit_layout */
736       .workgroupMemoryExplicitLayout = true,
737       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
738       .workgroupMemoryExplicitLayout8BitAccess = true,
739       .workgroupMemoryExplicitLayout16BitAccess = true,
740 
741       /* VK_EXT_ycbcr_image_arrays */
742       .ycbcrImageArrays = true,
743 
744       /* VK_EXT_ycbcr_2plane_444_formats */
745       .ycbcr2plane444Formats = true,
746 
747       /* VK_EXT_extended_dynamic_state */
748       .extendedDynamicState = true,
749 
750       /* VK_EXT_extended_dynamic_state2 */
751       .extendedDynamicState2 = true,
752       .extendedDynamicState2LogicOp = true,
753       .extendedDynamicState2PatchControlPoints = true,
754 
755       /* VK_EXT_extended_dynamic_state3 */
756       .extendedDynamicState3PolygonMode = true,
757       .extendedDynamicState3TessellationDomainOrigin = true,
758       .extendedDynamicState3RasterizationStream = true,
759       .extendedDynamicState3LineStippleEnable = true,
760       .extendedDynamicState3LineRasterizationMode = true,
761       .extendedDynamicState3LogicOpEnable = true,
762       .extendedDynamicState3AlphaToOneEnable = true,
763       .extendedDynamicState3DepthClipEnable = true,
764       .extendedDynamicState3DepthClampEnable = true,
765       .extendedDynamicState3DepthClipNegativeOneToOne = true,
766       .extendedDynamicState3ProvokingVertexMode = true,
767       .extendedDynamicState3ColorBlendEnable = true,
768       .extendedDynamicState3ColorWriteMask = true,
769       .extendedDynamicState3ColorBlendEquation = true,
770       .extendedDynamicState3SampleLocationsEnable = true,
771       .extendedDynamicState3SampleMask = true,
772       .extendedDynamicState3ConservativeRasterizationMode = true,
773       .extendedDynamicState3AlphaToCoverageEnable = true,
774       .extendedDynamicState3RasterizationSamples = true,
775 
776       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
777       .extendedDynamicState3ViewportWScalingEnable = false,
778       .extendedDynamicState3ViewportSwizzle = false,
779       .extendedDynamicState3ShadingRateImageEnable = false,
780       .extendedDynamicState3CoverageToColorEnable = false,
781       .extendedDynamicState3CoverageToColorLocation = false,
782       .extendedDynamicState3CoverageModulationMode = false,
783       .extendedDynamicState3CoverageModulationTableEnable = false,
784       .extendedDynamicState3CoverageModulationTable = false,
785       .extendedDynamicState3CoverageReductionMode = false,
786       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
787       .extendedDynamicState3ColorBlendAdvanced = false,
788 
789       /* VK_EXT_multi_draw */
790       .multiDraw = true,
791 
792       /* VK_EXT_non_seamless_cube_map */
793       .nonSeamlessCubeMap = true,
794 
795       /* VK_EXT_primitive_topology_list_restart */
796       .primitiveTopologyListRestart = true,
797       .primitiveTopologyPatchListRestart = true,
798 
799       /* VK_EXT_depth_clamp_control */
800       .depthClampControl = true,
801 
802       /* VK_EXT_depth_clip_control */
803       .depthClipControl = true,
804 
805       /* VK_KHR_present_id */
806       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
807 
808       /* VK_KHR_present_wait */
809       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
810 
811       /* VK_EXT_vertex_input_dynamic_state */
812       .vertexInputDynamicState = true,
813 
814       /* VK_KHR_ray_tracing_position_fetch */
815       .rayTracingPositionFetch = rt_enabled,
816 
817       /* VK_EXT_dynamic_rendering_unused_attachments */
818       .dynamicRenderingUnusedAttachments = true,
819 
820       /* VK_EXT_depth_bias_control */
821       .depthBiasControl = true,
822       .floatRepresentation = true,
823       .leastRepresentableValueForceUnormRepresentation = false,
824       .depthBiasExact = true,
825 
826       /* VK_EXT_pipeline_robustness */
827       .pipelineRobustness = true,
828 
829       /* VK_KHR_maintenance5 */
830       .maintenance5 = true,
831 
832       /* VK_KHR_maintenance6 */
833       .maintenance6 = true,
834 
835       /* VK_EXT_nested_command_buffer */
836       .nestedCommandBuffer = true,
837       .nestedCommandBufferRendering = true,
838       .nestedCommandBufferSimultaneousUse = false,
839 
840       /* VK_KHR_cooperative_matrix */
841       .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
842 
843       /* VK_KHR_shader_maximal_reconvergence */
844       .shaderMaximalReconvergence = true,
845 
846       /* VK_KHR_shader_subgroup_rotate */
847       .shaderSubgroupRotate = true,
848       .shaderSubgroupRotateClustered = true,
849 
850       /* VK_EXT_attachment_feedback_loop_layout */
851       .attachmentFeedbackLoopLayout = true,
852 
853       /* VK_EXT_attachment_feedback_loop_dynamic_state */
854       .attachmentFeedbackLoopDynamicState = true,
855 
856       /* VK_KHR_shader_expect_assume */
857       .shaderExpectAssume = true,
858 
859       /* VK_EXT_descriptor_buffer */
860       .descriptorBuffer = true,
861       .descriptorBufferCaptureReplay = true,
862       .descriptorBufferImageLayoutIgnored = false,
863       .descriptorBufferPushDescriptors = true,
864 
865       /* VK_EXT_map_memory_placed */
866       .memoryMapPlaced = true,
867       .memoryMapRangePlaced = false,
868       .memoryUnmapReserve = true,
869 
870       /* VK_KHR_shader_quad_control */
871       .shaderQuadControl = true,
872 
873 #ifdef ANV_USE_WSI_PLATFORM
874       /* VK_EXT_swapchain_maintenance1 */
875       .swapchainMaintenance1 = true,
876 #endif
877 
878       /* VK_KHR_video_maintenance1 */
879       .videoMaintenance1 = true,
880 
881       /* VK_EXT_image_compression_control */
882       .imageCompressionControl = true,
883 
884       /* VK_KHR_shader_float_controls2 */
885       .shaderFloatControls2 = true,
886 
887       /* VK_EXT_legacy_vertex_attributes */
888       .legacyVertexAttributes = true,
889 
890       /* VK_EXT_legacy_dithering */
891       .legacyDithering = true,
892 
893       /* VK_MESA_image_alignment_control */
894       .imageAlignmentControl = true,
895 
896       /* VK_KHR_maintenance7 */
897       .maintenance7 = true,
898 
899       /* VK_KHR_shader_relaxed_extended_instruction */
900       .shaderRelaxedExtendedInstruction = true,
901 
902       /* VK_KHR_dynamic_rendering_local_read */
903       .dynamicRenderingLocalRead = true,
904 
905       /* VK_EXT_pipeline_protected_access */
906       .pipelineProtectedAccess = pdevice->has_protected_contexts,
907 
908       /* VK_EXT_host_image_copy */
909       .hostImageCopy = true,
910    };
911 
912    /* The new DOOM and Wolfenstein games require depthBounds without
913     * checking for it.  They seem to run fine without it so just claim it's
914     * there and accept the consequences.
915     */
916    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
917       features->depthBounds = true;
918 }
919 
920 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
921 
922 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
923 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
924 
925 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)926 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
927 {
928    VkDeviceSize ret = 0;
929 
930    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
931       if (pdevice->memory.heaps[i].size > ret)
932          ret = pdevice->memory.heaps[i].size;
933    }
934 
935    return ret;
936 }
937 
938 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)939 get_properties_1_1(const struct anv_physical_device *pdevice,
940                    struct vk_properties *p)
941 {
942    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
943    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
944    memset(p->deviceLUID, 0, VK_LUID_SIZE);
945    p->deviceNodeMask = 0;
946    p->deviceLUIDValid = false;
947 
948    p->subgroupSize = BRW_SUBGROUP_SIZE;
949    VkShaderStageFlags scalar_stages = 0;
950    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
951       scalar_stages |= mesa_to_vk_shader_stage(stage);
952    }
953    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
954       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
955                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
956                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
957                        VK_SHADER_STAGE_MISS_BIT_KHR |
958                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
959                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
960    }
961    if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
962       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
963                        VK_SHADER_STAGE_MESH_BIT_EXT;
964    }
965    p->subgroupSupportedStages = scalar_stages;
966    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
967                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
968                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
969                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
970                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
971                                     VK_SUBGROUP_FEATURE_QUAD_BIT |
972                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
973                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
974                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
975                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
976    p->subgroupQuadOperationsInAllStages = true;
977 
978    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
979    p->maxMultiviewViewCount      = 16;
980    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
981    /* Our protected implementation is a memory encryption mechanism, it
982     * shouldn't page fault, but it hangs the HW so in terms of user visibility
983     * it's similar to a fault.
984     */
985    p->protectedNoFault           = false;
986    /* This value doesn't matter for us today as our per-stage descriptors are
987     * the real limit.
988     */
989    p->maxPerSetDescriptors       = 1024;
990 
991    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
992       p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
993                                         pdevice->memory.heaps[i].size);
994    }
995 }
996 
997 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)998 get_properties_1_2(const struct anv_physical_device *pdevice,
999                    struct vk_properties *p)
1000 {
1001    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1002    memset(p->driverName, 0, sizeof(p->driverName));
1003    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1004             "Intel open-source Mesa driver");
1005    memset(p->driverInfo, 0, sizeof(p->driverInfo));
1006    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1007             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1008 
1009    p->conformanceVersion = (VkConformanceVersion) {
1010       .major = 1,
1011       .minor = 4,
1012       .subminor = 0,
1013       .patch = 0,
1014    };
1015 
1016    p->denormBehaviorIndependence =
1017       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1018    p->roundingModeIndependence =
1019       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1020 
1021    /* Broadwell does not support HF denorms and there are restrictions
1022     * other gens. According to Kabylake's PRM:
1023     *
1024     * "math - Extended Math Function
1025     * [...]
1026     * Restriction : Half-float denorms are always retained."
1027     */
1028    p->shaderDenormFlushToZeroFloat16         = false;
1029    p->shaderDenormPreserveFloat16            = true;
1030    p->shaderRoundingModeRTEFloat16           = true;
1031    p->shaderRoundingModeRTZFloat16           = true;
1032    p->shaderSignedZeroInfNanPreserveFloat16  = true;
1033 
1034    p->shaderDenormFlushToZeroFloat32         = true;
1035    p->shaderDenormPreserveFloat32            = true;
1036    p->shaderRoundingModeRTEFloat32           = true;
1037    p->shaderRoundingModeRTZFloat32           = true;
1038    p->shaderSignedZeroInfNanPreserveFloat32  = true;
1039 
1040    p->shaderDenormFlushToZeroFloat64         = true;
1041    p->shaderDenormPreserveFloat64            = true;
1042    p->shaderRoundingModeRTEFloat64           = true;
1043    p->shaderRoundingModeRTZFloat64           = true;
1044    p->shaderSignedZeroInfNanPreserveFloat64  = true;
1045 
1046    struct anv_descriptor_limits desc_limits;
1047    get_device_descriptor_limits(pdevice, &desc_limits);
1048 
1049    p->maxUpdateAfterBindDescriptorsInAllPools            = desc_limits.max_resources;
1050    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
1051    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
1052    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
1053    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
1054    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1055    p->robustBufferAccessUpdateAfterBind                  = true;
1056    p->quadDivergentImplicitLod                           = false;
1057    p->maxPerStageDescriptorUpdateAfterBindSamplers       = desc_limits.max_samplers;
1058    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1059    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1060    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = desc_limits.max_images;
1061    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = desc_limits.max_images;
1062    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1063    p->maxPerStageUpdateAfterBindResources                = desc_limits.max_resources;
1064    p->maxDescriptorSetUpdateAfterBindSamplers            = desc_limits.max_samplers;
1065    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = desc_limits.max_ubos;
1066    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1067    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = desc_limits.max_ssbos;
1068    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1069    p->maxDescriptorSetUpdateAfterBindSampledImages       = desc_limits.max_images;
1070    p->maxDescriptorSetUpdateAfterBindStorageImages       = desc_limits.max_images;
1071    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1072 
1073    /* We support all of the depth resolve modes */
1074    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1075                                       VK_RESOLVE_MODE_AVERAGE_BIT |
1076                                       VK_RESOLVE_MODE_MIN_BIT |
1077                                       VK_RESOLVE_MODE_MAX_BIT;
1078    /* Average doesn't make sense for stencil so we don't support that */
1079    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1080                                       VK_RESOLVE_MODE_MIN_BIT |
1081                                       VK_RESOLVE_MODE_MAX_BIT;
1082    p->independentResolveNone  = true;
1083    p->independentResolve      = true;
1084 
1085    p->filterMinmaxSingleComponentFormats  = true;
1086    p->filterMinmaxImageComponentMapping   = true;
1087 
1088    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1089 
1090    p->framebufferIntegerColorSampleCounts =
1091       isl_device_get_sample_counts(&pdevice->isl_dev);
1092 }
1093 
1094 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1095 get_properties_1_3(const struct anv_physical_device *pdevice,
1096                    struct vk_properties *p)
1097 {
1098    if (pdevice->info.ver >= 20)
1099       p->minSubgroupSize = 16;
1100    else
1101       p->minSubgroupSize = 8;
1102    p->maxSubgroupSize = 32;
1103    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1104    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1105                                    VK_SHADER_STAGE_TASK_BIT_EXT |
1106                                    VK_SHADER_STAGE_MESH_BIT_EXT;
1107 
1108    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1109    p->maxPerStageDescriptorInlineUniformBlocks =
1110       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1111    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1112       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1113    p->maxDescriptorSetInlineUniformBlocks =
1114       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1115    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1116       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1117    p->maxInlineUniformTotalSize = UINT16_MAX;
1118 
1119    p->integerDotProduct8BitUnsignedAccelerated = false;
1120    p->integerDotProduct8BitSignedAccelerated = false;
1121    p->integerDotProduct8BitMixedSignednessAccelerated = false;
1122    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1123    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1124    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1125    p->integerDotProduct16BitUnsignedAccelerated = false;
1126    p->integerDotProduct16BitSignedAccelerated = false;
1127    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1128    p->integerDotProduct32BitUnsignedAccelerated = false;
1129    p->integerDotProduct32BitSignedAccelerated = false;
1130    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1131    p->integerDotProduct64BitUnsignedAccelerated = false;
1132    p->integerDotProduct64BitSignedAccelerated = false;
1133    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1134    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1135    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1136    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1137    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1138    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1139    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1140    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1141    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1142    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1143    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1144    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1145    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1146    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1147    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1148    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1149 
1150    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1151     * Base Address:
1152     *
1153     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1154     *    specifies the base address of the first element of the surface,
1155     *    computed in software by adding the surface base address to the
1156     *    byte offset of the element in the buffer. The base address must
1157     *    be aligned to element size."
1158     *
1159     * The typed dataport messages require that things be texel aligned.
1160     * Otherwise, we may just load/store the wrong data or, in the worst
1161     * case, there may be hangs.
1162     */
1163    p->storageTexelBufferOffsetAlignmentBytes = 16;
1164    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1165 
1166    /* The sampler, however, is much more forgiving and it can handle
1167     * arbitrary byte alignment for linear and buffer surfaces.  It's
1168     * hard to find a good PRM citation for this but years of empirical
1169     * experience demonstrate that this is true.
1170     */
1171    p->uniformTexelBufferOffsetAlignmentBytes = 1;
1172    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1173 
1174    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1175 }
1176 
1177 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1178 get_properties(const struct anv_physical_device *pdevice,
1179                struct vk_properties *props)
1180 {
1181 
1182       const struct intel_device_info *devinfo = &pdevice->info;
1183 
1184    const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1185 
1186    const uint32_t max_workgroup_size =
1187       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1188 
1189    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1190    const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1191 
1192    uint64_t sparse_addr_space_size =
1193       !has_sparse_or_fake ? 0 :
1194       sparse_uses_trtt ? pdevice->va.trtt.size :
1195       pdevice->va.high_heap.size;
1196 
1197    VkSampleCountFlags sample_counts =
1198       isl_device_get_sample_counts(&pdevice->isl_dev);
1199 
1200 #if DETECT_OS_ANDROID
1201    /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1202    uint64_t front_rendering_usage = 0;
1203    struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1204    if (gralloc != NULL) {
1205       u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1206       u_gralloc_destroy(&gralloc);
1207    }
1208 #endif /* DETECT_OS_ANDROID */
1209 
1210    struct anv_descriptor_limits desc_limits;
1211    get_device_descriptor_limits(pdevice, &desc_limits);
1212 
1213    *props = (struct vk_properties) {
1214       .apiVersion = ANV_API_VERSION,
1215       .driverVersion = vk_get_driver_version(),
1216       .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1217                   pdevice->instance->force_vk_vendor : 0x8086,
1218       .deviceID = pdevice->info.pci_device_id,
1219       .deviceType = pdevice->info.has_local_mem ?
1220                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1221                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1222 
1223       /* Limits: */
1224       .maxImageDimension1D                      = (1 << 14),
1225       .maxImageDimension2D                      = (1 << 14),
1226       .maxImageDimension3D                      = (1 << 11),
1227       .maxImageDimensionCube                    = (1 << 14),
1228       .maxImageArrayLayers                      = (1 << 11),
1229       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1230       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1231       .maxStorageBufferRange                    = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1232       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1233       .maxMemoryAllocationCount                 = UINT32_MAX,
1234       .maxSamplerAllocationCount                = 64 * 1024,
1235       .bufferImageGranularity                   = 1,
1236       .sparseAddressSpaceSize                   = sparse_addr_space_size,
1237       .maxBoundDescriptorSets                   = MAX_SETS,
1238       .maxPerStageDescriptorSamplers            = desc_limits.max_samplers,
1239       .maxPerStageDescriptorUniformBuffers      = desc_limits.max_ubos,
1240       .maxPerStageDescriptorStorageBuffers      = desc_limits.max_ssbos,
1241       .maxPerStageDescriptorSampledImages       = desc_limits.max_images,
1242       .maxPerStageDescriptorStorageImages       = desc_limits.max_images,
1243       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1244       .maxPerStageResources                     = desc_limits.max_resources,
1245       .maxDescriptorSetSamplers                 = desc_limits.max_samplers,
1246       .maxDescriptorSetUniformBuffers           = desc_limits.max_ubos,
1247       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1248       .maxDescriptorSetStorageBuffers           = desc_limits.max_ssbos,
1249       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1250       .maxDescriptorSetSampledImages            = desc_limits.max_images,
1251       .maxDescriptorSetStorageImages            = desc_limits.max_images,
1252       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1253       .maxVertexInputAttributes                 = MAX_VES,
1254       .maxVertexInputBindings                   = MAX_VBS,
1255       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1256        *
1257        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1258        */
1259       .maxVertexInputAttributeOffset            = 2047,
1260       /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1261        *
1262        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1263        */
1264       .maxVertexInputBindingStride              = 4095,
1265       .maxVertexOutputComponents                = 128,
1266       .maxTessellationGenerationLevel           = 64,
1267       .maxTessellationPatchSize                 = 32,
1268       .maxTessellationControlPerVertexInputComponents = 128,
1269       .maxTessellationControlPerVertexOutputComponents = 128,
1270       .maxTessellationControlPerPatchOutputComponents = 128,
1271       .maxTessellationControlTotalOutputComponents = 2048,
1272       .maxTessellationEvaluationInputComponents = 128,
1273       .maxTessellationEvaluationOutputComponents = 128,
1274       .maxGeometryShaderInvocations             = 32,
1275       .maxGeometryInputComponents               = 128,
1276       .maxGeometryOutputComponents              = 128,
1277       .maxGeometryOutputVertices                = 256,
1278       .maxGeometryTotalOutputComponents         = 1024,
1279       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1280       .maxFragmentOutputAttachments             = 8,
1281       .maxFragmentDualSrcAttachments            = 1,
1282       .maxFragmentCombinedOutputResources       = MAX_RTS + desc_limits.max_ssbos +
1283                                                   desc_limits.max_images,
1284       .maxComputeSharedMemorySize               = intel_device_info_get_max_slm_size(&pdevice->info),
1285       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1286       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1287       .maxComputeWorkGroupSize = {
1288          max_workgroup_size,
1289          max_workgroup_size,
1290          max_workgroup_size,
1291       },
1292       .subPixelPrecisionBits                    = 8,
1293       .subTexelPrecisionBits                    = 8,
1294       .mipmapPrecisionBits                      = 8,
1295       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1296       .maxDrawIndirectCount                     = UINT32_MAX,
1297       .maxSamplerLodBias                        = 16,
1298       .maxSamplerAnisotropy                     = 16,
1299       .maxViewports                             = MAX_VIEWPORTS,
1300       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1301       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1302       .viewportSubPixelBits                     = 13, /* We take a float? */
1303       .minMemoryMapAlignment                    = 4096, /* A page */
1304       /* The dataport requires texel alignment so we need to assume a worst
1305        * case of R32G32B32A32 which is 16 bytes.
1306        */
1307       .minTexelBufferOffsetAlignment            = 16,
1308       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1309       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1310       .minTexelOffset                           = -8,
1311       .maxTexelOffset                           = 7,
1312       .minTexelGatherOffset                     = -32,
1313       .maxTexelGatherOffset                     = 31,
1314       .minInterpolationOffset                   = -0.5,
1315       .maxInterpolationOffset                   = 0.4375,
1316       .subPixelInterpolationOffsetBits          = 4,
1317       .maxFramebufferWidth                      = (1 << 14),
1318       .maxFramebufferHeight                     = (1 << 14),
1319       .maxFramebufferLayers                     = (1 << 11),
1320       .framebufferColorSampleCounts             = sample_counts,
1321       .framebufferDepthSampleCounts             = sample_counts,
1322       .framebufferStencilSampleCounts           = sample_counts,
1323       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1324       .maxColorAttachments                      = MAX_RTS,
1325       .sampledImageColorSampleCounts            = sample_counts,
1326       .sampledImageIntegerSampleCounts          = sample_counts,
1327       .sampledImageDepthSampleCounts            = sample_counts,
1328       .sampledImageStencilSampleCounts          = sample_counts,
1329       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1330       .maxSampleMaskWords                       = 1,
1331       .timestampComputeAndGraphics              = true,
1332       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1333       .maxClipDistances                         = 8,
1334       .maxCullDistances                         = 8,
1335       .maxCombinedClipAndCullDistances          = 8,
1336       .discreteQueuePriorities                  = 2,
1337       .pointSizeRange                           = { 0.125, 255.875 },
1338       /* While SKL and up support much wider lines than we are setting here,
1339        * in practice we run into conformance issues if we go past this limit.
1340        * Since the Windows driver does the same, it's probably fair to assume
1341        * that no one needs more than this.
1342        */
1343       .lineWidthRange                           = { 0.0, 8.0 },
1344       .pointSizeGranularity                     = (1.0 / 8.0),
1345       .lineWidthGranularity                     = (1.0 / 128.0),
1346       .strictLines                              = false,
1347       .standardSampleLocations                  = true,
1348       .optimalBufferCopyOffsetAlignment         = 128,
1349       .optimalBufferCopyRowPitchAlignment       = 128,
1350       .nonCoherentAtomSize                      = 64,
1351 
1352       /* Sparse: */
1353       .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1354       .sparseResidencyStandard2DMultisampleBlockShape = false,
1355       .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1356       .sparseResidencyAlignedMipSize = false,
1357       .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1358 
1359       /* VK_KHR_cooperative_matrix */
1360       .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1361 
1362       /* Vulkan 1.4 */
1363       .dynamicRenderingLocalReadDepthStencilAttachments = true,
1364       .dynamicRenderingLocalReadMultisampledAttachments = true,
1365    };
1366 
1367    snprintf(props->deviceName, sizeof(props->deviceName),
1368             "%s", pdevice->info.name);
1369    memcpy(props->pipelineCacheUUID,
1370           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1371 
1372    get_properties_1_1(pdevice, props);
1373    get_properties_1_2(pdevice, props);
1374    get_properties_1_3(pdevice, props);
1375 
1376    /* VK_KHR_acceleration_structure */
1377    {
1378       props->maxGeometryCount = (1u << 24) - 1;
1379       props->maxInstanceCount = (1u << 24) - 1;
1380       props->maxPrimitiveCount = (1u << 29) - 1;
1381       props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1382       props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1383       props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1384       props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1385       props->minAccelerationStructureScratchOffsetAlignment = 64;
1386    }
1387 
1388    /* VK_KHR_compute_shader_derivatives */
1389    {
1390       props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1391    }
1392 
1393    /* VK_KHR_fragment_shading_rate */
1394    {
1395       props->primitiveFragmentShadingRateWithMultipleViewports =
1396          pdevice->info.has_coarse_pixel_primitive_and_cb;
1397       props->layeredShadingRateAttachments =
1398       pdevice->info.has_coarse_pixel_primitive_and_cb;
1399       props->fragmentShadingRateNonTrivialCombinerOps =
1400          pdevice->info.has_coarse_pixel_primitive_and_cb;
1401       props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1402       props->maxFragmentSizeAspectRatio =
1403          pdevice->info.has_coarse_pixel_primitive_and_cb ?
1404          2 : 4;
1405       props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1406          (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1407       props->maxFragmentShadingRateRasterizationSamples =
1408       pdevice->info.has_coarse_pixel_primitive_and_cb ?
1409          VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
1410       props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1411       props->fragmentShadingRateWithSampleMask = true;
1412       props->fragmentShadingRateWithShaderSampleMask = false;
1413       props->fragmentShadingRateWithConservativeRasterization = true;
1414       props->fragmentShadingRateWithFragmentShaderInterlock = true;
1415       props->fragmentShadingRateWithCustomSampleLocations = true;
1416       props->fragmentShadingRateStrictMultiplyCombiner = true;
1417 
1418       if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1419          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1420          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1421          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1422       } else {
1423          /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1424          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1425          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1426          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1427       }
1428    }
1429 
1430    /* VK_KHR_maintenance5 */
1431    {
1432       props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1433       props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1434       props->depthStencilSwizzleOneSupport = true;
1435       props->polygonModePointSize = true;
1436       props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1437       props->nonStrictWideLinesUseParallelogram = false;
1438    }
1439 
1440    /* VK_KHR_maintenance6 */
1441    {
1442       props->blockTexelViewCompatibleMultipleLayers = true;
1443       props->maxCombinedImageSamplerDescriptorCount = 3;
1444       props->fragmentShadingRateClampCombinerInputs = true;
1445    }
1446 
1447    /* VK_KHR_maintenance7 */
1448    {
1449       props->robustFragmentShadingRateAttachmentAccess = true;
1450       props->separateDepthStencilAttachmentAccess = true;
1451       props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1452       props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1453       props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1454       props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1455       props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1456       props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1457    }
1458 
1459    /* VK_KHR_performance_query */
1460    {
1461       props->allowCommandBufferQueryCopies = false;
1462    }
1463 
1464    /* VK_KHR_push_descriptor */
1465    {
1466       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1467    }
1468 
1469    /* VK_KHR_ray_tracing_pipeline */
1470    {
1471       /* TODO */
1472       props->shaderGroupHandleSize = 32;
1473       props->maxRayRecursionDepth = 31;
1474       /* MemRay::hitGroupSRStride is 16 bits */
1475       props->maxShaderGroupStride = UINT16_MAX;
1476       /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1477       props->shaderGroupBaseAlignment = 16;
1478       props->shaderGroupHandleAlignment = 16;
1479       props->shaderGroupHandleCaptureReplaySize = 32;
1480       props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1481       props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1482    }
1483 
1484    /* VK_KHR_vertex_attribute_divisor */
1485    {
1486       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1487       props->supportsNonZeroFirstInstance = true;
1488    }
1489 
1490    /* VK_EXT_conservative_rasterization */
1491    {
1492       /* There's nothing in the public docs about this value as far as I can
1493        * tell. However, this is the value the Windows driver reports and
1494        * there's a comment on a rejected HW feature in the internal docs that
1495        * says:
1496        *
1497        *    "This is similar to conservative rasterization, except the
1498        *    primitive area is not extended by 1/512 and..."
1499        *
1500        * That's a bit of an obtuse reference but it's the best we've got for
1501        * now.
1502        */
1503       props->primitiveOverestimationSize = 1.0f / 512.0f;
1504       props->maxExtraPrimitiveOverestimationSize = 0.0f;
1505       props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1506       props->primitiveUnderestimation = false;
1507       props->conservativePointAndLineRasterization = false;
1508       props->degenerateTrianglesRasterized = true;
1509       props->degenerateLinesRasterized = false;
1510       props->fullyCoveredFragmentShaderInputVariable = false;
1511       props->conservativeRasterizationPostDepthCoverage = true;
1512    }
1513 
1514    /* VK_EXT_custom_border_color */
1515    {
1516       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1517    }
1518 
1519    /* VK_EXT_descriptor_buffer */
1520    {
1521       props->combinedImageSamplerDescriptorSingleArray = true;
1522       props->bufferlessPushDescriptors = true;
1523       /* Written to the buffer before a timeline semaphore is signaled, but
1524        * after vkQueueSubmit().
1525        */
1526       props->allowSamplerImageViewPostSubmitCreation = true;
1527       props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1528 
1529       if (pdevice->uses_ex_bso) {
1530          props->maxDescriptorBufferBindings = MAX_SETS;
1531          props->maxResourceDescriptorBufferBindings = MAX_SETS;
1532          props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1533          props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1534       } else {
1535          props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1536          props->maxResourceDescriptorBufferBindings = 1;
1537          props->maxSamplerDescriptorBufferBindings = 1;
1538          props->maxEmbeddedImmutableSamplerBindings = 1;
1539       }
1540       props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1541 
1542       /* Storing a 64bit address */
1543       props->bufferCaptureReplayDescriptorDataSize = 8;
1544       props->imageCaptureReplayDescriptorDataSize = 8;
1545       /* Offset inside the reserved border color pool */
1546       props->samplerCaptureReplayDescriptorDataSize = 4;
1547 
1548       /* Not affected by replay */
1549       props->imageViewCaptureReplayDescriptorDataSize = 0;
1550       /* The acceleration structure virtual address backing is coming from a
1551        * buffer, so as long as that buffer is captured/replayed correctly we
1552        * should always get the same address.
1553        */
1554       props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1555 
1556       props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1557       props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1558                                                         ANV_SURFACE_STATE_SIZE);
1559       props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1560       props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1561       props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1562       props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1563       props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1564       props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1565       props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1566       props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1567       props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1568       props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1569       props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1570       props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1571       props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1572       props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1573                                                                                        true);
1574       props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1575       props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1576       props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1577    }
1578 
1579    /* VK_EXT_extended_dynamic_state3 */
1580    {
1581       props->dynamicPrimitiveTopologyUnrestricted = true;
1582    }
1583 
1584    /* VK_EXT_external_memory_host */
1585    {
1586       props->minImportedHostPointerAlignment = 4096;
1587    }
1588 
1589    /* VK_EXT_graphics_pipeline_library */
1590    {
1591       props->graphicsPipelineLibraryFastLinking = true;
1592       props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1593    }
1594 
1595    /* VK_EXT_host_image_copy */
1596    {
1597       static const VkImageLayout supported_layouts[] = {
1598          VK_IMAGE_LAYOUT_GENERAL,
1599          VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1600          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1601          VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1602          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1603          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1604          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1605          VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1606          VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1607          VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1608          VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1609          VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1610          VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1611          VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1612          VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1613          VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR,
1614          VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1615          VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR,
1616       };
1617 
1618       props->pCopySrcLayouts = (VkImageLayout *) supported_layouts;
1619       props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1620       props->pCopyDstLayouts = (VkImageLayout *) supported_layouts;
1621       props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1622 
1623       /* This UUID essentially tells you if you can share an optimially tiling
1624        * image with another driver. Much of the tiling decisions are based on :
1625        *
1626        *    - device generation (different tilings based on generations)
1627        *    - device workarounds
1628        *    - driver build (as we implement workarounds or performance tunings,
1629        *      the tiling decision changes)
1630        *
1631        * So we're using a hash of the verx10 field + driver_build_sha1.
1632        *
1633        * Unfortunately there is a HW issue on SKL GT4 that makes it use some
1634        * different tilings sometimes (see isl_gfx7.c).
1635        */
1636       {
1637          struct mesa_sha1 sha1_ctx;
1638          uint8_t sha1[20];
1639 
1640          _mesa_sha1_init(&sha1_ctx);
1641          _mesa_sha1_update(&sha1_ctx, pdevice->driver_build_sha1,
1642                            sizeof(pdevice->driver_build_sha1));
1643          _mesa_sha1_update(&sha1_ctx, &pdevice->info.platform,
1644                            sizeof(pdevice->info.platform));
1645          if (pdevice->info.platform == INTEL_PLATFORM_SKL &&
1646              pdevice->info.gt == 4) {
1647             _mesa_sha1_update(&sha1_ctx, &pdevice->info.gt,
1648                               sizeof(pdevice->info.gt));
1649          }
1650          _mesa_sha1_final(&sha1_ctx, sha1);
1651 
1652          assert(ARRAY_SIZE(sha1) >= VK_UUID_SIZE);
1653          memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1654       }
1655 
1656       /* System without ReBAR cannot map all memory types on the host and that
1657        * affects the memory types an image can use for host memory copies.
1658        *
1659        * System with compressed memory types also cannot expose all image
1660        * memory types for host image copies.
1661        */
1662       props->identicalMemoryTypeRequirements = pdevice->has_small_bar ||
1663          pdevice->memory.compressed_mem_types != 0;
1664    }
1665 
1666    /* VK_EXT_legacy_vertex_attributes */
1667    {
1668       props->nativeUnalignedPerformance = true;
1669    }
1670 
1671    /* VK_EXT_line_rasterization */
1672    {
1673       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1674        * Rules - Legacy Mode", it says the following:
1675        *
1676        *    "Note that the device divides a pixel into a 16x16 array of
1677        *     subpixels, referenced by their upper left corners."
1678        *
1679        * This is the only known reference in the PRMs to the subpixel
1680        * precision of line rasterization and a "16x16 array of subpixels"
1681        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1682        * subpixel precision bits applies to all line rasterization types.
1683        */
1684       props->lineSubPixelPrecisionBits = 4;
1685    }
1686 
1687    /* VK_EXT_map_memory_placed */
1688    {
1689       props->minPlacedMemoryMapAlignment = 4096;
1690    }
1691 
1692    /* VK_EXT_mesh_shader */
1693    {
1694       /* Bounded by the maximum representable size in
1695        * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
1696        */
1697       const uint32_t max_slm_size = intel_device_info_get_max_slm_size(devinfo);
1698 
1699       /* Bounded by the maximum representable size in
1700        * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
1701        */
1702       const uint32_t max_workgroup_size = 1 << 10;
1703 
1704       /* 3DMESH_3D limitation. */
1705       const uint32_t max_threadgroup_count = 1 << 22;
1706 
1707       /* 3DMESH_3D limitation. */
1708       const uint32_t max_threadgroup_xyz = 65535;
1709 
1710       const uint32_t max_urb_size = 64 * 1024;
1711 
1712       props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1713       props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1714       props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1715       props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1716 
1717       props->maxTaskWorkGroupInvocations = max_workgroup_size;
1718       props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1719       props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1720       props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1721 
1722       /* TUE header with padding */
1723       const uint32_t task_payload_reserved = 32;
1724 
1725       props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1726       props->maxTaskSharedMemorySize = max_slm_size;
1727       props->maxTaskPayloadAndSharedMemorySize =
1728          props->maxTaskPayloadSize +
1729          props->maxTaskSharedMemorySize;
1730 
1731       props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1732       props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1733       props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1734       props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1735 
1736       props->maxMeshWorkGroupInvocations = max_workgroup_size;
1737       props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1738       props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1739       props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1740 
1741       props->maxMeshSharedMemorySize = max_slm_size;
1742       props->maxMeshPayloadAndSharedMemorySize =
1743          props->maxTaskPayloadSize +
1744          props->maxMeshSharedMemorySize;
1745 
1746       /* Unfortunately spec's formula for the max output size doesn't match our hardware
1747        * (because some per-primitive and per-vertex attributes have alignment restrictions),
1748        * so we have to advertise the minimum value mandated by the spec to not overflow it.
1749        */
1750       props->maxMeshOutputPrimitives = 256;
1751       props->maxMeshOutputVertices = 256;
1752 
1753       /* NumPrim + Primitive Data List */
1754       const uint32_t max_indices_memory =
1755          ALIGN(sizeof(uint32_t) +
1756                sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1757 
1758       props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1759 
1760       props->maxMeshPayloadAndOutputMemorySize =
1761          props->maxTaskPayloadSize +
1762          props->maxMeshOutputMemorySize;
1763 
1764       props->maxMeshOutputComponents = 128;
1765 
1766       /* RTAIndex is 11-bits wide */
1767       props->maxMeshOutputLayers = 1 << 11;
1768 
1769       props->maxMeshMultiviewViewCount = 1;
1770 
1771       /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1772       props->meshOutputPerVertexGranularity = 8;
1773       /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1774       props->meshOutputPerPrimitiveGranularity = 8;
1775 
1776       /* SIMD16 */
1777       props->maxPreferredTaskWorkGroupInvocations = 16;
1778       props->maxPreferredMeshWorkGroupInvocations = 16;
1779 
1780       props->prefersLocalInvocationVertexOutput = false;
1781       props->prefersLocalInvocationPrimitiveOutput = false;
1782       props->prefersCompactVertexOutput = false;
1783       props->prefersCompactPrimitiveOutput = false;
1784 
1785       /* Spec minimum values */
1786       assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1787       assert(props->maxTaskWorkGroupCount[0] >= 65535);
1788       assert(props->maxTaskWorkGroupCount[1] >= 65535);
1789       assert(props->maxTaskWorkGroupCount[2] >= 65535);
1790 
1791       assert(props->maxTaskWorkGroupInvocations >= 128);
1792       assert(props->maxTaskWorkGroupSize[0] >= 128);
1793       assert(props->maxTaskWorkGroupSize[1] >= 128);
1794       assert(props->maxTaskWorkGroupSize[2] >= 128);
1795 
1796       assert(props->maxTaskPayloadSize >= 16384);
1797       assert(props->maxTaskSharedMemorySize >= 32768);
1798       assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1799 
1800 
1801       assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1802       assert(props->maxMeshWorkGroupCount[0] >= 65535);
1803       assert(props->maxMeshWorkGroupCount[1] >= 65535);
1804       assert(props->maxMeshWorkGroupCount[2] >= 65535);
1805 
1806       assert(props->maxMeshWorkGroupInvocations >= 128);
1807       assert(props->maxMeshWorkGroupSize[0] >= 128);
1808       assert(props->maxMeshWorkGroupSize[1] >= 128);
1809       assert(props->maxMeshWorkGroupSize[2] >= 128);
1810 
1811       assert(props->maxMeshSharedMemorySize >= 28672);
1812       assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1813       assert(props->maxMeshOutputMemorySize >= 32768);
1814       assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1815 
1816       assert(props->maxMeshOutputComponents >= 128);
1817 
1818       assert(props->maxMeshOutputVertices >= 256);
1819       assert(props->maxMeshOutputPrimitives >= 256);
1820       assert(props->maxMeshOutputLayers >= 8);
1821       assert(props->maxMeshMultiviewViewCount >= 1);
1822    }
1823 
1824    /* VK_EXT_multi_draw */
1825    {
1826       props->maxMultiDrawCount = 2048;
1827    }
1828 
1829    /* VK_EXT_nested_command_buffer */
1830    {
1831       props->maxCommandBufferNestingLevel = UINT32_MAX;
1832    }
1833 
1834    /* VK_EXT_pci_bus_info */
1835    {
1836       props->pciDomain = pdevice->info.pci_domain;
1837       props->pciBus = pdevice->info.pci_bus;
1838       props->pciDevice = pdevice->info.pci_dev;
1839       props->pciFunction = pdevice->info.pci_func;
1840    }
1841 
1842    /* VK_EXT_physical_device_drm */
1843    {
1844       props->drmHasPrimary = pdevice->has_master;
1845       props->drmPrimaryMajor = pdevice->master_major;
1846       props->drmPrimaryMinor = pdevice->master_minor;
1847       props->drmHasRender = pdevice->has_local;
1848       props->drmRenderMajor = pdevice->local_major;
1849       props->drmRenderMinor = pdevice->local_minor;
1850    }
1851 
1852    /* VK_EXT_pipeline_robustness */
1853    {
1854       props->defaultRobustnessStorageBuffers =
1855          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1856       props->defaultRobustnessUniformBuffers =
1857          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1858       props->defaultRobustnessVertexInputs =
1859          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1860       props->defaultRobustnessImages =
1861          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1862    }
1863 
1864    /* VK_EXT_provoking_vertex */
1865    {
1866       props->provokingVertexModePerPipeline = true;
1867       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1868    }
1869 
1870    /* VK_EXT_robustness2 */
1871    {
1872       props->robustStorageBufferAccessSizeAlignment =
1873          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1874       props->robustUniformBufferAccessSizeAlignment =
1875          ANV_UBO_ALIGNMENT;
1876    }
1877 
1878    /* VK_EXT_sample_locations */
1879    {
1880       props->sampleLocationSampleCounts =
1881          isl_device_get_sample_counts(&pdevice->isl_dev);
1882 
1883       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1884       props->maxSampleLocationGridSize.width = 1;
1885       props->maxSampleLocationGridSize.height = 1;
1886 
1887       props->sampleLocationCoordinateRange[0] = 0;
1888       props->sampleLocationCoordinateRange[1] = 0.9375;
1889       props->sampleLocationSubPixelBits = 4;
1890 
1891       props->variableSampleLocations = true;
1892    }
1893 
1894    /* VK_EXT_shader_module_identifier */
1895    {
1896       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1897                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1898       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1899              vk_shaderModuleIdentifierAlgorithmUUID,
1900              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1901    }
1902 
1903    /* VK_EXT_transform_feedback */
1904    {
1905       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1906       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1907       props->maxTransformFeedbackBufferSize = (1ull << 32);
1908       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1909       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1910       props->maxTransformFeedbackBufferDataStride = 2048;
1911       props->transformFeedbackQueries = true;
1912       props->transformFeedbackStreamsLinesTriangles = false;
1913       props->transformFeedbackRasterizationStreamSelect = false;
1914       props->transformFeedbackDraw = true;
1915    }
1916 
1917    /* VK_ANDROID_native_buffer */
1918 #if DETECT_OS_ANDROID
1919    {
1920       props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1921    }
1922 #endif /* DETECT_OS_ANDROID */
1923 
1924 
1925    /* VK_MESA_image_alignment_control */
1926    {
1927       /* We support 4k/64k tiling alignments on most platforms */
1928       props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1929    }
1930 }
1931 
1932 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1933 anv_init_meminfo(struct anv_physical_device *device, int fd)
1934 {
1935    const struct intel_device_info *devinfo = &device->info;
1936 
1937    device->sys.region = &devinfo->mem.sram.mem;
1938    device->sys.size = devinfo->mem.sram.mappable.size;
1939    device->sys.available = devinfo->mem.sram.mappable.free;
1940 
1941    device->vram_mappable.region = &devinfo->mem.vram.mem;
1942    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1943    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1944 
1945    device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1946    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1947    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1948 
1949    return VK_SUCCESS;
1950 }
1951 
1952 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1953 anv_update_meminfo(struct anv_physical_device *device, int fd)
1954 {
1955    if (!intel_device_info_update_memory_info(&device->info, fd))
1956       return;
1957 
1958    const struct intel_device_info *devinfo = &device->info;
1959    device->sys.available = devinfo->mem.sram.mappable.free;
1960    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1961    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1962 }
1963 
1964 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1965 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1966 {
1967    VkResult result = anv_init_meminfo(device, fd);
1968    if (result != VK_SUCCESS)
1969       return result;
1970 
1971    assert(device->sys.size != 0);
1972 
1973    if (anv_physical_device_has_vram(device)) {
1974       /* We can create 2 or 3 different heaps when we have local memory
1975        * support, first heap with local memory size and second with system
1976        * memory size and the third is added only if part of the vram is
1977        * mappable to the host.
1978        */
1979       device->memory.heap_count = 2;
1980       device->memory.heaps[0] = (struct anv_memory_heap) {
1981          /* If there is a vram_non_mappable, use that for the device only
1982           * heap. Otherwise use the vram_mappable.
1983           */
1984          .size = device->vram_non_mappable.size != 0 ?
1985                  device->vram_non_mappable.size : device->vram_mappable.size,
1986          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1987          .is_local_mem = true,
1988       };
1989       device->memory.heaps[1] = (struct anv_memory_heap) {
1990          .size = device->sys.size,
1991          .flags = 0,
1992          .is_local_mem = false,
1993       };
1994       /* Add an additional smaller vram mappable heap if we can't map all the
1995        * vram to the host.
1996        */
1997       if (device->vram_non_mappable.size > 0) {
1998          device->memory.heap_count++;
1999          device->memory.heaps[2] = (struct anv_memory_heap) {
2000             .size = device->vram_mappable.size,
2001             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2002             .is_local_mem = true,
2003          };
2004       }
2005    } else {
2006       device->memory.heap_count = 1;
2007       device->memory.heaps[0] = (struct anv_memory_heap) {
2008          .size = device->sys.size,
2009          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2010          .is_local_mem = false,
2011       };
2012    }
2013 
2014    switch (device->info.kmd_type) {
2015    case INTEL_KMD_TYPE_XE:
2016       result = anv_xe_physical_device_init_memory_types(device);
2017       break;
2018    case INTEL_KMD_TYPE_I915:
2019    default:
2020       result = anv_i915_physical_device_init_memory_types(device);
2021       break;
2022    }
2023 
2024    assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2025 
2026    if (result != VK_SUCCESS)
2027       return result;
2028 
2029    /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
2030     * find memory types both with and without
2031     * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
2032     * memory types just to make these games happy.
2033     * This behavior is not spec-compliant as we still only have one heap that
2034     * is now inconsistent with some of the memory types, but the game doesn't
2035     * seem to care about it.
2036     */
2037    if (device->instance->anv_fake_nonlocal_memory &&
2038        !anv_physical_device_has_vram(device)) {
2039       const uint32_t base_types_count = device->memory.type_count;
2040       for (int i = 0; i < base_types_count; i++) {
2041          if (!(device->memory.types[i].propertyFlags &
2042                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
2043             continue;
2044 
2045          assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2046          struct anv_memory_type *new_type =
2047             &device->memory.types[device->memory.type_count++];
2048          *new_type = device->memory.types[i];
2049 
2050          device->memory.types[i].propertyFlags &=
2051             ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2052       }
2053    }
2054 
2055    /* Replicate all non protected memory types for descriptor buffers because
2056     * we want to identify memory allocations to place them in the right memory
2057     * heap.
2058     */
2059    device->memory.default_buffer_mem_types =
2060       BITFIELD_RANGE(0, device->memory.type_count);
2061    device->memory.protected_mem_types = 0;
2062    device->memory.dynamic_visible_mem_types = 0;
2063    device->memory.compressed_mem_types = 0;
2064 
2065    const uint32_t base_types_count = device->memory.type_count;
2066    for (int i = 0; i < base_types_count; i++) {
2067       bool skip = false;
2068 
2069       if (device->memory.types[i].propertyFlags &
2070           VK_MEMORY_PROPERTY_PROTECTED_BIT) {
2071          device->memory.protected_mem_types |= BITFIELD_BIT(i);
2072          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2073          skip = true;
2074       }
2075 
2076       if (device->memory.types[i].compressed) {
2077          device->memory.compressed_mem_types |= BITFIELD_BIT(i);
2078          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2079          skip = true;
2080       }
2081 
2082       if (skip)
2083          continue;
2084 
2085       device->memory.dynamic_visible_mem_types |=
2086          BITFIELD_BIT(device->memory.type_count);
2087 
2088       assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2089       struct anv_memory_type *new_type =
2090          &device->memory.types[device->memory.type_count++];
2091       *new_type = device->memory.types[i];
2092       new_type->dynamic_visible = true;
2093    }
2094 
2095    assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
2096 
2097    for (unsigned i = 0; i < device->memory.type_count; i++) {
2098       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
2099       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
2100           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
2101 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2102          device->memory.need_flush = true;
2103 #else
2104          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2105                           "Memory configuration requires flushing, but it's not implemented for this architecture");
2106 #endif
2107    }
2108 
2109    return VK_SUCCESS;
2110 }
2111 
2112 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)2113 anv_physical_device_init_uuids(struct anv_physical_device *device)
2114 {
2115    const struct build_id_note *note =
2116       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
2117    if (!note) {
2118       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2119                        "Failed to find build-id");
2120    }
2121 
2122    unsigned build_id_len = build_id_length(note);
2123    if (build_id_len < 20) {
2124       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2125                        "build-id too short.  It needs to be a SHA");
2126    }
2127 
2128    memcpy(device->driver_build_sha1, build_id_data(note), 20);
2129 
2130    struct mesa_sha1 sha1_ctx;
2131    uint8_t sha1[20];
2132    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
2133 
2134    /* The pipeline cache UUID is used for determining when a pipeline cache is
2135     * invalid.  It needs both a driver build and the PCI ID of the device.
2136     */
2137    _mesa_sha1_init(&sha1_ctx);
2138    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
2139    brw_device_sha1_update(&sha1_ctx, &device->info);
2140    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
2141                      sizeof(device->always_use_bindless));
2142    _mesa_sha1_final(&sha1_ctx, sha1);
2143    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
2144 
2145    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
2146    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
2147 
2148    return VK_SUCCESS;
2149 }
2150 
2151 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)2152 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
2153 {
2154 #ifdef ENABLE_SHADER_CACHE
2155    char renderer[10];
2156    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2157                                device->info.pci_device_id);
2158    assert(len == sizeof(renderer) - 2);
2159 
2160    char timestamp[41];
2161    _mesa_sha1_format(timestamp, device->driver_build_sha1);
2162 
2163    const uint64_t driver_flags =
2164       brw_get_compiler_config_value(device->compiler);
2165    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2166 #endif
2167 }
2168 
2169 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2170 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2171 {
2172 #ifdef ENABLE_SHADER_CACHE
2173    if (device->vk.disk_cache) {
2174       disk_cache_destroy(device->vk.disk_cache);
2175       device->vk.disk_cache = NULL;
2176    }
2177 #else
2178    assert(device->vk.disk_cache == NULL);
2179 #endif
2180 }
2181 
2182 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2183  * queue overrides.
2184  *
2185  * To override the number queues:
2186  *  * "gc" is for graphics queues with compute support
2187  *  * "g" is for graphics queues with no compute support
2188  *  * "c" is for compute queues with no graphics support
2189  *  * "v" is for video queues with no graphics support
2190  *  * "b" is for copy (blitter) queues with no graphics support
2191  *
2192  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2193  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2194  * with compute-only support.
2195  *
2196  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2197  * include 1 queue with compute-only support, but it will not change the
2198  * number of graphics+compute queues.
2199  *
2200  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2201  * to include 1 queue with compute-only support, and it would override the
2202  * number of graphics+compute queues to be 0.
2203  */
2204 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2205 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2206 {
2207    int gc_override = -1;
2208    int g_override = -1;
2209    int c_override = -1;
2210    int v_override = -1;
2211    int blit_override = -1;
2212    const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2213 
2214    /* Override queues for Android HWUI that expects min 2 queues. */
2215 #if DETECT_OS_ANDROID
2216    *gc_count = 2;
2217 #endif
2218 
2219    if (env_ == NULL)
2220       return;
2221 
2222    char *env = strdup(env_);
2223    char *save = NULL;
2224    char *next = strtok_r(env, ",", &save);
2225    while (next != NULL) {
2226       if (strncmp(next, "gc=", 3) == 0) {
2227          gc_override = strtol(next + 3, NULL, 0);
2228       } else if (strncmp(next, "g=", 2) == 0) {
2229          g_override = strtol(next + 2, NULL, 0);
2230       } else if (strncmp(next, "c=", 2) == 0) {
2231          c_override = strtol(next + 2, NULL, 0);
2232       } else if (strncmp(next, "v=", 2) == 0) {
2233          v_override = strtol(next + 2, NULL, 0);
2234       } else if (strncmp(next, "b=", 2) == 0) {
2235          blit_override = strtol(next + 2, NULL, 0);
2236       } else {
2237          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2238       }
2239       next = strtok_r(NULL, ",", &save);
2240    }
2241    free(env);
2242    if (gc_override >= 0)
2243       *gc_count = gc_override;
2244    if (g_override >= 0)
2245       *g_count = g_override;
2246    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2247       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2248                 "Vulkan specification");
2249    if (c_override >= 0)
2250       *c_count = c_override;
2251    if (v_override >= 0)
2252       *v_count = v_override;
2253    if (blit_override >= 0)
2254       *blit_count = blit_override;
2255 }
2256 
2257 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2258 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2259 {
2260    uint32_t family_count = 0;
2261    VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2262                                VK_QUEUE_SPARSE_BINDING_BIT : 0;
2263    VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2264                                  VK_QUEUE_PROTECTED_BIT : 0;
2265 
2266    if (pdevice->engine_info) {
2267       int gc_count =
2268          intel_engines_count(pdevice->engine_info,
2269                              INTEL_ENGINE_CLASS_RENDER);
2270       int v_count =
2271          intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2272       int g_count = 0;
2273       int c_count = 0;
2274       /* Not only the Kernel needs to have vm_control, but it also needs to
2275        * have a new enough GuC and the interface to tell us so. This is
2276        * implemented in the common layer by is_guc_semaphore_functional() and
2277        * results in devinfo->engine_class_supported_count being adjusted,
2278        * which we read below.
2279        */
2280       const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2281       /* For now we're choosing to not expose non-render engines on i915.ko
2282        * even when the Kernel allows it. We have data suggesting it's not an
2283        * obvious win in terms of performance.
2284        */
2285       const bool can_use_non_render_engines =
2286          kernel_supports_non_render_engines &&
2287          pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2288 
2289       if (can_use_non_render_engines) {
2290          c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2291       }
2292 
2293       int blit_count = 0;
2294       if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2295          blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2296       }
2297 
2298       anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2299 
2300       enum intel_engine_class compute_class =
2301          pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE] &&
2302          c_count >= 1 ? INTEL_ENGINE_CLASS_COMPUTE :
2303                         INTEL_ENGINE_CLASS_RENDER;
2304 
2305       if (gc_count > 0) {
2306          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2307             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2308                           VK_QUEUE_COMPUTE_BIT |
2309                           VK_QUEUE_TRANSFER_BIT |
2310                           sparse_flags |
2311                           protected_flag,
2312             .queueCount = gc_count,
2313             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2314             .supports_perf = true,
2315          };
2316       }
2317       if (g_count > 0) {
2318          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2319             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2320                           VK_QUEUE_TRANSFER_BIT |
2321                           sparse_flags |
2322                           protected_flag,
2323             .queueCount = g_count,
2324             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2325          };
2326       }
2327       if (c_count > 0) {
2328          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2329             .queueFlags = VK_QUEUE_COMPUTE_BIT |
2330                           VK_QUEUE_TRANSFER_BIT |
2331                           sparse_flags |
2332                           protected_flag,
2333             .queueCount = c_count,
2334             .engine_class = compute_class,
2335          };
2336       }
2337       if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2338          /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2339           * to the first VCS engine instance.
2340           *
2341           * We should be able to query HEVC support from the kernel using the engine query uAPI,
2342           * but this appears to be broken :
2343           *    https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2344           *
2345           * When this bug is fixed we should be able to check HEVC support to determine the
2346           * correct number of queues.
2347           */
2348          /* TODO: enable protected content on video queue */
2349          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2350             .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2351                           (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2352             .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2353             .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2354          };
2355       }
2356       if (blit_count > 0) {
2357          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2358             .queueFlags = VK_QUEUE_TRANSFER_BIT |
2359                           protected_flag,
2360             .queueCount = blit_count,
2361             .engine_class = INTEL_ENGINE_CLASS_COPY,
2362          };
2363       }
2364    } else {
2365       /* Default to a single render queue */
2366       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2367          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2368                        VK_QUEUE_COMPUTE_BIT |
2369                        VK_QUEUE_TRANSFER_BIT |
2370                        sparse_flags,
2371          .queueCount = 1,
2372          .engine_class = INTEL_ENGINE_CLASS_RENDER,
2373       };
2374       family_count = 1;
2375    }
2376    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2377    pdevice->queue.family_count = family_count;
2378 }
2379 
2380 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2381 anv_physical_device_get_parameters(struct anv_physical_device *device)
2382 {
2383    switch (device->info.kmd_type) {
2384    case INTEL_KMD_TYPE_I915:
2385       return anv_i915_physical_device_get_parameters(device);
2386    case INTEL_KMD_TYPE_XE:
2387       return anv_xe_physical_device_get_parameters(device);
2388    default:
2389       unreachable("Missing");
2390       return VK_ERROR_UNKNOWN;
2391    }
2392 }
2393 
2394 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2395 anv_physical_device_try_create(struct vk_instance *vk_instance,
2396                                struct _drmDevice *drm_device,
2397                                struct vk_physical_device **out)
2398 {
2399    struct anv_instance *instance =
2400       container_of(vk_instance, struct anv_instance, vk);
2401 
2402    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2403        drm_device->bustype != DRM_BUS_PCI ||
2404        drm_device->deviceinfo.pci->vendor_id != 0x8086)
2405       return VK_ERROR_INCOMPATIBLE_DRIVER;
2406 
2407    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2408    const char *path = drm_device->nodes[DRM_NODE_RENDER];
2409    VkResult result;
2410    int fd;
2411    int master_fd = -1;
2412 
2413    process_intel_debug_variable();
2414 
2415    fd = open(path, O_RDWR | O_CLOEXEC);
2416    if (fd < 0) {
2417       if (errno == ENOMEM) {
2418          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2419                           "Unable to open device %s: out of memory", path);
2420       }
2421       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2422                        "Unable to open device %s: %m", path);
2423    }
2424 
2425    struct intel_device_info devinfo;
2426    if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2427       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2428       goto fail_fd;
2429    }
2430 
2431    if (devinfo.ver < 9) {
2432       /* Silently fail here, hasvk should pick up this device. */
2433       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2434       goto fail_fd;
2435    } else if (devinfo.probe_forced) {
2436       /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2437        * unsupported device support. No need to print a warning message.
2438        */
2439    } else if (devinfo.ver > 20) {
2440       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2441                          "Vulkan not yet supported on %s", devinfo.name);
2442       goto fail_fd;
2443    }
2444 
2445    /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2446     * need to always disable preemption :
2447     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2448     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2449     */
2450    if (devinfo.verx10 == 120)
2451       BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2452 
2453    if (!devinfo.has_context_isolation) {
2454       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2455                          "Vulkan requires context isolation for %s", devinfo.name);
2456       goto fail_fd;
2457    }
2458 
2459    struct anv_physical_device *device =
2460       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2461                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2462    if (device == NULL) {
2463       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2464       goto fail_fd;
2465    }
2466 
2467    struct vk_physical_device_dispatch_table dispatch_table;
2468    vk_physical_device_dispatch_table_from_entrypoints(
2469       &dispatch_table, &anv_physical_device_entrypoints, true);
2470    vk_physical_device_dispatch_table_from_entrypoints(
2471       &dispatch_table, &wsi_physical_device_entrypoints, false);
2472 
2473    result = vk_physical_device_init(&device->vk, &instance->vk,
2474                                     NULL, NULL, NULL, /* We set up extensions later */
2475                                     &dispatch_table);
2476    if (result != VK_SUCCESS) {
2477       vk_error(instance, result);
2478       goto fail_alloc;
2479    }
2480    device->instance = instance;
2481 
2482    assert(strlen(path) < ARRAY_SIZE(device->path));
2483    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2484 
2485    device->info = devinfo;
2486 
2487    device->local_fd = fd;
2488    result = anv_physical_device_get_parameters(device);
2489    if (result != VK_SUCCESS)
2490       goto fail_base;
2491 
2492    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2493                                               device->info.aperture_bytes;
2494 
2495    if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2496       vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2497                 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2498       goto fail_base;
2499    }
2500 
2501    /* We currently only have the right bits for instructions in Gen12+. If the
2502     * kernel ever starts supporting that feature on previous generations,
2503     * we'll need to edit genxml prior to enabling here.
2504     */
2505    device->has_protected_contexts = device->info.ver >= 12 &&
2506       intel_gem_supports_protected_context(fd, device->info.kmd_type);
2507 
2508    /* Just pick one; they're all the same */
2509    device->has_astc_ldr =
2510       isl_format_supports_sampling(&device->info,
2511                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2512    if (!device->has_astc_ldr &&
2513        driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2514       device->emu_astc_ldr = true;
2515    if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2516       device->flush_astc_ldr_void_extent_denorms =
2517          device->has_astc_ldr && !device->emu_astc_ldr;
2518    }
2519    device->disable_fcv = device->info.verx10 >= 125 ||
2520                          instance->disable_fcv;
2521 
2522    result = anv_physical_device_init_heaps(device, fd);
2523    if (result != VK_SUCCESS)
2524       goto fail_base;
2525 
2526    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2527       device->has_exec_timeline = false;
2528 
2529    device->has_cooperative_matrix =
2530       device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2531 
2532    unsigned st_idx = 0;
2533 
2534    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2535    if (!device->has_exec_timeline)
2536       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2537    device->sync_types[st_idx++] = &device->sync_syncobj_type;
2538 
2539    /* anv_bo_sync_type is only supported with i915 for now  */
2540    if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2541       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2542          device->sync_types[st_idx++] = &anv_bo_sync_type;
2543 
2544       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2545          device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2546          device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2547       }
2548    } else {
2549       assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2550       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2551       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2552    }
2553 
2554    device->sync_types[st_idx++] = NULL;
2555    assert(st_idx <= ARRAY_SIZE(device->sync_types));
2556    device->vk.supported_sync_types = device->sync_types;
2557 
2558    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2559 
2560    device->always_use_bindless =
2561       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2562 
2563    device->use_call_secondary =
2564       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2565 
2566    device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2567    device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2568 
2569    device->uses_ex_bso = device->info.verx10 >= 125;
2570 
2571    /* For now always use indirect descriptors. We'll update this
2572     * to !uses_ex_bso when all the infrastructure is built up.
2573     */
2574    device->indirect_descriptors =
2575       !device->uses_ex_bso ||
2576       driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2577 
2578    device->alloc_aux_tt_mem =
2579       device->info.has_aux_map && device->info.verx10 >= 125;
2580    /* Check if we can read the GPU timestamp register from the CPU */
2581    uint64_t u64_ignore;
2582    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2583                                                                device->info.kmd_type,
2584                                                                &u64_ignore);
2585 
2586    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2587 
2588    /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2589    if (debug_get_bool_option("ANV_SPARSE", true)) {
2590       if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2591          if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2592             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2593          else
2594             device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2595       } else {
2596          if (device->info.ver >= 12 && device->has_exec_timeline)
2597             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2598       }
2599    }
2600    if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2601       if (instance->has_fake_sparse)
2602          device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2603    }
2604 
2605    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2606       driQueryOptionb(&instance->dri_options, "always_flush_cache");
2607 
2608    device->compiler = brw_compiler_create(NULL, &device->info);
2609    if (device->compiler == NULL) {
2610       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2611       goto fail_base;
2612    }
2613    device->compiler->shader_debug_log = compiler_debug_log;
2614    device->compiler->shader_perf_log = compiler_perf_log;
2615    device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2616    device->compiler->use_bindless_sampler_offset = false;
2617    device->compiler->spilling_rate =
2618       driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2619 
2620    isl_device_init(&device->isl_dev, &device->info);
2621    device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2622    device->isl_dev.sampler_route_to_lsc =
2623       driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2624 
2625    result = anv_physical_device_init_uuids(device);
2626    if (result != VK_SUCCESS)
2627       goto fail_compiler;
2628 
2629    anv_physical_device_init_va_ranges(device);
2630 
2631    anv_physical_device_init_disk_cache(device);
2632 
2633    if (instance->vk.enabled_extensions.KHR_display) {
2634       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2635       if (master_fd >= 0) {
2636          /* fail if we don't have permission to even render on this device */
2637          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2638             close(master_fd);
2639             master_fd = -1;
2640          }
2641       }
2642    }
2643    device->master_fd = master_fd;
2644 
2645    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2646    intel_common_update_device_info(fd, &device->info);
2647 
2648    anv_physical_device_init_queue_families(device);
2649 
2650    anv_physical_device_init_perf(device, fd);
2651 
2652    /* Gather major/minor before WSI. */
2653    struct stat st;
2654 
2655    if (stat(primary_path, &st) == 0) {
2656       device->has_master = true;
2657       device->master_major = major(st.st_rdev);
2658       device->master_minor = minor(st.st_rdev);
2659    } else {
2660       device->has_master = false;
2661       device->master_major = 0;
2662       device->master_minor = 0;
2663    }
2664 
2665    if (stat(path, &st) == 0) {
2666       device->has_local = true;
2667       device->local_major = major(st.st_rdev);
2668       device->local_minor = minor(st.st_rdev);
2669    } else {
2670       device->has_local = false;
2671       device->local_major = 0;
2672       device->local_minor = 0;
2673    }
2674 
2675    device->has_small_bar = anv_physical_device_has_vram(device) &&
2676                            device->vram_non_mappable.size != 0;
2677 
2678    get_device_extensions(device, &device->vk.supported_extensions);
2679    get_features(device, &device->vk.supported_features);
2680    get_properties(device, &device->vk.properties);
2681 
2682    result = anv_init_wsi(device);
2683    if (result != VK_SUCCESS)
2684       goto fail_perf;
2685 
2686    anv_measure_device_init(device);
2687 
2688    anv_genX(&device->info, init_physical_device_state)(device);
2689 
2690    *out = &device->vk;
2691 
2692    return VK_SUCCESS;
2693 
2694 fail_perf:
2695    intel_perf_free(device->perf);
2696    free(device->engine_info);
2697    anv_physical_device_free_disk_cache(device);
2698 fail_compiler:
2699    ralloc_free(device->compiler);
2700 fail_base:
2701    vk_physical_device_finish(&device->vk);
2702 fail_alloc:
2703    vk_free(&instance->vk.alloc, device);
2704 fail_fd:
2705    close(fd);
2706    if (master_fd != -1)
2707       close(master_fd);
2708    return result;
2709 }
2710 
2711 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2712 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2713 {
2714    struct anv_physical_device *device =
2715       container_of(vk_device, struct anv_physical_device, vk);
2716 
2717    anv_finish_wsi(device);
2718    anv_measure_device_destroy(device);
2719    free(device->engine_info);
2720    anv_physical_device_free_disk_cache(device);
2721    ralloc_free(device->compiler);
2722    intel_perf_free(device->perf);
2723    close(device->local_fd);
2724    if (device->master_fd >= 0)
2725       close(device->master_fd);
2726    vk_physical_device_finish(&device->vk);
2727    vk_free(&device->instance->vk.alloc, device);
2728 }
2729 
2730 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2731 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2732 {
2733 
2734    /*
2735     * For Xe2+:
2736     * Bspec 60411: Timestamp register can hold 64-bit value
2737     *
2738     * Platforms < Xe2:
2739     * Bpsec 46111: Timestamp register can hold only 36-bit
2740     *              value
2741     */
2742    const VkQueueFamilyProperties anv_queue_family_properties_template =
2743    {
2744       .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2745       .minImageTransferGranularity = { 1, 1, 1 },
2746    };
2747 
2748    return anv_queue_family_properties_template;
2749 }
2750 
2751 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2752 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2753                                          uint32_t family_index)
2754 {
2755    const struct anv_queue_family *family = &device->queue.families[family_index];
2756    VkQueueFamilyProperties properties =
2757       get_anv_queue_family_properties_template(device);
2758 
2759    properties.queueFlags = family->queueFlags;
2760    properties.queueCount = family->queueCount;
2761    return properties;
2762 }
2763 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2764 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2765     VkPhysicalDevice                            physicalDevice,
2766     uint32_t*                                   pQueueFamilyPropertyCount,
2767     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2768 {
2769    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2770    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2771                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2772 
2773    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2774       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2775       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2776          p->queueFamilyProperties =
2777             anv_device_physical_get_queue_properties(pdevice, i);
2778 
2779          vk_foreach_struct(ext, p->pNext) {
2780             switch (ext->sType) {
2781             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2782                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2783                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2784 
2785                /* Deliberately sorted low to high */
2786                VkQueueGlobalPriorityKHR all_priorities[] = {
2787                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2788                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2789                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2790                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2791                };
2792 
2793                uint32_t count = 0;
2794                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2795                   if (all_priorities[i] > pdevice->max_context_priority)
2796                      break;
2797 
2798                   properties->priorities[count++] = all_priorities[i];
2799                }
2800                properties->priorityCount = count;
2801                break;
2802             }
2803             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2804                VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2805                   (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2806                prop->queryResultStatusSupport = VK_TRUE;
2807                break;
2808             }
2809             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2810                VkQueueFamilyVideoPropertiesKHR *prop =
2811                   (VkQueueFamilyVideoPropertiesKHR *)ext;
2812                if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2813                   prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2814                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2815                   if (pdevice->info.ver >= 12)
2816                      prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2817                }
2818 
2819                if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2820                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2821                                                 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2822                }
2823                break;
2824             }
2825             default:
2826                vk_debug_ignored_stype(ext->sType);
2827             }
2828          }
2829       }
2830    }
2831 }
2832 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2833 void anv_GetPhysicalDeviceMemoryProperties(
2834     VkPhysicalDevice                            physicalDevice,
2835     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2836 {
2837    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2838 
2839    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2840    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2841       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2842          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2843          .heapIndex     = physical_device->memory.types[i].heapIndex,
2844       };
2845    }
2846 
2847    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2848    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2849       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2850          .size    = physical_device->memory.heaps[i].size,
2851          .flags   = physical_device->memory.heaps[i].flags,
2852       };
2853    }
2854 }
2855 
2856 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2857 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2858                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2859 {
2860    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2861 
2862    if (!device->vk.supported_extensions.EXT_memory_budget)
2863       return;
2864 
2865    anv_update_meminfo(device, device->local_fd);
2866 
2867    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2868    for (size_t i = 0; i < device->memory.heap_count; i++) {
2869       if (device->memory.heaps[i].is_local_mem) {
2870          total_vram_heaps_size += device->memory.heaps[i].size;
2871       } else {
2872          total_sys_heaps_size += device->memory.heaps[i].size;
2873       }
2874    }
2875 
2876    for (size_t i = 0; i < device->memory.heap_count; i++) {
2877       VkDeviceSize heap_size = device->memory.heaps[i].size;
2878       VkDeviceSize heap_used = device->memory.heaps[i].used;
2879       VkDeviceSize heap_budget, total_heaps_size;
2880       uint64_t mem_available = 0;
2881 
2882       if (device->memory.heaps[i].is_local_mem) {
2883          total_heaps_size = total_vram_heaps_size;
2884          if (device->vram_non_mappable.size > 0 && i == 0) {
2885             mem_available = device->vram_non_mappable.available;
2886          } else {
2887             mem_available = device->vram_mappable.available;
2888          }
2889       } else {
2890          total_heaps_size = total_sys_heaps_size;
2891          mem_available = MIN2(device->sys.available, total_heaps_size);
2892       }
2893 
2894       double heap_proportion = (double) heap_size / total_heaps_size;
2895       VkDeviceSize available_prop = mem_available * heap_proportion;
2896 
2897       /*
2898        * Let's not incite the app to starve the system: report at most 90% of
2899        * the available heap memory.
2900        */
2901       uint64_t heap_available = available_prop * 9 / 10;
2902       heap_budget = MIN2(heap_size, heap_used + heap_available);
2903 
2904       /*
2905        * Round down to the nearest MB
2906        */
2907       heap_budget &= ~((1ull << 20) - 1);
2908 
2909       /*
2910        * The heapBudget value must be non-zero for array elements less than
2911        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2912        * value must be less than or equal to VkMemoryHeap::size for each heap.
2913        */
2914       assert(0 < heap_budget && heap_budget <= heap_size);
2915 
2916       memoryBudget->heapUsage[i] = heap_used;
2917       memoryBudget->heapBudget[i] = heap_budget;
2918    }
2919 
2920    /* The heapBudget and heapUsage values must be zero for array elements
2921     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2922     */
2923    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2924       memoryBudget->heapBudget[i] = 0;
2925       memoryBudget->heapUsage[i] = 0;
2926    }
2927 }
2928 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2929 void anv_GetPhysicalDeviceMemoryProperties2(
2930     VkPhysicalDevice                            physicalDevice,
2931     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2932 {
2933    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2934                                          &pMemoryProperties->memoryProperties);
2935 
2936    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2937       switch (ext->sType) {
2938       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2939          anv_get_memory_budget(physicalDevice, (void*)ext);
2940          break;
2941       default:
2942          vk_debug_ignored_stype(ext->sType);
2943          break;
2944       }
2945    }
2946 }
2947 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2948 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2949     VkPhysicalDevice                            physicalDevice,
2950     VkSampleCountFlagBits                       samples,
2951     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
2952 {
2953    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2954 
2955    assert(pMultisampleProperties->sType ==
2956           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2957 
2958    VkExtent2D grid_size;
2959    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2960       grid_size.width = 1;
2961       grid_size.height = 1;
2962    } else {
2963       grid_size.width = 0;
2964       grid_size.height = 0;
2965    }
2966    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2967 
2968    vk_foreach_struct(ext, pMultisampleProperties->pNext)
2969       vk_debug_ignored_stype(ext->sType);
2970 }
2971 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2972 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2973     VkPhysicalDevice                            physicalDevice,
2974     uint32_t*                                   pFragmentShadingRateCount,
2975     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
2976 {
2977    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2978    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2979                           pFragmentShadingRates, pFragmentShadingRateCount);
2980 
2981 #define append_rate(_samples, _width, _height)                                      \
2982    do {                                                                             \
2983       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2984          __r->sampleCounts = _samples;                                              \
2985          __r->fragmentSize = (VkExtent2D) {                                         \
2986             .width = _width,                                                        \
2987             .height = _height,                                                      \
2988          };                                                                         \
2989       }                                                                             \
2990    } while (0)
2991 
2992    VkSampleCountFlags sample_counts =
2993       isl_device_get_sample_counts(&physical_device->isl_dev);
2994 
2995    /* BSpec 47003: There are a number of restrictions on the sample count
2996     * based off the coarse pixel size.
2997     */
2998    static const VkSampleCountFlags cp_size_sample_limits[] = {
2999       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
3000              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3001       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3002       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3003       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3004       [16] = ISL_SAMPLE_COUNT_1_BIT,
3005    };
3006 
3007    for (uint32_t x = 4; x >= 1; x /= 2) {
3008        for (uint32_t y = 4; y >= 1; y /= 2) {
3009           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
3010              /* BSpec 47003:
3011               *   "CPsize 1x4 and 4x1 are not supported"
3012               */
3013              if ((x == 1 && y == 4) || (x == 4 && y == 1))
3014                 continue;
3015 
3016              /* For size {1, 1}, the sample count must be ~0
3017               *
3018               * 4x2 is also a specially case.
3019               */
3020              if (x == 1 && y == 1)
3021                 append_rate(~0, x, y);
3022              else if (x == 4 && y == 2)
3023                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
3024              else
3025                 append_rate(cp_size_sample_limits[x * y], x, y);
3026           } else {
3027              /* For size {1, 1}, the sample count must be ~0 */
3028              if (x == 1 && y == 1)
3029                 append_rate(~0, x, y);
3030              else
3031                 append_rate(sample_counts, x, y);
3032           }
3033        }
3034    }
3035 
3036 #undef append_rate
3037 
3038    return vk_outarray_status(&out);
3039 }
3040 
3041 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)3042 convert_component_type(enum intel_cooperative_matrix_component_type t)
3043 {
3044    switch (t) {
3045    case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
3046    case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
3047    case INTEL_CMAT_SINT32:  return VK_COMPONENT_TYPE_SINT32_KHR;
3048    case INTEL_CMAT_SINT8:   return VK_COMPONENT_TYPE_SINT8_KHR;
3049    case INTEL_CMAT_UINT32:  return VK_COMPONENT_TYPE_UINT32_KHR;
3050    case INTEL_CMAT_UINT8:   return VK_COMPONENT_TYPE_UINT8_KHR;
3051    }
3052    unreachable("invalid cooperative matrix component type in configuration");
3053 }
3054 
3055 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)3056 convert_scope(enum intel_cmat_scope scope)
3057 {
3058    switch (scope) {
3059    case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
3060    default:
3061       unreachable("invalid cooperative matrix scope in configuration");
3062    }
3063 }
3064 
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)3065 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
3066    VkPhysicalDevice                            physicalDevice,
3067    uint32_t*                                   pPropertyCount,
3068    VkCooperativeMatrixPropertiesKHR*           pProperties)
3069 {
3070    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3071    const struct intel_device_info *devinfo = &pdevice->info;
3072 
3073    assert(anv_has_cooperative_matrix(pdevice));
3074 
3075    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
3076 
3077    for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
3078       const struct intel_cooperative_matrix_configuration *cfg =
3079          &devinfo->cooperative_matrix_configurations[i];
3080 
3081       if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
3082          break;
3083 
3084       vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3085          prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3086 
3087          prop->MSize = cfg->m;
3088          prop->NSize = cfg->n;
3089          prop->KSize = cfg->k;
3090 
3091          prop->AType      = convert_component_type(cfg->a);
3092          prop->BType      = convert_component_type(cfg->b);
3093          prop->CType      = convert_component_type(cfg->c);
3094          prop->ResultType = convert_component_type(cfg->result);
3095 
3096          prop->saturatingAccumulation = VK_FALSE;
3097          prop->scope = convert_scope(cfg->scope);
3098       }
3099 
3100       /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
3101        *
3102        *    For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
3103        *    cooperative matrix operand must be present if and only if
3104        *    VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
3105        *    VK_TRUE.
3106        *
3107        * As a result, we have to advertise integer configs both with and
3108        * without this flag set.
3109        *
3110        * The DPAS instruction does not support the .sat modifier, so only
3111        * advertise the configurations when the DPAS would be lowered.
3112        *
3113        * FINISHME: It should be possible to do better than full lowering on
3114        * platforms that support DPAS. Emit a DPAS with a NULL accumulator
3115        * argument, then perform the correct sequence of saturating add
3116        * instructions.
3117        */
3118       if (cfg->a != INTEL_CMAT_FLOAT16 &&
3119           (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
3120          vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3121             prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3122 
3123             prop->MSize = cfg->m;
3124             prop->NSize = cfg->n;
3125             prop->KSize = cfg->k;
3126 
3127             prop->AType      = convert_component_type(cfg->a);
3128             prop->BType      = convert_component_type(cfg->b);
3129             prop->CType      = convert_component_type(cfg->c);
3130             prop->ResultType = convert_component_type(cfg->result);
3131 
3132             prop->saturatingAccumulation = VK_TRUE;
3133             prop->scope = convert_scope(cfg->scope);
3134          }
3135       }
3136    }
3137 
3138    return vk_outarray_status(&out);
3139 }
3140