• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8 
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11 
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14 
15 #include "perf/intel_perf.h"
16 
17 #include "git_sha1.h"
18 
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21 
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27 
28 /* This is probably far to big but it reflects the max size used for messages
29  * in OpenGLs KHR_debug.
30  */
31 #define MAX_DEBUG_MESSAGE_LENGTH    4096
32 
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36    char str[MAX_DEBUG_MESSAGE_LENGTH];
37    struct anv_device *device = (struct anv_device *)data;
38    UNUSED struct anv_instance *instance = device->physical->instance;
39 
40    va_list args;
41    va_start(args, fmt);
42    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43    va_end(args);
44 
45    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47 
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51    va_list args;
52    va_start(args, fmt);
53 
54    if (INTEL_DEBUG(DEBUG_PERF))
55       mesa_logd_v(fmt, args);
56 
57    va_end(args);
58 }
59 
60 struct anv_descriptor_limits {
61    uint32_t max_ubos;
62    uint32_t max_ssbos;
63    uint32_t max_samplers;
64    uint32_t max_images;
65    uint32_t max_resources;
66 };
67 
68 static void
get_device_descriptor_limits(const struct anv_physical_device * device,struct anv_descriptor_limits * limits)69 get_device_descriptor_limits(const struct anv_physical_device *device,
70                              struct anv_descriptor_limits *limits)
71 {
72    memset(limits, 0, sizeof(*limits));
73 
74    /* It's a bit hard to exactly map our implementation to the limits
75     * described by Vulkan. The bindless surface handle in the extended message
76     * descriptors is 20 bits on <= Gfx12.0, 26 bits on >= Gfx12.5 and it's an
77     * index into the table of RENDER_SURFACE_STATE structs that starts at
78     * bindless surface base address. On <= Gfx12.0, this means that we can
79     * have at must 1M surface states allocated at any given time. Since most
80     * image views take two descriptors, this means we have a limit of about
81     * 500K image views. On >= Gfx12.5, we do not need 2 surfaces per
82     * descriptors and we have 33M+ descriptors (we have a 2GB limit, due to
83     * overlapping heaps for workarounds, but HW can do 4GB).
84     *
85     * However, on <= Gfx12.0, since we allocate surface states at
86     * vkCreateImageView time, this means our limit is actually something on
87     * the order of 500K image views allocated at any time. The actual limit
88     * describe by Vulkan, on the other hand, is a limit of how many you can
89     * have in a descriptor set. Assuming anyone using 1M descriptors will be
90     * using the same image view twice a bunch of times (or a bunch of null
91     * descriptors), we can safely advertise a larger limit here.
92     *
93     * Here we use the size of the heap in which the descriptors are stored and
94     * divide by the size of the descriptor to get a limit value.
95     */
96    const uint64_t descriptor_heap_size =
97       device->indirect_descriptors ?
98       device->va.indirect_descriptor_pool.size :
99       device->va.bindless_surface_state_pool.size;;
100 
101    const uint32_t buffer_descriptor_size =
102       device->indirect_descriptors ?
103       sizeof(struct anv_address_range_descriptor) :
104       ANV_SURFACE_STATE_SIZE;
105    const uint32_t image_descriptor_size =
106       device->indirect_descriptors ?
107       sizeof(struct anv_address_range_descriptor) :
108       ANV_SURFACE_STATE_SIZE;
109    const uint32_t sampler_descriptor_size =
110       device->indirect_descriptors ?
111       sizeof(struct anv_sampled_image_descriptor) :
112       ANV_SAMPLER_STATE_SIZE;
113 
114    limits->max_ubos = descriptor_heap_size / buffer_descriptor_size;
115    limits->max_ssbos = descriptor_heap_size / buffer_descriptor_size;
116    limits->max_images = descriptor_heap_size / image_descriptor_size;
117    limits->max_samplers = descriptor_heap_size / sampler_descriptor_size;
118 
119    limits->max_resources = UINT32_MAX;
120    limits->max_resources = MIN2(limits->max_resources, limits->max_ubos);
121    limits->max_resources = MIN2(limits->max_resources, limits->max_ssbos);
122    limits->max_resources = MIN2(limits->max_resources, limits->max_images);
123    limits->max_resources = MIN2(limits->max_resources, limits->max_samplers);
124 }
125 
126 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)127 get_device_extensions(const struct anv_physical_device *device,
128                       struct vk_device_extension_table *ext)
129 {
130    const bool has_syncobj_wait =
131       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
132 
133    const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
134 
135    *ext = (struct vk_device_extension_table) {
136       .KHR_8bit_storage                      = true,
137       .KHR_16bit_storage                     = !device->instance->no_16bit,
138       .KHR_acceleration_structure            = rt_enabled,
139       .KHR_bind_memory2                      = true,
140       .KHR_buffer_device_address             = true,
141       .KHR_calibrated_timestamps             = device->has_reg_timestamp,
142       .KHR_compute_shader_derivatives        = true,
143       .KHR_cooperative_matrix                = anv_has_cooperative_matrix(device),
144       .KHR_copy_commands2                    = true,
145       .KHR_create_renderpass2                = true,
146       .KHR_dedicated_allocation              = true,
147       .KHR_deferred_host_operations          = true,
148       .KHR_depth_stencil_resolve             = true,
149       .KHR_descriptor_update_template        = true,
150       .KHR_device_group                      = true,
151       .KHR_draw_indirect_count               = true,
152       .KHR_driver_properties                 = true,
153       .KHR_dynamic_rendering                 = true,
154       .KHR_dynamic_rendering_local_read      = true,
155       .KHR_external_fence                    = has_syncobj_wait,
156       .KHR_external_fence_fd                 = has_syncobj_wait,
157       .KHR_external_memory                   = true,
158       .KHR_external_memory_fd                = true,
159       .KHR_external_semaphore                = true,
160       .KHR_external_semaphore_fd             = true,
161       .KHR_format_feature_flags2             = true,
162       .KHR_fragment_shading_rate             = device->info.ver >= 11,
163       .KHR_get_memory_requirements2          = true,
164       .KHR_global_priority                   = device->max_context_priority >=
165                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
166       .KHR_image_format_list                 = true,
167       .KHR_imageless_framebuffer             = true,
168 #ifdef ANV_USE_WSI_PLATFORM
169       .KHR_incremental_present               = true,
170 #endif
171       .KHR_index_type_uint8                  = true,
172       .KHR_line_rasterization                = true,
173       .KHR_load_store_op_none                = true,
174       .KHR_maintenance1                      = true,
175       .KHR_maintenance2                      = true,
176       .KHR_maintenance3                      = true,
177       .KHR_maintenance4                      = true,
178       .KHR_maintenance5                      = true,
179       .KHR_maintenance6                      = true,
180       .KHR_maintenance7                      = true,
181       .KHR_map_memory2                       = true,
182       .KHR_multiview                         = true,
183       .KHR_performance_query =
184          device->perf &&
185          (intel_perf_has_hold_preemption(device->perf) ||
186           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
187          device->use_call_secondary,
188       .KHR_pipeline_executable_properties    = true,
189       .KHR_pipeline_library                  = true,
190       /* Hide these behind dri configs for now since we cannot implement it reliably on
191        * all surfaces yet. There is no surface capability query for present wait/id,
192        * but the feature is useful enough to hide behind an opt-in mechanism for now.
193        * If the instance only enables surface extensions that unconditionally support present wait,
194        * we can also expose the extension that way. */
195       .KHR_present_id =
196          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
197          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
198       .KHR_present_wait =
199          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
200          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
201       .KHR_push_descriptor                   = true,
202       .KHR_ray_query                         = rt_enabled,
203       .KHR_ray_tracing_maintenance1          = rt_enabled,
204       .KHR_ray_tracing_pipeline              = rt_enabled,
205       .KHR_ray_tracing_position_fetch        = rt_enabled,
206       .KHR_relaxed_block_layout              = true,
207       .KHR_sampler_mirror_clamp_to_edge      = true,
208       .KHR_sampler_ycbcr_conversion          = true,
209       .KHR_separate_depth_stencil_layouts    = true,
210       .KHR_shader_atomic_int64               = true,
211       .KHR_shader_clock                      = true,
212       .KHR_shader_draw_parameters            = true,
213       .KHR_shader_expect_assume              = true,
214       .KHR_shader_float16_int8               = !device->instance->no_16bit,
215       .KHR_shader_float_controls             = true,
216       .KHR_shader_float_controls2            = true,
217       .KHR_shader_integer_dot_product        = true,
218       .KHR_shader_maximal_reconvergence      = true,
219       .KHR_shader_non_semantic_info          = true,
220       .KHR_shader_quad_control               = true,
221       .KHR_shader_relaxed_extended_instruction = true,
222       .KHR_shader_subgroup_extended_types    = true,
223       .KHR_shader_subgroup_rotate            = true,
224       .KHR_shader_subgroup_uniform_control_flow = true,
225       .KHR_shader_terminate_invocation       = true,
226       .KHR_spirv_1_4                         = true,
227       .KHR_storage_buffer_storage_class      = true,
228 #ifdef ANV_USE_WSI_PLATFORM
229       .KHR_swapchain                         = true,
230       .KHR_swapchain_mutable_format          = true,
231 #endif
232       .KHR_synchronization2                  = true,
233       .KHR_timeline_semaphore                = true,
234       .KHR_uniform_buffer_standard_layout    = true,
235       .KHR_variable_pointers                 = true,
236       .KHR_vertex_attribute_divisor          = true,
237       .KHR_video_queue                       = device->video_decode_enabled || device->video_encode_enabled,
238       .KHR_video_decode_queue                = device->video_decode_enabled,
239       .KHR_video_decode_h264                 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
240       .KHR_video_decode_h265                 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
241       .KHR_video_decode_av1                  = device->info.ver >= 12 && VIDEO_CODEC_AV1DEC && device->video_decode_enabled,
242       .KHR_video_encode_queue                = device->video_encode_enabled,
243       .KHR_video_encode_h264                 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
244       .KHR_video_encode_h265                 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
245       .KHR_video_maintenance1                = (device->video_decode_enabled &&
246                                                (VIDEO_CODEC_H264DEC || VIDEO_CODEC_H265DEC)) ||
247                                                (device->video_encode_enabled &&
248                                                (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)),
249       .KHR_vulkan_memory_model               = true,
250       .KHR_workgroup_memory_explicit_layout  = true,
251       .KHR_zero_initialize_workgroup_memory  = true,
252       .EXT_4444_formats                      = true,
253       .EXT_attachment_feedback_loop_layout   = true,
254       .EXT_attachment_feedback_loop_dynamic_state = true,
255       .EXT_border_color_swizzle              = true,
256       .EXT_buffer_device_address             = true,
257       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
258       .EXT_color_write_enable                = true,
259       .EXT_conditional_rendering             = true,
260       .EXT_conservative_rasterization        = true,
261       .EXT_custom_border_color               = true,
262       .EXT_depth_bias_control                = true,
263       .EXT_depth_clamp_control               = true,
264       .EXT_depth_clamp_zero_one              = true,
265       .EXT_depth_clip_control                = true,
266       .EXT_depth_clip_enable                 = true,
267       .EXT_depth_range_unrestricted          = device->info.ver >= 20,
268       .EXT_descriptor_buffer                 = true,
269       .EXT_descriptor_indexing               = true,
270 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
271       .EXT_display_control                   = true,
272 #endif
273       .EXT_dynamic_rendering_unused_attachments = true,
274       .EXT_extended_dynamic_state            = true,
275       .EXT_extended_dynamic_state2           = true,
276       .EXT_extended_dynamic_state3           = true,
277       .EXT_external_memory_dma_buf           = true,
278       .EXT_external_memory_host              = true,
279       .EXT_fragment_shader_interlock         = true,
280       .EXT_global_priority                   = device->max_context_priority >=
281                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
282       .EXT_global_priority_query             = device->max_context_priority >=
283                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
284       .EXT_graphics_pipeline_library         = !debug_get_bool_option("ANV_NO_GPL", false),
285       .EXT_host_image_copy                   = !device->emu_astc_ldr,
286       .EXT_host_query_reset                  = true,
287       .EXT_image_2d_view_of_3d               = true,
288       /* Because of Xe2 PAT selected compression and the Vulkan spec
289        * requirement to always return the same memory types for Images with
290        * same properties we can't support EXT_image_compression_control on Xe2+
291        */
292       .EXT_image_compression_control         = device->instance->compression_control_enabled &&
293                                                device->info.ver < 20,
294       .EXT_image_drm_format_modifier         = true,
295       .EXT_image_robustness                  = true,
296       .EXT_image_sliced_view_of_3d           = true,
297       .EXT_image_view_min_lod                = true,
298       .EXT_index_type_uint8                  = true,
299       .EXT_inline_uniform_block              = true,
300       .EXT_legacy_dithering                  = true,
301       .EXT_legacy_vertex_attributes          = true,
302       .EXT_line_rasterization                = true,
303       .EXT_load_store_op_none                = true,
304       .EXT_map_memory_placed                 = device->info.has_mmap_offset,
305       /* Enable the extension only if we have support on both the local &
306        * system memory
307        */
308       .EXT_memory_budget                     = (!device->info.has_local_mem ||
309                                                 device->vram_mappable.available > 0) &&
310                                                device->sys.available,
311       .EXT_mesh_shader                       = device->info.has_mesh_shading,
312       .EXT_multi_draw                        = true,
313       .EXT_mutable_descriptor_type           = true,
314       .EXT_nested_command_buffer             = true,
315       .EXT_non_seamless_cube_map             = true,
316       .EXT_pci_bus_info                      = true,
317       .EXT_physical_device_drm               = true,
318       .EXT_pipeline_creation_cache_control   = true,
319       .EXT_pipeline_creation_feedback        = true,
320       .EXT_pipeline_library_group_handles    = rt_enabled,
321       .EXT_pipeline_protected_access         = device->has_protected_contexts,
322       .EXT_pipeline_robustness               = true,
323       .EXT_post_depth_coverage               = true,
324       .EXT_primitive_topology_list_restart   = true,
325       .EXT_primitives_generated_query        = true,
326       .EXT_private_data                      = true,
327       .EXT_provoking_vertex                  = true,
328       .EXT_queue_family_foreign              = true,
329       .EXT_robustness2                       = true,
330       .EXT_sample_locations                  = true,
331       .EXT_sampler_filter_minmax             = true,
332       .EXT_scalar_block_layout               = true,
333       .EXT_separate_stencil_usage            = true,
334       .EXT_shader_atomic_float               = true,
335       .EXT_shader_atomic_float2              = true,
336       .EXT_shader_demote_to_helper_invocation = true,
337       .EXT_shader_module_identifier          = true,
338       .EXT_shader_replicated_composites      = true,
339       .EXT_shader_stencil_export             = true,
340       .EXT_shader_subgroup_ballot            = true,
341       .EXT_shader_subgroup_vote              = true,
342       .EXT_shader_viewport_index_layer       = true,
343       .EXT_subgroup_size_control             = true,
344 #ifdef ANV_USE_WSI_PLATFORM
345       .EXT_swapchain_maintenance1            = true,
346 #endif
347       .EXT_texel_buffer_alignment            = true,
348       .EXT_tooling_info                      = true,
349       .EXT_transform_feedback                = true,
350       .EXT_vertex_attribute_divisor          = true,
351       .EXT_vertex_input_dynamic_state        = true,
352       .EXT_ycbcr_2plane_444_formats          = true,
353       .EXT_ycbcr_image_arrays                = true,
354       .AMD_buffer_marker                     = true,
355       .AMD_texture_gather_bias_lod           = device->info.ver >= 20,
356 #if DETECT_OS_ANDROID
357       .ANDROID_external_memory_android_hardware_buffer = true,
358       .ANDROID_native_buffer                 = true,
359 #endif
360       .GOOGLE_decorate_string                = true,
361       .GOOGLE_hlsl_functionality1            = true,
362       .GOOGLE_user_type                      = true,
363       .INTEL_performance_query               = device->perf &&
364                                                intel_perf_has_hold_preemption(device->perf),
365       .INTEL_shader_integer_functions2       = true,
366       .MESA_image_alignment_control          = true,
367       .NV_compute_shader_derivatives         = true,
368       .VALVE_mutable_descriptor_type         = true,
369    };
370 }
371 
372 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)373 get_features(const struct anv_physical_device *pdevice,
374              struct vk_features *features)
375 {
376    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
377 
378    const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
379 
380    const bool mesh_shader =
381       pdevice->vk.supported_extensions.EXT_mesh_shader;
382 
383    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
384 
385    *features = (struct vk_features) {
386       /* Vulkan 1.0 */
387       .robustBufferAccess                       = true,
388       .fullDrawIndexUint32                      = true,
389       .imageCubeArray                           = true,
390       .independentBlend                         = true,
391       .geometryShader                           = true,
392       .tessellationShader                       = true,
393       .sampleRateShading                        = true,
394       .dualSrcBlend                             = true,
395       .logicOp                                  = true,
396       .multiDrawIndirect                        = true,
397       .drawIndirectFirstInstance                = true,
398       .depthClamp                               = true,
399       .depthBiasClamp                           = true,
400       .fillModeNonSolid                         = true,
401       .depthBounds                              = pdevice->info.ver >= 12,
402       .wideLines                                = true,
403       .largePoints                              = true,
404       .alphaToOne                               = true,
405       .multiViewport                            = true,
406       .samplerAnisotropy                        = true,
407       .textureCompressionETC2                   = true,
408       .textureCompressionASTC_LDR               = pdevice->has_astc_ldr ||
409                                                   pdevice->emu_astc_ldr,
410       .textureCompressionBC                     = true,
411       .occlusionQueryPrecise                    = true,
412       .pipelineStatisticsQuery                  = true,
413       .vertexPipelineStoresAndAtomics           = true,
414       .fragmentStoresAndAtomics                 = true,
415       .shaderTessellationAndGeometryPointSize   = true,
416       .shaderImageGatherExtended                = true,
417       .shaderStorageImageExtendedFormats        = true,
418       .shaderStorageImageMultisample            = false,
419       /* Gfx12.5 has all the required format supported in HW for typed
420        * read/writes
421        */
422       .shaderStorageImageReadWithoutFormat      = pdevice->info.verx10 >= 125,
423       .shaderStorageImageWriteWithoutFormat     = true,
424       .shaderUniformBufferArrayDynamicIndexing  = true,
425       .shaderSampledImageArrayDynamicIndexing   = true,
426       .shaderStorageBufferArrayDynamicIndexing  = true,
427       .shaderStorageImageArrayDynamicIndexing   = true,
428       .shaderClipDistance                       = true,
429       .shaderCullDistance                       = true,
430       .shaderFloat64                            = pdevice->info.has_64bit_float ||
431                                                   pdevice->instance->fp64_workaround_enabled,
432       .shaderInt64                              = true,
433       .shaderInt16                              = true,
434       .shaderResourceMinLod                     = true,
435       .shaderResourceResidency                  = has_sparse_or_fake,
436       .sparseBinding                            = has_sparse_or_fake,
437       .sparseResidencyAliased                   = has_sparse_or_fake,
438       .sparseResidencyBuffer                    = has_sparse_or_fake,
439       .sparseResidencyImage2D                   = has_sparse_or_fake,
440       .sparseResidencyImage3D                   = has_sparse_or_fake,
441       .sparseResidency2Samples                  = has_sparse_or_fake,
442       .sparseResidency4Samples                  = has_sparse_or_fake,
443       .sparseResidency8Samples                  = has_sparse_or_fake &&
444                                                   pdevice->info.verx10 != 125,
445       .sparseResidency16Samples                 = has_sparse_or_fake &&
446                                                   pdevice->info.verx10 != 125,
447       .variableMultisampleRate                  = true,
448       .inheritedQueries                         = true,
449 
450       /* Vulkan 1.1 */
451       .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
452       .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
453       .storagePushConstant16               = true,
454       .storageInputOutput16                = false,
455       .multiview                           = true,
456       .multiviewGeometryShader             = true,
457       .multiviewTessellationShader         = true,
458       .variablePointersStorageBuffer       = true,
459       .variablePointers                    = true,
460       .protectedMemory                     = pdevice->has_protected_contexts,
461       .samplerYcbcrConversion              = true,
462       .shaderDrawParameters                = true,
463 
464       /* Vulkan 1.2 */
465       .samplerMirrorClampToEdge            = true,
466       .drawIndirectCount                   = true,
467       .storageBuffer8BitAccess             = true,
468       .uniformAndStorageBuffer8BitAccess   = true,
469       .storagePushConstant8                = true,
470       .shaderBufferInt64Atomics            = true,
471       .shaderSharedInt64Atomics            = false,
472       .shaderFloat16                       = !pdevice->instance->no_16bit,
473       .shaderInt8                          = !pdevice->instance->no_16bit,
474 
475       .descriptorIndexing                                 = true,
476       .shaderInputAttachmentArrayDynamicIndexing          = false,
477       .shaderUniformTexelBufferArrayDynamicIndexing       = true,
478       .shaderStorageTexelBufferArrayDynamicIndexing       = true,
479       .shaderUniformBufferArrayNonUniformIndexing         = true,
480       .shaderSampledImageArrayNonUniformIndexing          = true,
481       .shaderStorageBufferArrayNonUniformIndexing         = true,
482       .shaderStorageImageArrayNonUniformIndexing          = true,
483       .shaderInputAttachmentArrayNonUniformIndexing       = false,
484       .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
485       .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
486       .descriptorBindingUniformBufferUpdateAfterBind      = true,
487       .descriptorBindingSampledImageUpdateAfterBind       = true,
488       .descriptorBindingStorageImageUpdateAfterBind       = true,
489       .descriptorBindingStorageBufferUpdateAfterBind      = true,
490       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
491       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
492       .descriptorBindingUpdateUnusedWhilePending          = true,
493       .descriptorBindingPartiallyBound                    = true,
494       .descriptorBindingVariableDescriptorCount           = true,
495       .runtimeDescriptorArray                             = true,
496 
497       .samplerFilterMinmax                 = true,
498       .scalarBlockLayout                   = true,
499       .imagelessFramebuffer                = true,
500       .uniformBufferStandardLayout         = true,
501       .shaderSubgroupExtendedTypes         = true,
502       .separateDepthStencilLayouts         = true,
503       .hostQueryReset                      = true,
504       .timelineSemaphore                   = true,
505       .bufferDeviceAddress                 = true,
506       .bufferDeviceAddressCaptureReplay    = true,
507       .bufferDeviceAddressMultiDevice      = false,
508       .vulkanMemoryModel                   = true,
509       .vulkanMemoryModelDeviceScope        = true,
510       .vulkanMemoryModelAvailabilityVisibilityChains = true,
511       .shaderOutputViewportIndex           = true,
512       .shaderOutputLayer                   = true,
513       .subgroupBroadcastDynamicId          = true,
514 
515       /* Vulkan 1.3 */
516       .robustImageAccess = true,
517       .inlineUniformBlock = true,
518       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
519       .pipelineCreationCacheControl = true,
520       .privateData = true,
521       .shaderDemoteToHelperInvocation = true,
522       .shaderTerminateInvocation = true,
523       .subgroupSizeControl = true,
524       .computeFullSubgroups = true,
525       .synchronization2 = true,
526       .textureCompressionASTC_HDR = false,
527       .shaderZeroInitializeWorkgroupMemory = true,
528       .dynamicRendering = true,
529       .shaderIntegerDotProduct = true,
530       .maintenance4 = true,
531 
532       /* Vulkan 1.4 */
533       .pushDescriptor = true,
534 
535       /* VK_EXT_4444_formats */
536       .formatA4R4G4B4 = true,
537       .formatA4B4G4R4 = false,
538 
539       /* VK_KHR_acceleration_structure */
540       .accelerationStructure = rt_enabled,
541       .accelerationStructureCaptureReplay = false, /* TODO */
542       .accelerationStructureIndirectBuild = false, /* TODO */
543       .accelerationStructureHostCommands = false,
544       .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
545 
546       /* VK_EXT_border_color_swizzle */
547       .borderColorSwizzle = true,
548       .borderColorSwizzleFromImage = true,
549 
550       /* VK_EXT_color_write_enable */
551       .colorWriteEnable = true,
552 
553       /* VK_EXT_image_2d_view_of_3d  */
554       .image2DViewOf3D = true,
555       .sampler2DViewOf3D = true,
556 
557       /* VK_EXT_image_sliced_view_of_3d */
558       .imageSlicedViewOf3D = true,
559 
560       /* VK_KHR_compute_shader_derivatives */
561       .computeDerivativeGroupQuads = true,
562       .computeDerivativeGroupLinear = true,
563 
564       /* VK_EXT_conditional_rendering */
565       .conditionalRendering = true,
566       .inheritedConditionalRendering = true,
567 
568       /* VK_EXT_custom_border_color */
569       .customBorderColors = true,
570       .customBorderColorWithoutFormat = true,
571 
572       /* VK_EXT_depth_clamp_zero_one */
573       .depthClampZeroOne = true,
574 
575       /* VK_EXT_depth_clip_enable */
576       .depthClipEnable = true,
577 
578       /* VK_EXT_fragment_shader_interlock */
579       .fragmentShaderSampleInterlock = true,
580       .fragmentShaderPixelInterlock = true,
581       .fragmentShaderShadingRateInterlock = false,
582 
583       /* VK_EXT_global_priority_query */
584       .globalPriorityQuery = true,
585 
586       /* VK_EXT_graphics_pipeline_library */
587       .graphicsPipelineLibrary =
588          pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
589 
590       /* VK_KHR_fragment_shading_rate */
591       .pipelineFragmentShadingRate = true,
592       .primitiveFragmentShadingRate =
593          pdevice->info.has_coarse_pixel_primitive_and_cb,
594       .attachmentFragmentShadingRate =
595          pdevice->info.has_coarse_pixel_primitive_and_cb,
596 
597       /* VK_EXT_image_view_min_lod */
598       .minLod = true,
599 
600       /* VK_EXT_index_type_uint8 */
601       .indexTypeUint8 = true,
602 
603       /* VK_EXT_line_rasterization */
604       /* Rectangular lines must use the strict algorithm, which is not
605        * supported for wide lines prior to ICL.  See rasterization_mode for
606        * details and how the HW states are programmed.
607        */
608       .rectangularLines = pdevice->info.ver >= 10,
609       .bresenhamLines = true,
610       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
611        * BSpec section "Multisample ModesState" table for "AA Line Support
612        * Requirements":
613        *
614        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
615        *
616        * Fortunately, this isn't a case most people care about.
617        */
618       .smoothLines = pdevice->info.ver < 10,
619       .stippledRectangularLines = false,
620       .stippledBresenhamLines = true,
621       .stippledSmoothLines = false,
622 
623       /* VK_NV_mesh_shader */
624       .taskShaderNV = false,
625       .meshShaderNV = false,
626 
627       /* VK_EXT_mesh_shader */
628       .taskShader = mesh_shader,
629       .meshShader = mesh_shader,
630       .multiviewMeshShader = false,
631       .primitiveFragmentShadingRateMeshShader = mesh_shader,
632       .meshShaderQueries = mesh_shader,
633 
634       /* VK_EXT_mutable_descriptor_type */
635       .mutableDescriptorType = true,
636 
637       /* VK_KHR_performance_query */
638       .performanceCounterQueryPools = true,
639       /* HW only supports a single configuration at a time. */
640       .performanceCounterMultipleQueryPools = false,
641 
642       /* VK_KHR_pipeline_executable_properties */
643       .pipelineExecutableInfo = true,
644 
645       /* VK_EXT_primitives_generated_query */
646       .primitivesGeneratedQuery = true,
647       .primitivesGeneratedQueryWithRasterizerDiscard = false,
648       .primitivesGeneratedQueryWithNonZeroStreams = false,
649 
650       /* VK_EXT_pipeline_library_group_handles */
651       .pipelineLibraryGroupHandles = true,
652 
653       /* VK_EXT_provoking_vertex */
654       .provokingVertexLast = true,
655       .transformFeedbackPreservesProvokingVertex = true,
656 
657       /* VK_KHR_ray_query */
658       .rayQuery = rt_enabled,
659 
660       /* VK_KHR_ray_tracing_maintenance1 */
661       .rayTracingMaintenance1 = rt_enabled,
662       .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
663 
664       /* VK_KHR_ray_tracing_pipeline */
665       .rayTracingPipeline = rt_enabled,
666       .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
667       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
668       .rayTracingPipelineTraceRaysIndirect = rt_enabled,
669       .rayTraversalPrimitiveCulling = rt_enabled,
670 
671       /* VK_EXT_robustness2 */
672       .robustBufferAccess2 = true,
673       .robustImageAccess2 = true,
674       .nullDescriptor = true,
675 
676       /* VK_EXT_shader_replicated_composites */
677       .shaderReplicatedComposites = true,
678 
679       /* VK_EXT_shader_atomic_float */
680       .shaderBufferFloat32Atomics =    true,
681       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
682       .shaderBufferFloat64Atomics =
683          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
684       .shaderBufferFloat64AtomicAdd =  pdevice->info.ver >= 20,
685       .shaderSharedFloat32Atomics =    true,
686       .shaderSharedFloat32AtomicAdd =  false,
687       .shaderSharedFloat64Atomics =    false,
688       .shaderSharedFloat64AtomicAdd =  false,
689       .shaderImageFloat32Atomics =     true,
690       .shaderImageFloat32AtomicAdd =   pdevice->info.ver >= 20,
691       .sparseImageFloat32Atomics =     false,
692       .sparseImageFloat32AtomicAdd =   false,
693 
694       /* VK_EXT_shader_atomic_float2 */
695       .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
696       .shaderBufferFloat16AtomicAdd    = false,
697       .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
698       .shaderBufferFloat32AtomicMinMax = true,
699       .shaderBufferFloat64AtomicMinMax =
700          pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
701          pdevice->info.ver < 20,
702       .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
703       .shaderSharedFloat16AtomicAdd    = false,
704       .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
705       .shaderSharedFloat32AtomicMinMax = true,
706       .shaderSharedFloat64AtomicMinMax = false,
707       .shaderImageFloat32AtomicMinMax  = false,
708       .sparseImageFloat32AtomicMinMax  = false,
709 
710       /* VK_KHR_shader_clock */
711       .shaderSubgroupClock = true,
712       .shaderDeviceClock = false,
713 
714       /* VK_INTEL_shader_integer_functions2 */
715       .shaderIntegerFunctions2 = true,
716 
717       /* VK_EXT_shader_module_identifier */
718       .shaderModuleIdentifier = true,
719 
720       /* VK_KHR_shader_subgroup_uniform_control_flow */
721       .shaderSubgroupUniformControlFlow = true,
722 
723       /* VK_EXT_texel_buffer_alignment */
724       .texelBufferAlignment = true,
725 
726       /* VK_EXT_transform_feedback */
727       .transformFeedback = true,
728       .geometryStreams = true,
729 
730       /* VK_KHR_vertex_attribute_divisor */
731       .vertexAttributeInstanceRateDivisor = true,
732       .vertexAttributeInstanceRateZeroDivisor = true,
733 
734       /* VK_KHR_workgroup_memory_explicit_layout */
735       .workgroupMemoryExplicitLayout = true,
736       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
737       .workgroupMemoryExplicitLayout8BitAccess = true,
738       .workgroupMemoryExplicitLayout16BitAccess = true,
739 
740       /* VK_EXT_ycbcr_image_arrays */
741       .ycbcrImageArrays = true,
742 
743       /* VK_EXT_ycbcr_2plane_444_formats */
744       .ycbcr2plane444Formats = true,
745 
746       /* VK_EXT_extended_dynamic_state */
747       .extendedDynamicState = true,
748 
749       /* VK_EXT_extended_dynamic_state2 */
750       .extendedDynamicState2 = true,
751       .extendedDynamicState2LogicOp = true,
752       .extendedDynamicState2PatchControlPoints = true,
753 
754       /* VK_EXT_extended_dynamic_state3 */
755       .extendedDynamicState3PolygonMode = true,
756       .extendedDynamicState3TessellationDomainOrigin = true,
757       .extendedDynamicState3RasterizationStream = true,
758       .extendedDynamicState3LineStippleEnable = true,
759       .extendedDynamicState3LineRasterizationMode = true,
760       .extendedDynamicState3LogicOpEnable = true,
761       .extendedDynamicState3AlphaToOneEnable = true,
762       .extendedDynamicState3DepthClipEnable = true,
763       .extendedDynamicState3DepthClampEnable = true,
764       .extendedDynamicState3DepthClipNegativeOneToOne = true,
765       .extendedDynamicState3ProvokingVertexMode = true,
766       .extendedDynamicState3ColorBlendEnable = true,
767       .extendedDynamicState3ColorWriteMask = true,
768       .extendedDynamicState3ColorBlendEquation = true,
769       .extendedDynamicState3SampleLocationsEnable = true,
770       .extendedDynamicState3SampleMask = true,
771       .extendedDynamicState3ConservativeRasterizationMode = true,
772       .extendedDynamicState3AlphaToCoverageEnable = true,
773       .extendedDynamicState3RasterizationSamples = true,
774 
775       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
776       .extendedDynamicState3ViewportWScalingEnable = false,
777       .extendedDynamicState3ViewportSwizzle = false,
778       .extendedDynamicState3ShadingRateImageEnable = false,
779       .extendedDynamicState3CoverageToColorEnable = false,
780       .extendedDynamicState3CoverageToColorLocation = false,
781       .extendedDynamicState3CoverageModulationMode = false,
782       .extendedDynamicState3CoverageModulationTableEnable = false,
783       .extendedDynamicState3CoverageModulationTable = false,
784       .extendedDynamicState3CoverageReductionMode = false,
785       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
786       .extendedDynamicState3ColorBlendAdvanced = false,
787 
788       /* VK_EXT_multi_draw */
789       .multiDraw = true,
790 
791       /* VK_EXT_non_seamless_cube_map */
792       .nonSeamlessCubeMap = true,
793 
794       /* VK_EXT_primitive_topology_list_restart */
795       .primitiveTopologyListRestart = true,
796       .primitiveTopologyPatchListRestart = true,
797 
798       /* VK_EXT_depth_clamp_control */
799       .depthClampControl = true,
800 
801       /* VK_EXT_depth_clip_control */
802       .depthClipControl = true,
803 
804       /* VK_KHR_present_id */
805       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
806 
807       /* VK_KHR_present_wait */
808       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
809 
810       /* VK_EXT_vertex_input_dynamic_state */
811       .vertexInputDynamicState = true,
812 
813       /* VK_KHR_ray_tracing_position_fetch */
814       .rayTracingPositionFetch = rt_enabled,
815 
816       /* VK_EXT_dynamic_rendering_unused_attachments */
817       .dynamicRenderingUnusedAttachments = true,
818 
819       /* VK_EXT_depth_bias_control */
820       .depthBiasControl = true,
821       .floatRepresentation = true,
822       .leastRepresentableValueForceUnormRepresentation = false,
823       .depthBiasExact = true,
824 
825       /* VK_EXT_pipeline_robustness */
826       .pipelineRobustness = true,
827 
828       /* VK_KHR_maintenance5 */
829       .maintenance5 = true,
830 
831       /* VK_KHR_maintenance6 */
832       .maintenance6 = true,
833 
834       /* VK_EXT_nested_command_buffer */
835       .nestedCommandBuffer = true,
836       .nestedCommandBufferRendering = true,
837       .nestedCommandBufferSimultaneousUse = false,
838 
839       /* VK_KHR_cooperative_matrix */
840       .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
841 
842       /* VK_KHR_shader_maximal_reconvergence */
843       .shaderMaximalReconvergence = true,
844 
845       /* VK_KHR_shader_subgroup_rotate */
846       .shaderSubgroupRotate = true,
847       .shaderSubgroupRotateClustered = true,
848 
849       /* VK_EXT_attachment_feedback_loop_layout */
850       .attachmentFeedbackLoopLayout = true,
851 
852       /* VK_EXT_attachment_feedback_loop_dynamic_state */
853       .attachmentFeedbackLoopDynamicState = true,
854 
855       /* VK_KHR_shader_expect_assume */
856       .shaderExpectAssume = true,
857 
858       /* VK_EXT_descriptor_buffer */
859       .descriptorBuffer = true,
860       .descriptorBufferCaptureReplay = true,
861       .descriptorBufferImageLayoutIgnored = false,
862       .descriptorBufferPushDescriptors = true,
863 
864       /* VK_EXT_map_memory_placed */
865       .memoryMapPlaced = true,
866       .memoryMapRangePlaced = false,
867       .memoryUnmapReserve = true,
868 
869       /* VK_KHR_shader_quad_control */
870       .shaderQuadControl = true,
871 
872 #ifdef ANV_USE_WSI_PLATFORM
873       /* VK_EXT_swapchain_maintenance1 */
874       .swapchainMaintenance1 = true,
875 #endif
876 
877       /* VK_KHR_video_maintenance1 */
878       .videoMaintenance1 = true,
879 
880       /* VK_EXT_image_compression_control */
881       .imageCompressionControl = true,
882 
883       /* VK_KHR_shader_float_controls2 */
884       .shaderFloatControls2 = true,
885 
886       /* VK_EXT_legacy_vertex_attributes */
887       .legacyVertexAttributes = true,
888 
889       /* VK_EXT_legacy_dithering */
890       .legacyDithering = true,
891 
892       /* VK_MESA_image_alignment_control */
893       .imageAlignmentControl = true,
894 
895       /* VK_KHR_maintenance7 */
896       .maintenance7 = true,
897 
898       /* VK_KHR_shader_relaxed_extended_instruction */
899       .shaderRelaxedExtendedInstruction = true,
900 
901       /* VK_KHR_dynamic_rendering_local_read */
902       .dynamicRenderingLocalRead = true,
903 
904       /* VK_EXT_pipeline_protected_access */
905       .pipelineProtectedAccess = pdevice->has_protected_contexts,
906 
907       /* VK_EXT_host_image_copy */
908       .hostImageCopy = true,
909    };
910 
911    /* The new DOOM and Wolfenstein games require depthBounds without
912     * checking for it.  They seem to run fine without it so just claim it's
913     * there and accept the consequences.
914     */
915    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
916       features->depthBounds = true;
917 }
918 
919 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
920 
921 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
922 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
923 
924 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)925 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
926 {
927    VkDeviceSize ret = 0;
928 
929    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
930       if (pdevice->memory.heaps[i].size > ret)
931          ret = pdevice->memory.heaps[i].size;
932    }
933 
934    return ret;
935 }
936 
937 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)938 get_properties_1_1(const struct anv_physical_device *pdevice,
939                    struct vk_properties *p)
940 {
941    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
942    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
943    memset(p->deviceLUID, 0, VK_LUID_SIZE);
944    p->deviceNodeMask = 0;
945    p->deviceLUIDValid = false;
946 
947    p->subgroupSize = BRW_SUBGROUP_SIZE;
948    VkShaderStageFlags scalar_stages = 0;
949    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
950       scalar_stages |= mesa_to_vk_shader_stage(stage);
951    }
952    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
953       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
954                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
955                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
956                        VK_SHADER_STAGE_MISS_BIT_KHR |
957                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
958                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
959    }
960    if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
961       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
962                        VK_SHADER_STAGE_MESH_BIT_EXT;
963    }
964    p->subgroupSupportedStages = scalar_stages;
965    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
966                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
967                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
968                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
969                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
970                                     VK_SUBGROUP_FEATURE_QUAD_BIT |
971                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
972                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
973                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
974                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
975    p->subgroupQuadOperationsInAllStages = true;
976 
977    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
978    p->maxMultiviewViewCount      = 16;
979    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
980    /* Our protected implementation is a memory encryption mechanism, it
981     * shouldn't page fault, but it hangs the HW so in terms of user visibility
982     * it's similar to a fault.
983     */
984    p->protectedNoFault           = false;
985    /* This value doesn't matter for us today as our per-stage descriptors are
986     * the real limit.
987     */
988    p->maxPerSetDescriptors       = 1024;
989 
990    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
991       p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
992                                         pdevice->memory.heaps[i].size);
993    }
994 }
995 
996 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)997 get_properties_1_2(const struct anv_physical_device *pdevice,
998                    struct vk_properties *p)
999 {
1000    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1001    memset(p->driverName, 0, sizeof(p->driverName));
1002    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1003             "Intel open-source Mesa driver");
1004    memset(p->driverInfo, 0, sizeof(p->driverInfo));
1005    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1006             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1007 
1008    p->conformanceVersion = (VkConformanceVersion) {
1009       .major = 1,
1010       .minor = 4,
1011       .subminor = 0,
1012       .patch = 0,
1013    };
1014 
1015    p->denormBehaviorIndependence =
1016       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1017    p->roundingModeIndependence =
1018       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1019 
1020    /* Broadwell does not support HF denorms and there are restrictions
1021     * other gens. According to Kabylake's PRM:
1022     *
1023     * "math - Extended Math Function
1024     * [...]
1025     * Restriction : Half-float denorms are always retained."
1026     */
1027    p->shaderDenormFlushToZeroFloat16         = false;
1028    p->shaderDenormPreserveFloat16            = true;
1029    p->shaderRoundingModeRTEFloat16           = true;
1030    p->shaderRoundingModeRTZFloat16           = true;
1031    p->shaderSignedZeroInfNanPreserveFloat16  = true;
1032 
1033    p->shaderDenormFlushToZeroFloat32         = true;
1034    p->shaderDenormPreserveFloat32            = true;
1035    p->shaderRoundingModeRTEFloat32           = true;
1036    p->shaderRoundingModeRTZFloat32           = true;
1037    p->shaderSignedZeroInfNanPreserveFloat32  = true;
1038 
1039    p->shaderDenormFlushToZeroFloat64         = true;
1040    p->shaderDenormPreserveFloat64            = true;
1041    p->shaderRoundingModeRTEFloat64           = true;
1042    p->shaderRoundingModeRTZFloat64           = true;
1043    p->shaderSignedZeroInfNanPreserveFloat64  = true;
1044 
1045    struct anv_descriptor_limits desc_limits;
1046    get_device_descriptor_limits(pdevice, &desc_limits);
1047 
1048    p->maxUpdateAfterBindDescriptorsInAllPools            = desc_limits.max_resources;
1049    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
1050    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
1051    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
1052    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
1053    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1054    p->robustBufferAccessUpdateAfterBind                  = true;
1055    p->quadDivergentImplicitLod                           = false;
1056    p->maxPerStageDescriptorUpdateAfterBindSamplers       = desc_limits.max_samplers;
1057    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1058    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1059    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = desc_limits.max_images;
1060    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = desc_limits.max_images;
1061    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1062    p->maxPerStageUpdateAfterBindResources                = desc_limits.max_resources;
1063    p->maxDescriptorSetUpdateAfterBindSamplers            = desc_limits.max_samplers;
1064    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = desc_limits.max_ubos;
1065    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1066    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = desc_limits.max_ssbos;
1067    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1068    p->maxDescriptorSetUpdateAfterBindSampledImages       = desc_limits.max_images;
1069    p->maxDescriptorSetUpdateAfterBindStorageImages       = desc_limits.max_images;
1070    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1071 
1072    /* We support all of the depth resolve modes */
1073    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1074                                       VK_RESOLVE_MODE_AVERAGE_BIT |
1075                                       VK_RESOLVE_MODE_MIN_BIT |
1076                                       VK_RESOLVE_MODE_MAX_BIT;
1077    /* Average doesn't make sense for stencil so we don't support that */
1078    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1079                                       VK_RESOLVE_MODE_MIN_BIT |
1080                                       VK_RESOLVE_MODE_MAX_BIT;
1081    p->independentResolveNone  = true;
1082    p->independentResolve      = true;
1083 
1084    p->filterMinmaxSingleComponentFormats  = true;
1085    p->filterMinmaxImageComponentMapping   = true;
1086 
1087    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1088 
1089    p->framebufferIntegerColorSampleCounts =
1090       isl_device_get_sample_counts(&pdevice->isl_dev);
1091 }
1092 
1093 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1094 get_properties_1_3(const struct anv_physical_device *pdevice,
1095                    struct vk_properties *p)
1096 {
1097    if (pdevice->info.ver >= 20)
1098       p->minSubgroupSize = 16;
1099    else
1100       p->minSubgroupSize = 8;
1101    p->maxSubgroupSize = 32;
1102    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1103    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1104                                    VK_SHADER_STAGE_TASK_BIT_EXT |
1105                                    VK_SHADER_STAGE_MESH_BIT_EXT;
1106 
1107    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1108    p->maxPerStageDescriptorInlineUniformBlocks =
1109       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1110    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1111       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1112    p->maxDescriptorSetInlineUniformBlocks =
1113       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1114    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1115       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1116    p->maxInlineUniformTotalSize = UINT16_MAX;
1117 
1118    p->integerDotProduct8BitUnsignedAccelerated = false;
1119    p->integerDotProduct8BitSignedAccelerated = false;
1120    p->integerDotProduct8BitMixedSignednessAccelerated = false;
1121    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1122    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1123    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1124    p->integerDotProduct16BitUnsignedAccelerated = false;
1125    p->integerDotProduct16BitSignedAccelerated = false;
1126    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1127    p->integerDotProduct32BitUnsignedAccelerated = false;
1128    p->integerDotProduct32BitSignedAccelerated = false;
1129    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1130    p->integerDotProduct64BitUnsignedAccelerated = false;
1131    p->integerDotProduct64BitSignedAccelerated = false;
1132    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1133    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1134    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1135    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1136    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1137    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1138    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1139    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1140    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1141    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1142    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1143    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1144    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1145    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1146    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1147    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1148 
1149    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1150     * Base Address:
1151     *
1152     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1153     *    specifies the base address of the first element of the surface,
1154     *    computed in software by adding the surface base address to the
1155     *    byte offset of the element in the buffer. The base address must
1156     *    be aligned to element size."
1157     *
1158     * The typed dataport messages require that things be texel aligned.
1159     * Otherwise, we may just load/store the wrong data or, in the worst
1160     * case, there may be hangs.
1161     */
1162    p->storageTexelBufferOffsetAlignmentBytes = 16;
1163    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1164 
1165    /* The sampler, however, is much more forgiving and it can handle
1166     * arbitrary byte alignment for linear and buffer surfaces.  It's
1167     * hard to find a good PRM citation for this but years of empirical
1168     * experience demonstrate that this is true.
1169     */
1170    p->uniformTexelBufferOffsetAlignmentBytes = 1;
1171    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1172 
1173    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1174 }
1175 
1176 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1177 get_properties(const struct anv_physical_device *pdevice,
1178                struct vk_properties *props)
1179 {
1180 
1181       const struct intel_device_info *devinfo = &pdevice->info;
1182 
1183    const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1184 
1185    const uint32_t max_workgroup_size =
1186       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1187 
1188    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1189    const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1190 
1191    uint64_t sparse_addr_space_size =
1192       !has_sparse_or_fake ? 0 :
1193       sparse_uses_trtt ? pdevice->va.trtt.size :
1194       pdevice->va.high_heap.size;
1195 
1196    VkSampleCountFlags sample_counts =
1197       isl_device_get_sample_counts(&pdevice->isl_dev);
1198 
1199 #if DETECT_OS_ANDROID
1200    /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1201    uint64_t front_rendering_usage = 0;
1202    struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1203    if (gralloc != NULL) {
1204       u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1205       u_gralloc_destroy(&gralloc);
1206    }
1207 #endif /* DETECT_OS_ANDROID */
1208 
1209    struct anv_descriptor_limits desc_limits;
1210    get_device_descriptor_limits(pdevice, &desc_limits);
1211 
1212    *props = (struct vk_properties) {
1213       .apiVersion = ANV_API_VERSION,
1214       .driverVersion = vk_get_driver_version(),
1215       .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1216                   pdevice->instance->force_vk_vendor : 0x8086,
1217       .deviceID = pdevice->info.pci_device_id,
1218       .deviceType = pdevice->info.has_local_mem ?
1219                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1220                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1221 
1222       /* Limits: */
1223       .maxImageDimension1D                      = (1 << 14),
1224       .maxImageDimension2D                      = (1 << 14),
1225       .maxImageDimension3D                      = (1 << 11),
1226       .maxImageDimensionCube                    = (1 << 14),
1227       .maxImageArrayLayers                      = (1 << 11),
1228       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1229       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1230       .maxStorageBufferRange                    = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1231       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1232       .maxMemoryAllocationCount                 = UINT32_MAX,
1233       .maxSamplerAllocationCount                = 64 * 1024,
1234       .bufferImageGranularity                   = 1,
1235       .sparseAddressSpaceSize                   = sparse_addr_space_size,
1236       .maxBoundDescriptorSets                   = MAX_SETS,
1237       .maxPerStageDescriptorSamplers            = desc_limits.max_samplers,
1238       .maxPerStageDescriptorUniformBuffers      = desc_limits.max_ubos,
1239       .maxPerStageDescriptorStorageBuffers      = desc_limits.max_ssbos,
1240       .maxPerStageDescriptorSampledImages       = desc_limits.max_images,
1241       .maxPerStageDescriptorStorageImages       = desc_limits.max_images,
1242       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1243       .maxPerStageResources                     = desc_limits.max_resources,
1244       .maxDescriptorSetSamplers                 = desc_limits.max_samplers,
1245       .maxDescriptorSetUniformBuffers           = desc_limits.max_ubos,
1246       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1247       .maxDescriptorSetStorageBuffers           = desc_limits.max_ssbos,
1248       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1249       .maxDescriptorSetSampledImages            = desc_limits.max_images,
1250       .maxDescriptorSetStorageImages            = desc_limits.max_images,
1251       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1252       .maxVertexInputAttributes                 = MAX_VES,
1253       .maxVertexInputBindings                   = MAX_VBS,
1254       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1255        *
1256        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1257        */
1258       .maxVertexInputAttributeOffset            = 2047,
1259       /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1260        *
1261        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1262        */
1263       .maxVertexInputBindingStride              = 4095,
1264       .maxVertexOutputComponents                = 128,
1265       .maxTessellationGenerationLevel           = 64,
1266       .maxTessellationPatchSize                 = 32,
1267       .maxTessellationControlPerVertexInputComponents = 128,
1268       .maxTessellationControlPerVertexOutputComponents = 128,
1269       .maxTessellationControlPerPatchOutputComponents = 128,
1270       .maxTessellationControlTotalOutputComponents = 2048,
1271       .maxTessellationEvaluationInputComponents = 128,
1272       .maxTessellationEvaluationOutputComponents = 128,
1273       .maxGeometryShaderInvocations             = 32,
1274       .maxGeometryInputComponents               = 128,
1275       .maxGeometryOutputComponents              = 128,
1276       .maxGeometryOutputVertices                = 256,
1277       .maxGeometryTotalOutputComponents         = 1024,
1278       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1279       .maxFragmentOutputAttachments             = 8,
1280       .maxFragmentDualSrcAttachments            = 1,
1281       .maxFragmentCombinedOutputResources       = MAX_RTS + desc_limits.max_ssbos +
1282                                                   desc_limits.max_images,
1283       .maxComputeSharedMemorySize               = intel_device_info_get_max_slm_size(&pdevice->info),
1284       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1285       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1286       .maxComputeWorkGroupSize = {
1287          max_workgroup_size,
1288          max_workgroup_size,
1289          max_workgroup_size,
1290       },
1291       .subPixelPrecisionBits                    = 8,
1292       .subTexelPrecisionBits                    = 8,
1293       .mipmapPrecisionBits                      = 8,
1294       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1295       .maxDrawIndirectCount                     = UINT32_MAX,
1296       .maxSamplerLodBias                        = 16,
1297       .maxSamplerAnisotropy                     = 16,
1298       .maxViewports                             = MAX_VIEWPORTS,
1299       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1300       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1301       .viewportSubPixelBits                     = 13, /* We take a float? */
1302       .minMemoryMapAlignment                    = 4096, /* A page */
1303       /* The dataport requires texel alignment so we need to assume a worst
1304        * case of R32G32B32A32 which is 16 bytes.
1305        */
1306       .minTexelBufferOffsetAlignment            = 16,
1307       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1308       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1309       .minTexelOffset                           = -8,
1310       .maxTexelOffset                           = 7,
1311       .minTexelGatherOffset                     = -32,
1312       .maxTexelGatherOffset                     = 31,
1313       .minInterpolationOffset                   = -0.5,
1314       .maxInterpolationOffset                   = 0.4375,
1315       .subPixelInterpolationOffsetBits          = 4,
1316       .maxFramebufferWidth                      = (1 << 14),
1317       .maxFramebufferHeight                     = (1 << 14),
1318       .maxFramebufferLayers                     = (1 << 11),
1319       .framebufferColorSampleCounts             = sample_counts,
1320       .framebufferDepthSampleCounts             = sample_counts,
1321       .framebufferStencilSampleCounts           = sample_counts,
1322       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1323       .maxColorAttachments                      = MAX_RTS,
1324       .sampledImageColorSampleCounts            = sample_counts,
1325       .sampledImageIntegerSampleCounts          = sample_counts,
1326       .sampledImageDepthSampleCounts            = sample_counts,
1327       .sampledImageStencilSampleCounts          = sample_counts,
1328       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1329       .maxSampleMaskWords                       = 1,
1330       .timestampComputeAndGraphics              = true,
1331       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1332       .maxClipDistances                         = 8,
1333       .maxCullDistances                         = 8,
1334       .maxCombinedClipAndCullDistances          = 8,
1335       .discreteQueuePriorities                  = 2,
1336       .pointSizeRange                           = { 0.125, 255.875 },
1337       /* While SKL and up support much wider lines than we are setting here,
1338        * in practice we run into conformance issues if we go past this limit.
1339        * Since the Windows driver does the same, it's probably fair to assume
1340        * that no one needs more than this.
1341        */
1342       .lineWidthRange                           = { 0.0, 8.0 },
1343       .pointSizeGranularity                     = (1.0 / 8.0),
1344       .lineWidthGranularity                     = (1.0 / 128.0),
1345       .strictLines                              = false,
1346       .standardSampleLocations                  = true,
1347       .optimalBufferCopyOffsetAlignment         = 128,
1348       .optimalBufferCopyRowPitchAlignment       = 128,
1349       .nonCoherentAtomSize                      = 64,
1350 
1351       /* Sparse: */
1352       .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1353       .sparseResidencyStandard2DMultisampleBlockShape = false,
1354       .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1355       .sparseResidencyAlignedMipSize = false,
1356       .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1357 
1358       /* VK_KHR_cooperative_matrix */
1359       .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1360 
1361       /* Vulkan 1.4 */
1362       .dynamicRenderingLocalReadDepthStencilAttachments = true,
1363       .dynamicRenderingLocalReadMultisampledAttachments = true,
1364    };
1365 
1366    snprintf(props->deviceName, sizeof(props->deviceName),
1367             "%s", pdevice->info.name);
1368    memcpy(props->pipelineCacheUUID,
1369           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1370 
1371    get_properties_1_1(pdevice, props);
1372    get_properties_1_2(pdevice, props);
1373    get_properties_1_3(pdevice, props);
1374 
1375    /* VK_KHR_acceleration_structure */
1376    {
1377       props->maxGeometryCount = (1u << 24) - 1;
1378       props->maxInstanceCount = (1u << 24) - 1;
1379       props->maxPrimitiveCount = (1u << 29) - 1;
1380       props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1381       props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1382       props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1383       props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1384       props->minAccelerationStructureScratchOffsetAlignment = 64;
1385    }
1386 
1387    /* VK_KHR_compute_shader_derivatives */
1388    {
1389       props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1390    }
1391 
1392    /* VK_KHR_fragment_shading_rate */
1393    {
1394       props->primitiveFragmentShadingRateWithMultipleViewports =
1395          pdevice->info.has_coarse_pixel_primitive_and_cb;
1396       props->layeredShadingRateAttachments =
1397       pdevice->info.has_coarse_pixel_primitive_and_cb;
1398       props->fragmentShadingRateNonTrivialCombinerOps =
1399          pdevice->info.has_coarse_pixel_primitive_and_cb;
1400       props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1401       props->maxFragmentSizeAspectRatio =
1402          pdevice->info.has_coarse_pixel_primitive_and_cb ?
1403          2 : 4;
1404       props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1405          (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1406       props->maxFragmentShadingRateRasterizationSamples =
1407       pdevice->info.has_coarse_pixel_primitive_and_cb ?
1408          VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
1409       props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1410       props->fragmentShadingRateWithSampleMask = true;
1411       props->fragmentShadingRateWithShaderSampleMask = false;
1412       props->fragmentShadingRateWithConservativeRasterization = true;
1413       props->fragmentShadingRateWithFragmentShaderInterlock = true;
1414       props->fragmentShadingRateWithCustomSampleLocations = true;
1415       props->fragmentShadingRateStrictMultiplyCombiner = true;
1416 
1417       if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1418          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1419          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1420          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1421       } else {
1422          /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1423          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1424          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1425          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1426       }
1427    }
1428 
1429    /* VK_KHR_maintenance5 */
1430    {
1431       props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1432       props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1433       props->depthStencilSwizzleOneSupport = true;
1434       props->polygonModePointSize = true;
1435       props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1436       props->nonStrictWideLinesUseParallelogram = false;
1437    }
1438 
1439    /* VK_KHR_maintenance6 */
1440    {
1441       props->blockTexelViewCompatibleMultipleLayers = true;
1442       props->maxCombinedImageSamplerDescriptorCount = 3;
1443       props->fragmentShadingRateClampCombinerInputs = true;
1444    }
1445 
1446    /* VK_KHR_maintenance7 */
1447    {
1448       props->robustFragmentShadingRateAttachmentAccess = true;
1449       props->separateDepthStencilAttachmentAccess = true;
1450       props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1451       props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1452       props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1453       props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1454       props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1455       props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1456    }
1457 
1458    /* VK_KHR_performance_query */
1459    {
1460       props->allowCommandBufferQueryCopies = false;
1461    }
1462 
1463    /* VK_KHR_push_descriptor */
1464    {
1465       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1466    }
1467 
1468    /* VK_KHR_ray_tracing_pipeline */
1469    {
1470       /* TODO */
1471       props->shaderGroupHandleSize = 32;
1472       props->maxRayRecursionDepth = 31;
1473       /* MemRay::hitGroupSRStride is 16 bits */
1474       props->maxShaderGroupStride = UINT16_MAX;
1475       /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1476       props->shaderGroupBaseAlignment = 16;
1477       props->shaderGroupHandleAlignment = 16;
1478       props->shaderGroupHandleCaptureReplaySize = 32;
1479       props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1480       props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1481    }
1482 
1483    /* VK_KHR_vertex_attribute_divisor */
1484    {
1485       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1486       props->supportsNonZeroFirstInstance = true;
1487    }
1488 
1489    /* VK_EXT_conservative_rasterization */
1490    {
1491       /* There's nothing in the public docs about this value as far as I can
1492        * tell. However, this is the value the Windows driver reports and
1493        * there's a comment on a rejected HW feature in the internal docs that
1494        * says:
1495        *
1496        *    "This is similar to conservative rasterization, except the
1497        *    primitive area is not extended by 1/512 and..."
1498        *
1499        * That's a bit of an obtuse reference but it's the best we've got for
1500        * now.
1501        */
1502       props->primitiveOverestimationSize = 1.0f / 512.0f;
1503       props->maxExtraPrimitiveOverestimationSize = 0.0f;
1504       props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1505       props->primitiveUnderestimation = false;
1506       props->conservativePointAndLineRasterization = false;
1507       props->degenerateTrianglesRasterized = true;
1508       props->degenerateLinesRasterized = false;
1509       props->fullyCoveredFragmentShaderInputVariable = false;
1510       props->conservativeRasterizationPostDepthCoverage = true;
1511    }
1512 
1513    /* VK_EXT_custom_border_color */
1514    {
1515       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1516    }
1517 
1518    /* VK_EXT_descriptor_buffer */
1519    {
1520       props->combinedImageSamplerDescriptorSingleArray = true;
1521       props->bufferlessPushDescriptors = true;
1522       /* Written to the buffer before a timeline semaphore is signaled, but
1523        * after vkQueueSubmit().
1524        */
1525       props->allowSamplerImageViewPostSubmitCreation = true;
1526       props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1527 
1528       if (pdevice->uses_ex_bso) {
1529          props->maxDescriptorBufferBindings = MAX_SETS;
1530          props->maxResourceDescriptorBufferBindings = MAX_SETS;
1531          props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1532          props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1533       } else {
1534          props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1535          props->maxResourceDescriptorBufferBindings = 1;
1536          props->maxSamplerDescriptorBufferBindings = 1;
1537          props->maxEmbeddedImmutableSamplerBindings = 1;
1538       }
1539       props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1540 
1541       /* Storing a 64bit address */
1542       props->bufferCaptureReplayDescriptorDataSize = 8;
1543       props->imageCaptureReplayDescriptorDataSize = 8;
1544       /* Offset inside the reserved border color pool */
1545       props->samplerCaptureReplayDescriptorDataSize = 4;
1546 
1547       /* Not affected by replay */
1548       props->imageViewCaptureReplayDescriptorDataSize = 0;
1549       /* The acceleration structure virtual address backing is coming from a
1550        * buffer, so as long as that buffer is captured/replayed correctly we
1551        * should always get the same address.
1552        */
1553       props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1554 
1555       props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1556       props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1557                                                         ANV_SURFACE_STATE_SIZE);
1558       props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1559       props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1560       props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1561       props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1562       props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1563       props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1564       props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1565       props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1566       props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1567       props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1568       props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1569       props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1570       props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1571       props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1572                                                                                        true);
1573       props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1574       props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1575       props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1576    }
1577 
1578    /* VK_EXT_extended_dynamic_state3 */
1579    {
1580       props->dynamicPrimitiveTopologyUnrestricted = true;
1581    }
1582 
1583    /* VK_EXT_external_memory_host */
1584    {
1585       props->minImportedHostPointerAlignment = 4096;
1586    }
1587 
1588    /* VK_EXT_graphics_pipeline_library */
1589    {
1590       props->graphicsPipelineLibraryFastLinking = true;
1591       props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1592    }
1593 
1594    /* VK_EXT_host_image_copy */
1595    {
1596       static const VkImageLayout supported_layouts[] = {
1597          VK_IMAGE_LAYOUT_GENERAL,
1598          VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1599          VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1600          VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1601          VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1602          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1603          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1604          VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1605          VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1606          VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1607          VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1608          VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1609          VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1610          VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1611          VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1612          VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR,
1613          VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1614          VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR,
1615       };
1616 
1617       props->pCopySrcLayouts = (VkImageLayout *) supported_layouts;
1618       props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1619       props->pCopyDstLayouts = (VkImageLayout *) supported_layouts;
1620       props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1621 
1622       /* This UUID essentially tells you if you can share an optimially tiling
1623        * image with another driver. Much of the tiling decisions are based on :
1624        *
1625        *    - device generation (different tilings based on generations)
1626        *    - device workarounds
1627        *    - driver build (as we implement workarounds or performance tunings,
1628        *      the tiling decision changes)
1629        *
1630        * So we're using a hash of the verx10 field + driver_build_sha1.
1631        *
1632        * Unfortunately there is a HW issue on SKL GT4 that makes it use some
1633        * different tilings sometimes (see isl_gfx7.c).
1634        */
1635       {
1636          struct mesa_sha1 sha1_ctx;
1637          uint8_t sha1[20];
1638 
1639          _mesa_sha1_init(&sha1_ctx);
1640          _mesa_sha1_update(&sha1_ctx, pdevice->driver_build_sha1,
1641                            sizeof(pdevice->driver_build_sha1));
1642          _mesa_sha1_update(&sha1_ctx, &pdevice->info.platform,
1643                            sizeof(pdevice->info.platform));
1644          if (pdevice->info.platform == INTEL_PLATFORM_SKL &&
1645              pdevice->info.gt == 4) {
1646             _mesa_sha1_update(&sha1_ctx, &pdevice->info.gt,
1647                               sizeof(pdevice->info.gt));
1648          }
1649          _mesa_sha1_final(&sha1_ctx, sha1);
1650 
1651          assert(ARRAY_SIZE(sha1) >= VK_UUID_SIZE);
1652          memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1653       }
1654 
1655       /* System without ReBAR cannot map all memory types on the host and that
1656        * affects the memory types an image can use for host memory copies.
1657        *
1658        * System with compressed memory types also cannot expose all image
1659        * memory types for host image copies.
1660        */
1661       props->identicalMemoryTypeRequirements = pdevice->has_small_bar ||
1662          pdevice->memory.compressed_mem_types != 0;
1663    }
1664 
1665    /* VK_EXT_legacy_vertex_attributes */
1666    {
1667       props->nativeUnalignedPerformance = true;
1668    }
1669 
1670    /* VK_EXT_line_rasterization */
1671    {
1672       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1673        * Rules - Legacy Mode", it says the following:
1674        *
1675        *    "Note that the device divides a pixel into a 16x16 array of
1676        *     subpixels, referenced by their upper left corners."
1677        *
1678        * This is the only known reference in the PRMs to the subpixel
1679        * precision of line rasterization and a "16x16 array of subpixels"
1680        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1681        * subpixel precision bits applies to all line rasterization types.
1682        */
1683       props->lineSubPixelPrecisionBits = 4;
1684    }
1685 
1686    /* VK_EXT_map_memory_placed */
1687    {
1688       props->minPlacedMemoryMapAlignment = 4096;
1689    }
1690 
1691    /* VK_EXT_mesh_shader */
1692    {
1693       /* Bounded by the maximum representable size in
1694        * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
1695        */
1696       const uint32_t max_slm_size = intel_device_info_get_max_slm_size(devinfo);
1697 
1698       /* Bounded by the maximum representable size in
1699        * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
1700        */
1701       const uint32_t max_workgroup_size = 1 << 10;
1702 
1703       /* 3DMESH_3D limitation. */
1704       const uint32_t max_threadgroup_count = 1 << 22;
1705 
1706       /* 3DMESH_3D limitation. */
1707       const uint32_t max_threadgroup_xyz = 65535;
1708 
1709       const uint32_t max_urb_size = 64 * 1024;
1710 
1711       props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1712       props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1713       props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1714       props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1715 
1716       props->maxTaskWorkGroupInvocations = max_workgroup_size;
1717       props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1718       props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1719       props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1720 
1721       /* TUE header with padding */
1722       const uint32_t task_payload_reserved = 32;
1723 
1724       props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1725       props->maxTaskSharedMemorySize = max_slm_size;
1726       props->maxTaskPayloadAndSharedMemorySize =
1727          props->maxTaskPayloadSize +
1728          props->maxTaskSharedMemorySize;
1729 
1730       props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1731       props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1732       props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1733       props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1734 
1735       props->maxMeshWorkGroupInvocations = max_workgroup_size;
1736       props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1737       props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1738       props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1739 
1740       props->maxMeshSharedMemorySize = max_slm_size;
1741       props->maxMeshPayloadAndSharedMemorySize =
1742          props->maxTaskPayloadSize +
1743          props->maxMeshSharedMemorySize;
1744 
1745       /* Unfortunately spec's formula for the max output size doesn't match our hardware
1746        * (because some per-primitive and per-vertex attributes have alignment restrictions),
1747        * so we have to advertise the minimum value mandated by the spec to not overflow it.
1748        */
1749       props->maxMeshOutputPrimitives = 256;
1750       props->maxMeshOutputVertices = 256;
1751 
1752       /* NumPrim + Primitive Data List */
1753       const uint32_t max_indices_memory =
1754          ALIGN(sizeof(uint32_t) +
1755                sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1756 
1757       props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1758 
1759       props->maxMeshPayloadAndOutputMemorySize =
1760          props->maxTaskPayloadSize +
1761          props->maxMeshOutputMemorySize;
1762 
1763       props->maxMeshOutputComponents = 128;
1764 
1765       /* RTAIndex is 11-bits wide */
1766       props->maxMeshOutputLayers = 1 << 11;
1767 
1768       props->maxMeshMultiviewViewCount = 1;
1769 
1770       /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1771       props->meshOutputPerVertexGranularity = 8;
1772       /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1773       props->meshOutputPerPrimitiveGranularity = 8;
1774 
1775       /* SIMD16 */
1776       props->maxPreferredTaskWorkGroupInvocations = 16;
1777       props->maxPreferredMeshWorkGroupInvocations = 16;
1778 
1779       props->prefersLocalInvocationVertexOutput = false;
1780       props->prefersLocalInvocationPrimitiveOutput = false;
1781       props->prefersCompactVertexOutput = false;
1782       props->prefersCompactPrimitiveOutput = false;
1783 
1784       /* Spec minimum values */
1785       assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1786       assert(props->maxTaskWorkGroupCount[0] >= 65535);
1787       assert(props->maxTaskWorkGroupCount[1] >= 65535);
1788       assert(props->maxTaskWorkGroupCount[2] >= 65535);
1789 
1790       assert(props->maxTaskWorkGroupInvocations >= 128);
1791       assert(props->maxTaskWorkGroupSize[0] >= 128);
1792       assert(props->maxTaskWorkGroupSize[1] >= 128);
1793       assert(props->maxTaskWorkGroupSize[2] >= 128);
1794 
1795       assert(props->maxTaskPayloadSize >= 16384);
1796       assert(props->maxTaskSharedMemorySize >= 32768);
1797       assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1798 
1799 
1800       assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1801       assert(props->maxMeshWorkGroupCount[0] >= 65535);
1802       assert(props->maxMeshWorkGroupCount[1] >= 65535);
1803       assert(props->maxMeshWorkGroupCount[2] >= 65535);
1804 
1805       assert(props->maxMeshWorkGroupInvocations >= 128);
1806       assert(props->maxMeshWorkGroupSize[0] >= 128);
1807       assert(props->maxMeshWorkGroupSize[1] >= 128);
1808       assert(props->maxMeshWorkGroupSize[2] >= 128);
1809 
1810       assert(props->maxMeshSharedMemorySize >= 28672);
1811       assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1812       assert(props->maxMeshOutputMemorySize >= 32768);
1813       assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1814 
1815       assert(props->maxMeshOutputComponents >= 128);
1816 
1817       assert(props->maxMeshOutputVertices >= 256);
1818       assert(props->maxMeshOutputPrimitives >= 256);
1819       assert(props->maxMeshOutputLayers >= 8);
1820       assert(props->maxMeshMultiviewViewCount >= 1);
1821    }
1822 
1823    /* VK_EXT_multi_draw */
1824    {
1825       props->maxMultiDrawCount = 2048;
1826    }
1827 
1828    /* VK_EXT_nested_command_buffer */
1829    {
1830       props->maxCommandBufferNestingLevel = UINT32_MAX;
1831    }
1832 
1833    /* VK_EXT_pci_bus_info */
1834    {
1835       props->pciDomain = pdevice->info.pci_domain;
1836       props->pciBus = pdevice->info.pci_bus;
1837       props->pciDevice = pdevice->info.pci_dev;
1838       props->pciFunction = pdevice->info.pci_func;
1839    }
1840 
1841    /* VK_EXT_physical_device_drm */
1842    {
1843       props->drmHasPrimary = pdevice->has_master;
1844       props->drmPrimaryMajor = pdevice->master_major;
1845       props->drmPrimaryMinor = pdevice->master_minor;
1846       props->drmHasRender = pdevice->has_local;
1847       props->drmRenderMajor = pdevice->local_major;
1848       props->drmRenderMinor = pdevice->local_minor;
1849    }
1850 
1851    /* VK_EXT_pipeline_robustness */
1852    {
1853       props->defaultRobustnessStorageBuffers =
1854          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1855       props->defaultRobustnessUniformBuffers =
1856          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1857       props->defaultRobustnessVertexInputs =
1858          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1859       props->defaultRobustnessImages =
1860          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1861    }
1862 
1863    /* VK_EXT_provoking_vertex */
1864    {
1865       props->provokingVertexModePerPipeline = true;
1866       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1867    }
1868 
1869    /* VK_EXT_robustness2 */
1870    {
1871       props->robustStorageBufferAccessSizeAlignment =
1872          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1873       props->robustUniformBufferAccessSizeAlignment =
1874          ANV_UBO_ALIGNMENT;
1875    }
1876 
1877    /* VK_EXT_sample_locations */
1878    {
1879       props->sampleLocationSampleCounts =
1880          isl_device_get_sample_counts(&pdevice->isl_dev);
1881 
1882       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1883       props->maxSampleLocationGridSize.width = 1;
1884       props->maxSampleLocationGridSize.height = 1;
1885 
1886       props->sampleLocationCoordinateRange[0] = 0;
1887       props->sampleLocationCoordinateRange[1] = 0.9375;
1888       props->sampleLocationSubPixelBits = 4;
1889 
1890       props->variableSampleLocations = true;
1891    }
1892 
1893    /* VK_EXT_shader_module_identifier */
1894    {
1895       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1896                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1897       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1898              vk_shaderModuleIdentifierAlgorithmUUID,
1899              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1900    }
1901 
1902    /* VK_EXT_transform_feedback */
1903    {
1904       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1905       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1906       props->maxTransformFeedbackBufferSize = (1ull << 32);
1907       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1908       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1909       props->maxTransformFeedbackBufferDataStride = 2048;
1910       props->transformFeedbackQueries = true;
1911       props->transformFeedbackStreamsLinesTriangles = false;
1912       props->transformFeedbackRasterizationStreamSelect = false;
1913       props->transformFeedbackDraw = true;
1914    }
1915 
1916    /* VK_ANDROID_native_buffer */
1917 #if DETECT_OS_ANDROID
1918    {
1919       props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1920    }
1921 #endif /* DETECT_OS_ANDROID */
1922 
1923 
1924    /* VK_MESA_image_alignment_control */
1925    {
1926       /* We support 4k/64k tiling alignments on most platforms */
1927       props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1928    }
1929 }
1930 
1931 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1932 anv_init_meminfo(struct anv_physical_device *device, int fd)
1933 {
1934    const struct intel_device_info *devinfo = &device->info;
1935 
1936    device->sys.region = &devinfo->mem.sram.mem;
1937    device->sys.size = devinfo->mem.sram.mappable.size;
1938    device->sys.available = devinfo->mem.sram.mappable.free;
1939 
1940    device->vram_mappable.region = &devinfo->mem.vram.mem;
1941    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1942    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1943 
1944    device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1945    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1946    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1947 
1948    return VK_SUCCESS;
1949 }
1950 
1951 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1952 anv_update_meminfo(struct anv_physical_device *device, int fd)
1953 {
1954    if (!intel_device_info_update_memory_info(&device->info, fd))
1955       return;
1956 
1957    const struct intel_device_info *devinfo = &device->info;
1958    device->sys.available = devinfo->mem.sram.mappable.free;
1959    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1960    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1961 }
1962 
1963 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1964 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1965 {
1966    VkResult result = anv_init_meminfo(device, fd);
1967    if (result != VK_SUCCESS)
1968       return result;
1969 
1970    assert(device->sys.size != 0);
1971 
1972    if (anv_physical_device_has_vram(device)) {
1973       /* We can create 2 or 3 different heaps when we have local memory
1974        * support, first heap with local memory size and second with system
1975        * memory size and the third is added only if part of the vram is
1976        * mappable to the host.
1977        */
1978       device->memory.heap_count = 2;
1979       device->memory.heaps[0] = (struct anv_memory_heap) {
1980          /* If there is a vram_non_mappable, use that for the device only
1981           * heap. Otherwise use the vram_mappable.
1982           */
1983          .size = device->vram_non_mappable.size != 0 ?
1984                  device->vram_non_mappable.size : device->vram_mappable.size,
1985          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1986          .is_local_mem = true,
1987       };
1988       device->memory.heaps[1] = (struct anv_memory_heap) {
1989          .size = device->sys.size,
1990          .flags = 0,
1991          .is_local_mem = false,
1992       };
1993       /* Add an additional smaller vram mappable heap if we can't map all the
1994        * vram to the host.
1995        */
1996       if (device->vram_non_mappable.size > 0) {
1997          device->memory.heap_count++;
1998          device->memory.heaps[2] = (struct anv_memory_heap) {
1999             .size = device->vram_mappable.size,
2000             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2001             .is_local_mem = true,
2002          };
2003       }
2004    } else {
2005       device->memory.heap_count = 1;
2006       device->memory.heaps[0] = (struct anv_memory_heap) {
2007          .size = device->sys.size,
2008          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2009          .is_local_mem = false,
2010       };
2011    }
2012 
2013    switch (device->info.kmd_type) {
2014    case INTEL_KMD_TYPE_XE:
2015       result = anv_xe_physical_device_init_memory_types(device);
2016       break;
2017    case INTEL_KMD_TYPE_I915:
2018    default:
2019       result = anv_i915_physical_device_init_memory_types(device);
2020       break;
2021    }
2022 
2023    assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2024 
2025    if (result != VK_SUCCESS)
2026       return result;
2027 
2028    /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
2029     * find memory types both with and without
2030     * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
2031     * memory types just to make these games happy.
2032     * This behavior is not spec-compliant as we still only have one heap that
2033     * is now inconsistent with some of the memory types, but the game doesn't
2034     * seem to care about it.
2035     */
2036    if (device->instance->anv_fake_nonlocal_memory &&
2037        !anv_physical_device_has_vram(device)) {
2038       const uint32_t base_types_count = device->memory.type_count;
2039       for (int i = 0; i < base_types_count; i++) {
2040          if (!(device->memory.types[i].propertyFlags &
2041                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
2042             continue;
2043 
2044          assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2045          struct anv_memory_type *new_type =
2046             &device->memory.types[device->memory.type_count++];
2047          *new_type = device->memory.types[i];
2048 
2049          device->memory.types[i].propertyFlags &=
2050             ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2051       }
2052    }
2053 
2054    /* Replicate all non protected memory types for descriptor buffers because
2055     * we want to identify memory allocations to place them in the right memory
2056     * heap.
2057     */
2058    device->memory.default_buffer_mem_types =
2059       BITFIELD_RANGE(0, device->memory.type_count);
2060    device->memory.protected_mem_types = 0;
2061    device->memory.dynamic_visible_mem_types = 0;
2062    device->memory.compressed_mem_types = 0;
2063 
2064    const uint32_t base_types_count = device->memory.type_count;
2065    for (int i = 0; i < base_types_count; i++) {
2066       bool skip = false;
2067 
2068       if (device->memory.types[i].propertyFlags &
2069           VK_MEMORY_PROPERTY_PROTECTED_BIT) {
2070          device->memory.protected_mem_types |= BITFIELD_BIT(i);
2071          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2072          skip = true;
2073       }
2074 
2075       if (device->memory.types[i].compressed) {
2076          device->memory.compressed_mem_types |= BITFIELD_BIT(i);
2077          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2078          skip = true;
2079       }
2080 
2081       if (skip)
2082          continue;
2083 
2084       device->memory.dynamic_visible_mem_types |=
2085          BITFIELD_BIT(device->memory.type_count);
2086 
2087       assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2088       struct anv_memory_type *new_type =
2089          &device->memory.types[device->memory.type_count++];
2090       *new_type = device->memory.types[i];
2091       new_type->dynamic_visible = true;
2092    }
2093 
2094    assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
2095 
2096    for (unsigned i = 0; i < device->memory.type_count; i++) {
2097       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
2098       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
2099           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
2100 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2101          device->memory.need_flush = true;
2102 #else
2103          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2104                           "Memory configuration requires flushing, but it's not implemented for this architecture");
2105 #endif
2106    }
2107 
2108    return VK_SUCCESS;
2109 }
2110 
2111 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)2112 anv_physical_device_init_uuids(struct anv_physical_device *device)
2113 {
2114    const struct build_id_note *note =
2115       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
2116    if (!note) {
2117       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2118                        "Failed to find build-id");
2119    }
2120 
2121    unsigned build_id_len = build_id_length(note);
2122    if (build_id_len < 20) {
2123       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2124                        "build-id too short.  It needs to be a SHA");
2125    }
2126 
2127    memcpy(device->driver_build_sha1, build_id_data(note), 20);
2128 
2129    struct mesa_sha1 sha1_ctx;
2130    uint8_t sha1[20];
2131    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
2132 
2133    /* The pipeline cache UUID is used for determining when a pipeline cache is
2134     * invalid.  It needs both a driver build and the PCI ID of the device.
2135     */
2136    _mesa_sha1_init(&sha1_ctx);
2137    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
2138    brw_device_sha1_update(&sha1_ctx, &device->info);
2139    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
2140                      sizeof(device->always_use_bindless));
2141    _mesa_sha1_final(&sha1_ctx, sha1);
2142    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
2143 
2144    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
2145    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
2146 
2147    return VK_SUCCESS;
2148 }
2149 
2150 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)2151 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
2152 {
2153 #ifdef ENABLE_SHADER_CACHE
2154    char renderer[10];
2155    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2156                                device->info.pci_device_id);
2157    assert(len == sizeof(renderer) - 2);
2158 
2159    char timestamp[41];
2160    _mesa_sha1_format(timestamp, device->driver_build_sha1);
2161 
2162    const uint64_t driver_flags =
2163       brw_get_compiler_config_value(device->compiler);
2164    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2165 #endif
2166 }
2167 
2168 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2169 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2170 {
2171 #ifdef ENABLE_SHADER_CACHE
2172    if (device->vk.disk_cache) {
2173       disk_cache_destroy(device->vk.disk_cache);
2174       device->vk.disk_cache = NULL;
2175    }
2176 #else
2177    assert(device->vk.disk_cache == NULL);
2178 #endif
2179 }
2180 
2181 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2182  * queue overrides.
2183  *
2184  * To override the number queues:
2185  *  * "gc" is for graphics queues with compute support
2186  *  * "g" is for graphics queues with no compute support
2187  *  * "c" is for compute queues with no graphics support
2188  *  * "v" is for video queues with no graphics support
2189  *  * "b" is for copy (blitter) queues with no graphics support
2190  *
2191  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2192  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2193  * with compute-only support.
2194  *
2195  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2196  * include 1 queue with compute-only support, but it will not change the
2197  * number of graphics+compute queues.
2198  *
2199  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2200  * to include 1 queue with compute-only support, and it would override the
2201  * number of graphics+compute queues to be 0.
2202  */
2203 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2204 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2205 {
2206    int gc_override = -1;
2207    int g_override = -1;
2208    int c_override = -1;
2209    int v_override = -1;
2210    int blit_override = -1;
2211    const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2212 
2213    /* Override queues for Android HWUI that expects min 2 queues. */
2214 #if DETECT_OS_ANDROID
2215    *gc_count = 2;
2216 #endif
2217 
2218    if (env_ == NULL)
2219       return;
2220 
2221    char *env = strdup(env_);
2222    char *save = NULL;
2223    char *next = strtok_r(env, ",", &save);
2224    while (next != NULL) {
2225       if (strncmp(next, "gc=", 3) == 0) {
2226          gc_override = strtol(next + 3, NULL, 0);
2227       } else if (strncmp(next, "g=", 2) == 0) {
2228          g_override = strtol(next + 2, NULL, 0);
2229       } else if (strncmp(next, "c=", 2) == 0) {
2230          c_override = strtol(next + 2, NULL, 0);
2231       } else if (strncmp(next, "v=", 2) == 0) {
2232          v_override = strtol(next + 2, NULL, 0);
2233       } else if (strncmp(next, "b=", 2) == 0) {
2234          blit_override = strtol(next + 2, NULL, 0);
2235       } else {
2236          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2237       }
2238       next = strtok_r(NULL, ",", &save);
2239    }
2240    free(env);
2241    if (gc_override >= 0)
2242       *gc_count = gc_override;
2243    if (g_override >= 0)
2244       *g_count = g_override;
2245    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2246       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2247                 "Vulkan specification");
2248    if (c_override >= 0)
2249       *c_count = c_override;
2250    if (v_override >= 0)
2251       *v_count = v_override;
2252    if (blit_override >= 0)
2253       *blit_count = blit_override;
2254 }
2255 
2256 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2257 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2258 {
2259    uint32_t family_count = 0;
2260    VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2261                                VK_QUEUE_SPARSE_BINDING_BIT : 0;
2262    VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2263                                  VK_QUEUE_PROTECTED_BIT : 0;
2264 
2265    if (pdevice->engine_info) {
2266       int gc_count =
2267          intel_engines_count(pdevice->engine_info,
2268                              INTEL_ENGINE_CLASS_RENDER);
2269       int v_count =
2270          intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2271       int g_count = 0;
2272       int c_count = 0;
2273       /* Not only the Kernel needs to have vm_control, but it also needs to
2274        * have a new enough GuC and the interface to tell us so. This is
2275        * implemented in the common layer by is_guc_semaphore_functional() and
2276        * results in devinfo->engine_class_supported_count being adjusted,
2277        * which we read below.
2278        */
2279       const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2280       /* For now we're choosing to not expose non-render engines on i915.ko
2281        * even when the Kernel allows it. We have data suggesting it's not an
2282        * obvious win in terms of performance.
2283        */
2284       const bool can_use_non_render_engines =
2285          kernel_supports_non_render_engines &&
2286          pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2287 
2288       if (can_use_non_render_engines) {
2289          c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2290       }
2291 
2292       int blit_count = 0;
2293       if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2294          blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2295       }
2296 
2297       anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2298 
2299       enum intel_engine_class compute_class =
2300          pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE] &&
2301          c_count >= 1 ? INTEL_ENGINE_CLASS_COMPUTE :
2302                         INTEL_ENGINE_CLASS_RENDER;
2303 
2304       if (gc_count > 0) {
2305          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2306             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2307                           VK_QUEUE_COMPUTE_BIT |
2308                           VK_QUEUE_TRANSFER_BIT |
2309                           sparse_flags |
2310                           protected_flag,
2311             .queueCount = gc_count,
2312             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2313             .supports_perf = true,
2314          };
2315       }
2316       if (g_count > 0) {
2317          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2318             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2319                           VK_QUEUE_TRANSFER_BIT |
2320                           sparse_flags |
2321                           protected_flag,
2322             .queueCount = g_count,
2323             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2324          };
2325       }
2326       if (c_count > 0) {
2327          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2328             .queueFlags = VK_QUEUE_COMPUTE_BIT |
2329                           VK_QUEUE_TRANSFER_BIT |
2330                           sparse_flags |
2331                           protected_flag,
2332             .queueCount = c_count,
2333             .engine_class = compute_class,
2334          };
2335       }
2336       if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2337          /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2338           * to the first VCS engine instance.
2339           *
2340           * We should be able to query HEVC support from the kernel using the engine query uAPI,
2341           * but this appears to be broken :
2342           *    https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2343           *
2344           * When this bug is fixed we should be able to check HEVC support to determine the
2345           * correct number of queues.
2346           */
2347          /* TODO: enable protected content on video queue */
2348          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2349             .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2350                           (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2351             .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2352             .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2353          };
2354       }
2355       if (blit_count > 0) {
2356          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2357             .queueFlags = VK_QUEUE_TRANSFER_BIT |
2358                           protected_flag,
2359             .queueCount = blit_count,
2360             .engine_class = INTEL_ENGINE_CLASS_COPY,
2361          };
2362       }
2363    } else {
2364       /* Default to a single render queue */
2365       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2366          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2367                        VK_QUEUE_COMPUTE_BIT |
2368                        VK_QUEUE_TRANSFER_BIT |
2369                        sparse_flags,
2370          .queueCount = 1,
2371          .engine_class = INTEL_ENGINE_CLASS_RENDER,
2372       };
2373       family_count = 1;
2374    }
2375    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2376    pdevice->queue.family_count = family_count;
2377 }
2378 
2379 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2380 anv_physical_device_get_parameters(struct anv_physical_device *device)
2381 {
2382    switch (device->info.kmd_type) {
2383    case INTEL_KMD_TYPE_I915:
2384       return anv_i915_physical_device_get_parameters(device);
2385    case INTEL_KMD_TYPE_XE:
2386       return anv_xe_physical_device_get_parameters(device);
2387    default:
2388       unreachable("Missing");
2389       return VK_ERROR_UNKNOWN;
2390    }
2391 }
2392 
2393 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2394 anv_physical_device_try_create(struct vk_instance *vk_instance,
2395                                struct _drmDevice *drm_device,
2396                                struct vk_physical_device **out)
2397 {
2398    struct anv_instance *instance =
2399       container_of(vk_instance, struct anv_instance, vk);
2400 
2401    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2402        drm_device->bustype != DRM_BUS_PCI ||
2403        drm_device->deviceinfo.pci->vendor_id != 0x8086)
2404       return VK_ERROR_INCOMPATIBLE_DRIVER;
2405 
2406    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2407    const char *path = drm_device->nodes[DRM_NODE_RENDER];
2408    VkResult result;
2409    int fd;
2410    int master_fd = -1;
2411 
2412    process_intel_debug_variable();
2413 
2414    fd = open(path, O_RDWR | O_CLOEXEC);
2415    if (fd < 0) {
2416       if (errno == ENOMEM) {
2417          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2418                           "Unable to open device %s: out of memory", path);
2419       }
2420       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2421                        "Unable to open device %s: %m", path);
2422    }
2423 
2424    struct intel_device_info devinfo;
2425    if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2426       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2427       goto fail_fd;
2428    }
2429 
2430    if (devinfo.ver < 9) {
2431       /* Silently fail here, hasvk should pick up this device. */
2432       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2433       goto fail_fd;
2434    } else if (devinfo.probe_forced) {
2435       /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2436        * unsupported device support. No need to print a warning message.
2437        */
2438    } else if (devinfo.ver > 20) {
2439       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2440                          "Vulkan not yet supported on %s", devinfo.name);
2441       goto fail_fd;
2442    }
2443 
2444    /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2445     * need to always disable preemption :
2446     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2447     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2448     */
2449    if (devinfo.verx10 == 120)
2450       BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2451 
2452    if (!devinfo.has_context_isolation) {
2453       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2454                          "Vulkan requires context isolation for %s", devinfo.name);
2455       goto fail_fd;
2456    }
2457 
2458    struct anv_physical_device *device =
2459       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2460                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2461    if (device == NULL) {
2462       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2463       goto fail_fd;
2464    }
2465 
2466    struct vk_physical_device_dispatch_table dispatch_table;
2467    vk_physical_device_dispatch_table_from_entrypoints(
2468       &dispatch_table, &anv_physical_device_entrypoints, true);
2469    vk_physical_device_dispatch_table_from_entrypoints(
2470       &dispatch_table, &wsi_physical_device_entrypoints, false);
2471 
2472    result = vk_physical_device_init(&device->vk, &instance->vk,
2473                                     NULL, NULL, NULL, /* We set up extensions later */
2474                                     &dispatch_table);
2475    if (result != VK_SUCCESS) {
2476       vk_error(instance, result);
2477       goto fail_alloc;
2478    }
2479    device->instance = instance;
2480 
2481    assert(strlen(path) < ARRAY_SIZE(device->path));
2482    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2483 
2484    device->info = devinfo;
2485 
2486    device->local_fd = fd;
2487    result = anv_physical_device_get_parameters(device);
2488    if (result != VK_SUCCESS)
2489       goto fail_base;
2490 
2491    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2492                                               device->info.aperture_bytes;
2493 
2494    if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2495       vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2496                 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2497       goto fail_base;
2498    }
2499 
2500    /* We currently only have the right bits for instructions in Gen12+. If the
2501     * kernel ever starts supporting that feature on previous generations,
2502     * we'll need to edit genxml prior to enabling here.
2503     */
2504    device->has_protected_contexts = device->info.ver >= 12 &&
2505       intel_gem_supports_protected_context(fd, device->info.kmd_type);
2506 
2507    /* Just pick one; they're all the same */
2508    device->has_astc_ldr =
2509       isl_format_supports_sampling(&device->info,
2510                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2511    if (!device->has_astc_ldr &&
2512        driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2513       device->emu_astc_ldr = true;
2514    if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2515       device->flush_astc_ldr_void_extent_denorms =
2516          device->has_astc_ldr && !device->emu_astc_ldr;
2517    }
2518    device->disable_fcv = device->info.verx10 >= 125 ||
2519                          instance->disable_fcv;
2520 
2521    result = anv_physical_device_init_heaps(device, fd);
2522    if (result != VK_SUCCESS)
2523       goto fail_base;
2524 
2525    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2526       device->has_exec_timeline = false;
2527 
2528    device->has_cooperative_matrix =
2529       device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2530 
2531    unsigned st_idx = 0;
2532 
2533    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2534    if (!device->has_exec_timeline)
2535       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2536    device->sync_types[st_idx++] = &device->sync_syncobj_type;
2537 
2538    /* anv_bo_sync_type is only supported with i915 for now  */
2539    if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2540       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2541          device->sync_types[st_idx++] = &anv_bo_sync_type;
2542 
2543       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2544          device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2545          device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2546       }
2547    } else {
2548       assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2549       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2550       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2551    }
2552 
2553    device->sync_types[st_idx++] = NULL;
2554    assert(st_idx <= ARRAY_SIZE(device->sync_types));
2555    device->vk.supported_sync_types = device->sync_types;
2556 
2557    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2558 
2559    device->always_use_bindless =
2560       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2561 
2562    device->use_call_secondary =
2563       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2564 
2565    device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2566    device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2567 
2568    device->uses_ex_bso = device->info.verx10 >= 125;
2569 
2570    /* For now always use indirect descriptors. We'll update this
2571     * to !uses_ex_bso when all the infrastructure is built up.
2572     */
2573    device->indirect_descriptors =
2574       !device->uses_ex_bso ||
2575       driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2576 
2577    device->alloc_aux_tt_mem =
2578       device->info.has_aux_map && device->info.verx10 >= 125;
2579    /* Check if we can read the GPU timestamp register from the CPU */
2580    uint64_t u64_ignore;
2581    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2582                                                                device->info.kmd_type,
2583                                                                &u64_ignore);
2584 
2585    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2586 
2587    /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2588    if (debug_get_bool_option("ANV_SPARSE", true)) {
2589       if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2590          if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2591             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2592          else
2593             device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2594       } else {
2595          if (device->info.ver >= 12 && device->has_exec_timeline)
2596             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2597       }
2598    }
2599    if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2600       if (instance->has_fake_sparse)
2601          device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2602    }
2603 
2604    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2605       driQueryOptionb(&instance->dri_options, "always_flush_cache");
2606 
2607    device->compiler = brw_compiler_create(NULL, &device->info);
2608    if (device->compiler == NULL) {
2609       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2610       goto fail_base;
2611    }
2612    device->compiler->shader_debug_log = compiler_debug_log;
2613    device->compiler->shader_perf_log = compiler_perf_log;
2614    device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2615    device->compiler->use_bindless_sampler_offset = false;
2616    device->compiler->spilling_rate =
2617       driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2618 
2619    isl_device_init(&device->isl_dev, &device->info);
2620    device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2621    device->isl_dev.sampler_route_to_lsc =
2622       driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2623 
2624    result = anv_physical_device_init_uuids(device);
2625    if (result != VK_SUCCESS)
2626       goto fail_compiler;
2627 
2628    anv_physical_device_init_va_ranges(device);
2629 
2630    anv_physical_device_init_disk_cache(device);
2631 
2632    if (instance->vk.enabled_extensions.KHR_display) {
2633       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2634       if (master_fd >= 0) {
2635          /* fail if we don't have permission to even render on this device */
2636          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2637             close(master_fd);
2638             master_fd = -1;
2639          }
2640       }
2641    }
2642    device->master_fd = master_fd;
2643 
2644    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2645    intel_common_update_device_info(fd, &device->info);
2646 
2647    anv_physical_device_init_queue_families(device);
2648 
2649    anv_physical_device_init_perf(device, fd);
2650 
2651    /* Gather major/minor before WSI. */
2652    struct stat st;
2653 
2654    if (stat(primary_path, &st) == 0) {
2655       device->has_master = true;
2656       device->master_major = major(st.st_rdev);
2657       device->master_minor = minor(st.st_rdev);
2658    } else {
2659       device->has_master = false;
2660       device->master_major = 0;
2661       device->master_minor = 0;
2662    }
2663 
2664    if (stat(path, &st) == 0) {
2665       device->has_local = true;
2666       device->local_major = major(st.st_rdev);
2667       device->local_minor = minor(st.st_rdev);
2668    } else {
2669       device->has_local = false;
2670       device->local_major = 0;
2671       device->local_minor = 0;
2672    }
2673 
2674    device->has_small_bar = anv_physical_device_has_vram(device) &&
2675                            device->vram_non_mappable.size != 0;
2676 
2677    get_device_extensions(device, &device->vk.supported_extensions);
2678    get_features(device, &device->vk.supported_features);
2679    get_properties(device, &device->vk.properties);
2680 
2681    result = anv_init_wsi(device);
2682    if (result != VK_SUCCESS)
2683       goto fail_perf;
2684 
2685    anv_measure_device_init(device);
2686 
2687    anv_genX(&device->info, init_physical_device_state)(device);
2688 
2689    *out = &device->vk;
2690 
2691    return VK_SUCCESS;
2692 
2693 fail_perf:
2694    intel_perf_free(device->perf);
2695    free(device->engine_info);
2696    anv_physical_device_free_disk_cache(device);
2697 fail_compiler:
2698    ralloc_free(device->compiler);
2699 fail_base:
2700    vk_physical_device_finish(&device->vk);
2701 fail_alloc:
2702    vk_free(&instance->vk.alloc, device);
2703 fail_fd:
2704    close(fd);
2705    if (master_fd != -1)
2706       close(master_fd);
2707    return result;
2708 }
2709 
2710 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2711 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2712 {
2713    struct anv_physical_device *device =
2714       container_of(vk_device, struct anv_physical_device, vk);
2715 
2716    anv_finish_wsi(device);
2717    anv_measure_device_destroy(device);
2718    free(device->engine_info);
2719    anv_physical_device_free_disk_cache(device);
2720    ralloc_free(device->compiler);
2721    intel_perf_free(device->perf);
2722    close(device->local_fd);
2723    if (device->master_fd >= 0)
2724       close(device->master_fd);
2725    vk_physical_device_finish(&device->vk);
2726    vk_free(&device->instance->vk.alloc, device);
2727 }
2728 
2729 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2730 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2731 {
2732 
2733    /*
2734     * For Xe2+:
2735     * Bspec 60411: Timestamp register can hold 64-bit value
2736     *
2737     * Platforms < Xe2:
2738     * Bpsec 46111: Timestamp register can hold only 36-bit
2739     *              value
2740     */
2741    const VkQueueFamilyProperties anv_queue_family_properties_template =
2742    {
2743       .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2744       .minImageTransferGranularity = { 1, 1, 1 },
2745    };
2746 
2747    return anv_queue_family_properties_template;
2748 }
2749 
2750 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2751 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2752                                          uint32_t family_index)
2753 {
2754    const struct anv_queue_family *family = &device->queue.families[family_index];
2755    VkQueueFamilyProperties properties =
2756       get_anv_queue_family_properties_template(device);
2757 
2758    properties.queueFlags = family->queueFlags;
2759    properties.queueCount = family->queueCount;
2760    return properties;
2761 }
2762 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2763 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2764     VkPhysicalDevice                            physicalDevice,
2765     uint32_t*                                   pQueueFamilyPropertyCount,
2766     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2767 {
2768    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2769    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2770                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2771 
2772    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2773       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2774       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2775          p->queueFamilyProperties =
2776             anv_device_physical_get_queue_properties(pdevice, i);
2777 
2778          vk_foreach_struct(ext, p->pNext) {
2779             switch (ext->sType) {
2780             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2781                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2782                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2783 
2784                /* Deliberately sorted low to high */
2785                VkQueueGlobalPriorityKHR all_priorities[] = {
2786                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2787                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2788                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2789                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2790                };
2791 
2792                uint32_t count = 0;
2793                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2794                   if (all_priorities[i] > pdevice->max_context_priority)
2795                      break;
2796 
2797                   properties->priorities[count++] = all_priorities[i];
2798                }
2799                properties->priorityCount = count;
2800                break;
2801             }
2802             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2803                VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2804                   (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2805                prop->queryResultStatusSupport = VK_TRUE;
2806                break;
2807             }
2808             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2809                VkQueueFamilyVideoPropertiesKHR *prop =
2810                   (VkQueueFamilyVideoPropertiesKHR *)ext;
2811                if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2812                   prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2813                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2814                   if (pdevice->info.ver >= 12)
2815                      prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2816                }
2817 
2818                if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2819                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2820                                                 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2821                }
2822                break;
2823             }
2824             default:
2825                vk_debug_ignored_stype(ext->sType);
2826             }
2827          }
2828       }
2829    }
2830 }
2831 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2832 void anv_GetPhysicalDeviceMemoryProperties(
2833     VkPhysicalDevice                            physicalDevice,
2834     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2835 {
2836    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2837 
2838    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2839    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2840       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2841          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2842          .heapIndex     = physical_device->memory.types[i].heapIndex,
2843       };
2844    }
2845 
2846    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2847    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2848       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2849          .size    = physical_device->memory.heaps[i].size,
2850          .flags   = physical_device->memory.heaps[i].flags,
2851       };
2852    }
2853 }
2854 
2855 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2856 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2857                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2858 {
2859    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2860 
2861    if (!device->vk.supported_extensions.EXT_memory_budget)
2862       return;
2863 
2864    anv_update_meminfo(device, device->local_fd);
2865 
2866    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2867    for (size_t i = 0; i < device->memory.heap_count; i++) {
2868       if (device->memory.heaps[i].is_local_mem) {
2869          total_vram_heaps_size += device->memory.heaps[i].size;
2870       } else {
2871          total_sys_heaps_size += device->memory.heaps[i].size;
2872       }
2873    }
2874 
2875    for (size_t i = 0; i < device->memory.heap_count; i++) {
2876       VkDeviceSize heap_size = device->memory.heaps[i].size;
2877       VkDeviceSize heap_used = device->memory.heaps[i].used;
2878       VkDeviceSize heap_budget, total_heaps_size;
2879       uint64_t mem_available = 0;
2880 
2881       if (device->memory.heaps[i].is_local_mem) {
2882          total_heaps_size = total_vram_heaps_size;
2883          if (device->vram_non_mappable.size > 0 && i == 0) {
2884             mem_available = device->vram_non_mappable.available;
2885          } else {
2886             mem_available = device->vram_mappable.available;
2887          }
2888       } else {
2889          total_heaps_size = total_sys_heaps_size;
2890          mem_available = MIN2(device->sys.available, total_heaps_size);
2891       }
2892 
2893       double heap_proportion = (double) heap_size / total_heaps_size;
2894       VkDeviceSize available_prop = mem_available * heap_proportion;
2895 
2896       /*
2897        * Let's not incite the app to starve the system: report at most 90% of
2898        * the available heap memory.
2899        */
2900       uint64_t heap_available = available_prop * 9 / 10;
2901       heap_budget = MIN2(heap_size, heap_used + heap_available);
2902 
2903       /*
2904        * Round down to the nearest MB
2905        */
2906       heap_budget &= ~((1ull << 20) - 1);
2907 
2908       /*
2909        * The heapBudget value must be non-zero for array elements less than
2910        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2911        * value must be less than or equal to VkMemoryHeap::size for each heap.
2912        */
2913       assert(0 < heap_budget && heap_budget <= heap_size);
2914 
2915       memoryBudget->heapUsage[i] = heap_used;
2916       memoryBudget->heapBudget[i] = heap_budget;
2917    }
2918 
2919    /* The heapBudget and heapUsage values must be zero for array elements
2920     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2921     */
2922    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2923       memoryBudget->heapBudget[i] = 0;
2924       memoryBudget->heapUsage[i] = 0;
2925    }
2926 }
2927 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2928 void anv_GetPhysicalDeviceMemoryProperties2(
2929     VkPhysicalDevice                            physicalDevice,
2930     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2931 {
2932    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2933                                          &pMemoryProperties->memoryProperties);
2934 
2935    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2936       switch (ext->sType) {
2937       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2938          anv_get_memory_budget(physicalDevice, (void*)ext);
2939          break;
2940       default:
2941          vk_debug_ignored_stype(ext->sType);
2942          break;
2943       }
2944    }
2945 }
2946 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2947 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2948     VkPhysicalDevice                            physicalDevice,
2949     VkSampleCountFlagBits                       samples,
2950     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
2951 {
2952    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2953 
2954    assert(pMultisampleProperties->sType ==
2955           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2956 
2957    VkExtent2D grid_size;
2958    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2959       grid_size.width = 1;
2960       grid_size.height = 1;
2961    } else {
2962       grid_size.width = 0;
2963       grid_size.height = 0;
2964    }
2965    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2966 
2967    vk_foreach_struct(ext, pMultisampleProperties->pNext)
2968       vk_debug_ignored_stype(ext->sType);
2969 }
2970 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2971 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2972     VkPhysicalDevice                            physicalDevice,
2973     uint32_t*                                   pFragmentShadingRateCount,
2974     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
2975 {
2976    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2977    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2978                           pFragmentShadingRates, pFragmentShadingRateCount);
2979 
2980 #define append_rate(_samples, _width, _height)                                      \
2981    do {                                                                             \
2982       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2983          __r->sampleCounts = _samples;                                              \
2984          __r->fragmentSize = (VkExtent2D) {                                         \
2985             .width = _width,                                                        \
2986             .height = _height,                                                      \
2987          };                                                                         \
2988       }                                                                             \
2989    } while (0)
2990 
2991    VkSampleCountFlags sample_counts =
2992       isl_device_get_sample_counts(&physical_device->isl_dev);
2993 
2994    /* BSpec 47003: There are a number of restrictions on the sample count
2995     * based off the coarse pixel size.
2996     */
2997    static const VkSampleCountFlags cp_size_sample_limits[] = {
2998       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
2999              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3000       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3001       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3002       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3003       [16] = ISL_SAMPLE_COUNT_1_BIT,
3004    };
3005 
3006    for (uint32_t x = 4; x >= 1; x /= 2) {
3007        for (uint32_t y = 4; y >= 1; y /= 2) {
3008           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
3009              /* BSpec 47003:
3010               *   "CPsize 1x4 and 4x1 are not supported"
3011               */
3012              if ((x == 1 && y == 4) || (x == 4 && y == 1))
3013                 continue;
3014 
3015              /* For size {1, 1}, the sample count must be ~0
3016               *
3017               * 4x2 is also a specially case.
3018               */
3019              if (x == 1 && y == 1)
3020                 append_rate(~0, x, y);
3021              else if (x == 4 && y == 2)
3022                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
3023              else
3024                 append_rate(cp_size_sample_limits[x * y], x, y);
3025           } else {
3026              /* For size {1, 1}, the sample count must be ~0 */
3027              if (x == 1 && y == 1)
3028                 append_rate(~0, x, y);
3029              else
3030                 append_rate(sample_counts, x, y);
3031           }
3032        }
3033    }
3034 
3035 #undef append_rate
3036 
3037    return vk_outarray_status(&out);
3038 }
3039 
3040 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)3041 convert_component_type(enum intel_cooperative_matrix_component_type t)
3042 {
3043    switch (t) {
3044    case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
3045    case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
3046    case INTEL_CMAT_SINT32:  return VK_COMPONENT_TYPE_SINT32_KHR;
3047    case INTEL_CMAT_SINT8:   return VK_COMPONENT_TYPE_SINT8_KHR;
3048    case INTEL_CMAT_UINT32:  return VK_COMPONENT_TYPE_UINT32_KHR;
3049    case INTEL_CMAT_UINT8:   return VK_COMPONENT_TYPE_UINT8_KHR;
3050    }
3051    unreachable("invalid cooperative matrix component type in configuration");
3052 }
3053 
3054 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)3055 convert_scope(enum intel_cmat_scope scope)
3056 {
3057    switch (scope) {
3058    case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
3059    default:
3060       unreachable("invalid cooperative matrix scope in configuration");
3061    }
3062 }
3063 
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)3064 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
3065    VkPhysicalDevice                            physicalDevice,
3066    uint32_t*                                   pPropertyCount,
3067    VkCooperativeMatrixPropertiesKHR*           pProperties)
3068 {
3069    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3070    const struct intel_device_info *devinfo = &pdevice->info;
3071 
3072    assert(anv_has_cooperative_matrix(pdevice));
3073 
3074    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
3075 
3076    for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
3077       const struct intel_cooperative_matrix_configuration *cfg =
3078          &devinfo->cooperative_matrix_configurations[i];
3079 
3080       if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
3081          break;
3082 
3083       vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3084          prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3085 
3086          prop->MSize = cfg->m;
3087          prop->NSize = cfg->n;
3088          prop->KSize = cfg->k;
3089 
3090          prop->AType      = convert_component_type(cfg->a);
3091          prop->BType      = convert_component_type(cfg->b);
3092          prop->CType      = convert_component_type(cfg->c);
3093          prop->ResultType = convert_component_type(cfg->result);
3094 
3095          prop->saturatingAccumulation = VK_FALSE;
3096          prop->scope = convert_scope(cfg->scope);
3097       }
3098 
3099       /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
3100        *
3101        *    For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
3102        *    cooperative matrix operand must be present if and only if
3103        *    VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
3104        *    VK_TRUE.
3105        *
3106        * As a result, we have to advertise integer configs both with and
3107        * without this flag set.
3108        *
3109        * The DPAS instruction does not support the .sat modifier, so only
3110        * advertise the configurations when the DPAS would be lowered.
3111        *
3112        * FINISHME: It should be possible to do better than full lowering on
3113        * platforms that support DPAS. Emit a DPAS with a NULL accumulator
3114        * argument, then perform the correct sequence of saturating add
3115        * instructions.
3116        */
3117       if (cfg->a != INTEL_CMAT_FLOAT16 &&
3118           (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
3119          vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3120             prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3121 
3122             prop->MSize = cfg->m;
3123             prop->NSize = cfg->n;
3124             prop->KSize = cfg->k;
3125 
3126             prop->AType      = convert_component_type(cfg->a);
3127             prop->BType      = convert_component_type(cfg->b);
3128             prop->CType      = convert_component_type(cfg->c);
3129             prop->ResultType = convert_component_type(cfg->result);
3130 
3131             prop->saturatingAccumulation = VK_TRUE;
3132             prop->scope = convert_scope(cfg->scope);
3133          }
3134       }
3135    }
3136 
3137    return vk_outarray_status(&out);
3138 }
3139