1 /* Copyright © 2024 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14
15 #include "perf/intel_perf.h"
16
17 #include "git_sha1.h"
18
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27
28 /* This is probably far to big but it reflects the max size used for messages
29 * in OpenGLs KHR_debug.
30 */
31 #define MAX_DEBUG_MESSAGE_LENGTH 4096
32
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36 char str[MAX_DEBUG_MESSAGE_LENGTH];
37 struct anv_device *device = (struct anv_device *)data;
38 UNUSED struct anv_instance *instance = device->physical->instance;
39
40 va_list args;
41 va_start(args, fmt);
42 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43 va_end(args);
44
45 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51 va_list args;
52 va_start(args, fmt);
53
54 if (INTEL_DEBUG(DEBUG_PERF))
55 mesa_logd_v(fmt, args);
56
57 va_end(args);
58 }
59
60 struct anv_descriptor_limits {
61 uint32_t max_ubos;
62 uint32_t max_ssbos;
63 uint32_t max_samplers;
64 uint32_t max_images;
65 uint32_t max_resources;
66 };
67
68 static void
get_device_descriptor_limits(const struct anv_physical_device * device,struct anv_descriptor_limits * limits)69 get_device_descriptor_limits(const struct anv_physical_device *device,
70 struct anv_descriptor_limits *limits)
71 {
72 memset(limits, 0, sizeof(*limits));
73
74 /* It's a bit hard to exactly map our implementation to the limits
75 * described by Vulkan. The bindless surface handle in the extended message
76 * descriptors is 20 bits on <= Gfx12.0, 26 bits on >= Gfx12.5 and it's an
77 * index into the table of RENDER_SURFACE_STATE structs that starts at
78 * bindless surface base address. On <= Gfx12.0, this means that we can
79 * have at must 1M surface states allocated at any given time. Since most
80 * image views take two descriptors, this means we have a limit of about
81 * 500K image views. On >= Gfx12.5, we do not need 2 surfaces per
82 * descriptors and we have 33M+ descriptors (we have a 2GB limit, due to
83 * overlapping heaps for workarounds, but HW can do 4GB).
84 *
85 * However, on <= Gfx12.0, since we allocate surface states at
86 * vkCreateImageView time, this means our limit is actually something on
87 * the order of 500K image views allocated at any time. The actual limit
88 * describe by Vulkan, on the other hand, is a limit of how many you can
89 * have in a descriptor set. Assuming anyone using 1M descriptors will be
90 * using the same image view twice a bunch of times (or a bunch of null
91 * descriptors), we can safely advertise a larger limit here.
92 *
93 * Here we use the size of the heap in which the descriptors are stored and
94 * divide by the size of the descriptor to get a limit value.
95 */
96 const uint64_t descriptor_heap_size =
97 device->indirect_descriptors ?
98 device->va.indirect_descriptor_pool.size :
99 device->va.bindless_surface_state_pool.size;;
100
101 const uint32_t buffer_descriptor_size =
102 device->indirect_descriptors ?
103 sizeof(struct anv_address_range_descriptor) :
104 ANV_SURFACE_STATE_SIZE;
105 const uint32_t image_descriptor_size =
106 device->indirect_descriptors ?
107 sizeof(struct anv_address_range_descriptor) :
108 ANV_SURFACE_STATE_SIZE;
109 const uint32_t sampler_descriptor_size =
110 device->indirect_descriptors ?
111 sizeof(struct anv_sampled_image_descriptor) :
112 ANV_SAMPLER_STATE_SIZE;
113
114 limits->max_ubos = descriptor_heap_size / buffer_descriptor_size;
115 limits->max_ssbos = descriptor_heap_size / buffer_descriptor_size;
116 limits->max_images = descriptor_heap_size / image_descriptor_size;
117 limits->max_samplers = descriptor_heap_size / sampler_descriptor_size;
118
119 limits->max_resources = UINT32_MAX;
120 limits->max_resources = MIN2(limits->max_resources, limits->max_ubos);
121 limits->max_resources = MIN2(limits->max_resources, limits->max_ssbos);
122 limits->max_resources = MIN2(limits->max_resources, limits->max_images);
123 limits->max_resources = MIN2(limits->max_resources, limits->max_samplers);
124 }
125
126 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)127 get_device_extensions(const struct anv_physical_device *device,
128 struct vk_device_extension_table *ext)
129 {
130 const bool has_syncobj_wait =
131 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
132
133 const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
134
135 *ext = (struct vk_device_extension_table) {
136 .KHR_8bit_storage = true,
137 .KHR_16bit_storage = !device->instance->no_16bit,
138 .KHR_acceleration_structure = rt_enabled,
139 .KHR_bind_memory2 = true,
140 .KHR_buffer_device_address = true,
141 .KHR_calibrated_timestamps = device->has_reg_timestamp,
142 .KHR_compute_shader_derivatives = true,
143 .KHR_cooperative_matrix = anv_has_cooperative_matrix(device),
144 .KHR_copy_commands2 = true,
145 .KHR_create_renderpass2 = true,
146 .KHR_dedicated_allocation = true,
147 .KHR_deferred_host_operations = true,
148 .KHR_depth_stencil_resolve = true,
149 .KHR_descriptor_update_template = true,
150 .KHR_device_group = true,
151 .KHR_draw_indirect_count = true,
152 .KHR_driver_properties = true,
153 .KHR_dynamic_rendering = true,
154 .KHR_dynamic_rendering_local_read = true,
155 .KHR_external_fence = has_syncobj_wait,
156 .KHR_external_fence_fd = has_syncobj_wait,
157 .KHR_external_memory = true,
158 .KHR_external_memory_fd = true,
159 .KHR_external_semaphore = true,
160 .KHR_external_semaphore_fd = true,
161 .KHR_format_feature_flags2 = true,
162 .KHR_fragment_shading_rate = device->info.ver >= 11,
163 .KHR_get_memory_requirements2 = true,
164 .KHR_global_priority = device->max_context_priority >=
165 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
166 .KHR_image_format_list = true,
167 .KHR_imageless_framebuffer = true,
168 #ifdef ANV_USE_WSI_PLATFORM
169 .KHR_incremental_present = true,
170 #endif
171 .KHR_index_type_uint8 = true,
172 .KHR_line_rasterization = true,
173 .KHR_load_store_op_none = true,
174 .KHR_maintenance1 = true,
175 .KHR_maintenance2 = true,
176 .KHR_maintenance3 = true,
177 .KHR_maintenance4 = true,
178 .KHR_maintenance5 = true,
179 .KHR_maintenance6 = true,
180 .KHR_maintenance7 = true,
181 .KHR_map_memory2 = true,
182 .KHR_multiview = true,
183 .KHR_performance_query =
184 device->perf &&
185 (intel_perf_has_hold_preemption(device->perf) ||
186 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
187 device->use_call_secondary,
188 .KHR_pipeline_executable_properties = true,
189 .KHR_pipeline_library = true,
190 /* Hide these behind dri configs for now since we cannot implement it reliably on
191 * all surfaces yet. There is no surface capability query for present wait/id,
192 * but the feature is useful enough to hide behind an opt-in mechanism for now.
193 * If the instance only enables surface extensions that unconditionally support present wait,
194 * we can also expose the extension that way. */
195 .KHR_present_id =
196 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
197 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
198 .KHR_present_wait =
199 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
200 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
201 .KHR_push_descriptor = true,
202 .KHR_ray_query = rt_enabled,
203 .KHR_ray_tracing_maintenance1 = rt_enabled,
204 .KHR_ray_tracing_pipeline = rt_enabled,
205 .KHR_ray_tracing_position_fetch = rt_enabled,
206 .KHR_relaxed_block_layout = true,
207 .KHR_sampler_mirror_clamp_to_edge = true,
208 .KHR_sampler_ycbcr_conversion = true,
209 .KHR_separate_depth_stencil_layouts = true,
210 .KHR_shader_atomic_int64 = true,
211 .KHR_shader_clock = true,
212 .KHR_shader_draw_parameters = true,
213 .KHR_shader_expect_assume = true,
214 .KHR_shader_float16_int8 = !device->instance->no_16bit,
215 .KHR_shader_float_controls = true,
216 .KHR_shader_float_controls2 = true,
217 .KHR_shader_integer_dot_product = true,
218 .KHR_shader_maximal_reconvergence = true,
219 .KHR_shader_non_semantic_info = true,
220 .KHR_shader_quad_control = true,
221 .KHR_shader_relaxed_extended_instruction = true,
222 .KHR_shader_subgroup_extended_types = true,
223 .KHR_shader_subgroup_rotate = true,
224 .KHR_shader_subgroup_uniform_control_flow = true,
225 .KHR_shader_terminate_invocation = true,
226 .KHR_spirv_1_4 = true,
227 .KHR_storage_buffer_storage_class = true,
228 #ifdef ANV_USE_WSI_PLATFORM
229 .KHR_swapchain = true,
230 .KHR_swapchain_mutable_format = true,
231 #endif
232 .KHR_synchronization2 = true,
233 .KHR_timeline_semaphore = true,
234 .KHR_uniform_buffer_standard_layout = true,
235 .KHR_variable_pointers = true,
236 .KHR_vertex_attribute_divisor = true,
237 .KHR_video_queue = device->video_decode_enabled || device->video_encode_enabled,
238 .KHR_video_decode_queue = device->video_decode_enabled,
239 .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
240 .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
241 .KHR_video_decode_av1 = device->info.ver >= 12 && VIDEO_CODEC_AV1DEC && device->video_decode_enabled,
242 .KHR_video_encode_queue = device->video_encode_enabled,
243 .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
244 .KHR_video_encode_h265 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
245 .KHR_video_maintenance1 = (device->video_decode_enabled &&
246 (VIDEO_CODEC_H264DEC || VIDEO_CODEC_H265DEC)) ||
247 (device->video_encode_enabled &&
248 (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)),
249 .KHR_vulkan_memory_model = true,
250 .KHR_workgroup_memory_explicit_layout = true,
251 .KHR_zero_initialize_workgroup_memory = true,
252 .EXT_4444_formats = true,
253 .EXT_attachment_feedback_loop_layout = true,
254 .EXT_attachment_feedback_loop_dynamic_state = true,
255 .EXT_border_color_swizzle = true,
256 .EXT_buffer_device_address = true,
257 .EXT_calibrated_timestamps = device->has_reg_timestamp,
258 .EXT_color_write_enable = true,
259 .EXT_conditional_rendering = true,
260 .EXT_conservative_rasterization = true,
261 .EXT_custom_border_color = true,
262 .EXT_depth_bias_control = true,
263 .EXT_depth_clamp_control = true,
264 .EXT_depth_clamp_zero_one = true,
265 .EXT_depth_clip_control = true,
266 .EXT_depth_clip_enable = true,
267 .EXT_depth_range_unrestricted = device->info.ver >= 20,
268 .EXT_descriptor_buffer = true,
269 .EXT_descriptor_indexing = true,
270 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
271 .EXT_display_control = true,
272 #endif
273 .EXT_dynamic_rendering_unused_attachments = true,
274 .EXT_extended_dynamic_state = true,
275 .EXT_extended_dynamic_state2 = true,
276 .EXT_extended_dynamic_state3 = true,
277 .EXT_external_memory_dma_buf = true,
278 .EXT_external_memory_host = true,
279 .EXT_fragment_shader_interlock = true,
280 .EXT_global_priority = device->max_context_priority >=
281 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
282 .EXT_global_priority_query = device->max_context_priority >=
283 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
284 .EXT_graphics_pipeline_library = !debug_get_bool_option("ANV_NO_GPL", false),
285 .EXT_host_image_copy = !device->emu_astc_ldr,
286 .EXT_host_query_reset = true,
287 .EXT_image_2d_view_of_3d = true,
288 /* Because of Xe2 PAT selected compression and the Vulkan spec
289 * requirement to always return the same memory types for Images with
290 * same properties we can't support EXT_image_compression_control on Xe2+
291 */
292 .EXT_image_compression_control = device->instance->compression_control_enabled &&
293 device->info.ver < 20,
294 .EXT_image_drm_format_modifier = true,
295 .EXT_image_robustness = true,
296 .EXT_image_sliced_view_of_3d = true,
297 .EXT_image_view_min_lod = true,
298 .EXT_index_type_uint8 = true,
299 .EXT_inline_uniform_block = true,
300 .EXT_legacy_dithering = true,
301 .EXT_legacy_vertex_attributes = true,
302 .EXT_line_rasterization = true,
303 .EXT_load_store_op_none = true,
304 .EXT_map_memory_placed = device->info.has_mmap_offset,
305 /* Enable the extension only if we have support on both the local &
306 * system memory
307 */
308 .EXT_memory_budget = (!device->info.has_local_mem ||
309 device->vram_mappable.available > 0) &&
310 device->sys.available,
311 .EXT_mesh_shader = device->info.has_mesh_shading,
312 .EXT_multi_draw = true,
313 .EXT_mutable_descriptor_type = true,
314 .EXT_nested_command_buffer = true,
315 .EXT_non_seamless_cube_map = true,
316 .EXT_pci_bus_info = true,
317 .EXT_physical_device_drm = true,
318 .EXT_pipeline_creation_cache_control = true,
319 .EXT_pipeline_creation_feedback = true,
320 .EXT_pipeline_library_group_handles = rt_enabled,
321 .EXT_pipeline_protected_access = device->has_protected_contexts,
322 .EXT_pipeline_robustness = true,
323 .EXT_post_depth_coverage = true,
324 .EXT_primitive_topology_list_restart = true,
325 .EXT_primitives_generated_query = true,
326 .EXT_private_data = true,
327 .EXT_provoking_vertex = true,
328 .EXT_queue_family_foreign = true,
329 .EXT_robustness2 = true,
330 .EXT_sample_locations = true,
331 .EXT_sampler_filter_minmax = true,
332 .EXT_scalar_block_layout = true,
333 .EXT_separate_stencil_usage = true,
334 .EXT_shader_atomic_float = true,
335 .EXT_shader_atomic_float2 = true,
336 .EXT_shader_demote_to_helper_invocation = true,
337 .EXT_shader_module_identifier = true,
338 .EXT_shader_replicated_composites = true,
339 .EXT_shader_stencil_export = true,
340 .EXT_shader_subgroup_ballot = true,
341 .EXT_shader_subgroup_vote = true,
342 .EXT_shader_viewport_index_layer = true,
343 .EXT_subgroup_size_control = true,
344 #ifdef ANV_USE_WSI_PLATFORM
345 .EXT_swapchain_maintenance1 = true,
346 #endif
347 .EXT_texel_buffer_alignment = true,
348 .EXT_tooling_info = true,
349 .EXT_transform_feedback = true,
350 .EXT_vertex_attribute_divisor = true,
351 .EXT_vertex_input_dynamic_state = true,
352 .EXT_ycbcr_2plane_444_formats = true,
353 .EXT_ycbcr_image_arrays = true,
354 .AMD_buffer_marker = true,
355 .AMD_texture_gather_bias_lod = device->info.ver >= 20,
356 #if DETECT_OS_ANDROID
357 .ANDROID_external_memory_android_hardware_buffer = true,
358 .ANDROID_native_buffer = true,
359 #endif
360 .GOOGLE_decorate_string = true,
361 .GOOGLE_hlsl_functionality1 = true,
362 .GOOGLE_user_type = true,
363 .INTEL_performance_query = device->perf &&
364 intel_perf_has_hold_preemption(device->perf),
365 .INTEL_shader_integer_functions2 = true,
366 .MESA_image_alignment_control = true,
367 .NV_compute_shader_derivatives = true,
368 .VALVE_mutable_descriptor_type = true,
369 };
370 }
371
372 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)373 get_features(const struct anv_physical_device *pdevice,
374 struct vk_features *features)
375 {
376 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
377
378 const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
379
380 const bool mesh_shader =
381 pdevice->vk.supported_extensions.EXT_mesh_shader;
382
383 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
384
385 *features = (struct vk_features) {
386 /* Vulkan 1.0 */
387 .robustBufferAccess = true,
388 .fullDrawIndexUint32 = true,
389 .imageCubeArray = true,
390 .independentBlend = true,
391 .geometryShader = true,
392 .tessellationShader = true,
393 .sampleRateShading = true,
394 .dualSrcBlend = true,
395 .logicOp = true,
396 .multiDrawIndirect = true,
397 .drawIndirectFirstInstance = true,
398 .depthClamp = true,
399 .depthBiasClamp = true,
400 .fillModeNonSolid = true,
401 .depthBounds = pdevice->info.ver >= 12,
402 .wideLines = true,
403 .largePoints = true,
404 .alphaToOne = true,
405 .multiViewport = true,
406 .samplerAnisotropy = true,
407 .textureCompressionETC2 = true,
408 .textureCompressionASTC_LDR = pdevice->has_astc_ldr ||
409 pdevice->emu_astc_ldr,
410 .textureCompressionBC = true,
411 .occlusionQueryPrecise = true,
412 .pipelineStatisticsQuery = true,
413 .vertexPipelineStoresAndAtomics = true,
414 .fragmentStoresAndAtomics = true,
415 .shaderTessellationAndGeometryPointSize = true,
416 .shaderImageGatherExtended = true,
417 .shaderStorageImageExtendedFormats = true,
418 .shaderStorageImageMultisample = false,
419 /* Gfx12.5 has all the required format supported in HW for typed
420 * read/writes
421 */
422 .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125,
423 .shaderStorageImageWriteWithoutFormat = true,
424 .shaderUniformBufferArrayDynamicIndexing = true,
425 .shaderSampledImageArrayDynamicIndexing = true,
426 .shaderStorageBufferArrayDynamicIndexing = true,
427 .shaderStorageImageArrayDynamicIndexing = true,
428 .shaderClipDistance = true,
429 .shaderCullDistance = true,
430 .shaderFloat64 = pdevice->info.has_64bit_float ||
431 pdevice->instance->fp64_workaround_enabled,
432 .shaderInt64 = true,
433 .shaderInt16 = true,
434 .shaderResourceMinLod = true,
435 .shaderResourceResidency = has_sparse_or_fake,
436 .sparseBinding = has_sparse_or_fake,
437 .sparseResidencyAliased = has_sparse_or_fake,
438 .sparseResidencyBuffer = has_sparse_or_fake,
439 .sparseResidencyImage2D = has_sparse_or_fake,
440 .sparseResidencyImage3D = has_sparse_or_fake,
441 .sparseResidency2Samples = has_sparse_or_fake,
442 .sparseResidency4Samples = has_sparse_or_fake,
443 .sparseResidency8Samples = has_sparse_or_fake &&
444 pdevice->info.verx10 != 125,
445 .sparseResidency16Samples = has_sparse_or_fake &&
446 pdevice->info.verx10 != 125,
447 .variableMultisampleRate = true,
448 .inheritedQueries = true,
449
450 /* Vulkan 1.1 */
451 .storageBuffer16BitAccess = !pdevice->instance->no_16bit,
452 .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit,
453 .storagePushConstant16 = true,
454 .storageInputOutput16 = false,
455 .multiview = true,
456 .multiviewGeometryShader = true,
457 .multiviewTessellationShader = true,
458 .variablePointersStorageBuffer = true,
459 .variablePointers = true,
460 .protectedMemory = pdevice->has_protected_contexts,
461 .samplerYcbcrConversion = true,
462 .shaderDrawParameters = true,
463
464 /* Vulkan 1.2 */
465 .samplerMirrorClampToEdge = true,
466 .drawIndirectCount = true,
467 .storageBuffer8BitAccess = true,
468 .uniformAndStorageBuffer8BitAccess = true,
469 .storagePushConstant8 = true,
470 .shaderBufferInt64Atomics = true,
471 .shaderSharedInt64Atomics = false,
472 .shaderFloat16 = !pdevice->instance->no_16bit,
473 .shaderInt8 = !pdevice->instance->no_16bit,
474
475 .descriptorIndexing = true,
476 .shaderInputAttachmentArrayDynamicIndexing = false,
477 .shaderUniformTexelBufferArrayDynamicIndexing = true,
478 .shaderStorageTexelBufferArrayDynamicIndexing = true,
479 .shaderUniformBufferArrayNonUniformIndexing = true,
480 .shaderSampledImageArrayNonUniformIndexing = true,
481 .shaderStorageBufferArrayNonUniformIndexing = true,
482 .shaderStorageImageArrayNonUniformIndexing = true,
483 .shaderInputAttachmentArrayNonUniformIndexing = false,
484 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
485 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
486 .descriptorBindingUniformBufferUpdateAfterBind = true,
487 .descriptorBindingSampledImageUpdateAfterBind = true,
488 .descriptorBindingStorageImageUpdateAfterBind = true,
489 .descriptorBindingStorageBufferUpdateAfterBind = true,
490 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
491 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
492 .descriptorBindingUpdateUnusedWhilePending = true,
493 .descriptorBindingPartiallyBound = true,
494 .descriptorBindingVariableDescriptorCount = true,
495 .runtimeDescriptorArray = true,
496
497 .samplerFilterMinmax = true,
498 .scalarBlockLayout = true,
499 .imagelessFramebuffer = true,
500 .uniformBufferStandardLayout = true,
501 .shaderSubgroupExtendedTypes = true,
502 .separateDepthStencilLayouts = true,
503 .hostQueryReset = true,
504 .timelineSemaphore = true,
505 .bufferDeviceAddress = true,
506 .bufferDeviceAddressCaptureReplay = true,
507 .bufferDeviceAddressMultiDevice = false,
508 .vulkanMemoryModel = true,
509 .vulkanMemoryModelDeviceScope = true,
510 .vulkanMemoryModelAvailabilityVisibilityChains = true,
511 .shaderOutputViewportIndex = true,
512 .shaderOutputLayer = true,
513 .subgroupBroadcastDynamicId = true,
514
515 /* Vulkan 1.3 */
516 .robustImageAccess = true,
517 .inlineUniformBlock = true,
518 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
519 .pipelineCreationCacheControl = true,
520 .privateData = true,
521 .shaderDemoteToHelperInvocation = true,
522 .shaderTerminateInvocation = true,
523 .subgroupSizeControl = true,
524 .computeFullSubgroups = true,
525 .synchronization2 = true,
526 .textureCompressionASTC_HDR = false,
527 .shaderZeroInitializeWorkgroupMemory = true,
528 .dynamicRendering = true,
529 .shaderIntegerDotProduct = true,
530 .maintenance4 = true,
531
532 /* Vulkan 1.4 */
533 .pushDescriptor = true,
534
535 /* VK_EXT_4444_formats */
536 .formatA4R4G4B4 = true,
537 .formatA4B4G4R4 = false,
538
539 /* VK_KHR_acceleration_structure */
540 .accelerationStructure = rt_enabled,
541 .accelerationStructureCaptureReplay = false, /* TODO */
542 .accelerationStructureIndirectBuild = false, /* TODO */
543 .accelerationStructureHostCommands = false,
544 .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
545
546 /* VK_EXT_border_color_swizzle */
547 .borderColorSwizzle = true,
548 .borderColorSwizzleFromImage = true,
549
550 /* VK_EXT_color_write_enable */
551 .colorWriteEnable = true,
552
553 /* VK_EXT_image_2d_view_of_3d */
554 .image2DViewOf3D = true,
555 .sampler2DViewOf3D = true,
556
557 /* VK_EXT_image_sliced_view_of_3d */
558 .imageSlicedViewOf3D = true,
559
560 /* VK_KHR_compute_shader_derivatives */
561 .computeDerivativeGroupQuads = true,
562 .computeDerivativeGroupLinear = true,
563
564 /* VK_EXT_conditional_rendering */
565 .conditionalRendering = true,
566 .inheritedConditionalRendering = true,
567
568 /* VK_EXT_custom_border_color */
569 .customBorderColors = true,
570 .customBorderColorWithoutFormat = true,
571
572 /* VK_EXT_depth_clamp_zero_one */
573 .depthClampZeroOne = true,
574
575 /* VK_EXT_depth_clip_enable */
576 .depthClipEnable = true,
577
578 /* VK_EXT_fragment_shader_interlock */
579 .fragmentShaderSampleInterlock = true,
580 .fragmentShaderPixelInterlock = true,
581 .fragmentShaderShadingRateInterlock = false,
582
583 /* VK_EXT_global_priority_query */
584 .globalPriorityQuery = true,
585
586 /* VK_EXT_graphics_pipeline_library */
587 .graphicsPipelineLibrary =
588 pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
589
590 /* VK_KHR_fragment_shading_rate */
591 .pipelineFragmentShadingRate = true,
592 .primitiveFragmentShadingRate =
593 pdevice->info.has_coarse_pixel_primitive_and_cb,
594 .attachmentFragmentShadingRate =
595 pdevice->info.has_coarse_pixel_primitive_and_cb,
596
597 /* VK_EXT_image_view_min_lod */
598 .minLod = true,
599
600 /* VK_EXT_index_type_uint8 */
601 .indexTypeUint8 = true,
602
603 /* VK_EXT_line_rasterization */
604 /* Rectangular lines must use the strict algorithm, which is not
605 * supported for wide lines prior to ICL. See rasterization_mode for
606 * details and how the HW states are programmed.
607 */
608 .rectangularLines = pdevice->info.ver >= 10,
609 .bresenhamLines = true,
610 /* Support for Smooth lines with MSAA was removed on gfx11. From the
611 * BSpec section "Multisample ModesState" table for "AA Line Support
612 * Requirements":
613 *
614 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
615 *
616 * Fortunately, this isn't a case most people care about.
617 */
618 .smoothLines = pdevice->info.ver < 10,
619 .stippledRectangularLines = false,
620 .stippledBresenhamLines = true,
621 .stippledSmoothLines = false,
622
623 /* VK_NV_mesh_shader */
624 .taskShaderNV = false,
625 .meshShaderNV = false,
626
627 /* VK_EXT_mesh_shader */
628 .taskShader = mesh_shader,
629 .meshShader = mesh_shader,
630 .multiviewMeshShader = false,
631 .primitiveFragmentShadingRateMeshShader = mesh_shader,
632 .meshShaderQueries = mesh_shader,
633
634 /* VK_EXT_mutable_descriptor_type */
635 .mutableDescriptorType = true,
636
637 /* VK_KHR_performance_query */
638 .performanceCounterQueryPools = true,
639 /* HW only supports a single configuration at a time. */
640 .performanceCounterMultipleQueryPools = false,
641
642 /* VK_KHR_pipeline_executable_properties */
643 .pipelineExecutableInfo = true,
644
645 /* VK_EXT_primitives_generated_query */
646 .primitivesGeneratedQuery = true,
647 .primitivesGeneratedQueryWithRasterizerDiscard = false,
648 .primitivesGeneratedQueryWithNonZeroStreams = false,
649
650 /* VK_EXT_pipeline_library_group_handles */
651 .pipelineLibraryGroupHandles = true,
652
653 /* VK_EXT_provoking_vertex */
654 .provokingVertexLast = true,
655 .transformFeedbackPreservesProvokingVertex = true,
656
657 /* VK_KHR_ray_query */
658 .rayQuery = rt_enabled,
659
660 /* VK_KHR_ray_tracing_maintenance1 */
661 .rayTracingMaintenance1 = rt_enabled,
662 .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
663
664 /* VK_KHR_ray_tracing_pipeline */
665 .rayTracingPipeline = rt_enabled,
666 .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
667 .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
668 .rayTracingPipelineTraceRaysIndirect = rt_enabled,
669 .rayTraversalPrimitiveCulling = rt_enabled,
670
671 /* VK_EXT_robustness2 */
672 .robustBufferAccess2 = true,
673 .robustImageAccess2 = true,
674 .nullDescriptor = true,
675
676 /* VK_EXT_shader_replicated_composites */
677 .shaderReplicatedComposites = true,
678
679 /* VK_EXT_shader_atomic_float */
680 .shaderBufferFloat32Atomics = true,
681 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
682 .shaderBufferFloat64Atomics =
683 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
684 .shaderBufferFloat64AtomicAdd = pdevice->info.ver >= 20,
685 .shaderSharedFloat32Atomics = true,
686 .shaderSharedFloat32AtomicAdd = false,
687 .shaderSharedFloat64Atomics = false,
688 .shaderSharedFloat64AtomicAdd = false,
689 .shaderImageFloat32Atomics = true,
690 .shaderImageFloat32AtomicAdd = pdevice->info.ver >= 20,
691 .sparseImageFloat32Atomics = false,
692 .sparseImageFloat32AtomicAdd = false,
693
694 /* VK_EXT_shader_atomic_float2 */
695 .shaderBufferFloat16Atomics = pdevice->info.has_lsc,
696 .shaderBufferFloat16AtomicAdd = false,
697 .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
698 .shaderBufferFloat32AtomicMinMax = true,
699 .shaderBufferFloat64AtomicMinMax =
700 pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
701 pdevice->info.ver < 20,
702 .shaderSharedFloat16Atomics = pdevice->info.has_lsc,
703 .shaderSharedFloat16AtomicAdd = false,
704 .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
705 .shaderSharedFloat32AtomicMinMax = true,
706 .shaderSharedFloat64AtomicMinMax = false,
707 .shaderImageFloat32AtomicMinMax = false,
708 .sparseImageFloat32AtomicMinMax = false,
709
710 /* VK_KHR_shader_clock */
711 .shaderSubgroupClock = true,
712 .shaderDeviceClock = false,
713
714 /* VK_INTEL_shader_integer_functions2 */
715 .shaderIntegerFunctions2 = true,
716
717 /* VK_EXT_shader_module_identifier */
718 .shaderModuleIdentifier = true,
719
720 /* VK_KHR_shader_subgroup_uniform_control_flow */
721 .shaderSubgroupUniformControlFlow = true,
722
723 /* VK_EXT_texel_buffer_alignment */
724 .texelBufferAlignment = true,
725
726 /* VK_EXT_transform_feedback */
727 .transformFeedback = true,
728 .geometryStreams = true,
729
730 /* VK_KHR_vertex_attribute_divisor */
731 .vertexAttributeInstanceRateDivisor = true,
732 .vertexAttributeInstanceRateZeroDivisor = true,
733
734 /* VK_KHR_workgroup_memory_explicit_layout */
735 .workgroupMemoryExplicitLayout = true,
736 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
737 .workgroupMemoryExplicitLayout8BitAccess = true,
738 .workgroupMemoryExplicitLayout16BitAccess = true,
739
740 /* VK_EXT_ycbcr_image_arrays */
741 .ycbcrImageArrays = true,
742
743 /* VK_EXT_ycbcr_2plane_444_formats */
744 .ycbcr2plane444Formats = true,
745
746 /* VK_EXT_extended_dynamic_state */
747 .extendedDynamicState = true,
748
749 /* VK_EXT_extended_dynamic_state2 */
750 .extendedDynamicState2 = true,
751 .extendedDynamicState2LogicOp = true,
752 .extendedDynamicState2PatchControlPoints = true,
753
754 /* VK_EXT_extended_dynamic_state3 */
755 .extendedDynamicState3PolygonMode = true,
756 .extendedDynamicState3TessellationDomainOrigin = true,
757 .extendedDynamicState3RasterizationStream = true,
758 .extendedDynamicState3LineStippleEnable = true,
759 .extendedDynamicState3LineRasterizationMode = true,
760 .extendedDynamicState3LogicOpEnable = true,
761 .extendedDynamicState3AlphaToOneEnable = true,
762 .extendedDynamicState3DepthClipEnable = true,
763 .extendedDynamicState3DepthClampEnable = true,
764 .extendedDynamicState3DepthClipNegativeOneToOne = true,
765 .extendedDynamicState3ProvokingVertexMode = true,
766 .extendedDynamicState3ColorBlendEnable = true,
767 .extendedDynamicState3ColorWriteMask = true,
768 .extendedDynamicState3ColorBlendEquation = true,
769 .extendedDynamicState3SampleLocationsEnable = true,
770 .extendedDynamicState3SampleMask = true,
771 .extendedDynamicState3ConservativeRasterizationMode = true,
772 .extendedDynamicState3AlphaToCoverageEnable = true,
773 .extendedDynamicState3RasterizationSamples = true,
774
775 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
776 .extendedDynamicState3ViewportWScalingEnable = false,
777 .extendedDynamicState3ViewportSwizzle = false,
778 .extendedDynamicState3ShadingRateImageEnable = false,
779 .extendedDynamicState3CoverageToColorEnable = false,
780 .extendedDynamicState3CoverageToColorLocation = false,
781 .extendedDynamicState3CoverageModulationMode = false,
782 .extendedDynamicState3CoverageModulationTableEnable = false,
783 .extendedDynamicState3CoverageModulationTable = false,
784 .extendedDynamicState3CoverageReductionMode = false,
785 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
786 .extendedDynamicState3ColorBlendAdvanced = false,
787
788 /* VK_EXT_multi_draw */
789 .multiDraw = true,
790
791 /* VK_EXT_non_seamless_cube_map */
792 .nonSeamlessCubeMap = true,
793
794 /* VK_EXT_primitive_topology_list_restart */
795 .primitiveTopologyListRestart = true,
796 .primitiveTopologyPatchListRestart = true,
797
798 /* VK_EXT_depth_clamp_control */
799 .depthClampControl = true,
800
801 /* VK_EXT_depth_clip_control */
802 .depthClipControl = true,
803
804 /* VK_KHR_present_id */
805 .presentId = pdevice->vk.supported_extensions.KHR_present_id,
806
807 /* VK_KHR_present_wait */
808 .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
809
810 /* VK_EXT_vertex_input_dynamic_state */
811 .vertexInputDynamicState = true,
812
813 /* VK_KHR_ray_tracing_position_fetch */
814 .rayTracingPositionFetch = rt_enabled,
815
816 /* VK_EXT_dynamic_rendering_unused_attachments */
817 .dynamicRenderingUnusedAttachments = true,
818
819 /* VK_EXT_depth_bias_control */
820 .depthBiasControl = true,
821 .floatRepresentation = true,
822 .leastRepresentableValueForceUnormRepresentation = false,
823 .depthBiasExact = true,
824
825 /* VK_EXT_pipeline_robustness */
826 .pipelineRobustness = true,
827
828 /* VK_KHR_maintenance5 */
829 .maintenance5 = true,
830
831 /* VK_KHR_maintenance6 */
832 .maintenance6 = true,
833
834 /* VK_EXT_nested_command_buffer */
835 .nestedCommandBuffer = true,
836 .nestedCommandBufferRendering = true,
837 .nestedCommandBufferSimultaneousUse = false,
838
839 /* VK_KHR_cooperative_matrix */
840 .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
841
842 /* VK_KHR_shader_maximal_reconvergence */
843 .shaderMaximalReconvergence = true,
844
845 /* VK_KHR_shader_subgroup_rotate */
846 .shaderSubgroupRotate = true,
847 .shaderSubgroupRotateClustered = true,
848
849 /* VK_EXT_attachment_feedback_loop_layout */
850 .attachmentFeedbackLoopLayout = true,
851
852 /* VK_EXT_attachment_feedback_loop_dynamic_state */
853 .attachmentFeedbackLoopDynamicState = true,
854
855 /* VK_KHR_shader_expect_assume */
856 .shaderExpectAssume = true,
857
858 /* VK_EXT_descriptor_buffer */
859 .descriptorBuffer = true,
860 .descriptorBufferCaptureReplay = true,
861 .descriptorBufferImageLayoutIgnored = false,
862 .descriptorBufferPushDescriptors = true,
863
864 /* VK_EXT_map_memory_placed */
865 .memoryMapPlaced = true,
866 .memoryMapRangePlaced = false,
867 .memoryUnmapReserve = true,
868
869 /* VK_KHR_shader_quad_control */
870 .shaderQuadControl = true,
871
872 #ifdef ANV_USE_WSI_PLATFORM
873 /* VK_EXT_swapchain_maintenance1 */
874 .swapchainMaintenance1 = true,
875 #endif
876
877 /* VK_KHR_video_maintenance1 */
878 .videoMaintenance1 = true,
879
880 /* VK_EXT_image_compression_control */
881 .imageCompressionControl = true,
882
883 /* VK_KHR_shader_float_controls2 */
884 .shaderFloatControls2 = true,
885
886 /* VK_EXT_legacy_vertex_attributes */
887 .legacyVertexAttributes = true,
888
889 /* VK_EXT_legacy_dithering */
890 .legacyDithering = true,
891
892 /* VK_MESA_image_alignment_control */
893 .imageAlignmentControl = true,
894
895 /* VK_KHR_maintenance7 */
896 .maintenance7 = true,
897
898 /* VK_KHR_shader_relaxed_extended_instruction */
899 .shaderRelaxedExtendedInstruction = true,
900
901 /* VK_KHR_dynamic_rendering_local_read */
902 .dynamicRenderingLocalRead = true,
903
904 /* VK_EXT_pipeline_protected_access */
905 .pipelineProtectedAccess = pdevice->has_protected_contexts,
906
907 /* VK_EXT_host_image_copy */
908 .hostImageCopy = true,
909 };
910
911 /* The new DOOM and Wolfenstein games require depthBounds without
912 * checking for it. They seem to run fine without it so just claim it's
913 * there and accept the consequences.
914 */
915 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
916 features->depthBounds = true;
917 }
918
919 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
920
921 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
922 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
923
924 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)925 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
926 {
927 VkDeviceSize ret = 0;
928
929 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
930 if (pdevice->memory.heaps[i].size > ret)
931 ret = pdevice->memory.heaps[i].size;
932 }
933
934 return ret;
935 }
936
937 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)938 get_properties_1_1(const struct anv_physical_device *pdevice,
939 struct vk_properties *p)
940 {
941 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
942 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
943 memset(p->deviceLUID, 0, VK_LUID_SIZE);
944 p->deviceNodeMask = 0;
945 p->deviceLUIDValid = false;
946
947 p->subgroupSize = BRW_SUBGROUP_SIZE;
948 VkShaderStageFlags scalar_stages = 0;
949 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
950 scalar_stages |= mesa_to_vk_shader_stage(stage);
951 }
952 if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
953 scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
954 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
955 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
956 VK_SHADER_STAGE_MISS_BIT_KHR |
957 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
958 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
959 }
960 if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
961 scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
962 VK_SHADER_STAGE_MESH_BIT_EXT;
963 }
964 p->subgroupSupportedStages = scalar_stages;
965 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
966 VK_SUBGROUP_FEATURE_VOTE_BIT |
967 VK_SUBGROUP_FEATURE_BALLOT_BIT |
968 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
969 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
970 VK_SUBGROUP_FEATURE_QUAD_BIT |
971 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
972 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
973 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
974 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
975 p->subgroupQuadOperationsInAllStages = true;
976
977 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
978 p->maxMultiviewViewCount = 16;
979 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
980 /* Our protected implementation is a memory encryption mechanism, it
981 * shouldn't page fault, but it hangs the HW so in terms of user visibility
982 * it's similar to a fault.
983 */
984 p->protectedNoFault = false;
985 /* This value doesn't matter for us today as our per-stage descriptors are
986 * the real limit.
987 */
988 p->maxPerSetDescriptors = 1024;
989
990 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
991 p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
992 pdevice->memory.heaps[i].size);
993 }
994 }
995
996 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)997 get_properties_1_2(const struct anv_physical_device *pdevice,
998 struct vk_properties *p)
999 {
1000 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1001 memset(p->driverName, 0, sizeof(p->driverName));
1002 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1003 "Intel open-source Mesa driver");
1004 memset(p->driverInfo, 0, sizeof(p->driverInfo));
1005 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1006 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1007
1008 p->conformanceVersion = (VkConformanceVersion) {
1009 .major = 1,
1010 .minor = 4,
1011 .subminor = 0,
1012 .patch = 0,
1013 };
1014
1015 p->denormBehaviorIndependence =
1016 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1017 p->roundingModeIndependence =
1018 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1019
1020 /* Broadwell does not support HF denorms and there are restrictions
1021 * other gens. According to Kabylake's PRM:
1022 *
1023 * "math - Extended Math Function
1024 * [...]
1025 * Restriction : Half-float denorms are always retained."
1026 */
1027 p->shaderDenormFlushToZeroFloat16 = false;
1028 p->shaderDenormPreserveFloat16 = true;
1029 p->shaderRoundingModeRTEFloat16 = true;
1030 p->shaderRoundingModeRTZFloat16 = true;
1031 p->shaderSignedZeroInfNanPreserveFloat16 = true;
1032
1033 p->shaderDenormFlushToZeroFloat32 = true;
1034 p->shaderDenormPreserveFloat32 = true;
1035 p->shaderRoundingModeRTEFloat32 = true;
1036 p->shaderRoundingModeRTZFloat32 = true;
1037 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1038
1039 p->shaderDenormFlushToZeroFloat64 = true;
1040 p->shaderDenormPreserveFloat64 = true;
1041 p->shaderRoundingModeRTEFloat64 = true;
1042 p->shaderRoundingModeRTZFloat64 = true;
1043 p->shaderSignedZeroInfNanPreserveFloat64 = true;
1044
1045 struct anv_descriptor_limits desc_limits;
1046 get_device_descriptor_limits(pdevice, &desc_limits);
1047
1048 p->maxUpdateAfterBindDescriptorsInAllPools = desc_limits.max_resources;
1049 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1050 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1051 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
1052 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1053 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1054 p->robustBufferAccessUpdateAfterBind = true;
1055 p->quadDivergentImplicitLod = false;
1056 p->maxPerStageDescriptorUpdateAfterBindSamplers = desc_limits.max_samplers;
1057 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1058 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1059 p->maxPerStageDescriptorUpdateAfterBindSampledImages = desc_limits.max_images;
1060 p->maxPerStageDescriptorUpdateAfterBindStorageImages = desc_limits.max_images;
1061 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1062 p->maxPerStageUpdateAfterBindResources = desc_limits.max_resources;
1063 p->maxDescriptorSetUpdateAfterBindSamplers = desc_limits.max_samplers;
1064 p->maxDescriptorSetUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1065 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1066 p->maxDescriptorSetUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1067 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1068 p->maxDescriptorSetUpdateAfterBindSampledImages = desc_limits.max_images;
1069 p->maxDescriptorSetUpdateAfterBindStorageImages = desc_limits.max_images;
1070 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1071
1072 /* We support all of the depth resolve modes */
1073 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1074 VK_RESOLVE_MODE_AVERAGE_BIT |
1075 VK_RESOLVE_MODE_MIN_BIT |
1076 VK_RESOLVE_MODE_MAX_BIT;
1077 /* Average doesn't make sense for stencil so we don't support that */
1078 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1079 VK_RESOLVE_MODE_MIN_BIT |
1080 VK_RESOLVE_MODE_MAX_BIT;
1081 p->independentResolveNone = true;
1082 p->independentResolve = true;
1083
1084 p->filterMinmaxSingleComponentFormats = true;
1085 p->filterMinmaxImageComponentMapping = true;
1086
1087 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1088
1089 p->framebufferIntegerColorSampleCounts =
1090 isl_device_get_sample_counts(&pdevice->isl_dev);
1091 }
1092
1093 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1094 get_properties_1_3(const struct anv_physical_device *pdevice,
1095 struct vk_properties *p)
1096 {
1097 if (pdevice->info.ver >= 20)
1098 p->minSubgroupSize = 16;
1099 else
1100 p->minSubgroupSize = 8;
1101 p->maxSubgroupSize = 32;
1102 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1103 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1104 VK_SHADER_STAGE_TASK_BIT_EXT |
1105 VK_SHADER_STAGE_MESH_BIT_EXT;
1106
1107 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1108 p->maxPerStageDescriptorInlineUniformBlocks =
1109 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1110 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1111 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1112 p->maxDescriptorSetInlineUniformBlocks =
1113 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1114 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1115 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1116 p->maxInlineUniformTotalSize = UINT16_MAX;
1117
1118 p->integerDotProduct8BitUnsignedAccelerated = false;
1119 p->integerDotProduct8BitSignedAccelerated = false;
1120 p->integerDotProduct8BitMixedSignednessAccelerated = false;
1121 p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1122 p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1123 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1124 p->integerDotProduct16BitUnsignedAccelerated = false;
1125 p->integerDotProduct16BitSignedAccelerated = false;
1126 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1127 p->integerDotProduct32BitUnsignedAccelerated = false;
1128 p->integerDotProduct32BitSignedAccelerated = false;
1129 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1130 p->integerDotProduct64BitUnsignedAccelerated = false;
1131 p->integerDotProduct64BitSignedAccelerated = false;
1132 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1133 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1134 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1135 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1136 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1137 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1138 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1139 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1140 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1141 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1142 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1143 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1144 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1145 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1146 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1147 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1148
1149 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1150 * Base Address:
1151 *
1152 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1153 * specifies the base address of the first element of the surface,
1154 * computed in software by adding the surface base address to the
1155 * byte offset of the element in the buffer. The base address must
1156 * be aligned to element size."
1157 *
1158 * The typed dataport messages require that things be texel aligned.
1159 * Otherwise, we may just load/store the wrong data or, in the worst
1160 * case, there may be hangs.
1161 */
1162 p->storageTexelBufferOffsetAlignmentBytes = 16;
1163 p->storageTexelBufferOffsetSingleTexelAlignment = true;
1164
1165 /* The sampler, however, is much more forgiving and it can handle
1166 * arbitrary byte alignment for linear and buffer surfaces. It's
1167 * hard to find a good PRM citation for this but years of empirical
1168 * experience demonstrate that this is true.
1169 */
1170 p->uniformTexelBufferOffsetAlignmentBytes = 1;
1171 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1172
1173 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1174 }
1175
1176 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1177 get_properties(const struct anv_physical_device *pdevice,
1178 struct vk_properties *props)
1179 {
1180
1181 const struct intel_device_info *devinfo = &pdevice->info;
1182
1183 const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1184
1185 const uint32_t max_workgroup_size =
1186 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1187
1188 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1189 const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1190
1191 uint64_t sparse_addr_space_size =
1192 !has_sparse_or_fake ? 0 :
1193 sparse_uses_trtt ? pdevice->va.trtt.size :
1194 pdevice->va.high_heap.size;
1195
1196 VkSampleCountFlags sample_counts =
1197 isl_device_get_sample_counts(&pdevice->isl_dev);
1198
1199 #if DETECT_OS_ANDROID
1200 /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1201 uint64_t front_rendering_usage = 0;
1202 struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1203 if (gralloc != NULL) {
1204 u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1205 u_gralloc_destroy(&gralloc);
1206 }
1207 #endif /* DETECT_OS_ANDROID */
1208
1209 struct anv_descriptor_limits desc_limits;
1210 get_device_descriptor_limits(pdevice, &desc_limits);
1211
1212 *props = (struct vk_properties) {
1213 .apiVersion = ANV_API_VERSION,
1214 .driverVersion = vk_get_driver_version(),
1215 .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1216 pdevice->instance->force_vk_vendor : 0x8086,
1217 .deviceID = pdevice->info.pci_device_id,
1218 .deviceType = pdevice->info.has_local_mem ?
1219 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1220 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1221
1222 /* Limits: */
1223 .maxImageDimension1D = (1 << 14),
1224 .maxImageDimension2D = (1 << 14),
1225 .maxImageDimension3D = (1 << 11),
1226 .maxImageDimensionCube = (1 << 14),
1227 .maxImageArrayLayers = (1 << 11),
1228 .maxTexelBufferElements = 128 * 1024 * 1024,
1229 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1230 .maxStorageBufferRange = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1231 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1232 .maxMemoryAllocationCount = UINT32_MAX,
1233 .maxSamplerAllocationCount = 64 * 1024,
1234 .bufferImageGranularity = 1,
1235 .sparseAddressSpaceSize = sparse_addr_space_size,
1236 .maxBoundDescriptorSets = MAX_SETS,
1237 .maxPerStageDescriptorSamplers = desc_limits.max_samplers,
1238 .maxPerStageDescriptorUniformBuffers = desc_limits.max_ubos,
1239 .maxPerStageDescriptorStorageBuffers = desc_limits.max_ssbos,
1240 .maxPerStageDescriptorSampledImages = desc_limits.max_images,
1241 .maxPerStageDescriptorStorageImages = desc_limits.max_images,
1242 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1243 .maxPerStageResources = desc_limits.max_resources,
1244 .maxDescriptorSetSamplers = desc_limits.max_samplers,
1245 .maxDescriptorSetUniformBuffers = desc_limits.max_ubos,
1246 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1247 .maxDescriptorSetStorageBuffers = desc_limits.max_ssbos,
1248 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1249 .maxDescriptorSetSampledImages = desc_limits.max_images,
1250 .maxDescriptorSetStorageImages = desc_limits.max_images,
1251 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1252 .maxVertexInputAttributes = MAX_VES,
1253 .maxVertexInputBindings = MAX_VBS,
1254 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1255 *
1256 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1257 */
1258 .maxVertexInputAttributeOffset = 2047,
1259 /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1260 *
1261 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1262 */
1263 .maxVertexInputBindingStride = 4095,
1264 .maxVertexOutputComponents = 128,
1265 .maxTessellationGenerationLevel = 64,
1266 .maxTessellationPatchSize = 32,
1267 .maxTessellationControlPerVertexInputComponents = 128,
1268 .maxTessellationControlPerVertexOutputComponents = 128,
1269 .maxTessellationControlPerPatchOutputComponents = 128,
1270 .maxTessellationControlTotalOutputComponents = 2048,
1271 .maxTessellationEvaluationInputComponents = 128,
1272 .maxTessellationEvaluationOutputComponents = 128,
1273 .maxGeometryShaderInvocations = 32,
1274 .maxGeometryInputComponents = 128,
1275 .maxGeometryOutputComponents = 128,
1276 .maxGeometryOutputVertices = 256,
1277 .maxGeometryTotalOutputComponents = 1024,
1278 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1279 .maxFragmentOutputAttachments = 8,
1280 .maxFragmentDualSrcAttachments = 1,
1281 .maxFragmentCombinedOutputResources = MAX_RTS + desc_limits.max_ssbos +
1282 desc_limits.max_images,
1283 .maxComputeSharedMemorySize = intel_device_info_get_max_slm_size(&pdevice->info),
1284 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1285 .maxComputeWorkGroupInvocations = max_workgroup_size,
1286 .maxComputeWorkGroupSize = {
1287 max_workgroup_size,
1288 max_workgroup_size,
1289 max_workgroup_size,
1290 },
1291 .subPixelPrecisionBits = 8,
1292 .subTexelPrecisionBits = 8,
1293 .mipmapPrecisionBits = 8,
1294 .maxDrawIndexedIndexValue = UINT32_MAX,
1295 .maxDrawIndirectCount = UINT32_MAX,
1296 .maxSamplerLodBias = 16,
1297 .maxSamplerAnisotropy = 16,
1298 .maxViewports = MAX_VIEWPORTS,
1299 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1300 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1301 .viewportSubPixelBits = 13, /* We take a float? */
1302 .minMemoryMapAlignment = 4096, /* A page */
1303 /* The dataport requires texel alignment so we need to assume a worst
1304 * case of R32G32B32A32 which is 16 bytes.
1305 */
1306 .minTexelBufferOffsetAlignment = 16,
1307 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1308 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1309 .minTexelOffset = -8,
1310 .maxTexelOffset = 7,
1311 .minTexelGatherOffset = -32,
1312 .maxTexelGatherOffset = 31,
1313 .minInterpolationOffset = -0.5,
1314 .maxInterpolationOffset = 0.4375,
1315 .subPixelInterpolationOffsetBits = 4,
1316 .maxFramebufferWidth = (1 << 14),
1317 .maxFramebufferHeight = (1 << 14),
1318 .maxFramebufferLayers = (1 << 11),
1319 .framebufferColorSampleCounts = sample_counts,
1320 .framebufferDepthSampleCounts = sample_counts,
1321 .framebufferStencilSampleCounts = sample_counts,
1322 .framebufferNoAttachmentsSampleCounts = sample_counts,
1323 .maxColorAttachments = MAX_RTS,
1324 .sampledImageColorSampleCounts = sample_counts,
1325 .sampledImageIntegerSampleCounts = sample_counts,
1326 .sampledImageDepthSampleCounts = sample_counts,
1327 .sampledImageStencilSampleCounts = sample_counts,
1328 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1329 .maxSampleMaskWords = 1,
1330 .timestampComputeAndGraphics = true,
1331 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1332 .maxClipDistances = 8,
1333 .maxCullDistances = 8,
1334 .maxCombinedClipAndCullDistances = 8,
1335 .discreteQueuePriorities = 2,
1336 .pointSizeRange = { 0.125, 255.875 },
1337 /* While SKL and up support much wider lines than we are setting here,
1338 * in practice we run into conformance issues if we go past this limit.
1339 * Since the Windows driver does the same, it's probably fair to assume
1340 * that no one needs more than this.
1341 */
1342 .lineWidthRange = { 0.0, 8.0 },
1343 .pointSizeGranularity = (1.0 / 8.0),
1344 .lineWidthGranularity = (1.0 / 128.0),
1345 .strictLines = false,
1346 .standardSampleLocations = true,
1347 .optimalBufferCopyOffsetAlignment = 128,
1348 .optimalBufferCopyRowPitchAlignment = 128,
1349 .nonCoherentAtomSize = 64,
1350
1351 /* Sparse: */
1352 .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1353 .sparseResidencyStandard2DMultisampleBlockShape = false,
1354 .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1355 .sparseResidencyAlignedMipSize = false,
1356 .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1357
1358 /* VK_KHR_cooperative_matrix */
1359 .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1360
1361 /* Vulkan 1.4 */
1362 .dynamicRenderingLocalReadDepthStencilAttachments = true,
1363 .dynamicRenderingLocalReadMultisampledAttachments = true,
1364 };
1365
1366 snprintf(props->deviceName, sizeof(props->deviceName),
1367 "%s", pdevice->info.name);
1368 memcpy(props->pipelineCacheUUID,
1369 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1370
1371 get_properties_1_1(pdevice, props);
1372 get_properties_1_2(pdevice, props);
1373 get_properties_1_3(pdevice, props);
1374
1375 /* VK_KHR_acceleration_structure */
1376 {
1377 props->maxGeometryCount = (1u << 24) - 1;
1378 props->maxInstanceCount = (1u << 24) - 1;
1379 props->maxPrimitiveCount = (1u << 29) - 1;
1380 props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1381 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1382 props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1383 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1384 props->minAccelerationStructureScratchOffsetAlignment = 64;
1385 }
1386
1387 /* VK_KHR_compute_shader_derivatives */
1388 {
1389 props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1390 }
1391
1392 /* VK_KHR_fragment_shading_rate */
1393 {
1394 props->primitiveFragmentShadingRateWithMultipleViewports =
1395 pdevice->info.has_coarse_pixel_primitive_and_cb;
1396 props->layeredShadingRateAttachments =
1397 pdevice->info.has_coarse_pixel_primitive_and_cb;
1398 props->fragmentShadingRateNonTrivialCombinerOps =
1399 pdevice->info.has_coarse_pixel_primitive_and_cb;
1400 props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1401 props->maxFragmentSizeAspectRatio =
1402 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1403 2 : 4;
1404 props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1405 (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1406 props->maxFragmentShadingRateRasterizationSamples =
1407 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1408 VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
1409 props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1410 props->fragmentShadingRateWithSampleMask = true;
1411 props->fragmentShadingRateWithShaderSampleMask = false;
1412 props->fragmentShadingRateWithConservativeRasterization = true;
1413 props->fragmentShadingRateWithFragmentShaderInterlock = true;
1414 props->fragmentShadingRateWithCustomSampleLocations = true;
1415 props->fragmentShadingRateStrictMultiplyCombiner = true;
1416
1417 if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1418 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1419 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1420 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1421 } else {
1422 /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1423 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1424 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1425 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1426 }
1427 }
1428
1429 /* VK_KHR_maintenance5 */
1430 {
1431 props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1432 props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1433 props->depthStencilSwizzleOneSupport = true;
1434 props->polygonModePointSize = true;
1435 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1436 props->nonStrictWideLinesUseParallelogram = false;
1437 }
1438
1439 /* VK_KHR_maintenance6 */
1440 {
1441 props->blockTexelViewCompatibleMultipleLayers = true;
1442 props->maxCombinedImageSamplerDescriptorCount = 3;
1443 props->fragmentShadingRateClampCombinerInputs = true;
1444 }
1445
1446 /* VK_KHR_maintenance7 */
1447 {
1448 props->robustFragmentShadingRateAttachmentAccess = true;
1449 props->separateDepthStencilAttachmentAccess = true;
1450 props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1451 props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1452 props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1453 props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1454 props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1455 props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1456 }
1457
1458 /* VK_KHR_performance_query */
1459 {
1460 props->allowCommandBufferQueryCopies = false;
1461 }
1462
1463 /* VK_KHR_push_descriptor */
1464 {
1465 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1466 }
1467
1468 /* VK_KHR_ray_tracing_pipeline */
1469 {
1470 /* TODO */
1471 props->shaderGroupHandleSize = 32;
1472 props->maxRayRecursionDepth = 31;
1473 /* MemRay::hitGroupSRStride is 16 bits */
1474 props->maxShaderGroupStride = UINT16_MAX;
1475 /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1476 props->shaderGroupBaseAlignment = 16;
1477 props->shaderGroupHandleAlignment = 16;
1478 props->shaderGroupHandleCaptureReplaySize = 32;
1479 props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1480 props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1481 }
1482
1483 /* VK_KHR_vertex_attribute_divisor */
1484 {
1485 props->maxVertexAttribDivisor = UINT32_MAX / 16;
1486 props->supportsNonZeroFirstInstance = true;
1487 }
1488
1489 /* VK_EXT_conservative_rasterization */
1490 {
1491 /* There's nothing in the public docs about this value as far as I can
1492 * tell. However, this is the value the Windows driver reports and
1493 * there's a comment on a rejected HW feature in the internal docs that
1494 * says:
1495 *
1496 * "This is similar to conservative rasterization, except the
1497 * primitive area is not extended by 1/512 and..."
1498 *
1499 * That's a bit of an obtuse reference but it's the best we've got for
1500 * now.
1501 */
1502 props->primitiveOverestimationSize = 1.0f / 512.0f;
1503 props->maxExtraPrimitiveOverestimationSize = 0.0f;
1504 props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1505 props->primitiveUnderestimation = false;
1506 props->conservativePointAndLineRasterization = false;
1507 props->degenerateTrianglesRasterized = true;
1508 props->degenerateLinesRasterized = false;
1509 props->fullyCoveredFragmentShaderInputVariable = false;
1510 props->conservativeRasterizationPostDepthCoverage = true;
1511 }
1512
1513 /* VK_EXT_custom_border_color */
1514 {
1515 props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1516 }
1517
1518 /* VK_EXT_descriptor_buffer */
1519 {
1520 props->combinedImageSamplerDescriptorSingleArray = true;
1521 props->bufferlessPushDescriptors = true;
1522 /* Written to the buffer before a timeline semaphore is signaled, but
1523 * after vkQueueSubmit().
1524 */
1525 props->allowSamplerImageViewPostSubmitCreation = true;
1526 props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1527
1528 if (pdevice->uses_ex_bso) {
1529 props->maxDescriptorBufferBindings = MAX_SETS;
1530 props->maxResourceDescriptorBufferBindings = MAX_SETS;
1531 props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1532 props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1533 } else {
1534 props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1535 props->maxResourceDescriptorBufferBindings = 1;
1536 props->maxSamplerDescriptorBufferBindings = 1;
1537 props->maxEmbeddedImmutableSamplerBindings = 1;
1538 }
1539 props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1540
1541 /* Storing a 64bit address */
1542 props->bufferCaptureReplayDescriptorDataSize = 8;
1543 props->imageCaptureReplayDescriptorDataSize = 8;
1544 /* Offset inside the reserved border color pool */
1545 props->samplerCaptureReplayDescriptorDataSize = 4;
1546
1547 /* Not affected by replay */
1548 props->imageViewCaptureReplayDescriptorDataSize = 0;
1549 /* The acceleration structure virtual address backing is coming from a
1550 * buffer, so as long as that buffer is captured/replayed correctly we
1551 * should always get the same address.
1552 */
1553 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1554
1555 props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1556 props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1557 ANV_SURFACE_STATE_SIZE);
1558 props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1559 props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1560 props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1561 props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1562 props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1563 props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1564 props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1565 props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1566 props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1567 props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1568 props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1569 props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1570 props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1571 props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1572 true);
1573 props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1574 props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1575 props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1576 }
1577
1578 /* VK_EXT_extended_dynamic_state3 */
1579 {
1580 props->dynamicPrimitiveTopologyUnrestricted = true;
1581 }
1582
1583 /* VK_EXT_external_memory_host */
1584 {
1585 props->minImportedHostPointerAlignment = 4096;
1586 }
1587
1588 /* VK_EXT_graphics_pipeline_library */
1589 {
1590 props->graphicsPipelineLibraryFastLinking = true;
1591 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1592 }
1593
1594 /* VK_EXT_host_image_copy */
1595 {
1596 static const VkImageLayout supported_layouts[] = {
1597 VK_IMAGE_LAYOUT_GENERAL,
1598 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1599 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1600 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1601 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1602 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1603 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1604 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1605 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1606 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1607 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1608 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1609 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1610 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1611 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1612 VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR,
1613 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1614 VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR,
1615 };
1616
1617 props->pCopySrcLayouts = (VkImageLayout *) supported_layouts;
1618 props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1619 props->pCopyDstLayouts = (VkImageLayout *) supported_layouts;
1620 props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1621
1622 /* This UUID essentially tells you if you can share an optimially tiling
1623 * image with another driver. Much of the tiling decisions are based on :
1624 *
1625 * - device generation (different tilings based on generations)
1626 * - device workarounds
1627 * - driver build (as we implement workarounds or performance tunings,
1628 * the tiling decision changes)
1629 *
1630 * So we're using a hash of the verx10 field + driver_build_sha1.
1631 *
1632 * Unfortunately there is a HW issue on SKL GT4 that makes it use some
1633 * different tilings sometimes (see isl_gfx7.c).
1634 */
1635 {
1636 struct mesa_sha1 sha1_ctx;
1637 uint8_t sha1[20];
1638
1639 _mesa_sha1_init(&sha1_ctx);
1640 _mesa_sha1_update(&sha1_ctx, pdevice->driver_build_sha1,
1641 sizeof(pdevice->driver_build_sha1));
1642 _mesa_sha1_update(&sha1_ctx, &pdevice->info.platform,
1643 sizeof(pdevice->info.platform));
1644 if (pdevice->info.platform == INTEL_PLATFORM_SKL &&
1645 pdevice->info.gt == 4) {
1646 _mesa_sha1_update(&sha1_ctx, &pdevice->info.gt,
1647 sizeof(pdevice->info.gt));
1648 }
1649 _mesa_sha1_final(&sha1_ctx, sha1);
1650
1651 assert(ARRAY_SIZE(sha1) >= VK_UUID_SIZE);
1652 memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1653 }
1654
1655 /* System without ReBAR cannot map all memory types on the host and that
1656 * affects the memory types an image can use for host memory copies.
1657 *
1658 * System with compressed memory types also cannot expose all image
1659 * memory types for host image copies.
1660 */
1661 props->identicalMemoryTypeRequirements = pdevice->has_small_bar ||
1662 pdevice->memory.compressed_mem_types != 0;
1663 }
1664
1665 /* VK_EXT_legacy_vertex_attributes */
1666 {
1667 props->nativeUnalignedPerformance = true;
1668 }
1669
1670 /* VK_EXT_line_rasterization */
1671 {
1672 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1673 * Rules - Legacy Mode", it says the following:
1674 *
1675 * "Note that the device divides a pixel into a 16x16 array of
1676 * subpixels, referenced by their upper left corners."
1677 *
1678 * This is the only known reference in the PRMs to the subpixel
1679 * precision of line rasterization and a "16x16 array of subpixels"
1680 * implies 4 subpixel precision bits. Empirical testing has shown that 4
1681 * subpixel precision bits applies to all line rasterization types.
1682 */
1683 props->lineSubPixelPrecisionBits = 4;
1684 }
1685
1686 /* VK_EXT_map_memory_placed */
1687 {
1688 props->minPlacedMemoryMapAlignment = 4096;
1689 }
1690
1691 /* VK_EXT_mesh_shader */
1692 {
1693 /* Bounded by the maximum representable size in
1694 * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
1695 */
1696 const uint32_t max_slm_size = intel_device_info_get_max_slm_size(devinfo);
1697
1698 /* Bounded by the maximum representable size in
1699 * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
1700 */
1701 const uint32_t max_workgroup_size = 1 << 10;
1702
1703 /* 3DMESH_3D limitation. */
1704 const uint32_t max_threadgroup_count = 1 << 22;
1705
1706 /* 3DMESH_3D limitation. */
1707 const uint32_t max_threadgroup_xyz = 65535;
1708
1709 const uint32_t max_urb_size = 64 * 1024;
1710
1711 props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1712 props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1713 props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1714 props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1715
1716 props->maxTaskWorkGroupInvocations = max_workgroup_size;
1717 props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1718 props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1719 props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1720
1721 /* TUE header with padding */
1722 const uint32_t task_payload_reserved = 32;
1723
1724 props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1725 props->maxTaskSharedMemorySize = max_slm_size;
1726 props->maxTaskPayloadAndSharedMemorySize =
1727 props->maxTaskPayloadSize +
1728 props->maxTaskSharedMemorySize;
1729
1730 props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1731 props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1732 props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1733 props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1734
1735 props->maxMeshWorkGroupInvocations = max_workgroup_size;
1736 props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1737 props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1738 props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1739
1740 props->maxMeshSharedMemorySize = max_slm_size;
1741 props->maxMeshPayloadAndSharedMemorySize =
1742 props->maxTaskPayloadSize +
1743 props->maxMeshSharedMemorySize;
1744
1745 /* Unfortunately spec's formula for the max output size doesn't match our hardware
1746 * (because some per-primitive and per-vertex attributes have alignment restrictions),
1747 * so we have to advertise the minimum value mandated by the spec to not overflow it.
1748 */
1749 props->maxMeshOutputPrimitives = 256;
1750 props->maxMeshOutputVertices = 256;
1751
1752 /* NumPrim + Primitive Data List */
1753 const uint32_t max_indices_memory =
1754 ALIGN(sizeof(uint32_t) +
1755 sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1756
1757 props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1758
1759 props->maxMeshPayloadAndOutputMemorySize =
1760 props->maxTaskPayloadSize +
1761 props->maxMeshOutputMemorySize;
1762
1763 props->maxMeshOutputComponents = 128;
1764
1765 /* RTAIndex is 11-bits wide */
1766 props->maxMeshOutputLayers = 1 << 11;
1767
1768 props->maxMeshMultiviewViewCount = 1;
1769
1770 /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1771 props->meshOutputPerVertexGranularity = 8;
1772 /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1773 props->meshOutputPerPrimitiveGranularity = 8;
1774
1775 /* SIMD16 */
1776 props->maxPreferredTaskWorkGroupInvocations = 16;
1777 props->maxPreferredMeshWorkGroupInvocations = 16;
1778
1779 props->prefersLocalInvocationVertexOutput = false;
1780 props->prefersLocalInvocationPrimitiveOutput = false;
1781 props->prefersCompactVertexOutput = false;
1782 props->prefersCompactPrimitiveOutput = false;
1783
1784 /* Spec minimum values */
1785 assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1786 assert(props->maxTaskWorkGroupCount[0] >= 65535);
1787 assert(props->maxTaskWorkGroupCount[1] >= 65535);
1788 assert(props->maxTaskWorkGroupCount[2] >= 65535);
1789
1790 assert(props->maxTaskWorkGroupInvocations >= 128);
1791 assert(props->maxTaskWorkGroupSize[0] >= 128);
1792 assert(props->maxTaskWorkGroupSize[1] >= 128);
1793 assert(props->maxTaskWorkGroupSize[2] >= 128);
1794
1795 assert(props->maxTaskPayloadSize >= 16384);
1796 assert(props->maxTaskSharedMemorySize >= 32768);
1797 assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1798
1799
1800 assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1801 assert(props->maxMeshWorkGroupCount[0] >= 65535);
1802 assert(props->maxMeshWorkGroupCount[1] >= 65535);
1803 assert(props->maxMeshWorkGroupCount[2] >= 65535);
1804
1805 assert(props->maxMeshWorkGroupInvocations >= 128);
1806 assert(props->maxMeshWorkGroupSize[0] >= 128);
1807 assert(props->maxMeshWorkGroupSize[1] >= 128);
1808 assert(props->maxMeshWorkGroupSize[2] >= 128);
1809
1810 assert(props->maxMeshSharedMemorySize >= 28672);
1811 assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1812 assert(props->maxMeshOutputMemorySize >= 32768);
1813 assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1814
1815 assert(props->maxMeshOutputComponents >= 128);
1816
1817 assert(props->maxMeshOutputVertices >= 256);
1818 assert(props->maxMeshOutputPrimitives >= 256);
1819 assert(props->maxMeshOutputLayers >= 8);
1820 assert(props->maxMeshMultiviewViewCount >= 1);
1821 }
1822
1823 /* VK_EXT_multi_draw */
1824 {
1825 props->maxMultiDrawCount = 2048;
1826 }
1827
1828 /* VK_EXT_nested_command_buffer */
1829 {
1830 props->maxCommandBufferNestingLevel = UINT32_MAX;
1831 }
1832
1833 /* VK_EXT_pci_bus_info */
1834 {
1835 props->pciDomain = pdevice->info.pci_domain;
1836 props->pciBus = pdevice->info.pci_bus;
1837 props->pciDevice = pdevice->info.pci_dev;
1838 props->pciFunction = pdevice->info.pci_func;
1839 }
1840
1841 /* VK_EXT_physical_device_drm */
1842 {
1843 props->drmHasPrimary = pdevice->has_master;
1844 props->drmPrimaryMajor = pdevice->master_major;
1845 props->drmPrimaryMinor = pdevice->master_minor;
1846 props->drmHasRender = pdevice->has_local;
1847 props->drmRenderMajor = pdevice->local_major;
1848 props->drmRenderMinor = pdevice->local_minor;
1849 }
1850
1851 /* VK_EXT_pipeline_robustness */
1852 {
1853 props->defaultRobustnessStorageBuffers =
1854 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1855 props->defaultRobustnessUniformBuffers =
1856 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1857 props->defaultRobustnessVertexInputs =
1858 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1859 props->defaultRobustnessImages =
1860 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1861 }
1862
1863 /* VK_EXT_provoking_vertex */
1864 {
1865 props->provokingVertexModePerPipeline = true;
1866 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1867 }
1868
1869 /* VK_EXT_robustness2 */
1870 {
1871 props->robustStorageBufferAccessSizeAlignment =
1872 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1873 props->robustUniformBufferAccessSizeAlignment =
1874 ANV_UBO_ALIGNMENT;
1875 }
1876
1877 /* VK_EXT_sample_locations */
1878 {
1879 props->sampleLocationSampleCounts =
1880 isl_device_get_sample_counts(&pdevice->isl_dev);
1881
1882 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1883 props->maxSampleLocationGridSize.width = 1;
1884 props->maxSampleLocationGridSize.height = 1;
1885
1886 props->sampleLocationCoordinateRange[0] = 0;
1887 props->sampleLocationCoordinateRange[1] = 0.9375;
1888 props->sampleLocationSubPixelBits = 4;
1889
1890 props->variableSampleLocations = true;
1891 }
1892
1893 /* VK_EXT_shader_module_identifier */
1894 {
1895 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1896 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1897 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1898 vk_shaderModuleIdentifierAlgorithmUUID,
1899 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1900 }
1901
1902 /* VK_EXT_transform_feedback */
1903 {
1904 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1905 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1906 props->maxTransformFeedbackBufferSize = (1ull << 32);
1907 props->maxTransformFeedbackStreamDataSize = 128 * 4;
1908 props->maxTransformFeedbackBufferDataSize = 128 * 4;
1909 props->maxTransformFeedbackBufferDataStride = 2048;
1910 props->transformFeedbackQueries = true;
1911 props->transformFeedbackStreamsLinesTriangles = false;
1912 props->transformFeedbackRasterizationStreamSelect = false;
1913 props->transformFeedbackDraw = true;
1914 }
1915
1916 /* VK_ANDROID_native_buffer */
1917 #if DETECT_OS_ANDROID
1918 {
1919 props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1920 }
1921 #endif /* DETECT_OS_ANDROID */
1922
1923
1924 /* VK_MESA_image_alignment_control */
1925 {
1926 /* We support 4k/64k tiling alignments on most platforms */
1927 props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1928 }
1929 }
1930
1931 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1932 anv_init_meminfo(struct anv_physical_device *device, int fd)
1933 {
1934 const struct intel_device_info *devinfo = &device->info;
1935
1936 device->sys.region = &devinfo->mem.sram.mem;
1937 device->sys.size = devinfo->mem.sram.mappable.size;
1938 device->sys.available = devinfo->mem.sram.mappable.free;
1939
1940 device->vram_mappable.region = &devinfo->mem.vram.mem;
1941 device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1942 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1943
1944 device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1945 device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1946 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1947
1948 return VK_SUCCESS;
1949 }
1950
1951 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1952 anv_update_meminfo(struct anv_physical_device *device, int fd)
1953 {
1954 if (!intel_device_info_update_memory_info(&device->info, fd))
1955 return;
1956
1957 const struct intel_device_info *devinfo = &device->info;
1958 device->sys.available = devinfo->mem.sram.mappable.free;
1959 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1960 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1961 }
1962
1963 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1964 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1965 {
1966 VkResult result = anv_init_meminfo(device, fd);
1967 if (result != VK_SUCCESS)
1968 return result;
1969
1970 assert(device->sys.size != 0);
1971
1972 if (anv_physical_device_has_vram(device)) {
1973 /* We can create 2 or 3 different heaps when we have local memory
1974 * support, first heap with local memory size and second with system
1975 * memory size and the third is added only if part of the vram is
1976 * mappable to the host.
1977 */
1978 device->memory.heap_count = 2;
1979 device->memory.heaps[0] = (struct anv_memory_heap) {
1980 /* If there is a vram_non_mappable, use that for the device only
1981 * heap. Otherwise use the vram_mappable.
1982 */
1983 .size = device->vram_non_mappable.size != 0 ?
1984 device->vram_non_mappable.size : device->vram_mappable.size,
1985 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1986 .is_local_mem = true,
1987 };
1988 device->memory.heaps[1] = (struct anv_memory_heap) {
1989 .size = device->sys.size,
1990 .flags = 0,
1991 .is_local_mem = false,
1992 };
1993 /* Add an additional smaller vram mappable heap if we can't map all the
1994 * vram to the host.
1995 */
1996 if (device->vram_non_mappable.size > 0) {
1997 device->memory.heap_count++;
1998 device->memory.heaps[2] = (struct anv_memory_heap) {
1999 .size = device->vram_mappable.size,
2000 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2001 .is_local_mem = true,
2002 };
2003 }
2004 } else {
2005 device->memory.heap_count = 1;
2006 device->memory.heaps[0] = (struct anv_memory_heap) {
2007 .size = device->sys.size,
2008 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2009 .is_local_mem = false,
2010 };
2011 }
2012
2013 switch (device->info.kmd_type) {
2014 case INTEL_KMD_TYPE_XE:
2015 result = anv_xe_physical_device_init_memory_types(device);
2016 break;
2017 case INTEL_KMD_TYPE_I915:
2018 default:
2019 result = anv_i915_physical_device_init_memory_types(device);
2020 break;
2021 }
2022
2023 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2024
2025 if (result != VK_SUCCESS)
2026 return result;
2027
2028 /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
2029 * find memory types both with and without
2030 * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
2031 * memory types just to make these games happy.
2032 * This behavior is not spec-compliant as we still only have one heap that
2033 * is now inconsistent with some of the memory types, but the game doesn't
2034 * seem to care about it.
2035 */
2036 if (device->instance->anv_fake_nonlocal_memory &&
2037 !anv_physical_device_has_vram(device)) {
2038 const uint32_t base_types_count = device->memory.type_count;
2039 for (int i = 0; i < base_types_count; i++) {
2040 if (!(device->memory.types[i].propertyFlags &
2041 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
2042 continue;
2043
2044 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2045 struct anv_memory_type *new_type =
2046 &device->memory.types[device->memory.type_count++];
2047 *new_type = device->memory.types[i];
2048
2049 device->memory.types[i].propertyFlags &=
2050 ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2051 }
2052 }
2053
2054 /* Replicate all non protected memory types for descriptor buffers because
2055 * we want to identify memory allocations to place them in the right memory
2056 * heap.
2057 */
2058 device->memory.default_buffer_mem_types =
2059 BITFIELD_RANGE(0, device->memory.type_count);
2060 device->memory.protected_mem_types = 0;
2061 device->memory.dynamic_visible_mem_types = 0;
2062 device->memory.compressed_mem_types = 0;
2063
2064 const uint32_t base_types_count = device->memory.type_count;
2065 for (int i = 0; i < base_types_count; i++) {
2066 bool skip = false;
2067
2068 if (device->memory.types[i].propertyFlags &
2069 VK_MEMORY_PROPERTY_PROTECTED_BIT) {
2070 device->memory.protected_mem_types |= BITFIELD_BIT(i);
2071 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2072 skip = true;
2073 }
2074
2075 if (device->memory.types[i].compressed) {
2076 device->memory.compressed_mem_types |= BITFIELD_BIT(i);
2077 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2078 skip = true;
2079 }
2080
2081 if (skip)
2082 continue;
2083
2084 device->memory.dynamic_visible_mem_types |=
2085 BITFIELD_BIT(device->memory.type_count);
2086
2087 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2088 struct anv_memory_type *new_type =
2089 &device->memory.types[device->memory.type_count++];
2090 *new_type = device->memory.types[i];
2091 new_type->dynamic_visible = true;
2092 }
2093
2094 assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
2095
2096 for (unsigned i = 0; i < device->memory.type_count; i++) {
2097 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
2098 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
2099 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
2100 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2101 device->memory.need_flush = true;
2102 #else
2103 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2104 "Memory configuration requires flushing, but it's not implemented for this architecture");
2105 #endif
2106 }
2107
2108 return VK_SUCCESS;
2109 }
2110
2111 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)2112 anv_physical_device_init_uuids(struct anv_physical_device *device)
2113 {
2114 const struct build_id_note *note =
2115 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
2116 if (!note) {
2117 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2118 "Failed to find build-id");
2119 }
2120
2121 unsigned build_id_len = build_id_length(note);
2122 if (build_id_len < 20) {
2123 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2124 "build-id too short. It needs to be a SHA");
2125 }
2126
2127 memcpy(device->driver_build_sha1, build_id_data(note), 20);
2128
2129 struct mesa_sha1 sha1_ctx;
2130 uint8_t sha1[20];
2131 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
2132
2133 /* The pipeline cache UUID is used for determining when a pipeline cache is
2134 * invalid. It needs both a driver build and the PCI ID of the device.
2135 */
2136 _mesa_sha1_init(&sha1_ctx);
2137 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
2138 brw_device_sha1_update(&sha1_ctx, &device->info);
2139 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
2140 sizeof(device->always_use_bindless));
2141 _mesa_sha1_final(&sha1_ctx, sha1);
2142 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
2143
2144 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
2145 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)2151 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
2152 {
2153 #ifdef ENABLE_SHADER_CACHE
2154 char renderer[10];
2155 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2156 device->info.pci_device_id);
2157 assert(len == sizeof(renderer) - 2);
2158
2159 char timestamp[41];
2160 _mesa_sha1_format(timestamp, device->driver_build_sha1);
2161
2162 const uint64_t driver_flags =
2163 brw_get_compiler_config_value(device->compiler);
2164 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2165 #endif
2166 }
2167
2168 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2169 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2170 {
2171 #ifdef ENABLE_SHADER_CACHE
2172 if (device->vk.disk_cache) {
2173 disk_cache_destroy(device->vk.disk_cache);
2174 device->vk.disk_cache = NULL;
2175 }
2176 #else
2177 assert(device->vk.disk_cache == NULL);
2178 #endif
2179 }
2180
2181 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2182 * queue overrides.
2183 *
2184 * To override the number queues:
2185 * * "gc" is for graphics queues with compute support
2186 * * "g" is for graphics queues with no compute support
2187 * * "c" is for compute queues with no graphics support
2188 * * "v" is for video queues with no graphics support
2189 * * "b" is for copy (blitter) queues with no graphics support
2190 *
2191 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2192 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2193 * with compute-only support.
2194 *
2195 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2196 * include 1 queue with compute-only support, but it will not change the
2197 * number of graphics+compute queues.
2198 *
2199 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2200 * to include 1 queue with compute-only support, and it would override the
2201 * number of graphics+compute queues to be 0.
2202 */
2203 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2204 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2205 {
2206 int gc_override = -1;
2207 int g_override = -1;
2208 int c_override = -1;
2209 int v_override = -1;
2210 int blit_override = -1;
2211 const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2212
2213 /* Override queues for Android HWUI that expects min 2 queues. */
2214 #if DETECT_OS_ANDROID
2215 *gc_count = 2;
2216 #endif
2217
2218 if (env_ == NULL)
2219 return;
2220
2221 char *env = strdup(env_);
2222 char *save = NULL;
2223 char *next = strtok_r(env, ",", &save);
2224 while (next != NULL) {
2225 if (strncmp(next, "gc=", 3) == 0) {
2226 gc_override = strtol(next + 3, NULL, 0);
2227 } else if (strncmp(next, "g=", 2) == 0) {
2228 g_override = strtol(next + 2, NULL, 0);
2229 } else if (strncmp(next, "c=", 2) == 0) {
2230 c_override = strtol(next + 2, NULL, 0);
2231 } else if (strncmp(next, "v=", 2) == 0) {
2232 v_override = strtol(next + 2, NULL, 0);
2233 } else if (strncmp(next, "b=", 2) == 0) {
2234 blit_override = strtol(next + 2, NULL, 0);
2235 } else {
2236 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2237 }
2238 next = strtok_r(NULL, ",", &save);
2239 }
2240 free(env);
2241 if (gc_override >= 0)
2242 *gc_count = gc_override;
2243 if (g_override >= 0)
2244 *g_count = g_override;
2245 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2246 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2247 "Vulkan specification");
2248 if (c_override >= 0)
2249 *c_count = c_override;
2250 if (v_override >= 0)
2251 *v_count = v_override;
2252 if (blit_override >= 0)
2253 *blit_count = blit_override;
2254 }
2255
2256 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2257 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2258 {
2259 uint32_t family_count = 0;
2260 VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2261 VK_QUEUE_SPARSE_BINDING_BIT : 0;
2262 VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2263 VK_QUEUE_PROTECTED_BIT : 0;
2264
2265 if (pdevice->engine_info) {
2266 int gc_count =
2267 intel_engines_count(pdevice->engine_info,
2268 INTEL_ENGINE_CLASS_RENDER);
2269 int v_count =
2270 intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2271 int g_count = 0;
2272 int c_count = 0;
2273 /* Not only the Kernel needs to have vm_control, but it also needs to
2274 * have a new enough GuC and the interface to tell us so. This is
2275 * implemented in the common layer by is_guc_semaphore_functional() and
2276 * results in devinfo->engine_class_supported_count being adjusted,
2277 * which we read below.
2278 */
2279 const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2280 /* For now we're choosing to not expose non-render engines on i915.ko
2281 * even when the Kernel allows it. We have data suggesting it's not an
2282 * obvious win in terms of performance.
2283 */
2284 const bool can_use_non_render_engines =
2285 kernel_supports_non_render_engines &&
2286 pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2287
2288 if (can_use_non_render_engines) {
2289 c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2290 }
2291
2292 int blit_count = 0;
2293 if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2294 blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2295 }
2296
2297 anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2298
2299 enum intel_engine_class compute_class =
2300 pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE] &&
2301 c_count >= 1 ? INTEL_ENGINE_CLASS_COMPUTE :
2302 INTEL_ENGINE_CLASS_RENDER;
2303
2304 if (gc_count > 0) {
2305 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2306 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2307 VK_QUEUE_COMPUTE_BIT |
2308 VK_QUEUE_TRANSFER_BIT |
2309 sparse_flags |
2310 protected_flag,
2311 .queueCount = gc_count,
2312 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2313 .supports_perf = true,
2314 };
2315 }
2316 if (g_count > 0) {
2317 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2318 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2319 VK_QUEUE_TRANSFER_BIT |
2320 sparse_flags |
2321 protected_flag,
2322 .queueCount = g_count,
2323 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2324 };
2325 }
2326 if (c_count > 0) {
2327 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2328 .queueFlags = VK_QUEUE_COMPUTE_BIT |
2329 VK_QUEUE_TRANSFER_BIT |
2330 sparse_flags |
2331 protected_flag,
2332 .queueCount = c_count,
2333 .engine_class = compute_class,
2334 };
2335 }
2336 if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2337 /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2338 * to the first VCS engine instance.
2339 *
2340 * We should be able to query HEVC support from the kernel using the engine query uAPI,
2341 * but this appears to be broken :
2342 * https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2343 *
2344 * When this bug is fixed we should be able to check HEVC support to determine the
2345 * correct number of queues.
2346 */
2347 /* TODO: enable protected content on video queue */
2348 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2349 .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2350 (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2351 .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2352 .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2353 };
2354 }
2355 if (blit_count > 0) {
2356 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2357 .queueFlags = VK_QUEUE_TRANSFER_BIT |
2358 protected_flag,
2359 .queueCount = blit_count,
2360 .engine_class = INTEL_ENGINE_CLASS_COPY,
2361 };
2362 }
2363 } else {
2364 /* Default to a single render queue */
2365 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2366 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2367 VK_QUEUE_COMPUTE_BIT |
2368 VK_QUEUE_TRANSFER_BIT |
2369 sparse_flags,
2370 .queueCount = 1,
2371 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2372 };
2373 family_count = 1;
2374 }
2375 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2376 pdevice->queue.family_count = family_count;
2377 }
2378
2379 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2380 anv_physical_device_get_parameters(struct anv_physical_device *device)
2381 {
2382 switch (device->info.kmd_type) {
2383 case INTEL_KMD_TYPE_I915:
2384 return anv_i915_physical_device_get_parameters(device);
2385 case INTEL_KMD_TYPE_XE:
2386 return anv_xe_physical_device_get_parameters(device);
2387 default:
2388 unreachable("Missing");
2389 return VK_ERROR_UNKNOWN;
2390 }
2391 }
2392
2393 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2394 anv_physical_device_try_create(struct vk_instance *vk_instance,
2395 struct _drmDevice *drm_device,
2396 struct vk_physical_device **out)
2397 {
2398 struct anv_instance *instance =
2399 container_of(vk_instance, struct anv_instance, vk);
2400
2401 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2402 drm_device->bustype != DRM_BUS_PCI ||
2403 drm_device->deviceinfo.pci->vendor_id != 0x8086)
2404 return VK_ERROR_INCOMPATIBLE_DRIVER;
2405
2406 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2407 const char *path = drm_device->nodes[DRM_NODE_RENDER];
2408 VkResult result;
2409 int fd;
2410 int master_fd = -1;
2411
2412 process_intel_debug_variable();
2413
2414 fd = open(path, O_RDWR | O_CLOEXEC);
2415 if (fd < 0) {
2416 if (errno == ENOMEM) {
2417 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2418 "Unable to open device %s: out of memory", path);
2419 }
2420 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2421 "Unable to open device %s: %m", path);
2422 }
2423
2424 struct intel_device_info devinfo;
2425 if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2426 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2427 goto fail_fd;
2428 }
2429
2430 if (devinfo.ver < 9) {
2431 /* Silently fail here, hasvk should pick up this device. */
2432 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2433 goto fail_fd;
2434 } else if (devinfo.probe_forced) {
2435 /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2436 * unsupported device support. No need to print a warning message.
2437 */
2438 } else if (devinfo.ver > 20) {
2439 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2440 "Vulkan not yet supported on %s", devinfo.name);
2441 goto fail_fd;
2442 }
2443
2444 /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2445 * need to always disable preemption :
2446 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2447 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2448 */
2449 if (devinfo.verx10 == 120)
2450 BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2451
2452 if (!devinfo.has_context_isolation) {
2453 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2454 "Vulkan requires context isolation for %s", devinfo.name);
2455 goto fail_fd;
2456 }
2457
2458 struct anv_physical_device *device =
2459 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2460 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2461 if (device == NULL) {
2462 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2463 goto fail_fd;
2464 }
2465
2466 struct vk_physical_device_dispatch_table dispatch_table;
2467 vk_physical_device_dispatch_table_from_entrypoints(
2468 &dispatch_table, &anv_physical_device_entrypoints, true);
2469 vk_physical_device_dispatch_table_from_entrypoints(
2470 &dispatch_table, &wsi_physical_device_entrypoints, false);
2471
2472 result = vk_physical_device_init(&device->vk, &instance->vk,
2473 NULL, NULL, NULL, /* We set up extensions later */
2474 &dispatch_table);
2475 if (result != VK_SUCCESS) {
2476 vk_error(instance, result);
2477 goto fail_alloc;
2478 }
2479 device->instance = instance;
2480
2481 assert(strlen(path) < ARRAY_SIZE(device->path));
2482 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2483
2484 device->info = devinfo;
2485
2486 device->local_fd = fd;
2487 result = anv_physical_device_get_parameters(device);
2488 if (result != VK_SUCCESS)
2489 goto fail_base;
2490
2491 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2492 device->info.aperture_bytes;
2493
2494 if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2495 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2496 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2497 goto fail_base;
2498 }
2499
2500 /* We currently only have the right bits for instructions in Gen12+. If the
2501 * kernel ever starts supporting that feature on previous generations,
2502 * we'll need to edit genxml prior to enabling here.
2503 */
2504 device->has_protected_contexts = device->info.ver >= 12 &&
2505 intel_gem_supports_protected_context(fd, device->info.kmd_type);
2506
2507 /* Just pick one; they're all the same */
2508 device->has_astc_ldr =
2509 isl_format_supports_sampling(&device->info,
2510 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2511 if (!device->has_astc_ldr &&
2512 driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2513 device->emu_astc_ldr = true;
2514 if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2515 device->flush_astc_ldr_void_extent_denorms =
2516 device->has_astc_ldr && !device->emu_astc_ldr;
2517 }
2518 device->disable_fcv = device->info.verx10 >= 125 ||
2519 instance->disable_fcv;
2520
2521 result = anv_physical_device_init_heaps(device, fd);
2522 if (result != VK_SUCCESS)
2523 goto fail_base;
2524
2525 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2526 device->has_exec_timeline = false;
2527
2528 device->has_cooperative_matrix =
2529 device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2530
2531 unsigned st_idx = 0;
2532
2533 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2534 if (!device->has_exec_timeline)
2535 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2536 device->sync_types[st_idx++] = &device->sync_syncobj_type;
2537
2538 /* anv_bo_sync_type is only supported with i915 for now */
2539 if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2540 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2541 device->sync_types[st_idx++] = &anv_bo_sync_type;
2542
2543 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2544 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2545 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2546 }
2547 } else {
2548 assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2549 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2550 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2551 }
2552
2553 device->sync_types[st_idx++] = NULL;
2554 assert(st_idx <= ARRAY_SIZE(device->sync_types));
2555 device->vk.supported_sync_types = device->sync_types;
2556
2557 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2558
2559 device->always_use_bindless =
2560 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2561
2562 device->use_call_secondary =
2563 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2564
2565 device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2566 device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2567
2568 device->uses_ex_bso = device->info.verx10 >= 125;
2569
2570 /* For now always use indirect descriptors. We'll update this
2571 * to !uses_ex_bso when all the infrastructure is built up.
2572 */
2573 device->indirect_descriptors =
2574 !device->uses_ex_bso ||
2575 driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2576
2577 device->alloc_aux_tt_mem =
2578 device->info.has_aux_map && device->info.verx10 >= 125;
2579 /* Check if we can read the GPU timestamp register from the CPU */
2580 uint64_t u64_ignore;
2581 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2582 device->info.kmd_type,
2583 &u64_ignore);
2584
2585 device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2586
2587 /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2588 if (debug_get_bool_option("ANV_SPARSE", true)) {
2589 if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2590 if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2591 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2592 else
2593 device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2594 } else {
2595 if (device->info.ver >= 12 && device->has_exec_timeline)
2596 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2597 }
2598 }
2599 if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2600 if (instance->has_fake_sparse)
2601 device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2602 }
2603
2604 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2605 driQueryOptionb(&instance->dri_options, "always_flush_cache");
2606
2607 device->compiler = brw_compiler_create(NULL, &device->info);
2608 if (device->compiler == NULL) {
2609 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2610 goto fail_base;
2611 }
2612 device->compiler->shader_debug_log = compiler_debug_log;
2613 device->compiler->shader_perf_log = compiler_perf_log;
2614 device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2615 device->compiler->use_bindless_sampler_offset = false;
2616 device->compiler->spilling_rate =
2617 driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2618
2619 isl_device_init(&device->isl_dev, &device->info);
2620 device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2621 device->isl_dev.sampler_route_to_lsc =
2622 driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2623
2624 result = anv_physical_device_init_uuids(device);
2625 if (result != VK_SUCCESS)
2626 goto fail_compiler;
2627
2628 anv_physical_device_init_va_ranges(device);
2629
2630 anv_physical_device_init_disk_cache(device);
2631
2632 if (instance->vk.enabled_extensions.KHR_display) {
2633 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2634 if (master_fd >= 0) {
2635 /* fail if we don't have permission to even render on this device */
2636 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2637 close(master_fd);
2638 master_fd = -1;
2639 }
2640 }
2641 }
2642 device->master_fd = master_fd;
2643
2644 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2645 intel_common_update_device_info(fd, &device->info);
2646
2647 anv_physical_device_init_queue_families(device);
2648
2649 anv_physical_device_init_perf(device, fd);
2650
2651 /* Gather major/minor before WSI. */
2652 struct stat st;
2653
2654 if (stat(primary_path, &st) == 0) {
2655 device->has_master = true;
2656 device->master_major = major(st.st_rdev);
2657 device->master_minor = minor(st.st_rdev);
2658 } else {
2659 device->has_master = false;
2660 device->master_major = 0;
2661 device->master_minor = 0;
2662 }
2663
2664 if (stat(path, &st) == 0) {
2665 device->has_local = true;
2666 device->local_major = major(st.st_rdev);
2667 device->local_minor = minor(st.st_rdev);
2668 } else {
2669 device->has_local = false;
2670 device->local_major = 0;
2671 device->local_minor = 0;
2672 }
2673
2674 device->has_small_bar = anv_physical_device_has_vram(device) &&
2675 device->vram_non_mappable.size != 0;
2676
2677 get_device_extensions(device, &device->vk.supported_extensions);
2678 get_features(device, &device->vk.supported_features);
2679 get_properties(device, &device->vk.properties);
2680
2681 result = anv_init_wsi(device);
2682 if (result != VK_SUCCESS)
2683 goto fail_perf;
2684
2685 anv_measure_device_init(device);
2686
2687 anv_genX(&device->info, init_physical_device_state)(device);
2688
2689 *out = &device->vk;
2690
2691 return VK_SUCCESS;
2692
2693 fail_perf:
2694 intel_perf_free(device->perf);
2695 free(device->engine_info);
2696 anv_physical_device_free_disk_cache(device);
2697 fail_compiler:
2698 ralloc_free(device->compiler);
2699 fail_base:
2700 vk_physical_device_finish(&device->vk);
2701 fail_alloc:
2702 vk_free(&instance->vk.alloc, device);
2703 fail_fd:
2704 close(fd);
2705 if (master_fd != -1)
2706 close(master_fd);
2707 return result;
2708 }
2709
2710 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2711 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2712 {
2713 struct anv_physical_device *device =
2714 container_of(vk_device, struct anv_physical_device, vk);
2715
2716 anv_finish_wsi(device);
2717 anv_measure_device_destroy(device);
2718 free(device->engine_info);
2719 anv_physical_device_free_disk_cache(device);
2720 ralloc_free(device->compiler);
2721 intel_perf_free(device->perf);
2722 close(device->local_fd);
2723 if (device->master_fd >= 0)
2724 close(device->master_fd);
2725 vk_physical_device_finish(&device->vk);
2726 vk_free(&device->instance->vk.alloc, device);
2727 }
2728
2729 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2730 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2731 {
2732
2733 /*
2734 * For Xe2+:
2735 * Bspec 60411: Timestamp register can hold 64-bit value
2736 *
2737 * Platforms < Xe2:
2738 * Bpsec 46111: Timestamp register can hold only 36-bit
2739 * value
2740 */
2741 const VkQueueFamilyProperties anv_queue_family_properties_template =
2742 {
2743 .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2744 .minImageTransferGranularity = { 1, 1, 1 },
2745 };
2746
2747 return anv_queue_family_properties_template;
2748 }
2749
2750 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2751 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2752 uint32_t family_index)
2753 {
2754 const struct anv_queue_family *family = &device->queue.families[family_index];
2755 VkQueueFamilyProperties properties =
2756 get_anv_queue_family_properties_template(device);
2757
2758 properties.queueFlags = family->queueFlags;
2759 properties.queueCount = family->queueCount;
2760 return properties;
2761 }
2762
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2763 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2764 VkPhysicalDevice physicalDevice,
2765 uint32_t* pQueueFamilyPropertyCount,
2766 VkQueueFamilyProperties2* pQueueFamilyProperties)
2767 {
2768 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2769 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2770 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2771
2772 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2773 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2774 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2775 p->queueFamilyProperties =
2776 anv_device_physical_get_queue_properties(pdevice, i);
2777
2778 vk_foreach_struct(ext, p->pNext) {
2779 switch (ext->sType) {
2780 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2781 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2782 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2783
2784 /* Deliberately sorted low to high */
2785 VkQueueGlobalPriorityKHR all_priorities[] = {
2786 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2787 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2788 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2789 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2790 };
2791
2792 uint32_t count = 0;
2793 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2794 if (all_priorities[i] > pdevice->max_context_priority)
2795 break;
2796
2797 properties->priorities[count++] = all_priorities[i];
2798 }
2799 properties->priorityCount = count;
2800 break;
2801 }
2802 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2803 VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2804 (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2805 prop->queryResultStatusSupport = VK_TRUE;
2806 break;
2807 }
2808 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2809 VkQueueFamilyVideoPropertiesKHR *prop =
2810 (VkQueueFamilyVideoPropertiesKHR *)ext;
2811 if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2812 prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2813 VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2814 if (pdevice->info.ver >= 12)
2815 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2816 }
2817
2818 if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2819 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2820 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2821 }
2822 break;
2823 }
2824 default:
2825 vk_debug_ignored_stype(ext->sType);
2826 }
2827 }
2828 }
2829 }
2830 }
2831
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2832 void anv_GetPhysicalDeviceMemoryProperties(
2833 VkPhysicalDevice physicalDevice,
2834 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2835 {
2836 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2837
2838 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2839 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2840 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2841 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2842 .heapIndex = physical_device->memory.types[i].heapIndex,
2843 };
2844 }
2845
2846 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2847 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2848 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2849 .size = physical_device->memory.heaps[i].size,
2850 .flags = physical_device->memory.heaps[i].flags,
2851 };
2852 }
2853 }
2854
2855 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2856 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2857 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2858 {
2859 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2860
2861 if (!device->vk.supported_extensions.EXT_memory_budget)
2862 return;
2863
2864 anv_update_meminfo(device, device->local_fd);
2865
2866 VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2867 for (size_t i = 0; i < device->memory.heap_count; i++) {
2868 if (device->memory.heaps[i].is_local_mem) {
2869 total_vram_heaps_size += device->memory.heaps[i].size;
2870 } else {
2871 total_sys_heaps_size += device->memory.heaps[i].size;
2872 }
2873 }
2874
2875 for (size_t i = 0; i < device->memory.heap_count; i++) {
2876 VkDeviceSize heap_size = device->memory.heaps[i].size;
2877 VkDeviceSize heap_used = device->memory.heaps[i].used;
2878 VkDeviceSize heap_budget, total_heaps_size;
2879 uint64_t mem_available = 0;
2880
2881 if (device->memory.heaps[i].is_local_mem) {
2882 total_heaps_size = total_vram_heaps_size;
2883 if (device->vram_non_mappable.size > 0 && i == 0) {
2884 mem_available = device->vram_non_mappable.available;
2885 } else {
2886 mem_available = device->vram_mappable.available;
2887 }
2888 } else {
2889 total_heaps_size = total_sys_heaps_size;
2890 mem_available = MIN2(device->sys.available, total_heaps_size);
2891 }
2892
2893 double heap_proportion = (double) heap_size / total_heaps_size;
2894 VkDeviceSize available_prop = mem_available * heap_proportion;
2895
2896 /*
2897 * Let's not incite the app to starve the system: report at most 90% of
2898 * the available heap memory.
2899 */
2900 uint64_t heap_available = available_prop * 9 / 10;
2901 heap_budget = MIN2(heap_size, heap_used + heap_available);
2902
2903 /*
2904 * Round down to the nearest MB
2905 */
2906 heap_budget &= ~((1ull << 20) - 1);
2907
2908 /*
2909 * The heapBudget value must be non-zero for array elements less than
2910 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2911 * value must be less than or equal to VkMemoryHeap::size for each heap.
2912 */
2913 assert(0 < heap_budget && heap_budget <= heap_size);
2914
2915 memoryBudget->heapUsage[i] = heap_used;
2916 memoryBudget->heapBudget[i] = heap_budget;
2917 }
2918
2919 /* The heapBudget and heapUsage values must be zero for array elements
2920 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2921 */
2922 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2923 memoryBudget->heapBudget[i] = 0;
2924 memoryBudget->heapUsage[i] = 0;
2925 }
2926 }
2927
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2928 void anv_GetPhysicalDeviceMemoryProperties2(
2929 VkPhysicalDevice physicalDevice,
2930 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2931 {
2932 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2933 &pMemoryProperties->memoryProperties);
2934
2935 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2936 switch (ext->sType) {
2937 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2938 anv_get_memory_budget(physicalDevice, (void*)ext);
2939 break;
2940 default:
2941 vk_debug_ignored_stype(ext->sType);
2942 break;
2943 }
2944 }
2945 }
2946
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2947 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2948 VkPhysicalDevice physicalDevice,
2949 VkSampleCountFlagBits samples,
2950 VkMultisamplePropertiesEXT* pMultisampleProperties)
2951 {
2952 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2953
2954 assert(pMultisampleProperties->sType ==
2955 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2956
2957 VkExtent2D grid_size;
2958 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2959 grid_size.width = 1;
2960 grid_size.height = 1;
2961 } else {
2962 grid_size.width = 0;
2963 grid_size.height = 0;
2964 }
2965 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2966
2967 vk_foreach_struct(ext, pMultisampleProperties->pNext)
2968 vk_debug_ignored_stype(ext->sType);
2969 }
2970
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2971 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2972 VkPhysicalDevice physicalDevice,
2973 uint32_t* pFragmentShadingRateCount,
2974 VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
2975 {
2976 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2977 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2978 pFragmentShadingRates, pFragmentShadingRateCount);
2979
2980 #define append_rate(_samples, _width, _height) \
2981 do { \
2982 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2983 __r->sampleCounts = _samples; \
2984 __r->fragmentSize = (VkExtent2D) { \
2985 .width = _width, \
2986 .height = _height, \
2987 }; \
2988 } \
2989 } while (0)
2990
2991 VkSampleCountFlags sample_counts =
2992 isl_device_get_sample_counts(&physical_device->isl_dev);
2993
2994 /* BSpec 47003: There are a number of restrictions on the sample count
2995 * based off the coarse pixel size.
2996 */
2997 static const VkSampleCountFlags cp_size_sample_limits[] = {
2998 [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
2999 ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3000 [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3001 [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3002 [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3003 [16] = ISL_SAMPLE_COUNT_1_BIT,
3004 };
3005
3006 for (uint32_t x = 4; x >= 1; x /= 2) {
3007 for (uint32_t y = 4; y >= 1; y /= 2) {
3008 if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
3009 /* BSpec 47003:
3010 * "CPsize 1x4 and 4x1 are not supported"
3011 */
3012 if ((x == 1 && y == 4) || (x == 4 && y == 1))
3013 continue;
3014
3015 /* For size {1, 1}, the sample count must be ~0
3016 *
3017 * 4x2 is also a specially case.
3018 */
3019 if (x == 1 && y == 1)
3020 append_rate(~0, x, y);
3021 else if (x == 4 && y == 2)
3022 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
3023 else
3024 append_rate(cp_size_sample_limits[x * y], x, y);
3025 } else {
3026 /* For size {1, 1}, the sample count must be ~0 */
3027 if (x == 1 && y == 1)
3028 append_rate(~0, x, y);
3029 else
3030 append_rate(sample_counts, x, y);
3031 }
3032 }
3033 }
3034
3035 #undef append_rate
3036
3037 return vk_outarray_status(&out);
3038 }
3039
3040 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)3041 convert_component_type(enum intel_cooperative_matrix_component_type t)
3042 {
3043 switch (t) {
3044 case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
3045 case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
3046 case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
3047 case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
3048 case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
3049 case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
3050 }
3051 unreachable("invalid cooperative matrix component type in configuration");
3052 }
3053
3054 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)3055 convert_scope(enum intel_cmat_scope scope)
3056 {
3057 switch (scope) {
3058 case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
3059 default:
3060 unreachable("invalid cooperative matrix scope in configuration");
3061 }
3062 }
3063
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)3064 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
3065 VkPhysicalDevice physicalDevice,
3066 uint32_t* pPropertyCount,
3067 VkCooperativeMatrixPropertiesKHR* pProperties)
3068 {
3069 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3070 const struct intel_device_info *devinfo = &pdevice->info;
3071
3072 assert(anv_has_cooperative_matrix(pdevice));
3073
3074 VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
3075
3076 for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
3077 const struct intel_cooperative_matrix_configuration *cfg =
3078 &devinfo->cooperative_matrix_configurations[i];
3079
3080 if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
3081 break;
3082
3083 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3084 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3085
3086 prop->MSize = cfg->m;
3087 prop->NSize = cfg->n;
3088 prop->KSize = cfg->k;
3089
3090 prop->AType = convert_component_type(cfg->a);
3091 prop->BType = convert_component_type(cfg->b);
3092 prop->CType = convert_component_type(cfg->c);
3093 prop->ResultType = convert_component_type(cfg->result);
3094
3095 prop->saturatingAccumulation = VK_FALSE;
3096 prop->scope = convert_scope(cfg->scope);
3097 }
3098
3099 /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
3100 *
3101 * For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
3102 * cooperative matrix operand must be present if and only if
3103 * VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
3104 * VK_TRUE.
3105 *
3106 * As a result, we have to advertise integer configs both with and
3107 * without this flag set.
3108 *
3109 * The DPAS instruction does not support the .sat modifier, so only
3110 * advertise the configurations when the DPAS would be lowered.
3111 *
3112 * FINISHME: It should be possible to do better than full lowering on
3113 * platforms that support DPAS. Emit a DPAS with a NULL accumulator
3114 * argument, then perform the correct sequence of saturating add
3115 * instructions.
3116 */
3117 if (cfg->a != INTEL_CMAT_FLOAT16 &&
3118 (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
3119 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3120 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3121
3122 prop->MSize = cfg->m;
3123 prop->NSize = cfg->n;
3124 prop->KSize = cfg->k;
3125
3126 prop->AType = convert_component_type(cfg->a);
3127 prop->BType = convert_component_type(cfg->b);
3128 prop->CType = convert_component_type(cfg->c);
3129 prop->ResultType = convert_component_type(cfg->result);
3130
3131 prop->saturatingAccumulation = VK_TRUE;
3132 prop->scope = convert_scope(cfg->scope);
3133 }
3134 }
3135 }
3136
3137 return vk_outarray_status(&out);
3138 }
3139