1 /* Copyright © 2024 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14
15 #include "perf/intel_perf.h"
16
17 #include "git_sha1.h"
18
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27
28 /* This is probably far to big but it reflects the max size used for messages
29 * in OpenGLs KHR_debug.
30 */
31 #define MAX_DEBUG_MESSAGE_LENGTH 4096
32
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36 char str[MAX_DEBUG_MESSAGE_LENGTH];
37 struct anv_device *device = (struct anv_device *)data;
38 UNUSED struct anv_instance *instance = device->physical->instance;
39
40 va_list args;
41 va_start(args, fmt);
42 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43 va_end(args);
44
45 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51 va_list args;
52 va_start(args, fmt);
53
54 if (INTEL_DEBUG(DEBUG_PERF))
55 mesa_logd_v(fmt, args);
56
57 va_end(args);
58 }
59
60 struct anv_descriptor_limits {
61 uint32_t max_ubos;
62 uint32_t max_ssbos;
63 uint32_t max_samplers;
64 uint32_t max_images;
65 uint32_t max_resources;
66 };
67
68 static void
get_device_descriptor_limits(const struct anv_physical_device * device,struct anv_descriptor_limits * limits)69 get_device_descriptor_limits(const struct anv_physical_device *device,
70 struct anv_descriptor_limits *limits)
71 {
72 memset(limits, 0, sizeof(*limits));
73
74 /* It's a bit hard to exactly map our implementation to the limits
75 * described by Vulkan. The bindless surface handle in the extended message
76 * descriptors is 20 bits on <= Gfx12.0, 26 bits on >= Gfx12.5 and it's an
77 * index into the table of RENDER_SURFACE_STATE structs that starts at
78 * bindless surface base address. On <= Gfx12.0, this means that we can
79 * have at must 1M surface states allocated at any given time. Since most
80 * image views take two descriptors, this means we have a limit of about
81 * 500K image views. On >= Gfx12.5, we do not need 2 surfaces per
82 * descriptors and we have 33M+ descriptors (we have a 2GB limit, due to
83 * overlapping heaps for workarounds, but HW can do 4GB).
84 *
85 * However, on <= Gfx12.0, since we allocate surface states at
86 * vkCreateImageView time, this means our limit is actually something on
87 * the order of 500K image views allocated at any time. The actual limit
88 * describe by Vulkan, on the other hand, is a limit of how many you can
89 * have in a descriptor set. Assuming anyone using 1M descriptors will be
90 * using the same image view twice a bunch of times (or a bunch of null
91 * descriptors), we can safely advertise a larger limit here.
92 *
93 * Here we use the size of the heap in which the descriptors are stored and
94 * divide by the size of the descriptor to get a limit value.
95 */
96 const uint64_t descriptor_heap_size =
97 device->indirect_descriptors ?
98 device->va.indirect_descriptor_pool.size :
99 device->va.bindless_surface_state_pool.size;;
100
101 const uint32_t buffer_descriptor_size =
102 device->indirect_descriptors ?
103 sizeof(struct anv_address_range_descriptor) :
104 ANV_SURFACE_STATE_SIZE;
105 const uint32_t image_descriptor_size =
106 device->indirect_descriptors ?
107 sizeof(struct anv_address_range_descriptor) :
108 ANV_SURFACE_STATE_SIZE;
109 const uint32_t sampler_descriptor_size =
110 device->indirect_descriptors ?
111 sizeof(struct anv_sampled_image_descriptor) :
112 ANV_SAMPLER_STATE_SIZE;
113
114 limits->max_ubos = descriptor_heap_size / buffer_descriptor_size;
115 limits->max_ssbos = descriptor_heap_size / buffer_descriptor_size;
116 limits->max_images = descriptor_heap_size / image_descriptor_size;
117 limits->max_samplers = descriptor_heap_size / sampler_descriptor_size;
118
119 limits->max_resources = UINT32_MAX;
120 limits->max_resources = MIN2(limits->max_resources, limits->max_ubos);
121 limits->max_resources = MIN2(limits->max_resources, limits->max_ssbos);
122 limits->max_resources = MIN2(limits->max_resources, limits->max_images);
123 limits->max_resources = MIN2(limits->max_resources, limits->max_samplers);
124 }
125
126 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)127 get_device_extensions(const struct anv_physical_device *device,
128 struct vk_device_extension_table *ext)
129 {
130 const bool has_syncobj_wait =
131 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
132
133 const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
134
135 *ext = (struct vk_device_extension_table) {
136 .KHR_8bit_storage = true,
137 .KHR_16bit_storage = !device->instance->no_16bit,
138 .KHR_acceleration_structure = rt_enabled,
139 .KHR_bind_memory2 = true,
140 .KHR_buffer_device_address = true,
141 .KHR_calibrated_timestamps = device->has_reg_timestamp,
142 .KHR_compute_shader_derivatives = true,
143 .KHR_cooperative_matrix = anv_has_cooperative_matrix(device),
144 .KHR_copy_commands2 = true,
145 .KHR_create_renderpass2 = true,
146 .KHR_dedicated_allocation = true,
147 .KHR_deferred_host_operations = true,
148 .KHR_depth_stencil_resolve = true,
149 .KHR_descriptor_update_template = true,
150 .KHR_device_group = true,
151 .KHR_draw_indirect_count = true,
152 .KHR_driver_properties = true,
153 .KHR_dynamic_rendering = true,
154 .KHR_dynamic_rendering_local_read = true,
155 .KHR_external_fence = has_syncobj_wait,
156 .KHR_external_fence_fd = has_syncobj_wait,
157 .KHR_external_memory = true,
158 .KHR_external_memory_fd = true,
159 .KHR_external_semaphore = true,
160 .KHR_external_semaphore_fd = true,
161 .KHR_format_feature_flags2 = true,
162 .KHR_fragment_shading_rate = device->info.ver >= 11,
163 .KHR_get_memory_requirements2 = true,
164 .KHR_global_priority = device->max_context_priority >=
165 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
166 .KHR_image_format_list = true,
167 .KHR_imageless_framebuffer = true,
168 #ifdef ANV_USE_WSI_PLATFORM
169 .KHR_incremental_present = true,
170 #endif
171 .KHR_index_type_uint8 = true,
172 .KHR_line_rasterization = true,
173 .KHR_load_store_op_none = true,
174 .KHR_maintenance1 = true,
175 .KHR_maintenance2 = true,
176 .KHR_maintenance3 = true,
177 .KHR_maintenance4 = true,
178 .KHR_maintenance5 = true,
179 .KHR_maintenance6 = true,
180 .KHR_maintenance7 = true,
181 .KHR_map_memory2 = true,
182 .KHR_multiview = true,
183 .KHR_performance_query =
184 device->perf &&
185 (intel_perf_has_hold_preemption(device->perf) ||
186 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
187 device->use_call_secondary,
188 .KHR_pipeline_executable_properties = true,
189 .KHR_pipeline_library = true,
190 /* Hide these behind dri configs for now since we cannot implement it reliably on
191 * all surfaces yet. There is no surface capability query for present wait/id,
192 * but the feature is useful enough to hide behind an opt-in mechanism for now.
193 * If the instance only enables surface extensions that unconditionally support present wait,
194 * we can also expose the extension that way. */
195 .KHR_present_id =
196 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
197 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
198 .KHR_present_wait =
199 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
200 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
201 .KHR_push_descriptor = true,
202 .KHR_ray_query = rt_enabled,
203 .KHR_ray_tracing_maintenance1 = rt_enabled,
204 .KHR_ray_tracing_pipeline = rt_enabled,
205 .KHR_ray_tracing_position_fetch = rt_enabled,
206 .KHR_relaxed_block_layout = true,
207 .KHR_sampler_mirror_clamp_to_edge = true,
208 .KHR_sampler_ycbcr_conversion = true,
209 .KHR_separate_depth_stencil_layouts = true,
210 .KHR_shader_atomic_int64 = true,
211 .KHR_shader_clock = true,
212 .KHR_shader_draw_parameters = true,
213 .KHR_shader_expect_assume = true,
214 .KHR_shader_float16_int8 = !device->instance->no_16bit,
215 .KHR_shader_float_controls = true,
216 .KHR_shader_float_controls2 = true,
217 .KHR_shader_integer_dot_product = true,
218 .KHR_shader_maximal_reconvergence = true,
219 .KHR_shader_non_semantic_info = true,
220 .KHR_shader_quad_control = true,
221 .KHR_shader_relaxed_extended_instruction = true,
222 .KHR_shader_subgroup_extended_types = true,
223 .KHR_shader_subgroup_rotate = true,
224 .KHR_shader_subgroup_uniform_control_flow = true,
225 .KHR_shader_terminate_invocation = true,
226 .KHR_spirv_1_4 = true,
227 .KHR_storage_buffer_storage_class = true,
228 #ifdef ANV_USE_WSI_PLATFORM
229 .KHR_swapchain = true,
230 .KHR_swapchain_mutable_format = true,
231 #endif
232 .KHR_synchronization2 = true,
233 .KHR_timeline_semaphore = true,
234 .KHR_uniform_buffer_standard_layout = true,
235 .KHR_variable_pointers = true,
236 .KHR_vertex_attribute_divisor = true,
237 .KHR_video_queue = device->video_decode_enabled || device->video_encode_enabled,
238 .KHR_video_decode_queue = device->video_decode_enabled,
239 .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
240 .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
241 .KHR_video_decode_av1 = device->info.ver >= 12 && VIDEO_CODEC_AV1DEC && device->video_decode_enabled,
242 .KHR_video_encode_queue = device->video_encode_enabled,
243 .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
244 .KHR_video_encode_h265 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
245 .KHR_video_maintenance1 = (device->video_decode_enabled &&
246 (VIDEO_CODEC_H264DEC || VIDEO_CODEC_H265DEC)) ||
247 (device->video_encode_enabled &&
248 (VIDEO_CODEC_H264ENC || VIDEO_CODEC_H265ENC)),
249 .KHR_vulkan_memory_model = true,
250 .KHR_workgroup_memory_explicit_layout = true,
251 .KHR_zero_initialize_workgroup_memory = true,
252 .EXT_4444_formats = true,
253 .EXT_attachment_feedback_loop_layout = true,
254 .EXT_attachment_feedback_loop_dynamic_state = true,
255 .EXT_border_color_swizzle = true,
256 .EXT_buffer_device_address = true,
257 .EXT_calibrated_timestamps = device->has_reg_timestamp,
258 .EXT_color_write_enable = true,
259 .EXT_conditional_rendering = true,
260 .EXT_conservative_rasterization = true,
261 .EXT_custom_border_color = true,
262 .EXT_depth_bias_control = true,
263 .EXT_depth_clamp_control = true,
264 .EXT_depth_clamp_zero_one = true,
265 .EXT_depth_clip_control = true,
266 .EXT_depth_clip_enable = true,
267 .EXT_depth_range_unrestricted = device->info.ver >= 20,
268 .EXT_descriptor_buffer = true,
269 .EXT_descriptor_indexing = true,
270 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
271 .EXT_display_control = true,
272 #endif
273 .EXT_dynamic_rendering_unused_attachments = true,
274 .EXT_extended_dynamic_state = true,
275 .EXT_extended_dynamic_state2 = true,
276 .EXT_extended_dynamic_state3 = true,
277 .EXT_external_memory_dma_buf = true,
278 .EXT_external_memory_host = true,
279 .EXT_fragment_shader_interlock = true,
280 .EXT_global_priority = device->max_context_priority >=
281 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
282 .EXT_global_priority_query = device->max_context_priority >=
283 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
284 .EXT_graphics_pipeline_library = !debug_get_bool_option("ANV_NO_GPL", false),
285 .EXT_host_image_copy = !device->emu_astc_ldr,
286 .EXT_host_query_reset = true,
287 .EXT_image_2d_view_of_3d = true,
288 /* Because of Xe2 PAT selected compression and the Vulkan spec
289 * requirement to always return the same memory types for Images with
290 * same properties we can't support EXT_image_compression_control on Xe2+
291 */
292 .EXT_image_compression_control = device->instance->compression_control_enabled &&
293 device->info.ver < 20,
294 .EXT_image_drm_format_modifier = true,
295 .EXT_image_robustness = true,
296 .EXT_image_sliced_view_of_3d = true,
297 .EXT_image_view_min_lod = true,
298 .EXT_index_type_uint8 = true,
299 .EXT_inline_uniform_block = true,
300 .EXT_legacy_dithering = true,
301 .EXT_legacy_vertex_attributes = true,
302 .EXT_line_rasterization = true,
303 .EXT_load_store_op_none = true,
304 .EXT_map_memory_placed = device->info.has_mmap_offset,
305 /* Enable the extension only if we have support on both the local &
306 * system memory
307 */
308 .EXT_memory_budget = (!device->info.has_local_mem ||
309 device->vram_mappable.available > 0) &&
310 device->sys.available,
311 .EXT_mesh_shader = device->info.has_mesh_shading,
312 .EXT_multi_draw = true,
313 .EXT_mutable_descriptor_type = true,
314 .EXT_nested_command_buffer = true,
315 .EXT_non_seamless_cube_map = true,
316 .EXT_pci_bus_info = true,
317 .EXT_physical_device_drm = true,
318 .EXT_pipeline_creation_cache_control = true,
319 .EXT_pipeline_creation_feedback = true,
320 .EXT_pipeline_library_group_handles = rt_enabled,
321 .EXT_pipeline_protected_access = device->has_protected_contexts,
322 .EXT_pipeline_robustness = true,
323 .EXT_post_depth_coverage = true,
324 .EXT_primitive_topology_list_restart = true,
325 .EXT_primitives_generated_query = true,
326 .EXT_private_data = true,
327 .EXT_provoking_vertex = true,
328 .EXT_queue_family_foreign = true,
329 .EXT_robustness2 = true,
330 .EXT_sample_locations = true,
331 .EXT_sampler_filter_minmax = true,
332 .EXT_scalar_block_layout = true,
333 .EXT_separate_stencil_usage = true,
334 .EXT_shader_atomic_float = true,
335 .EXT_shader_atomic_float2 = true,
336 .EXT_shader_demote_to_helper_invocation = true,
337 .EXT_shader_module_identifier = true,
338 .EXT_shader_replicated_composites = true,
339 .EXT_shader_stencil_export = true,
340 .EXT_shader_subgroup_ballot = true,
341 .EXT_shader_subgroup_vote = true,
342 .EXT_shader_viewport_index_layer = true,
343 .EXT_subgroup_size_control = true,
344 #ifdef ANV_USE_WSI_PLATFORM
345 .EXT_swapchain_maintenance1 = true,
346 #endif
347 .EXT_texel_buffer_alignment = true,
348 .EXT_tooling_info = true,
349 .EXT_transform_feedback = true,
350 .EXT_vertex_attribute_divisor = true,
351 .EXT_vertex_input_dynamic_state = true,
352 .EXT_ycbcr_2plane_444_formats = true,
353 .EXT_ycbcr_image_arrays = true,
354 .AMD_buffer_marker = true,
355 .AMD_texture_gather_bias_lod = device->info.ver >= 20,
356 #if DETECT_OS_ANDROID
357 .ANDROID_external_memory_android_hardware_buffer = true,
358 .ANDROID_native_buffer = true,
359 #endif
360 .GOOGLE_decorate_string = true,
361 .GOOGLE_hlsl_functionality1 = true,
362 .GOOGLE_user_type = true,
363 .INTEL_performance_query = device->perf &&
364 intel_perf_has_hold_preemption(device->perf),
365 .INTEL_shader_integer_functions2 = true,
366 .MESA_image_alignment_control = true,
367 .NV_compute_shader_derivatives = true,
368 .VALVE_mutable_descriptor_type = true,
369 };
370 }
371
372 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)373 get_features(const struct anv_physical_device *pdevice,
374 struct vk_features *features)
375 {
376 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
377
378 const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
379
380 const bool mesh_shader =
381 pdevice->vk.supported_extensions.EXT_mesh_shader;
382
383 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
384
385 *features = (struct vk_features) {
386 /* Vulkan 1.0 */
387 .robustBufferAccess = true,
388 .fullDrawIndexUint32 = true,
389 .imageCubeArray = true,
390 .independentBlend = true,
391 .geometryShader = true,
392 .tessellationShader = true,
393 .sampleRateShading = true,
394 .dualSrcBlend = true,
395 .logicOp = true,
396 .multiDrawIndirect = true,
397 .drawIndirectFirstInstance = true,
398 .depthClamp = true,
399 .depthBiasClamp = true,
400 .fillModeNonSolid = true,
401 .depthBounds = pdevice->info.ver >= 12,
402 .wideLines = true,
403 .largePoints = true,
404 .alphaToOne = true,
405 .multiViewport = true,
406 .samplerAnisotropy = true,
407 .textureCompressionETC2 = true,
408 .textureCompressionASTC_LDR = pdevice->has_astc_ldr ||
409 pdevice->emu_astc_ldr,
410 .textureCompressionBC = true,
411 .occlusionQueryPrecise = true,
412 .pipelineStatisticsQuery = true,
413 .vertexPipelineStoresAndAtomics = true,
414 .fragmentStoresAndAtomics = true,
415 .shaderTessellationAndGeometryPointSize = true,
416 .shaderImageGatherExtended = true,
417 .shaderStorageImageExtendedFormats = true,
418 .shaderStorageImageMultisample = false,
419 /* Gfx12.5 has all the required format supported in HW for typed
420 * read/writes
421 */
422 .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125,
423 .shaderStorageImageWriteWithoutFormat = true,
424 .shaderUniformBufferArrayDynamicIndexing = true,
425 .shaderSampledImageArrayDynamicIndexing = true,
426 .shaderStorageBufferArrayDynamicIndexing = true,
427 .shaderStorageImageArrayDynamicIndexing = true,
428 .shaderClipDistance = true,
429 .shaderCullDistance = true,
430 .shaderFloat64 = pdevice->info.has_64bit_float ||
431 pdevice->instance->fp64_workaround_enabled,
432 .shaderInt64 = true,
433 .shaderInt16 = true,
434 .shaderResourceMinLod = true,
435 .shaderResourceResidency = has_sparse_or_fake,
436 .sparseBinding = has_sparse_or_fake,
437 .sparseResidencyAliased = has_sparse_or_fake,
438 .sparseResidencyBuffer = has_sparse_or_fake,
439 .sparseResidencyImage2D = has_sparse_or_fake,
440 .sparseResidencyImage3D = has_sparse_or_fake,
441 .sparseResidency2Samples = has_sparse_or_fake,
442 .sparseResidency4Samples = has_sparse_or_fake,
443 .sparseResidency8Samples = has_sparse_or_fake &&
444 pdevice->info.verx10 != 125,
445 .sparseResidency16Samples = has_sparse_or_fake &&
446 pdevice->info.verx10 != 125,
447 .variableMultisampleRate = true,
448 .inheritedQueries = true,
449
450 /* Vulkan 1.1 */
451 .storageBuffer16BitAccess = !pdevice->instance->no_16bit,
452 .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit,
453 .storagePushConstant16 = true,
454 .storageInputOutput16 = false,
455 .multiview = true,
456 .multiviewGeometryShader = true,
457 .multiviewTessellationShader = true,
458 .variablePointersStorageBuffer = true,
459 .variablePointers = true,
460 .protectedMemory = pdevice->has_protected_contexts,
461 .samplerYcbcrConversion = true,
462 .shaderDrawParameters = true,
463
464 /* Vulkan 1.2 */
465 .samplerMirrorClampToEdge = true,
466 .drawIndirectCount = true,
467 .storageBuffer8BitAccess = true,
468 .uniformAndStorageBuffer8BitAccess = true,
469 .storagePushConstant8 = true,
470 .shaderBufferInt64Atomics = true,
471 .shaderSharedInt64Atomics = false,
472 .shaderFloat16 = !pdevice->instance->no_16bit,
473 .shaderInt8 = !pdevice->instance->no_16bit,
474
475 .descriptorIndexing = true,
476 .shaderInputAttachmentArrayDynamicIndexing = false,
477 .shaderUniformTexelBufferArrayDynamicIndexing = true,
478 .shaderStorageTexelBufferArrayDynamicIndexing = true,
479 .shaderUniformBufferArrayNonUniformIndexing = true,
480 .shaderSampledImageArrayNonUniformIndexing = true,
481 .shaderStorageBufferArrayNonUniformIndexing = true,
482 .shaderStorageImageArrayNonUniformIndexing = true,
483 .shaderInputAttachmentArrayNonUniformIndexing = false,
484 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
485 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
486 .descriptorBindingUniformBufferUpdateAfterBind = true,
487 .descriptorBindingSampledImageUpdateAfterBind = true,
488 .descriptorBindingStorageImageUpdateAfterBind = true,
489 .descriptorBindingStorageBufferUpdateAfterBind = true,
490 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
491 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
492 .descriptorBindingUpdateUnusedWhilePending = true,
493 .descriptorBindingPartiallyBound = true,
494 .descriptorBindingVariableDescriptorCount = true,
495 .runtimeDescriptorArray = true,
496
497 .samplerFilterMinmax = true,
498 .scalarBlockLayout = true,
499 .imagelessFramebuffer = true,
500 .uniformBufferStandardLayout = true,
501 .shaderSubgroupExtendedTypes = true,
502 .separateDepthStencilLayouts = true,
503 .hostQueryReset = true,
504 .timelineSemaphore = true,
505 .bufferDeviceAddress = true,
506 .bufferDeviceAddressCaptureReplay = true,
507 .bufferDeviceAddressMultiDevice = false,
508 .vulkanMemoryModel = true,
509 .vulkanMemoryModelDeviceScope = true,
510 .vulkanMemoryModelAvailabilityVisibilityChains = true,
511 .shaderOutputViewportIndex = true,
512 .shaderOutputLayer = true,
513 .subgroupBroadcastDynamicId = true,
514
515 /* Vulkan 1.3 */
516 .robustImageAccess = true,
517 .inlineUniformBlock = true,
518 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
519 .pipelineCreationCacheControl = true,
520 .privateData = true,
521 .shaderDemoteToHelperInvocation = true,
522 .shaderTerminateInvocation = true,
523 .subgroupSizeControl = true,
524 .computeFullSubgroups = true,
525 .synchronization2 = true,
526 .textureCompressionASTC_HDR = false,
527 .shaderZeroInitializeWorkgroupMemory = true,
528 .dynamicRendering = true,
529 .shaderIntegerDotProduct = true,
530 .maintenance4 = true,
531
532 /* Vulkan 1.4 */
533 .pushDescriptor = true,
534
535 /* VK_EXT_4444_formats */
536 .formatA4R4G4B4 = true,
537 .formatA4B4G4R4 = false,
538
539 /* VK_KHR_acceleration_structure */
540 .accelerationStructure = rt_enabled,
541 .accelerationStructureCaptureReplay = false, /* TODO */
542 .accelerationStructureIndirectBuild = false, /* TODO */
543 .accelerationStructureHostCommands = false,
544 .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
545
546 /* VK_EXT_border_color_swizzle */
547 .borderColorSwizzle = true,
548 .borderColorSwizzleFromImage = true,
549
550 /* VK_EXT_color_write_enable */
551 .colorWriteEnable = true,
552
553 /* VK_EXT_image_2d_view_of_3d */
554 .image2DViewOf3D = true,
555 .sampler2DViewOf3D = true,
556
557 /* VK_EXT_image_sliced_view_of_3d */
558 .imageSlicedViewOf3D = true,
559
560 /* VK_KHR_compute_shader_derivatives */
561 .computeDerivativeGroupQuads = true,
562 .computeDerivativeGroupLinear = true,
563
564 /* VK_EXT_conditional_rendering */
565 .conditionalRendering = true,
566 .inheritedConditionalRendering = true,
567
568 /* VK_EXT_custom_border_color */
569 .customBorderColors = true,
570 .customBorderColorWithoutFormat =
571 pdevice->instance->custom_border_colors_without_format,
572
573 /* VK_EXT_depth_clamp_zero_one */
574 .depthClampZeroOne = true,
575
576 /* VK_EXT_depth_clip_enable */
577 .depthClipEnable = true,
578
579 /* VK_EXT_fragment_shader_interlock */
580 .fragmentShaderSampleInterlock = true,
581 .fragmentShaderPixelInterlock = true,
582 .fragmentShaderShadingRateInterlock = false,
583
584 /* VK_EXT_global_priority_query */
585 .globalPriorityQuery = true,
586
587 /* VK_EXT_graphics_pipeline_library */
588 .graphicsPipelineLibrary =
589 pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
590
591 /* VK_KHR_fragment_shading_rate */
592 .pipelineFragmentShadingRate = true,
593 .primitiveFragmentShadingRate =
594 pdevice->info.has_coarse_pixel_primitive_and_cb,
595 .attachmentFragmentShadingRate =
596 pdevice->info.has_coarse_pixel_primitive_and_cb,
597
598 /* VK_EXT_image_view_min_lod */
599 .minLod = true,
600
601 /* VK_EXT_index_type_uint8 */
602 .indexTypeUint8 = true,
603
604 /* VK_EXT_line_rasterization */
605 /* Rectangular lines must use the strict algorithm, which is not
606 * supported for wide lines prior to ICL. See rasterization_mode for
607 * details and how the HW states are programmed.
608 */
609 .rectangularLines = pdevice->info.ver >= 10,
610 .bresenhamLines = true,
611 /* Support for Smooth lines with MSAA was removed on gfx11. From the
612 * BSpec section "Multisample ModesState" table for "AA Line Support
613 * Requirements":
614 *
615 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
616 *
617 * Fortunately, this isn't a case most people care about.
618 */
619 .smoothLines = pdevice->info.ver < 10,
620 .stippledRectangularLines = false,
621 .stippledBresenhamLines = true,
622 .stippledSmoothLines = false,
623
624 /* VK_NV_mesh_shader */
625 .taskShaderNV = false,
626 .meshShaderNV = false,
627
628 /* VK_EXT_mesh_shader */
629 .taskShader = mesh_shader,
630 .meshShader = mesh_shader,
631 .multiviewMeshShader = false,
632 .primitiveFragmentShadingRateMeshShader = mesh_shader,
633 .meshShaderQueries = mesh_shader,
634
635 /* VK_EXT_mutable_descriptor_type */
636 .mutableDescriptorType = true,
637
638 /* VK_KHR_performance_query */
639 .performanceCounterQueryPools = true,
640 /* HW only supports a single configuration at a time. */
641 .performanceCounterMultipleQueryPools = false,
642
643 /* VK_KHR_pipeline_executable_properties */
644 .pipelineExecutableInfo = true,
645
646 /* VK_EXT_primitives_generated_query */
647 .primitivesGeneratedQuery = true,
648 .primitivesGeneratedQueryWithRasterizerDiscard = false,
649 .primitivesGeneratedQueryWithNonZeroStreams = false,
650
651 /* VK_EXT_pipeline_library_group_handles */
652 .pipelineLibraryGroupHandles = true,
653
654 /* VK_EXT_provoking_vertex */
655 .provokingVertexLast = true,
656 .transformFeedbackPreservesProvokingVertex = true,
657
658 /* VK_KHR_ray_query */
659 .rayQuery = rt_enabled,
660
661 /* VK_KHR_ray_tracing_maintenance1 */
662 .rayTracingMaintenance1 = rt_enabled,
663 .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
664
665 /* VK_KHR_ray_tracing_pipeline */
666 .rayTracingPipeline = rt_enabled,
667 .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
668 .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
669 .rayTracingPipelineTraceRaysIndirect = rt_enabled,
670 .rayTraversalPrimitiveCulling = rt_enabled,
671
672 /* VK_EXT_robustness2 */
673 .robustBufferAccess2 = true,
674 .robustImageAccess2 = true,
675 .nullDescriptor = true,
676
677 /* VK_EXT_shader_replicated_composites */
678 .shaderReplicatedComposites = true,
679
680 /* VK_EXT_shader_atomic_float */
681 .shaderBufferFloat32Atomics = true,
682 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
683 .shaderBufferFloat64Atomics =
684 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
685 .shaderBufferFloat64AtomicAdd = pdevice->info.ver >= 20,
686 .shaderSharedFloat32Atomics = true,
687 .shaderSharedFloat32AtomicAdd = false,
688 .shaderSharedFloat64Atomics = false,
689 .shaderSharedFloat64AtomicAdd = false,
690 .shaderImageFloat32Atomics = true,
691 .shaderImageFloat32AtomicAdd = pdevice->info.ver >= 20,
692 .sparseImageFloat32Atomics = false,
693 .sparseImageFloat32AtomicAdd = false,
694
695 /* VK_EXT_shader_atomic_float2 */
696 .shaderBufferFloat16Atomics = pdevice->info.has_lsc,
697 .shaderBufferFloat16AtomicAdd = false,
698 .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
699 .shaderBufferFloat32AtomicMinMax = true,
700 .shaderBufferFloat64AtomicMinMax =
701 pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
702 pdevice->info.ver < 20,
703 .shaderSharedFloat16Atomics = pdevice->info.has_lsc,
704 .shaderSharedFloat16AtomicAdd = false,
705 .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
706 .shaderSharedFloat32AtomicMinMax = true,
707 .shaderSharedFloat64AtomicMinMax = false,
708 .shaderImageFloat32AtomicMinMax = false,
709 .sparseImageFloat32AtomicMinMax = false,
710
711 /* VK_KHR_shader_clock */
712 .shaderSubgroupClock = true,
713 .shaderDeviceClock = false,
714
715 /* VK_INTEL_shader_integer_functions2 */
716 .shaderIntegerFunctions2 = true,
717
718 /* VK_EXT_shader_module_identifier */
719 .shaderModuleIdentifier = true,
720
721 /* VK_KHR_shader_subgroup_uniform_control_flow */
722 .shaderSubgroupUniformControlFlow = true,
723
724 /* VK_EXT_texel_buffer_alignment */
725 .texelBufferAlignment = true,
726
727 /* VK_EXT_transform_feedback */
728 .transformFeedback = true,
729 .geometryStreams = true,
730
731 /* VK_KHR_vertex_attribute_divisor */
732 .vertexAttributeInstanceRateDivisor = true,
733 .vertexAttributeInstanceRateZeroDivisor = true,
734
735 /* VK_KHR_workgroup_memory_explicit_layout */
736 .workgroupMemoryExplicitLayout = true,
737 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
738 .workgroupMemoryExplicitLayout8BitAccess = true,
739 .workgroupMemoryExplicitLayout16BitAccess = true,
740
741 /* VK_EXT_ycbcr_image_arrays */
742 .ycbcrImageArrays = true,
743
744 /* VK_EXT_ycbcr_2plane_444_formats */
745 .ycbcr2plane444Formats = true,
746
747 /* VK_EXT_extended_dynamic_state */
748 .extendedDynamicState = true,
749
750 /* VK_EXT_extended_dynamic_state2 */
751 .extendedDynamicState2 = true,
752 .extendedDynamicState2LogicOp = true,
753 .extendedDynamicState2PatchControlPoints = true,
754
755 /* VK_EXT_extended_dynamic_state3 */
756 .extendedDynamicState3PolygonMode = true,
757 .extendedDynamicState3TessellationDomainOrigin = true,
758 .extendedDynamicState3RasterizationStream = true,
759 .extendedDynamicState3LineStippleEnable = true,
760 .extendedDynamicState3LineRasterizationMode = true,
761 .extendedDynamicState3LogicOpEnable = true,
762 .extendedDynamicState3AlphaToOneEnable = true,
763 .extendedDynamicState3DepthClipEnable = true,
764 .extendedDynamicState3DepthClampEnable = true,
765 .extendedDynamicState3DepthClipNegativeOneToOne = true,
766 .extendedDynamicState3ProvokingVertexMode = true,
767 .extendedDynamicState3ColorBlendEnable = true,
768 .extendedDynamicState3ColorWriteMask = true,
769 .extendedDynamicState3ColorBlendEquation = true,
770 .extendedDynamicState3SampleLocationsEnable = true,
771 .extendedDynamicState3SampleMask = true,
772 .extendedDynamicState3ConservativeRasterizationMode = true,
773 .extendedDynamicState3AlphaToCoverageEnable = true,
774 .extendedDynamicState3RasterizationSamples = true,
775
776 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
777 .extendedDynamicState3ViewportWScalingEnable = false,
778 .extendedDynamicState3ViewportSwizzle = false,
779 .extendedDynamicState3ShadingRateImageEnable = false,
780 .extendedDynamicState3CoverageToColorEnable = false,
781 .extendedDynamicState3CoverageToColorLocation = false,
782 .extendedDynamicState3CoverageModulationMode = false,
783 .extendedDynamicState3CoverageModulationTableEnable = false,
784 .extendedDynamicState3CoverageModulationTable = false,
785 .extendedDynamicState3CoverageReductionMode = false,
786 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
787 .extendedDynamicState3ColorBlendAdvanced = false,
788
789 /* VK_EXT_multi_draw */
790 .multiDraw = true,
791
792 /* VK_EXT_non_seamless_cube_map */
793 .nonSeamlessCubeMap = true,
794
795 /* VK_EXT_primitive_topology_list_restart */
796 .primitiveTopologyListRestart = true,
797 .primitiveTopologyPatchListRestart = true,
798
799 /* VK_EXT_depth_clamp_control */
800 .depthClampControl = true,
801
802 /* VK_EXT_depth_clip_control */
803 .depthClipControl = true,
804
805 /* VK_KHR_present_id */
806 .presentId = pdevice->vk.supported_extensions.KHR_present_id,
807
808 /* VK_KHR_present_wait */
809 .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
810
811 /* VK_EXT_vertex_input_dynamic_state */
812 .vertexInputDynamicState = true,
813
814 /* VK_KHR_ray_tracing_position_fetch */
815 .rayTracingPositionFetch = rt_enabled,
816
817 /* VK_EXT_dynamic_rendering_unused_attachments */
818 .dynamicRenderingUnusedAttachments = true,
819
820 /* VK_EXT_depth_bias_control */
821 .depthBiasControl = true,
822 .floatRepresentation = true,
823 .leastRepresentableValueForceUnormRepresentation = false,
824 .depthBiasExact = true,
825
826 /* VK_EXT_pipeline_robustness */
827 .pipelineRobustness = true,
828
829 /* VK_KHR_maintenance5 */
830 .maintenance5 = true,
831
832 /* VK_KHR_maintenance6 */
833 .maintenance6 = true,
834
835 /* VK_EXT_nested_command_buffer */
836 .nestedCommandBuffer = true,
837 .nestedCommandBufferRendering = true,
838 .nestedCommandBufferSimultaneousUse = false,
839
840 /* VK_KHR_cooperative_matrix */
841 .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
842
843 /* VK_KHR_shader_maximal_reconvergence */
844 .shaderMaximalReconvergence = true,
845
846 /* VK_KHR_shader_subgroup_rotate */
847 .shaderSubgroupRotate = true,
848 .shaderSubgroupRotateClustered = true,
849
850 /* VK_EXT_attachment_feedback_loop_layout */
851 .attachmentFeedbackLoopLayout = true,
852
853 /* VK_EXT_attachment_feedback_loop_dynamic_state */
854 .attachmentFeedbackLoopDynamicState = true,
855
856 /* VK_KHR_shader_expect_assume */
857 .shaderExpectAssume = true,
858
859 /* VK_EXT_descriptor_buffer */
860 .descriptorBuffer = true,
861 .descriptorBufferCaptureReplay = true,
862 .descriptorBufferImageLayoutIgnored = false,
863 .descriptorBufferPushDescriptors = true,
864
865 /* VK_EXT_map_memory_placed */
866 .memoryMapPlaced = true,
867 .memoryMapRangePlaced = false,
868 .memoryUnmapReserve = true,
869
870 /* VK_KHR_shader_quad_control */
871 .shaderQuadControl = true,
872
873 #ifdef ANV_USE_WSI_PLATFORM
874 /* VK_EXT_swapchain_maintenance1 */
875 .swapchainMaintenance1 = true,
876 #endif
877
878 /* VK_KHR_video_maintenance1 */
879 .videoMaintenance1 = true,
880
881 /* VK_EXT_image_compression_control */
882 .imageCompressionControl = true,
883
884 /* VK_KHR_shader_float_controls2 */
885 .shaderFloatControls2 = true,
886
887 /* VK_EXT_legacy_vertex_attributes */
888 .legacyVertexAttributes = true,
889
890 /* VK_EXT_legacy_dithering */
891 .legacyDithering = true,
892
893 /* VK_MESA_image_alignment_control */
894 .imageAlignmentControl = true,
895
896 /* VK_KHR_maintenance7 */
897 .maintenance7 = true,
898
899 /* VK_KHR_shader_relaxed_extended_instruction */
900 .shaderRelaxedExtendedInstruction = true,
901
902 /* VK_KHR_dynamic_rendering_local_read */
903 .dynamicRenderingLocalRead = true,
904
905 /* VK_EXT_pipeline_protected_access */
906 .pipelineProtectedAccess = pdevice->has_protected_contexts,
907
908 /* VK_EXT_host_image_copy */
909 .hostImageCopy = true,
910 };
911
912 /* The new DOOM and Wolfenstein games require depthBounds without
913 * checking for it. They seem to run fine without it so just claim it's
914 * there and accept the consequences.
915 */
916 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
917 features->depthBounds = true;
918 }
919
920 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
921
922 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
923 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
924
925 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)926 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
927 {
928 VkDeviceSize ret = 0;
929
930 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
931 if (pdevice->memory.heaps[i].size > ret)
932 ret = pdevice->memory.heaps[i].size;
933 }
934
935 return ret;
936 }
937
938 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)939 get_properties_1_1(const struct anv_physical_device *pdevice,
940 struct vk_properties *p)
941 {
942 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
943 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
944 memset(p->deviceLUID, 0, VK_LUID_SIZE);
945 p->deviceNodeMask = 0;
946 p->deviceLUIDValid = false;
947
948 p->subgroupSize = BRW_SUBGROUP_SIZE;
949 VkShaderStageFlags scalar_stages = 0;
950 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
951 scalar_stages |= mesa_to_vk_shader_stage(stage);
952 }
953 if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
954 scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
955 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
956 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
957 VK_SHADER_STAGE_MISS_BIT_KHR |
958 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
959 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
960 }
961 if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
962 scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
963 VK_SHADER_STAGE_MESH_BIT_EXT;
964 }
965 p->subgroupSupportedStages = scalar_stages;
966 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
967 VK_SUBGROUP_FEATURE_VOTE_BIT |
968 VK_SUBGROUP_FEATURE_BALLOT_BIT |
969 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
970 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
971 VK_SUBGROUP_FEATURE_QUAD_BIT |
972 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
973 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
974 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
975 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
976 p->subgroupQuadOperationsInAllStages = true;
977
978 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
979 p->maxMultiviewViewCount = 16;
980 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
981 /* Our protected implementation is a memory encryption mechanism, it
982 * shouldn't page fault, but it hangs the HW so in terms of user visibility
983 * it's similar to a fault.
984 */
985 p->protectedNoFault = false;
986 /* This value doesn't matter for us today as our per-stage descriptors are
987 * the real limit.
988 */
989 p->maxPerSetDescriptors = 1024;
990
991 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
992 p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
993 pdevice->memory.heaps[i].size);
994 }
995 }
996
997 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)998 get_properties_1_2(const struct anv_physical_device *pdevice,
999 struct vk_properties *p)
1000 {
1001 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1002 memset(p->driverName, 0, sizeof(p->driverName));
1003 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1004 "Intel open-source Mesa driver");
1005 memset(p->driverInfo, 0, sizeof(p->driverInfo));
1006 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1007 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1008
1009 p->conformanceVersion = (VkConformanceVersion) {
1010 .major = 1,
1011 .minor = 4,
1012 .subminor = 0,
1013 .patch = 0,
1014 };
1015
1016 p->denormBehaviorIndependence =
1017 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1018 p->roundingModeIndependence =
1019 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1020
1021 /* Broadwell does not support HF denorms and there are restrictions
1022 * other gens. According to Kabylake's PRM:
1023 *
1024 * "math - Extended Math Function
1025 * [...]
1026 * Restriction : Half-float denorms are always retained."
1027 */
1028 p->shaderDenormFlushToZeroFloat16 = false;
1029 p->shaderDenormPreserveFloat16 = true;
1030 p->shaderRoundingModeRTEFloat16 = true;
1031 p->shaderRoundingModeRTZFloat16 = true;
1032 p->shaderSignedZeroInfNanPreserveFloat16 = true;
1033
1034 p->shaderDenormFlushToZeroFloat32 = true;
1035 p->shaderDenormPreserveFloat32 = true;
1036 p->shaderRoundingModeRTEFloat32 = true;
1037 p->shaderRoundingModeRTZFloat32 = true;
1038 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1039
1040 p->shaderDenormFlushToZeroFloat64 = true;
1041 p->shaderDenormPreserveFloat64 = true;
1042 p->shaderRoundingModeRTEFloat64 = true;
1043 p->shaderRoundingModeRTZFloat64 = true;
1044 p->shaderSignedZeroInfNanPreserveFloat64 = true;
1045
1046 struct anv_descriptor_limits desc_limits;
1047 get_device_descriptor_limits(pdevice, &desc_limits);
1048
1049 p->maxUpdateAfterBindDescriptorsInAllPools = desc_limits.max_resources;
1050 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1051 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1052 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
1053 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1054 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1055 p->robustBufferAccessUpdateAfterBind = true;
1056 p->quadDivergentImplicitLod = false;
1057 p->maxPerStageDescriptorUpdateAfterBindSamplers = desc_limits.max_samplers;
1058 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1059 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1060 p->maxPerStageDescriptorUpdateAfterBindSampledImages = desc_limits.max_images;
1061 p->maxPerStageDescriptorUpdateAfterBindStorageImages = desc_limits.max_images;
1062 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1063 p->maxPerStageUpdateAfterBindResources = desc_limits.max_resources;
1064 p->maxDescriptorSetUpdateAfterBindSamplers = desc_limits.max_samplers;
1065 p->maxDescriptorSetUpdateAfterBindUniformBuffers = desc_limits.max_ubos;
1066 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1067 p->maxDescriptorSetUpdateAfterBindStorageBuffers = desc_limits.max_ssbos;
1068 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1069 p->maxDescriptorSetUpdateAfterBindSampledImages = desc_limits.max_images;
1070 p->maxDescriptorSetUpdateAfterBindStorageImages = desc_limits.max_images;
1071 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1072
1073 /* We support all of the depth resolve modes */
1074 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1075 VK_RESOLVE_MODE_AVERAGE_BIT |
1076 VK_RESOLVE_MODE_MIN_BIT |
1077 VK_RESOLVE_MODE_MAX_BIT;
1078 /* Average doesn't make sense for stencil so we don't support that */
1079 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1080 VK_RESOLVE_MODE_MIN_BIT |
1081 VK_RESOLVE_MODE_MAX_BIT;
1082 p->independentResolveNone = true;
1083 p->independentResolve = true;
1084
1085 p->filterMinmaxSingleComponentFormats = true;
1086 p->filterMinmaxImageComponentMapping = true;
1087
1088 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1089
1090 p->framebufferIntegerColorSampleCounts =
1091 isl_device_get_sample_counts(&pdevice->isl_dev);
1092 }
1093
1094 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1095 get_properties_1_3(const struct anv_physical_device *pdevice,
1096 struct vk_properties *p)
1097 {
1098 if (pdevice->info.ver >= 20)
1099 p->minSubgroupSize = 16;
1100 else
1101 p->minSubgroupSize = 8;
1102 p->maxSubgroupSize = 32;
1103 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1104 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1105 VK_SHADER_STAGE_TASK_BIT_EXT |
1106 VK_SHADER_STAGE_MESH_BIT_EXT;
1107
1108 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1109 p->maxPerStageDescriptorInlineUniformBlocks =
1110 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1111 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1112 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1113 p->maxDescriptorSetInlineUniformBlocks =
1114 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1115 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1116 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1117 p->maxInlineUniformTotalSize = UINT16_MAX;
1118
1119 p->integerDotProduct8BitUnsignedAccelerated = false;
1120 p->integerDotProduct8BitSignedAccelerated = false;
1121 p->integerDotProduct8BitMixedSignednessAccelerated = false;
1122 p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1123 p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1124 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1125 p->integerDotProduct16BitUnsignedAccelerated = false;
1126 p->integerDotProduct16BitSignedAccelerated = false;
1127 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1128 p->integerDotProduct32BitUnsignedAccelerated = false;
1129 p->integerDotProduct32BitSignedAccelerated = false;
1130 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1131 p->integerDotProduct64BitUnsignedAccelerated = false;
1132 p->integerDotProduct64BitSignedAccelerated = false;
1133 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1134 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1135 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1136 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1137 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1138 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1139 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1140 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1141 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1142 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1143 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1144 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1145 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1146 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1147 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1148 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1149
1150 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1151 * Base Address:
1152 *
1153 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1154 * specifies the base address of the first element of the surface,
1155 * computed in software by adding the surface base address to the
1156 * byte offset of the element in the buffer. The base address must
1157 * be aligned to element size."
1158 *
1159 * The typed dataport messages require that things be texel aligned.
1160 * Otherwise, we may just load/store the wrong data or, in the worst
1161 * case, there may be hangs.
1162 */
1163 p->storageTexelBufferOffsetAlignmentBytes = 16;
1164 p->storageTexelBufferOffsetSingleTexelAlignment = true;
1165
1166 /* The sampler, however, is much more forgiving and it can handle
1167 * arbitrary byte alignment for linear and buffer surfaces. It's
1168 * hard to find a good PRM citation for this but years of empirical
1169 * experience demonstrate that this is true.
1170 */
1171 p->uniformTexelBufferOffsetAlignmentBytes = 1;
1172 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1173
1174 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1175 }
1176
1177 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1178 get_properties(const struct anv_physical_device *pdevice,
1179 struct vk_properties *props)
1180 {
1181
1182 const struct intel_device_info *devinfo = &pdevice->info;
1183
1184 const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1185
1186 const uint32_t max_workgroup_size =
1187 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1188
1189 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1190 const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1191
1192 uint64_t sparse_addr_space_size =
1193 !has_sparse_or_fake ? 0 :
1194 sparse_uses_trtt ? pdevice->va.trtt.size :
1195 pdevice->va.high_heap.size;
1196
1197 VkSampleCountFlags sample_counts =
1198 isl_device_get_sample_counts(&pdevice->isl_dev);
1199
1200 #if DETECT_OS_ANDROID
1201 /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1202 uint64_t front_rendering_usage = 0;
1203 struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1204 if (gralloc != NULL) {
1205 u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1206 u_gralloc_destroy(&gralloc);
1207 }
1208 #endif /* DETECT_OS_ANDROID */
1209
1210 struct anv_descriptor_limits desc_limits;
1211 get_device_descriptor_limits(pdevice, &desc_limits);
1212
1213 *props = (struct vk_properties) {
1214 .apiVersion = ANV_API_VERSION,
1215 .driverVersion = vk_get_driver_version(),
1216 .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1217 pdevice->instance->force_vk_vendor : 0x8086,
1218 .deviceID = pdevice->info.pci_device_id,
1219 .deviceType = pdevice->info.has_local_mem ?
1220 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1221 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1222
1223 /* Limits: */
1224 .maxImageDimension1D = (1 << 14),
1225 .maxImageDimension2D = (1 << 14),
1226 .maxImageDimension3D = (1 << 11),
1227 .maxImageDimensionCube = (1 << 14),
1228 .maxImageArrayLayers = (1 << 11),
1229 .maxTexelBufferElements = 128 * 1024 * 1024,
1230 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1231 .maxStorageBufferRange = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1232 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1233 .maxMemoryAllocationCount = UINT32_MAX,
1234 .maxSamplerAllocationCount = 64 * 1024,
1235 .bufferImageGranularity = 1,
1236 .sparseAddressSpaceSize = sparse_addr_space_size,
1237 .maxBoundDescriptorSets = MAX_SETS,
1238 .maxPerStageDescriptorSamplers = desc_limits.max_samplers,
1239 .maxPerStageDescriptorUniformBuffers = desc_limits.max_ubos,
1240 .maxPerStageDescriptorStorageBuffers = desc_limits.max_ssbos,
1241 .maxPerStageDescriptorSampledImages = desc_limits.max_images,
1242 .maxPerStageDescriptorStorageImages = desc_limits.max_images,
1243 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1244 .maxPerStageResources = desc_limits.max_resources,
1245 .maxDescriptorSetSamplers = desc_limits.max_samplers,
1246 .maxDescriptorSetUniformBuffers = desc_limits.max_ubos,
1247 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1248 .maxDescriptorSetStorageBuffers = desc_limits.max_ssbos,
1249 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1250 .maxDescriptorSetSampledImages = desc_limits.max_images,
1251 .maxDescriptorSetStorageImages = desc_limits.max_images,
1252 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1253 .maxVertexInputAttributes = MAX_VES,
1254 .maxVertexInputBindings = MAX_VBS,
1255 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1256 *
1257 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1258 */
1259 .maxVertexInputAttributeOffset = 2047,
1260 /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1261 *
1262 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1263 */
1264 .maxVertexInputBindingStride = 4095,
1265 .maxVertexOutputComponents = 128,
1266 .maxTessellationGenerationLevel = 64,
1267 .maxTessellationPatchSize = 32,
1268 .maxTessellationControlPerVertexInputComponents = 128,
1269 .maxTessellationControlPerVertexOutputComponents = 128,
1270 .maxTessellationControlPerPatchOutputComponents = 128,
1271 .maxTessellationControlTotalOutputComponents = 2048,
1272 .maxTessellationEvaluationInputComponents = 128,
1273 .maxTessellationEvaluationOutputComponents = 128,
1274 .maxGeometryShaderInvocations = 32,
1275 .maxGeometryInputComponents = 128,
1276 .maxGeometryOutputComponents = 128,
1277 .maxGeometryOutputVertices = 256,
1278 .maxGeometryTotalOutputComponents = 1024,
1279 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1280 .maxFragmentOutputAttachments = 8,
1281 .maxFragmentDualSrcAttachments = 1,
1282 .maxFragmentCombinedOutputResources = MAX_RTS + desc_limits.max_ssbos +
1283 desc_limits.max_images,
1284 .maxComputeSharedMemorySize = intel_device_info_get_max_slm_size(&pdevice->info),
1285 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1286 .maxComputeWorkGroupInvocations = max_workgroup_size,
1287 .maxComputeWorkGroupSize = {
1288 max_workgroup_size,
1289 max_workgroup_size,
1290 max_workgroup_size,
1291 },
1292 .subPixelPrecisionBits = 8,
1293 .subTexelPrecisionBits = 8,
1294 .mipmapPrecisionBits = 8,
1295 .maxDrawIndexedIndexValue = UINT32_MAX,
1296 .maxDrawIndirectCount = UINT32_MAX,
1297 .maxSamplerLodBias = 16,
1298 .maxSamplerAnisotropy = 16,
1299 .maxViewports = MAX_VIEWPORTS,
1300 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1301 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1302 .viewportSubPixelBits = 13, /* We take a float? */
1303 .minMemoryMapAlignment = 4096, /* A page */
1304 /* The dataport requires texel alignment so we need to assume a worst
1305 * case of R32G32B32A32 which is 16 bytes.
1306 */
1307 .minTexelBufferOffsetAlignment = 16,
1308 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1309 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1310 .minTexelOffset = -8,
1311 .maxTexelOffset = 7,
1312 .minTexelGatherOffset = -32,
1313 .maxTexelGatherOffset = 31,
1314 .minInterpolationOffset = -0.5,
1315 .maxInterpolationOffset = 0.4375,
1316 .subPixelInterpolationOffsetBits = 4,
1317 .maxFramebufferWidth = (1 << 14),
1318 .maxFramebufferHeight = (1 << 14),
1319 .maxFramebufferLayers = (1 << 11),
1320 .framebufferColorSampleCounts = sample_counts,
1321 .framebufferDepthSampleCounts = sample_counts,
1322 .framebufferStencilSampleCounts = sample_counts,
1323 .framebufferNoAttachmentsSampleCounts = sample_counts,
1324 .maxColorAttachments = MAX_RTS,
1325 .sampledImageColorSampleCounts = sample_counts,
1326 .sampledImageIntegerSampleCounts = sample_counts,
1327 .sampledImageDepthSampleCounts = sample_counts,
1328 .sampledImageStencilSampleCounts = sample_counts,
1329 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1330 .maxSampleMaskWords = 1,
1331 .timestampComputeAndGraphics = true,
1332 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1333 .maxClipDistances = 8,
1334 .maxCullDistances = 8,
1335 .maxCombinedClipAndCullDistances = 8,
1336 .discreteQueuePriorities = 2,
1337 .pointSizeRange = { 0.125, 255.875 },
1338 /* While SKL and up support much wider lines than we are setting here,
1339 * in practice we run into conformance issues if we go past this limit.
1340 * Since the Windows driver does the same, it's probably fair to assume
1341 * that no one needs more than this.
1342 */
1343 .lineWidthRange = { 0.0, 8.0 },
1344 .pointSizeGranularity = (1.0 / 8.0),
1345 .lineWidthGranularity = (1.0 / 128.0),
1346 .strictLines = false,
1347 .standardSampleLocations = true,
1348 .optimalBufferCopyOffsetAlignment = 128,
1349 .optimalBufferCopyRowPitchAlignment = 128,
1350 .nonCoherentAtomSize = 64,
1351
1352 /* Sparse: */
1353 .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1354 .sparseResidencyStandard2DMultisampleBlockShape = false,
1355 .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1356 .sparseResidencyAlignedMipSize = false,
1357 .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1358
1359 /* VK_KHR_cooperative_matrix */
1360 .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1361
1362 /* Vulkan 1.4 */
1363 .dynamicRenderingLocalReadDepthStencilAttachments = true,
1364 .dynamicRenderingLocalReadMultisampledAttachments = true,
1365 };
1366
1367 snprintf(props->deviceName, sizeof(props->deviceName),
1368 "%s", pdevice->info.name);
1369 memcpy(props->pipelineCacheUUID,
1370 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1371
1372 get_properties_1_1(pdevice, props);
1373 get_properties_1_2(pdevice, props);
1374 get_properties_1_3(pdevice, props);
1375
1376 /* VK_KHR_acceleration_structure */
1377 {
1378 props->maxGeometryCount = (1u << 24) - 1;
1379 props->maxInstanceCount = (1u << 24) - 1;
1380 props->maxPrimitiveCount = (1u << 29) - 1;
1381 props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1382 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1383 props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1384 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1385 props->minAccelerationStructureScratchOffsetAlignment = 64;
1386 }
1387
1388 /* VK_KHR_compute_shader_derivatives */
1389 {
1390 props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1391 }
1392
1393 /* VK_KHR_fragment_shading_rate */
1394 {
1395 props->primitiveFragmentShadingRateWithMultipleViewports =
1396 pdevice->info.has_coarse_pixel_primitive_and_cb;
1397 props->layeredShadingRateAttachments =
1398 pdevice->info.has_coarse_pixel_primitive_and_cb;
1399 props->fragmentShadingRateNonTrivialCombinerOps =
1400 pdevice->info.has_coarse_pixel_primitive_and_cb;
1401 props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1402 props->maxFragmentSizeAspectRatio =
1403 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1404 2 : 4;
1405 props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1406 (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1407 props->maxFragmentShadingRateRasterizationSamples =
1408 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1409 VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
1410 props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1411 props->fragmentShadingRateWithSampleMask = true;
1412 props->fragmentShadingRateWithShaderSampleMask = false;
1413 props->fragmentShadingRateWithConservativeRasterization = true;
1414 props->fragmentShadingRateWithFragmentShaderInterlock = true;
1415 props->fragmentShadingRateWithCustomSampleLocations = true;
1416 props->fragmentShadingRateStrictMultiplyCombiner = true;
1417
1418 if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1419 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1420 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1421 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1422 } else {
1423 /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1424 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1425 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1426 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1427 }
1428 }
1429
1430 /* VK_KHR_maintenance5 */
1431 {
1432 props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1433 props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1434 props->depthStencilSwizzleOneSupport = true;
1435 props->polygonModePointSize = true;
1436 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1437 props->nonStrictWideLinesUseParallelogram = false;
1438 }
1439
1440 /* VK_KHR_maintenance6 */
1441 {
1442 props->blockTexelViewCompatibleMultipleLayers = true;
1443 props->maxCombinedImageSamplerDescriptorCount = 3;
1444 props->fragmentShadingRateClampCombinerInputs = true;
1445 }
1446
1447 /* VK_KHR_maintenance7 */
1448 {
1449 props->robustFragmentShadingRateAttachmentAccess = true;
1450 props->separateDepthStencilAttachmentAccess = true;
1451 props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1452 props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1453 props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1454 props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1455 props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1456 props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1457 }
1458
1459 /* VK_KHR_performance_query */
1460 {
1461 props->allowCommandBufferQueryCopies = false;
1462 }
1463
1464 /* VK_KHR_push_descriptor */
1465 {
1466 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1467 }
1468
1469 /* VK_KHR_ray_tracing_pipeline */
1470 {
1471 /* TODO */
1472 props->shaderGroupHandleSize = 32;
1473 props->maxRayRecursionDepth = 31;
1474 /* MemRay::hitGroupSRStride is 16 bits */
1475 props->maxShaderGroupStride = UINT16_MAX;
1476 /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1477 props->shaderGroupBaseAlignment = 16;
1478 props->shaderGroupHandleAlignment = 16;
1479 props->shaderGroupHandleCaptureReplaySize = 32;
1480 props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1481 props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1482 }
1483
1484 /* VK_KHR_vertex_attribute_divisor */
1485 {
1486 props->maxVertexAttribDivisor = UINT32_MAX / 16;
1487 props->supportsNonZeroFirstInstance = true;
1488 }
1489
1490 /* VK_EXT_conservative_rasterization */
1491 {
1492 /* There's nothing in the public docs about this value as far as I can
1493 * tell. However, this is the value the Windows driver reports and
1494 * there's a comment on a rejected HW feature in the internal docs that
1495 * says:
1496 *
1497 * "This is similar to conservative rasterization, except the
1498 * primitive area is not extended by 1/512 and..."
1499 *
1500 * That's a bit of an obtuse reference but it's the best we've got for
1501 * now.
1502 */
1503 props->primitiveOverestimationSize = 1.0f / 512.0f;
1504 props->maxExtraPrimitiveOverestimationSize = 0.0f;
1505 props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1506 props->primitiveUnderestimation = false;
1507 props->conservativePointAndLineRasterization = false;
1508 props->degenerateTrianglesRasterized = true;
1509 props->degenerateLinesRasterized = false;
1510 props->fullyCoveredFragmentShaderInputVariable = false;
1511 props->conservativeRasterizationPostDepthCoverage = true;
1512 }
1513
1514 /* VK_EXT_custom_border_color */
1515 {
1516 props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1517 }
1518
1519 /* VK_EXT_descriptor_buffer */
1520 {
1521 props->combinedImageSamplerDescriptorSingleArray = true;
1522 props->bufferlessPushDescriptors = true;
1523 /* Written to the buffer before a timeline semaphore is signaled, but
1524 * after vkQueueSubmit().
1525 */
1526 props->allowSamplerImageViewPostSubmitCreation = true;
1527 props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1528
1529 if (pdevice->uses_ex_bso) {
1530 props->maxDescriptorBufferBindings = MAX_SETS;
1531 props->maxResourceDescriptorBufferBindings = MAX_SETS;
1532 props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1533 props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1534 } else {
1535 props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1536 props->maxResourceDescriptorBufferBindings = 1;
1537 props->maxSamplerDescriptorBufferBindings = 1;
1538 props->maxEmbeddedImmutableSamplerBindings = 1;
1539 }
1540 props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1541
1542 /* Storing a 64bit address */
1543 props->bufferCaptureReplayDescriptorDataSize = 8;
1544 props->imageCaptureReplayDescriptorDataSize = 8;
1545 /* Offset inside the reserved border color pool */
1546 props->samplerCaptureReplayDescriptorDataSize = 4;
1547
1548 /* Not affected by replay */
1549 props->imageViewCaptureReplayDescriptorDataSize = 0;
1550 /* The acceleration structure virtual address backing is coming from a
1551 * buffer, so as long as that buffer is captured/replayed correctly we
1552 * should always get the same address.
1553 */
1554 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1555
1556 props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1557 props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1558 ANV_SURFACE_STATE_SIZE);
1559 props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1560 props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1561 props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1562 props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1563 props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1564 props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1565 props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1566 props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1567 props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1568 props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1569 props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1570 props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1571 props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1572 props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1573 true);
1574 props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1575 props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1576 props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1577 }
1578
1579 /* VK_EXT_extended_dynamic_state3 */
1580 {
1581 props->dynamicPrimitiveTopologyUnrestricted = true;
1582 }
1583
1584 /* VK_EXT_external_memory_host */
1585 {
1586 props->minImportedHostPointerAlignment = 4096;
1587 }
1588
1589 /* VK_EXT_graphics_pipeline_library */
1590 {
1591 props->graphicsPipelineLibraryFastLinking = true;
1592 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1593 }
1594
1595 /* VK_EXT_host_image_copy */
1596 {
1597 static const VkImageLayout supported_layouts[] = {
1598 VK_IMAGE_LAYOUT_GENERAL,
1599 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1600 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1601 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1602 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1603 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1604 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1605 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1606 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1607 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1608 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1609 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1610 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1611 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1612 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1613 VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR,
1614 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1615 VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR,
1616 };
1617
1618 props->pCopySrcLayouts = (VkImageLayout *) supported_layouts;
1619 props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1620 props->pCopyDstLayouts = (VkImageLayout *) supported_layouts;
1621 props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1622
1623 /* This UUID essentially tells you if you can share an optimially tiling
1624 * image with another driver. Much of the tiling decisions are based on :
1625 *
1626 * - device generation (different tilings based on generations)
1627 * - device workarounds
1628 * - driver build (as we implement workarounds or performance tunings,
1629 * the tiling decision changes)
1630 *
1631 * So we're using a hash of the verx10 field + driver_build_sha1.
1632 *
1633 * Unfortunately there is a HW issue on SKL GT4 that makes it use some
1634 * different tilings sometimes (see isl_gfx7.c).
1635 */
1636 {
1637 struct mesa_sha1 sha1_ctx;
1638 uint8_t sha1[20];
1639
1640 _mesa_sha1_init(&sha1_ctx);
1641 _mesa_sha1_update(&sha1_ctx, pdevice->driver_build_sha1,
1642 sizeof(pdevice->driver_build_sha1));
1643 _mesa_sha1_update(&sha1_ctx, &pdevice->info.platform,
1644 sizeof(pdevice->info.platform));
1645 if (pdevice->info.platform == INTEL_PLATFORM_SKL &&
1646 pdevice->info.gt == 4) {
1647 _mesa_sha1_update(&sha1_ctx, &pdevice->info.gt,
1648 sizeof(pdevice->info.gt));
1649 }
1650 _mesa_sha1_final(&sha1_ctx, sha1);
1651
1652 assert(ARRAY_SIZE(sha1) >= VK_UUID_SIZE);
1653 memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1654 }
1655
1656 /* System without ReBAR cannot map all memory types on the host and that
1657 * affects the memory types an image can use for host memory copies.
1658 *
1659 * System with compressed memory types also cannot expose all image
1660 * memory types for host image copies.
1661 */
1662 props->identicalMemoryTypeRequirements = pdevice->has_small_bar ||
1663 pdevice->memory.compressed_mem_types != 0;
1664 }
1665
1666 /* VK_EXT_legacy_vertex_attributes */
1667 {
1668 props->nativeUnalignedPerformance = true;
1669 }
1670
1671 /* VK_EXT_line_rasterization */
1672 {
1673 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1674 * Rules - Legacy Mode", it says the following:
1675 *
1676 * "Note that the device divides a pixel into a 16x16 array of
1677 * subpixels, referenced by their upper left corners."
1678 *
1679 * This is the only known reference in the PRMs to the subpixel
1680 * precision of line rasterization and a "16x16 array of subpixels"
1681 * implies 4 subpixel precision bits. Empirical testing has shown that 4
1682 * subpixel precision bits applies to all line rasterization types.
1683 */
1684 props->lineSubPixelPrecisionBits = 4;
1685 }
1686
1687 /* VK_EXT_map_memory_placed */
1688 {
1689 props->minPlacedMemoryMapAlignment = 4096;
1690 }
1691
1692 /* VK_EXT_mesh_shader */
1693 {
1694 /* Bounded by the maximum representable size in
1695 * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
1696 */
1697 const uint32_t max_slm_size = intel_device_info_get_max_slm_size(devinfo);
1698
1699 /* Bounded by the maximum representable size in
1700 * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
1701 */
1702 const uint32_t max_workgroup_size = 1 << 10;
1703
1704 /* 3DMESH_3D limitation. */
1705 const uint32_t max_threadgroup_count = 1 << 22;
1706
1707 /* 3DMESH_3D limitation. */
1708 const uint32_t max_threadgroup_xyz = 65535;
1709
1710 const uint32_t max_urb_size = 64 * 1024;
1711
1712 props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1713 props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1714 props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1715 props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1716
1717 props->maxTaskWorkGroupInvocations = max_workgroup_size;
1718 props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1719 props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1720 props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1721
1722 /* TUE header with padding */
1723 const uint32_t task_payload_reserved = 32;
1724
1725 props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1726 props->maxTaskSharedMemorySize = max_slm_size;
1727 props->maxTaskPayloadAndSharedMemorySize =
1728 props->maxTaskPayloadSize +
1729 props->maxTaskSharedMemorySize;
1730
1731 props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1732 props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1733 props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1734 props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1735
1736 props->maxMeshWorkGroupInvocations = max_workgroup_size;
1737 props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1738 props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1739 props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1740
1741 props->maxMeshSharedMemorySize = max_slm_size;
1742 props->maxMeshPayloadAndSharedMemorySize =
1743 props->maxTaskPayloadSize +
1744 props->maxMeshSharedMemorySize;
1745
1746 /* Unfortunately spec's formula for the max output size doesn't match our hardware
1747 * (because some per-primitive and per-vertex attributes have alignment restrictions),
1748 * so we have to advertise the minimum value mandated by the spec to not overflow it.
1749 */
1750 props->maxMeshOutputPrimitives = 256;
1751 props->maxMeshOutputVertices = 256;
1752
1753 /* NumPrim + Primitive Data List */
1754 const uint32_t max_indices_memory =
1755 ALIGN(sizeof(uint32_t) +
1756 sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1757
1758 props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1759
1760 props->maxMeshPayloadAndOutputMemorySize =
1761 props->maxTaskPayloadSize +
1762 props->maxMeshOutputMemorySize;
1763
1764 props->maxMeshOutputComponents = 128;
1765
1766 /* RTAIndex is 11-bits wide */
1767 props->maxMeshOutputLayers = 1 << 11;
1768
1769 props->maxMeshMultiviewViewCount = 1;
1770
1771 /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1772 props->meshOutputPerVertexGranularity = 8;
1773 /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1774 props->meshOutputPerPrimitiveGranularity = 8;
1775
1776 /* SIMD16 */
1777 props->maxPreferredTaskWorkGroupInvocations = 16;
1778 props->maxPreferredMeshWorkGroupInvocations = 16;
1779
1780 props->prefersLocalInvocationVertexOutput = false;
1781 props->prefersLocalInvocationPrimitiveOutput = false;
1782 props->prefersCompactVertexOutput = false;
1783 props->prefersCompactPrimitiveOutput = false;
1784
1785 /* Spec minimum values */
1786 assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1787 assert(props->maxTaskWorkGroupCount[0] >= 65535);
1788 assert(props->maxTaskWorkGroupCount[1] >= 65535);
1789 assert(props->maxTaskWorkGroupCount[2] >= 65535);
1790
1791 assert(props->maxTaskWorkGroupInvocations >= 128);
1792 assert(props->maxTaskWorkGroupSize[0] >= 128);
1793 assert(props->maxTaskWorkGroupSize[1] >= 128);
1794 assert(props->maxTaskWorkGroupSize[2] >= 128);
1795
1796 assert(props->maxTaskPayloadSize >= 16384);
1797 assert(props->maxTaskSharedMemorySize >= 32768);
1798 assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1799
1800
1801 assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1802 assert(props->maxMeshWorkGroupCount[0] >= 65535);
1803 assert(props->maxMeshWorkGroupCount[1] >= 65535);
1804 assert(props->maxMeshWorkGroupCount[2] >= 65535);
1805
1806 assert(props->maxMeshWorkGroupInvocations >= 128);
1807 assert(props->maxMeshWorkGroupSize[0] >= 128);
1808 assert(props->maxMeshWorkGroupSize[1] >= 128);
1809 assert(props->maxMeshWorkGroupSize[2] >= 128);
1810
1811 assert(props->maxMeshSharedMemorySize >= 28672);
1812 assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1813 assert(props->maxMeshOutputMemorySize >= 32768);
1814 assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1815
1816 assert(props->maxMeshOutputComponents >= 128);
1817
1818 assert(props->maxMeshOutputVertices >= 256);
1819 assert(props->maxMeshOutputPrimitives >= 256);
1820 assert(props->maxMeshOutputLayers >= 8);
1821 assert(props->maxMeshMultiviewViewCount >= 1);
1822 }
1823
1824 /* VK_EXT_multi_draw */
1825 {
1826 props->maxMultiDrawCount = 2048;
1827 }
1828
1829 /* VK_EXT_nested_command_buffer */
1830 {
1831 props->maxCommandBufferNestingLevel = UINT32_MAX;
1832 }
1833
1834 /* VK_EXT_pci_bus_info */
1835 {
1836 props->pciDomain = pdevice->info.pci_domain;
1837 props->pciBus = pdevice->info.pci_bus;
1838 props->pciDevice = pdevice->info.pci_dev;
1839 props->pciFunction = pdevice->info.pci_func;
1840 }
1841
1842 /* VK_EXT_physical_device_drm */
1843 {
1844 props->drmHasPrimary = pdevice->has_master;
1845 props->drmPrimaryMajor = pdevice->master_major;
1846 props->drmPrimaryMinor = pdevice->master_minor;
1847 props->drmHasRender = pdevice->has_local;
1848 props->drmRenderMajor = pdevice->local_major;
1849 props->drmRenderMinor = pdevice->local_minor;
1850 }
1851
1852 /* VK_EXT_pipeline_robustness */
1853 {
1854 props->defaultRobustnessStorageBuffers =
1855 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1856 props->defaultRobustnessUniformBuffers =
1857 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1858 props->defaultRobustnessVertexInputs =
1859 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1860 props->defaultRobustnessImages =
1861 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1862 }
1863
1864 /* VK_EXT_provoking_vertex */
1865 {
1866 props->provokingVertexModePerPipeline = true;
1867 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1868 }
1869
1870 /* VK_EXT_robustness2 */
1871 {
1872 props->robustStorageBufferAccessSizeAlignment =
1873 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1874 props->robustUniformBufferAccessSizeAlignment =
1875 ANV_UBO_ALIGNMENT;
1876 }
1877
1878 /* VK_EXT_sample_locations */
1879 {
1880 props->sampleLocationSampleCounts =
1881 isl_device_get_sample_counts(&pdevice->isl_dev);
1882
1883 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1884 props->maxSampleLocationGridSize.width = 1;
1885 props->maxSampleLocationGridSize.height = 1;
1886
1887 props->sampleLocationCoordinateRange[0] = 0;
1888 props->sampleLocationCoordinateRange[1] = 0.9375;
1889 props->sampleLocationSubPixelBits = 4;
1890
1891 props->variableSampleLocations = true;
1892 }
1893
1894 /* VK_EXT_shader_module_identifier */
1895 {
1896 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1897 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1898 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1899 vk_shaderModuleIdentifierAlgorithmUUID,
1900 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1901 }
1902
1903 /* VK_EXT_transform_feedback */
1904 {
1905 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1906 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1907 props->maxTransformFeedbackBufferSize = (1ull << 32);
1908 props->maxTransformFeedbackStreamDataSize = 128 * 4;
1909 props->maxTransformFeedbackBufferDataSize = 128 * 4;
1910 props->maxTransformFeedbackBufferDataStride = 2048;
1911 props->transformFeedbackQueries = true;
1912 props->transformFeedbackStreamsLinesTriangles = false;
1913 props->transformFeedbackRasterizationStreamSelect = false;
1914 props->transformFeedbackDraw = true;
1915 }
1916
1917 /* VK_ANDROID_native_buffer */
1918 #if DETECT_OS_ANDROID
1919 {
1920 props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1921 }
1922 #endif /* DETECT_OS_ANDROID */
1923
1924
1925 /* VK_MESA_image_alignment_control */
1926 {
1927 /* We support 4k/64k tiling alignments on most platforms */
1928 props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1929 }
1930 }
1931
1932 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1933 anv_init_meminfo(struct anv_physical_device *device, int fd)
1934 {
1935 const struct intel_device_info *devinfo = &device->info;
1936
1937 device->sys.region = &devinfo->mem.sram.mem;
1938 device->sys.size = devinfo->mem.sram.mappable.size;
1939 device->sys.available = devinfo->mem.sram.mappable.free;
1940
1941 device->vram_mappable.region = &devinfo->mem.vram.mem;
1942 device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1943 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1944
1945 device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1946 device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1947 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1948
1949 return VK_SUCCESS;
1950 }
1951
1952 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1953 anv_update_meminfo(struct anv_physical_device *device, int fd)
1954 {
1955 if (!intel_device_info_update_memory_info(&device->info, fd))
1956 return;
1957
1958 const struct intel_device_info *devinfo = &device->info;
1959 device->sys.available = devinfo->mem.sram.mappable.free;
1960 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1961 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1962 }
1963
1964 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1965 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1966 {
1967 VkResult result = anv_init_meminfo(device, fd);
1968 if (result != VK_SUCCESS)
1969 return result;
1970
1971 assert(device->sys.size != 0);
1972
1973 if (anv_physical_device_has_vram(device)) {
1974 /* We can create 2 or 3 different heaps when we have local memory
1975 * support, first heap with local memory size and second with system
1976 * memory size and the third is added only if part of the vram is
1977 * mappable to the host.
1978 */
1979 device->memory.heap_count = 2;
1980 device->memory.heaps[0] = (struct anv_memory_heap) {
1981 /* If there is a vram_non_mappable, use that for the device only
1982 * heap. Otherwise use the vram_mappable.
1983 */
1984 .size = device->vram_non_mappable.size != 0 ?
1985 device->vram_non_mappable.size : device->vram_mappable.size,
1986 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1987 .is_local_mem = true,
1988 };
1989 device->memory.heaps[1] = (struct anv_memory_heap) {
1990 .size = device->sys.size,
1991 .flags = 0,
1992 .is_local_mem = false,
1993 };
1994 /* Add an additional smaller vram mappable heap if we can't map all the
1995 * vram to the host.
1996 */
1997 if (device->vram_non_mappable.size > 0) {
1998 device->memory.heap_count++;
1999 device->memory.heaps[2] = (struct anv_memory_heap) {
2000 .size = device->vram_mappable.size,
2001 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2002 .is_local_mem = true,
2003 };
2004 }
2005 } else {
2006 device->memory.heap_count = 1;
2007 device->memory.heaps[0] = (struct anv_memory_heap) {
2008 .size = device->sys.size,
2009 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
2010 .is_local_mem = false,
2011 };
2012 }
2013
2014 switch (device->info.kmd_type) {
2015 case INTEL_KMD_TYPE_XE:
2016 result = anv_xe_physical_device_init_memory_types(device);
2017 break;
2018 case INTEL_KMD_TYPE_I915:
2019 default:
2020 result = anv_i915_physical_device_init_memory_types(device);
2021 break;
2022 }
2023
2024 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2025
2026 if (result != VK_SUCCESS)
2027 return result;
2028
2029 /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
2030 * find memory types both with and without
2031 * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
2032 * memory types just to make these games happy.
2033 * This behavior is not spec-compliant as we still only have one heap that
2034 * is now inconsistent with some of the memory types, but the game doesn't
2035 * seem to care about it.
2036 */
2037 if (device->instance->anv_fake_nonlocal_memory &&
2038 !anv_physical_device_has_vram(device)) {
2039 const uint32_t base_types_count = device->memory.type_count;
2040 for (int i = 0; i < base_types_count; i++) {
2041 if (!(device->memory.types[i].propertyFlags &
2042 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
2043 continue;
2044
2045 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2046 struct anv_memory_type *new_type =
2047 &device->memory.types[device->memory.type_count++];
2048 *new_type = device->memory.types[i];
2049
2050 device->memory.types[i].propertyFlags &=
2051 ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2052 }
2053 }
2054
2055 /* Replicate all non protected memory types for descriptor buffers because
2056 * we want to identify memory allocations to place them in the right memory
2057 * heap.
2058 */
2059 device->memory.default_buffer_mem_types =
2060 BITFIELD_RANGE(0, device->memory.type_count);
2061 device->memory.protected_mem_types = 0;
2062 device->memory.dynamic_visible_mem_types = 0;
2063 device->memory.compressed_mem_types = 0;
2064
2065 const uint32_t base_types_count = device->memory.type_count;
2066 for (int i = 0; i < base_types_count; i++) {
2067 bool skip = false;
2068
2069 if (device->memory.types[i].propertyFlags &
2070 VK_MEMORY_PROPERTY_PROTECTED_BIT) {
2071 device->memory.protected_mem_types |= BITFIELD_BIT(i);
2072 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2073 skip = true;
2074 }
2075
2076 if (device->memory.types[i].compressed) {
2077 device->memory.compressed_mem_types |= BITFIELD_BIT(i);
2078 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
2079 skip = true;
2080 }
2081
2082 if (skip)
2083 continue;
2084
2085 device->memory.dynamic_visible_mem_types |=
2086 BITFIELD_BIT(device->memory.type_count);
2087
2088 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
2089 struct anv_memory_type *new_type =
2090 &device->memory.types[device->memory.type_count++];
2091 *new_type = device->memory.types[i];
2092 new_type->dynamic_visible = true;
2093 }
2094
2095 assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
2096
2097 for (unsigned i = 0; i < device->memory.type_count; i++) {
2098 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
2099 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
2100 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
2101 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2102 device->memory.need_flush = true;
2103 #else
2104 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2105 "Memory configuration requires flushing, but it's not implemented for this architecture");
2106 #endif
2107 }
2108
2109 return VK_SUCCESS;
2110 }
2111
2112 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)2113 anv_physical_device_init_uuids(struct anv_physical_device *device)
2114 {
2115 const struct build_id_note *note =
2116 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
2117 if (!note) {
2118 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2119 "Failed to find build-id");
2120 }
2121
2122 unsigned build_id_len = build_id_length(note);
2123 if (build_id_len < 20) {
2124 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2125 "build-id too short. It needs to be a SHA");
2126 }
2127
2128 memcpy(device->driver_build_sha1, build_id_data(note), 20);
2129
2130 struct mesa_sha1 sha1_ctx;
2131 uint8_t sha1[20];
2132 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
2133
2134 /* The pipeline cache UUID is used for determining when a pipeline cache is
2135 * invalid. It needs both a driver build and the PCI ID of the device.
2136 */
2137 _mesa_sha1_init(&sha1_ctx);
2138 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
2139 brw_device_sha1_update(&sha1_ctx, &device->info);
2140 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
2141 sizeof(device->always_use_bindless));
2142 _mesa_sha1_final(&sha1_ctx, sha1);
2143 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
2144
2145 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
2146 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
2147
2148 return VK_SUCCESS;
2149 }
2150
2151 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)2152 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
2153 {
2154 #ifdef ENABLE_SHADER_CACHE
2155 char renderer[10];
2156 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2157 device->info.pci_device_id);
2158 assert(len == sizeof(renderer) - 2);
2159
2160 char timestamp[41];
2161 _mesa_sha1_format(timestamp, device->driver_build_sha1);
2162
2163 const uint64_t driver_flags =
2164 brw_get_compiler_config_value(device->compiler);
2165 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2166 #endif
2167 }
2168
2169 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2170 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2171 {
2172 #ifdef ENABLE_SHADER_CACHE
2173 if (device->vk.disk_cache) {
2174 disk_cache_destroy(device->vk.disk_cache);
2175 device->vk.disk_cache = NULL;
2176 }
2177 #else
2178 assert(device->vk.disk_cache == NULL);
2179 #endif
2180 }
2181
2182 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2183 * queue overrides.
2184 *
2185 * To override the number queues:
2186 * * "gc" is for graphics queues with compute support
2187 * * "g" is for graphics queues with no compute support
2188 * * "c" is for compute queues with no graphics support
2189 * * "v" is for video queues with no graphics support
2190 * * "b" is for copy (blitter) queues with no graphics support
2191 *
2192 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2193 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2194 * with compute-only support.
2195 *
2196 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2197 * include 1 queue with compute-only support, but it will not change the
2198 * number of graphics+compute queues.
2199 *
2200 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2201 * to include 1 queue with compute-only support, and it would override the
2202 * number of graphics+compute queues to be 0.
2203 */
2204 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2205 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2206 {
2207 int gc_override = -1;
2208 int g_override = -1;
2209 int c_override = -1;
2210 int v_override = -1;
2211 int blit_override = -1;
2212 const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2213
2214 /* Override queues for Android HWUI that expects min 2 queues. */
2215 #if DETECT_OS_ANDROID
2216 *gc_count = 2;
2217 #endif
2218
2219 if (env_ == NULL)
2220 return;
2221
2222 char *env = strdup(env_);
2223 char *save = NULL;
2224 char *next = strtok_r(env, ",", &save);
2225 while (next != NULL) {
2226 if (strncmp(next, "gc=", 3) == 0) {
2227 gc_override = strtol(next + 3, NULL, 0);
2228 } else if (strncmp(next, "g=", 2) == 0) {
2229 g_override = strtol(next + 2, NULL, 0);
2230 } else if (strncmp(next, "c=", 2) == 0) {
2231 c_override = strtol(next + 2, NULL, 0);
2232 } else if (strncmp(next, "v=", 2) == 0) {
2233 v_override = strtol(next + 2, NULL, 0);
2234 } else if (strncmp(next, "b=", 2) == 0) {
2235 blit_override = strtol(next + 2, NULL, 0);
2236 } else {
2237 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2238 }
2239 next = strtok_r(NULL, ",", &save);
2240 }
2241 free(env);
2242 if (gc_override >= 0)
2243 *gc_count = gc_override;
2244 if (g_override >= 0)
2245 *g_count = g_override;
2246 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2247 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2248 "Vulkan specification");
2249 if (c_override >= 0)
2250 *c_count = c_override;
2251 if (v_override >= 0)
2252 *v_count = v_override;
2253 if (blit_override >= 0)
2254 *blit_count = blit_override;
2255 }
2256
2257 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2258 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2259 {
2260 uint32_t family_count = 0;
2261 VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2262 VK_QUEUE_SPARSE_BINDING_BIT : 0;
2263 VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2264 VK_QUEUE_PROTECTED_BIT : 0;
2265
2266 if (pdevice->engine_info) {
2267 int gc_count =
2268 intel_engines_count(pdevice->engine_info,
2269 INTEL_ENGINE_CLASS_RENDER);
2270 int v_count =
2271 intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2272 int g_count = 0;
2273 int c_count = 0;
2274 /* Not only the Kernel needs to have vm_control, but it also needs to
2275 * have a new enough GuC and the interface to tell us so. This is
2276 * implemented in the common layer by is_guc_semaphore_functional() and
2277 * results in devinfo->engine_class_supported_count being adjusted,
2278 * which we read below.
2279 */
2280 const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2281 /* For now we're choosing to not expose non-render engines on i915.ko
2282 * even when the Kernel allows it. We have data suggesting it's not an
2283 * obvious win in terms of performance.
2284 */
2285 const bool can_use_non_render_engines =
2286 kernel_supports_non_render_engines &&
2287 pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2288
2289 if (can_use_non_render_engines) {
2290 c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2291 }
2292
2293 int blit_count = 0;
2294 if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2295 blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2296 }
2297
2298 anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2299
2300 enum intel_engine_class compute_class =
2301 pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE] &&
2302 c_count >= 1 ? INTEL_ENGINE_CLASS_COMPUTE :
2303 INTEL_ENGINE_CLASS_RENDER;
2304
2305 if (gc_count > 0) {
2306 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2307 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2308 VK_QUEUE_COMPUTE_BIT |
2309 VK_QUEUE_TRANSFER_BIT |
2310 sparse_flags |
2311 protected_flag,
2312 .queueCount = gc_count,
2313 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2314 .supports_perf = true,
2315 };
2316 }
2317 if (g_count > 0) {
2318 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2319 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2320 VK_QUEUE_TRANSFER_BIT |
2321 sparse_flags |
2322 protected_flag,
2323 .queueCount = g_count,
2324 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2325 };
2326 }
2327 if (c_count > 0) {
2328 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2329 .queueFlags = VK_QUEUE_COMPUTE_BIT |
2330 VK_QUEUE_TRANSFER_BIT |
2331 sparse_flags |
2332 protected_flag,
2333 .queueCount = c_count,
2334 .engine_class = compute_class,
2335 };
2336 }
2337 if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2338 /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2339 * to the first VCS engine instance.
2340 *
2341 * We should be able to query HEVC support from the kernel using the engine query uAPI,
2342 * but this appears to be broken :
2343 * https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2344 *
2345 * When this bug is fixed we should be able to check HEVC support to determine the
2346 * correct number of queues.
2347 */
2348 /* TODO: enable protected content on video queue */
2349 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2350 .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2351 (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2352 .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2353 .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2354 };
2355 }
2356 if (blit_count > 0) {
2357 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2358 .queueFlags = VK_QUEUE_TRANSFER_BIT |
2359 protected_flag,
2360 .queueCount = blit_count,
2361 .engine_class = INTEL_ENGINE_CLASS_COPY,
2362 };
2363 }
2364 } else {
2365 /* Default to a single render queue */
2366 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2367 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2368 VK_QUEUE_COMPUTE_BIT |
2369 VK_QUEUE_TRANSFER_BIT |
2370 sparse_flags,
2371 .queueCount = 1,
2372 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2373 };
2374 family_count = 1;
2375 }
2376 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2377 pdevice->queue.family_count = family_count;
2378 }
2379
2380 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2381 anv_physical_device_get_parameters(struct anv_physical_device *device)
2382 {
2383 switch (device->info.kmd_type) {
2384 case INTEL_KMD_TYPE_I915:
2385 return anv_i915_physical_device_get_parameters(device);
2386 case INTEL_KMD_TYPE_XE:
2387 return anv_xe_physical_device_get_parameters(device);
2388 default:
2389 unreachable("Missing");
2390 return VK_ERROR_UNKNOWN;
2391 }
2392 }
2393
2394 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2395 anv_physical_device_try_create(struct vk_instance *vk_instance,
2396 struct _drmDevice *drm_device,
2397 struct vk_physical_device **out)
2398 {
2399 struct anv_instance *instance =
2400 container_of(vk_instance, struct anv_instance, vk);
2401
2402 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2403 drm_device->bustype != DRM_BUS_PCI ||
2404 drm_device->deviceinfo.pci->vendor_id != 0x8086)
2405 return VK_ERROR_INCOMPATIBLE_DRIVER;
2406
2407 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2408 const char *path = drm_device->nodes[DRM_NODE_RENDER];
2409 VkResult result;
2410 int fd;
2411 int master_fd = -1;
2412
2413 process_intel_debug_variable();
2414
2415 fd = open(path, O_RDWR | O_CLOEXEC);
2416 if (fd < 0) {
2417 if (errno == ENOMEM) {
2418 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2419 "Unable to open device %s: out of memory", path);
2420 }
2421 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2422 "Unable to open device %s: %m", path);
2423 }
2424
2425 struct intel_device_info devinfo;
2426 if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2427 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2428 goto fail_fd;
2429 }
2430
2431 if (devinfo.ver < 9) {
2432 /* Silently fail here, hasvk should pick up this device. */
2433 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2434 goto fail_fd;
2435 } else if (devinfo.probe_forced) {
2436 /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2437 * unsupported device support. No need to print a warning message.
2438 */
2439 } else if (devinfo.ver > 20) {
2440 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2441 "Vulkan not yet supported on %s", devinfo.name);
2442 goto fail_fd;
2443 }
2444
2445 /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2446 * need to always disable preemption :
2447 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2448 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2449 */
2450 if (devinfo.verx10 == 120)
2451 BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2452
2453 if (!devinfo.has_context_isolation) {
2454 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2455 "Vulkan requires context isolation for %s", devinfo.name);
2456 goto fail_fd;
2457 }
2458
2459 struct anv_physical_device *device =
2460 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2461 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2462 if (device == NULL) {
2463 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2464 goto fail_fd;
2465 }
2466
2467 struct vk_physical_device_dispatch_table dispatch_table;
2468 vk_physical_device_dispatch_table_from_entrypoints(
2469 &dispatch_table, &anv_physical_device_entrypoints, true);
2470 vk_physical_device_dispatch_table_from_entrypoints(
2471 &dispatch_table, &wsi_physical_device_entrypoints, false);
2472
2473 result = vk_physical_device_init(&device->vk, &instance->vk,
2474 NULL, NULL, NULL, /* We set up extensions later */
2475 &dispatch_table);
2476 if (result != VK_SUCCESS) {
2477 vk_error(instance, result);
2478 goto fail_alloc;
2479 }
2480 device->instance = instance;
2481
2482 assert(strlen(path) < ARRAY_SIZE(device->path));
2483 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2484
2485 device->info = devinfo;
2486
2487 device->local_fd = fd;
2488 result = anv_physical_device_get_parameters(device);
2489 if (result != VK_SUCCESS)
2490 goto fail_base;
2491
2492 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2493 device->info.aperture_bytes;
2494
2495 if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2496 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2497 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2498 goto fail_base;
2499 }
2500
2501 /* We currently only have the right bits for instructions in Gen12+. If the
2502 * kernel ever starts supporting that feature on previous generations,
2503 * we'll need to edit genxml prior to enabling here.
2504 */
2505 device->has_protected_contexts = device->info.ver >= 12 &&
2506 intel_gem_supports_protected_context(fd, device->info.kmd_type);
2507
2508 /* Just pick one; they're all the same */
2509 device->has_astc_ldr =
2510 isl_format_supports_sampling(&device->info,
2511 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2512 if (!device->has_astc_ldr &&
2513 driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2514 device->emu_astc_ldr = true;
2515 if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2516 device->flush_astc_ldr_void_extent_denorms =
2517 device->has_astc_ldr && !device->emu_astc_ldr;
2518 }
2519 device->disable_fcv = device->info.verx10 >= 125 ||
2520 instance->disable_fcv;
2521
2522 result = anv_physical_device_init_heaps(device, fd);
2523 if (result != VK_SUCCESS)
2524 goto fail_base;
2525
2526 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2527 device->has_exec_timeline = false;
2528
2529 device->has_cooperative_matrix =
2530 device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2531
2532 unsigned st_idx = 0;
2533
2534 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2535 if (!device->has_exec_timeline)
2536 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2537 device->sync_types[st_idx++] = &device->sync_syncobj_type;
2538
2539 /* anv_bo_sync_type is only supported with i915 for now */
2540 if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2541 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2542 device->sync_types[st_idx++] = &anv_bo_sync_type;
2543
2544 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2545 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2546 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2547 }
2548 } else {
2549 assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2550 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2551 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2552 }
2553
2554 device->sync_types[st_idx++] = NULL;
2555 assert(st_idx <= ARRAY_SIZE(device->sync_types));
2556 device->vk.supported_sync_types = device->sync_types;
2557
2558 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2559
2560 device->always_use_bindless =
2561 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2562
2563 device->use_call_secondary =
2564 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2565
2566 device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2567 device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2568
2569 device->uses_ex_bso = device->info.verx10 >= 125;
2570
2571 /* For now always use indirect descriptors. We'll update this
2572 * to !uses_ex_bso when all the infrastructure is built up.
2573 */
2574 device->indirect_descriptors =
2575 !device->uses_ex_bso ||
2576 driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2577
2578 device->alloc_aux_tt_mem =
2579 device->info.has_aux_map && device->info.verx10 >= 125;
2580 /* Check if we can read the GPU timestamp register from the CPU */
2581 uint64_t u64_ignore;
2582 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2583 device->info.kmd_type,
2584 &u64_ignore);
2585
2586 device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2587
2588 /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2589 if (debug_get_bool_option("ANV_SPARSE", true)) {
2590 if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2591 if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2592 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2593 else
2594 device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2595 } else {
2596 if (device->info.ver >= 12 && device->has_exec_timeline)
2597 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2598 }
2599 }
2600 if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2601 if (instance->has_fake_sparse)
2602 device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2603 }
2604
2605 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2606 driQueryOptionb(&instance->dri_options, "always_flush_cache");
2607
2608 device->compiler = brw_compiler_create(NULL, &device->info);
2609 if (device->compiler == NULL) {
2610 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2611 goto fail_base;
2612 }
2613 device->compiler->shader_debug_log = compiler_debug_log;
2614 device->compiler->shader_perf_log = compiler_perf_log;
2615 device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2616 device->compiler->use_bindless_sampler_offset = false;
2617 device->compiler->spilling_rate =
2618 driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2619
2620 isl_device_init(&device->isl_dev, &device->info);
2621 device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2622 device->isl_dev.sampler_route_to_lsc =
2623 driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2624
2625 result = anv_physical_device_init_uuids(device);
2626 if (result != VK_SUCCESS)
2627 goto fail_compiler;
2628
2629 anv_physical_device_init_va_ranges(device);
2630
2631 anv_physical_device_init_disk_cache(device);
2632
2633 if (instance->vk.enabled_extensions.KHR_display) {
2634 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2635 if (master_fd >= 0) {
2636 /* fail if we don't have permission to even render on this device */
2637 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2638 close(master_fd);
2639 master_fd = -1;
2640 }
2641 }
2642 }
2643 device->master_fd = master_fd;
2644
2645 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2646 intel_common_update_device_info(fd, &device->info);
2647
2648 anv_physical_device_init_queue_families(device);
2649
2650 anv_physical_device_init_perf(device, fd);
2651
2652 /* Gather major/minor before WSI. */
2653 struct stat st;
2654
2655 if (stat(primary_path, &st) == 0) {
2656 device->has_master = true;
2657 device->master_major = major(st.st_rdev);
2658 device->master_minor = minor(st.st_rdev);
2659 } else {
2660 device->has_master = false;
2661 device->master_major = 0;
2662 device->master_minor = 0;
2663 }
2664
2665 if (stat(path, &st) == 0) {
2666 device->has_local = true;
2667 device->local_major = major(st.st_rdev);
2668 device->local_minor = minor(st.st_rdev);
2669 } else {
2670 device->has_local = false;
2671 device->local_major = 0;
2672 device->local_minor = 0;
2673 }
2674
2675 device->has_small_bar = anv_physical_device_has_vram(device) &&
2676 device->vram_non_mappable.size != 0;
2677
2678 get_device_extensions(device, &device->vk.supported_extensions);
2679 get_features(device, &device->vk.supported_features);
2680 get_properties(device, &device->vk.properties);
2681
2682 result = anv_init_wsi(device);
2683 if (result != VK_SUCCESS)
2684 goto fail_perf;
2685
2686 anv_measure_device_init(device);
2687
2688 anv_genX(&device->info, init_physical_device_state)(device);
2689
2690 *out = &device->vk;
2691
2692 return VK_SUCCESS;
2693
2694 fail_perf:
2695 intel_perf_free(device->perf);
2696 free(device->engine_info);
2697 anv_physical_device_free_disk_cache(device);
2698 fail_compiler:
2699 ralloc_free(device->compiler);
2700 fail_base:
2701 vk_physical_device_finish(&device->vk);
2702 fail_alloc:
2703 vk_free(&instance->vk.alloc, device);
2704 fail_fd:
2705 close(fd);
2706 if (master_fd != -1)
2707 close(master_fd);
2708 return result;
2709 }
2710
2711 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2712 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2713 {
2714 struct anv_physical_device *device =
2715 container_of(vk_device, struct anv_physical_device, vk);
2716
2717 anv_finish_wsi(device);
2718 anv_measure_device_destroy(device);
2719 free(device->engine_info);
2720 anv_physical_device_free_disk_cache(device);
2721 ralloc_free(device->compiler);
2722 intel_perf_free(device->perf);
2723 close(device->local_fd);
2724 if (device->master_fd >= 0)
2725 close(device->master_fd);
2726 vk_physical_device_finish(&device->vk);
2727 vk_free(&device->instance->vk.alloc, device);
2728 }
2729
2730 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2731 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2732 {
2733
2734 /*
2735 * For Xe2+:
2736 * Bspec 60411: Timestamp register can hold 64-bit value
2737 *
2738 * Platforms < Xe2:
2739 * Bpsec 46111: Timestamp register can hold only 36-bit
2740 * value
2741 */
2742 const VkQueueFamilyProperties anv_queue_family_properties_template =
2743 {
2744 .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2745 .minImageTransferGranularity = { 1, 1, 1 },
2746 };
2747
2748 return anv_queue_family_properties_template;
2749 }
2750
2751 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2752 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2753 uint32_t family_index)
2754 {
2755 const struct anv_queue_family *family = &device->queue.families[family_index];
2756 VkQueueFamilyProperties properties =
2757 get_anv_queue_family_properties_template(device);
2758
2759 properties.queueFlags = family->queueFlags;
2760 properties.queueCount = family->queueCount;
2761 return properties;
2762 }
2763
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2764 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2765 VkPhysicalDevice physicalDevice,
2766 uint32_t* pQueueFamilyPropertyCount,
2767 VkQueueFamilyProperties2* pQueueFamilyProperties)
2768 {
2769 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2770 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2771 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2772
2773 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2774 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2775 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2776 p->queueFamilyProperties =
2777 anv_device_physical_get_queue_properties(pdevice, i);
2778
2779 vk_foreach_struct(ext, p->pNext) {
2780 switch (ext->sType) {
2781 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2782 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2783 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2784
2785 /* Deliberately sorted low to high */
2786 VkQueueGlobalPriorityKHR all_priorities[] = {
2787 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2788 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2789 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2790 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2791 };
2792
2793 uint32_t count = 0;
2794 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2795 if (all_priorities[i] > pdevice->max_context_priority)
2796 break;
2797
2798 properties->priorities[count++] = all_priorities[i];
2799 }
2800 properties->priorityCount = count;
2801 break;
2802 }
2803 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2804 VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2805 (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2806 prop->queryResultStatusSupport = VK_TRUE;
2807 break;
2808 }
2809 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2810 VkQueueFamilyVideoPropertiesKHR *prop =
2811 (VkQueueFamilyVideoPropertiesKHR *)ext;
2812 if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2813 prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2814 VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2815 if (pdevice->info.ver >= 12)
2816 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2817 }
2818
2819 if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2820 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2821 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2822 }
2823 break;
2824 }
2825 default:
2826 vk_debug_ignored_stype(ext->sType);
2827 }
2828 }
2829 }
2830 }
2831 }
2832
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2833 void anv_GetPhysicalDeviceMemoryProperties(
2834 VkPhysicalDevice physicalDevice,
2835 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2836 {
2837 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2838
2839 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2840 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2841 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2842 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2843 .heapIndex = physical_device->memory.types[i].heapIndex,
2844 };
2845 }
2846
2847 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2848 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2849 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2850 .size = physical_device->memory.heaps[i].size,
2851 .flags = physical_device->memory.heaps[i].flags,
2852 };
2853 }
2854 }
2855
2856 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2857 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2858 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2859 {
2860 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2861
2862 if (!device->vk.supported_extensions.EXT_memory_budget)
2863 return;
2864
2865 anv_update_meminfo(device, device->local_fd);
2866
2867 VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2868 for (size_t i = 0; i < device->memory.heap_count; i++) {
2869 if (device->memory.heaps[i].is_local_mem) {
2870 total_vram_heaps_size += device->memory.heaps[i].size;
2871 } else {
2872 total_sys_heaps_size += device->memory.heaps[i].size;
2873 }
2874 }
2875
2876 for (size_t i = 0; i < device->memory.heap_count; i++) {
2877 VkDeviceSize heap_size = device->memory.heaps[i].size;
2878 VkDeviceSize heap_used = device->memory.heaps[i].used;
2879 VkDeviceSize heap_budget, total_heaps_size;
2880 uint64_t mem_available = 0;
2881
2882 if (device->memory.heaps[i].is_local_mem) {
2883 total_heaps_size = total_vram_heaps_size;
2884 if (device->vram_non_mappable.size > 0 && i == 0) {
2885 mem_available = device->vram_non_mappable.available;
2886 } else {
2887 mem_available = device->vram_mappable.available;
2888 }
2889 } else {
2890 total_heaps_size = total_sys_heaps_size;
2891 mem_available = MIN2(device->sys.available, total_heaps_size);
2892 }
2893
2894 double heap_proportion = (double) heap_size / total_heaps_size;
2895 VkDeviceSize available_prop = mem_available * heap_proportion;
2896
2897 /*
2898 * Let's not incite the app to starve the system: report at most 90% of
2899 * the available heap memory.
2900 */
2901 uint64_t heap_available = available_prop * 9 / 10;
2902 heap_budget = MIN2(heap_size, heap_used + heap_available);
2903
2904 /*
2905 * Round down to the nearest MB
2906 */
2907 heap_budget &= ~((1ull << 20) - 1);
2908
2909 /*
2910 * The heapBudget value must be non-zero for array elements less than
2911 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2912 * value must be less than or equal to VkMemoryHeap::size for each heap.
2913 */
2914 assert(0 < heap_budget && heap_budget <= heap_size);
2915
2916 memoryBudget->heapUsage[i] = heap_used;
2917 memoryBudget->heapBudget[i] = heap_budget;
2918 }
2919
2920 /* The heapBudget and heapUsage values must be zero for array elements
2921 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2922 */
2923 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2924 memoryBudget->heapBudget[i] = 0;
2925 memoryBudget->heapUsage[i] = 0;
2926 }
2927 }
2928
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2929 void anv_GetPhysicalDeviceMemoryProperties2(
2930 VkPhysicalDevice physicalDevice,
2931 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2932 {
2933 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2934 &pMemoryProperties->memoryProperties);
2935
2936 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2937 switch (ext->sType) {
2938 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2939 anv_get_memory_budget(physicalDevice, (void*)ext);
2940 break;
2941 default:
2942 vk_debug_ignored_stype(ext->sType);
2943 break;
2944 }
2945 }
2946 }
2947
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2948 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2949 VkPhysicalDevice physicalDevice,
2950 VkSampleCountFlagBits samples,
2951 VkMultisamplePropertiesEXT* pMultisampleProperties)
2952 {
2953 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2954
2955 assert(pMultisampleProperties->sType ==
2956 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2957
2958 VkExtent2D grid_size;
2959 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2960 grid_size.width = 1;
2961 grid_size.height = 1;
2962 } else {
2963 grid_size.width = 0;
2964 grid_size.height = 0;
2965 }
2966 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2967
2968 vk_foreach_struct(ext, pMultisampleProperties->pNext)
2969 vk_debug_ignored_stype(ext->sType);
2970 }
2971
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2972 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2973 VkPhysicalDevice physicalDevice,
2974 uint32_t* pFragmentShadingRateCount,
2975 VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
2976 {
2977 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2978 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2979 pFragmentShadingRates, pFragmentShadingRateCount);
2980
2981 #define append_rate(_samples, _width, _height) \
2982 do { \
2983 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2984 __r->sampleCounts = _samples; \
2985 __r->fragmentSize = (VkExtent2D) { \
2986 .width = _width, \
2987 .height = _height, \
2988 }; \
2989 } \
2990 } while (0)
2991
2992 VkSampleCountFlags sample_counts =
2993 isl_device_get_sample_counts(&physical_device->isl_dev);
2994
2995 /* BSpec 47003: There are a number of restrictions on the sample count
2996 * based off the coarse pixel size.
2997 */
2998 static const VkSampleCountFlags cp_size_sample_limits[] = {
2999 [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
3000 ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3001 [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3002 [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3003 [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
3004 [16] = ISL_SAMPLE_COUNT_1_BIT,
3005 };
3006
3007 for (uint32_t x = 4; x >= 1; x /= 2) {
3008 for (uint32_t y = 4; y >= 1; y /= 2) {
3009 if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
3010 /* BSpec 47003:
3011 * "CPsize 1x4 and 4x1 are not supported"
3012 */
3013 if ((x == 1 && y == 4) || (x == 4 && y == 1))
3014 continue;
3015
3016 /* For size {1, 1}, the sample count must be ~0
3017 *
3018 * 4x2 is also a specially case.
3019 */
3020 if (x == 1 && y == 1)
3021 append_rate(~0, x, y);
3022 else if (x == 4 && y == 2)
3023 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
3024 else
3025 append_rate(cp_size_sample_limits[x * y], x, y);
3026 } else {
3027 /* For size {1, 1}, the sample count must be ~0 */
3028 if (x == 1 && y == 1)
3029 append_rate(~0, x, y);
3030 else
3031 append_rate(sample_counts, x, y);
3032 }
3033 }
3034 }
3035
3036 #undef append_rate
3037
3038 return vk_outarray_status(&out);
3039 }
3040
3041 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)3042 convert_component_type(enum intel_cooperative_matrix_component_type t)
3043 {
3044 switch (t) {
3045 case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
3046 case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
3047 case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
3048 case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
3049 case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
3050 case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
3051 }
3052 unreachable("invalid cooperative matrix component type in configuration");
3053 }
3054
3055 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)3056 convert_scope(enum intel_cmat_scope scope)
3057 {
3058 switch (scope) {
3059 case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
3060 default:
3061 unreachable("invalid cooperative matrix scope in configuration");
3062 }
3063 }
3064
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)3065 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
3066 VkPhysicalDevice physicalDevice,
3067 uint32_t* pPropertyCount,
3068 VkCooperativeMatrixPropertiesKHR* pProperties)
3069 {
3070 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
3071 const struct intel_device_info *devinfo = &pdevice->info;
3072
3073 assert(anv_has_cooperative_matrix(pdevice));
3074
3075 VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
3076
3077 for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
3078 const struct intel_cooperative_matrix_configuration *cfg =
3079 &devinfo->cooperative_matrix_configurations[i];
3080
3081 if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
3082 break;
3083
3084 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3085 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3086
3087 prop->MSize = cfg->m;
3088 prop->NSize = cfg->n;
3089 prop->KSize = cfg->k;
3090
3091 prop->AType = convert_component_type(cfg->a);
3092 prop->BType = convert_component_type(cfg->b);
3093 prop->CType = convert_component_type(cfg->c);
3094 prop->ResultType = convert_component_type(cfg->result);
3095
3096 prop->saturatingAccumulation = VK_FALSE;
3097 prop->scope = convert_scope(cfg->scope);
3098 }
3099
3100 /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
3101 *
3102 * For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
3103 * cooperative matrix operand must be present if and only if
3104 * VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
3105 * VK_TRUE.
3106 *
3107 * As a result, we have to advertise integer configs both with and
3108 * without this flag set.
3109 *
3110 * The DPAS instruction does not support the .sat modifier, so only
3111 * advertise the configurations when the DPAS would be lowered.
3112 *
3113 * FINISHME: It should be possible to do better than full lowering on
3114 * platforms that support DPAS. Emit a DPAS with a NULL accumulator
3115 * argument, then perform the correct sequence of saturating add
3116 * instructions.
3117 */
3118 if (cfg->a != INTEL_CMAT_FLOAT16 &&
3119 (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
3120 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
3121 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
3122
3123 prop->MSize = cfg->m;
3124 prop->NSize = cfg->n;
3125 prop->KSize = cfg->k;
3126
3127 prop->AType = convert_component_type(cfg->a);
3128 prop->BType = convert_component_type(cfg->b);
3129 prop->CType = convert_component_type(cfg->c);
3130 prop->ResultType = convert_component_type(cfg->result);
3131
3132 prop->saturatingAccumulation = VK_TRUE;
3133 prop->scope = convert_scope(cfg->scope);
3134 }
3135 }
3136 }
3137
3138 return vk_outarray_status(&out);
3139 }
3140