• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41 
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #if DETECT_OS_ANDROID
52 #include "util/u_gralloc/u_gralloc.h"
53 #endif
54 #include "util/u_string.h"
55 #include "util/driconf.h"
56 #include "git_sha1.h"
57 #include "vk_common_entrypoints.h"
58 #include "vk_util.h"
59 #include "vk_deferred_operation.h"
60 #include "vk_drm_syncobj.h"
61 #include "common/intel_aux_map.h"
62 #include "common/intel_uuid.h"
63 #include "perf/intel_perf.h"
64 
65 #include "i915/anv_device.h"
66 #include "xe/anv_device.h"
67 #include "xe/anv_queue.h"
68 
69 #include "genxml/gen7_pack.h"
70 #include "genxml/genX_bits.h"
71 
72 static const driOptionDescription anv_dri_options[] = {
73    DRI_CONF_SECTION_PERFORMANCE
74       DRI_CONF_ADAPTIVE_SYNC(true)
75       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
76       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
77       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
78       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
79       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
80       DRI_CONF_ANV_DISABLE_FCV(false)
81       DRI_CONF_ANV_EXTERNAL_MEMORY_IMPLICIT_SYNC(true)
82       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
83       DRI_CONF_ANV_FORCE_FILTER_ADDR_ROUNDING(false)
84       DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
85       DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
86       DRI_CONF_ANV_GENERATED_INDIRECT_RING_THRESHOLD(100)
87       DRI_CONF_NO_16BIT(false)
88       DRI_CONF_INTEL_ENABLE_WA_14018912822(false)
89       DRI_CONF_ANV_QUERY_CLEAR_WITH_BLORP_THRESHOLD(6)
90       DRI_CONF_ANV_QUERY_COPY_WITH_SHADER_THRESHOLD(6)
91       DRI_CONF_ANV_FORCE_INDIRECT_DESCRIPTORS(false)
92       DRI_CONF_SHADER_SPILLING_RATE(0)
93       DRI_CONF_OPT_B(intel_tbimr, true, "Enable TBIMR tiled rendering")
94    DRI_CONF_SECTION_END
95 
96    DRI_CONF_SECTION_DEBUG
97       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
98       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
99       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
100       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
101       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
102       DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2)
103       DRI_CONF_FORCE_VK_VENDOR(0)
104       DRI_CONF_FAKE_SPARSE(false)
105 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 34
106       DRI_CONF_VK_REQUIRE_ASTC(true)
107 #else
108       DRI_CONF_VK_REQUIRE_ASTC(false)
109 #endif
110    DRI_CONF_SECTION_END
111 
112    DRI_CONF_SECTION_QUALITY
113       DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
114    DRI_CONF_SECTION_END
115 };
116 
117 /* This is probably far to big but it reflects the max size used for messages
118  * in OpenGLs KHR_debug.
119  */
120 #define MAX_DEBUG_MESSAGE_LENGTH    4096
121 
122 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
123 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
124 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
125 #endif
126 
127 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)128 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
129 {
130    char str[MAX_DEBUG_MESSAGE_LENGTH];
131    struct anv_device *device = (struct anv_device *)data;
132    UNUSED struct anv_instance *instance = device->physical->instance;
133 
134    va_list args;
135    va_start(args, fmt);
136    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
137    va_end(args);
138 
139    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
140 }
141 
142 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)143 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
144 {
145    va_list args;
146    va_start(args, fmt);
147 
148    if (INTEL_DEBUG(DEBUG_PERF))
149       mesa_logd_v(fmt, args);
150 
151    va_end(args);
152 }
153 
154 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
155     defined(VK_USE_PLATFORM_XCB_KHR) || \
156     defined(VK_USE_PLATFORM_XLIB_KHR) || \
157     defined(VK_USE_PLATFORM_DISPLAY_KHR)
158 #define ANV_USE_WSI_PLATFORM
159 #endif
160 
161 #ifdef ANDROID_STRICT
162 #if ANDROID_API_LEVEL >= 33
163 #define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
164 #else
165 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
166 #endif
167 #else
168 #define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
169 #endif
170 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)171 VkResult anv_EnumerateInstanceVersion(
172     uint32_t*                                   pApiVersion)
173 {
174     *pApiVersion = ANV_API_VERSION;
175     return VK_SUCCESS;
176 }
177 
178 static const struct vk_instance_extension_table instance_extensions = {
179    .KHR_device_group_creation                = true,
180    .KHR_external_fence_capabilities          = true,
181    .KHR_external_memory_capabilities         = true,
182    .KHR_external_semaphore_capabilities      = true,
183    .KHR_get_physical_device_properties2      = true,
184    .EXT_debug_report                         = true,
185    .EXT_debug_utils                          = true,
186 
187 #ifdef ANV_USE_WSI_PLATFORM
188    .KHR_get_surface_capabilities2            = true,
189    .KHR_surface                              = true,
190    .KHR_surface_protected_capabilities       = true,
191    .EXT_swapchain_colorspace                 = true,
192 #endif
193 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
194    .KHR_wayland_surface                      = true,
195 #endif
196 #ifdef VK_USE_PLATFORM_XCB_KHR
197    .KHR_xcb_surface                          = true,
198 #endif
199 #ifdef VK_USE_PLATFORM_XLIB_KHR
200    .KHR_xlib_surface                         = true,
201 #endif
202 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
203    .EXT_acquire_xlib_display                 = true,
204 #endif
205 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
206    .KHR_display                              = true,
207    .KHR_get_display_properties2              = true,
208    .EXT_direct_mode_display                  = true,
209    .EXT_display_surface_counter              = true,
210    .EXT_acquire_drm_display                  = true,
211 #endif
212 #ifndef VK_USE_PLATFORM_WIN32_KHR
213    .EXT_headless_surface                     = true,
214 #endif
215 };
216 
217 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)218 get_device_extensions(const struct anv_physical_device *device,
219                       struct vk_device_extension_table *ext)
220 {
221    const bool has_syncobj_wait =
222       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
223 
224    const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
225 
226    *ext = (struct vk_device_extension_table) {
227       .KHR_8bit_storage                      = true,
228       .KHR_16bit_storage                     = !device->instance->no_16bit,
229       .KHR_acceleration_structure            = rt_enabled,
230       .KHR_bind_memory2                      = true,
231       .KHR_buffer_device_address             = true,
232       .KHR_calibrated_timestamps             = device->has_reg_timestamp,
233       .KHR_copy_commands2                    = true,
234       .KHR_cooperative_matrix                = anv_has_cooperative_matrix(device),
235       .KHR_create_renderpass2                = true,
236       .KHR_dedicated_allocation              = true,
237       .KHR_deferred_host_operations          = true,
238       .KHR_depth_stencil_resolve             = true,
239       .KHR_descriptor_update_template        = true,
240       .KHR_device_group                      = true,
241       .KHR_draw_indirect_count               = true,
242       .KHR_driver_properties                 = true,
243       .KHR_dynamic_rendering                 = true,
244       .KHR_external_fence                    = has_syncobj_wait,
245       .KHR_external_fence_fd                 = has_syncobj_wait,
246       .KHR_external_memory                   = true,
247       .KHR_external_memory_fd                = true,
248       .KHR_external_semaphore                = true,
249       .KHR_external_semaphore_fd             = true,
250       .KHR_format_feature_flags2             = true,
251       .KHR_fragment_shading_rate             = device->info.ver >= 11,
252       .KHR_get_memory_requirements2          = true,
253       .KHR_global_priority                   = device->max_context_priority >=
254                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
255       .KHR_image_format_list                 = true,
256       .KHR_imageless_framebuffer             = true,
257 #ifdef ANV_USE_WSI_PLATFORM
258       .KHR_incremental_present               = true,
259 #endif
260       .KHR_index_type_uint8                  = true,
261       .KHR_line_rasterization                = true,
262       .KHR_load_store_op_none                = true,
263       .KHR_maintenance1                      = true,
264       .KHR_maintenance2                      = true,
265       .KHR_maintenance3                      = true,
266       .KHR_maintenance4                      = true,
267       .KHR_maintenance5                      = true,
268       .KHR_maintenance6                      = true,
269       .KHR_map_memory2                       = true,
270       .KHR_multiview                         = true,
271       .KHR_performance_query =
272          device->perf &&
273          (device->perf->i915_perf_version >= 3 ||
274           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
275          device->use_call_secondary,
276       .KHR_pipeline_executable_properties    = true,
277       .KHR_pipeline_library                  = true,
278       /* Hide these behind dri configs for now since we cannot implement it reliably on
279        * all surfaces yet. There is no surface capability query for present wait/id,
280        * but the feature is useful enough to hide behind an opt-in mechanism for now.
281        * If the instance only enables surface extensions that unconditionally support present wait,
282        * we can also expose the extension that way. */
283       .KHR_present_id =
284          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
285          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
286       .KHR_present_wait =
287          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
288          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
289       .KHR_push_descriptor                   = true,
290       .KHR_ray_query                         = rt_enabled,
291       .KHR_ray_tracing_maintenance1          = rt_enabled,
292       .KHR_ray_tracing_pipeline              = rt_enabled,
293       .KHR_ray_tracing_position_fetch        = rt_enabled,
294       .KHR_relaxed_block_layout              = true,
295       .KHR_sampler_mirror_clamp_to_edge      = true,
296       .KHR_sampler_ycbcr_conversion          = true,
297       .KHR_separate_depth_stencil_layouts    = true,
298       .KHR_shader_atomic_int64               = true,
299       .KHR_shader_clock                      = true,
300       .KHR_shader_draw_parameters            = true,
301       .KHR_shader_expect_assume              = true,
302       .KHR_shader_float16_int8               = !device->instance->no_16bit,
303       .KHR_shader_float_controls             = true,
304       .KHR_shader_integer_dot_product        = true,
305       .KHR_shader_non_semantic_info          = true,
306       .KHR_shader_subgroup_extended_types    = true,
307       .KHR_shader_subgroup_rotate            = true,
308       .KHR_shader_subgroup_uniform_control_flow = true,
309       .KHR_shader_terminate_invocation       = true,
310       .KHR_spirv_1_4                         = true,
311       .KHR_storage_buffer_storage_class      = true,
312 #ifdef ANV_USE_WSI_PLATFORM
313       .KHR_swapchain                         = true,
314       .KHR_swapchain_mutable_format          = true,
315 #endif
316       .KHR_synchronization2                  = true,
317       .KHR_timeline_semaphore                = true,
318       .KHR_uniform_buffer_standard_layout    = true,
319       .KHR_variable_pointers                 = true,
320       .KHR_vertex_attribute_divisor          = true,
321       .KHR_video_queue                       = device->video_decode_enabled,
322       .KHR_video_decode_queue                = device->video_decode_enabled,
323       .KHR_video_decode_h264                 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
324       .KHR_video_decode_h265                 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
325       .KHR_vulkan_memory_model               = true,
326       .KHR_workgroup_memory_explicit_layout  = true,
327       .KHR_zero_initialize_workgroup_memory  = true,
328       .EXT_4444_formats                      = true,
329       .EXT_attachment_feedback_loop_layout   = true,
330       .EXT_attachment_feedback_loop_dynamic_state = true,
331       .EXT_border_color_swizzle              = true,
332       .EXT_buffer_device_address             = true,
333       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
334       .EXT_color_write_enable                = true,
335       .EXT_conditional_rendering             = true,
336       .EXT_conservative_rasterization        = true,
337       .EXT_custom_border_color               = true,
338       .EXT_depth_bias_control                = true,
339       .EXT_depth_clamp_zero_one              = true,
340       .EXT_depth_clip_control                = true,
341       .EXT_depth_range_unrestricted          = device->info.ver >= 20,
342       .EXT_depth_clip_enable                 = true,
343       .EXT_descriptor_indexing               = true,
344 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
345       .EXT_display_control                   = true,
346 #endif
347       .EXT_dynamic_rendering_unused_attachments = true,
348       .EXT_extended_dynamic_state            = true,
349       .EXT_extended_dynamic_state2           = true,
350       .EXT_extended_dynamic_state3           = true,
351       .EXT_external_memory_dma_buf           = true,
352       .EXT_external_memory_host              = true,
353       .EXT_fragment_shader_interlock         = true,
354       .EXT_global_priority                   = device->max_context_priority >=
355                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
356       .EXT_global_priority_query             = device->max_context_priority >=
357                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
358       .EXT_graphics_pipeline_library         = !debug_get_bool_option("ANV_NO_GPL", false),
359       .EXT_host_query_reset                  = true,
360       .EXT_image_2d_view_of_3d               = true,
361       .EXT_image_robustness                  = true,
362       .EXT_image_drm_format_modifier         = true,
363       .EXT_image_sliced_view_of_3d           = true,
364       .EXT_image_view_min_lod                = true,
365       .EXT_index_type_uint8                  = true,
366       .EXT_inline_uniform_block              = true,
367       .EXT_line_rasterization                = true,
368       .EXT_load_store_op_none                = true,
369       /* Enable the extension only if we have support on both the local &
370        * system memory
371        */
372       .EXT_memory_budget                     = (!device->info.has_local_mem ||
373                                                 device->vram_mappable.available > 0) &&
374                                                device->sys.available,
375       .EXT_mesh_shader                       = device->info.has_mesh_shading,
376       .EXT_mutable_descriptor_type           = true,
377       .EXT_nested_command_buffer             = true,
378       .EXT_non_seamless_cube_map             = true,
379       .EXT_pci_bus_info                      = true,
380       .EXT_physical_device_drm               = true,
381       .EXT_pipeline_creation_cache_control   = true,
382       .EXT_pipeline_creation_feedback        = true,
383       .EXT_pipeline_library_group_handles    = rt_enabled,
384       .EXT_pipeline_robustness               = true,
385       .EXT_post_depth_coverage               = true,
386       .EXT_primitives_generated_query        = true,
387       .EXT_primitive_topology_list_restart   = true,
388       .EXT_private_data                      = true,
389       .EXT_provoking_vertex                  = true,
390       .EXT_queue_family_foreign              = true,
391       .EXT_robustness2                       = true,
392       .EXT_sample_locations                  = true,
393       .EXT_sampler_filter_minmax             = true,
394       .EXT_scalar_block_layout               = true,
395       .EXT_separate_stencil_usage            = true,
396       .EXT_shader_atomic_float               = true,
397       .EXT_shader_atomic_float2              = true,
398       .EXT_shader_demote_to_helper_invocation = true,
399       .EXT_shader_module_identifier          = true,
400       .EXT_shader_stencil_export             = true,
401       .EXT_shader_subgroup_ballot            = true,
402       .EXT_shader_subgroup_vote              = true,
403       .EXT_shader_viewport_index_layer       = true,
404       .EXT_subgroup_size_control             = true,
405       .EXT_texel_buffer_alignment            = true,
406       .EXT_tooling_info                      = true,
407       .EXT_transform_feedback                = true,
408       .EXT_vertex_attribute_divisor          = true,
409       .EXT_vertex_input_dynamic_state        = true,
410       .EXT_ycbcr_image_arrays                = true,
411       .AMD_buffer_marker                     = true,
412       .AMD_texture_gather_bias_lod           = device->info.ver >= 20,
413 #if DETECT_OS_ANDROID
414       .ANDROID_external_memory_android_hardware_buffer = true,
415       .ANDROID_native_buffer                 = true,
416 #endif
417       .GOOGLE_decorate_string                = true,
418       .GOOGLE_hlsl_functionality1            = true,
419       .GOOGLE_user_type                      = true,
420       .INTEL_performance_query               = device->perf &&
421                                                device->perf->i915_perf_version >= 3,
422       .INTEL_shader_integer_functions2       = true,
423       .EXT_multi_draw                        = true,
424       .NV_compute_shader_derivatives         = true,
425       .VALVE_mutable_descriptor_type         = true,
426    };
427 }
428 
429 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)430 get_features(const struct anv_physical_device *pdevice,
431              struct vk_features *features)
432 {
433    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
434 
435    const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
436 
437    const bool mesh_shader =
438       pdevice->vk.supported_extensions.EXT_mesh_shader;
439 
440    const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse ||
441                                    pdevice->has_sparse;
442 
443    *features = (struct vk_features) {
444       /* Vulkan 1.0 */
445       .robustBufferAccess                       = true,
446       .fullDrawIndexUint32                      = true,
447       .imageCubeArray                           = true,
448       .independentBlend                         = true,
449       .geometryShader                           = true,
450       .tessellationShader                       = true,
451       .sampleRateShading                        = true,
452       .dualSrcBlend                             = true,
453       .logicOp                                  = true,
454       .multiDrawIndirect                        = true,
455       .drawIndirectFirstInstance                = true,
456       .depthClamp                               = true,
457       .depthBiasClamp                           = true,
458       .fillModeNonSolid                         = true,
459       .depthBounds                              = pdevice->info.ver >= 12,
460       .wideLines                                = true,
461       .largePoints                              = true,
462       .alphaToOne                               = true,
463       .multiViewport                            = true,
464       .samplerAnisotropy                        = true,
465       .textureCompressionETC2                   = true,
466       .textureCompressionASTC_LDR               = pdevice->has_astc_ldr ||
467                                                   pdevice->emu_astc_ldr,
468       .textureCompressionBC                     = true,
469       .occlusionQueryPrecise                    = true,
470       .pipelineStatisticsQuery                  = true,
471       .vertexPipelineStoresAndAtomics           = true,
472       .fragmentStoresAndAtomics                 = true,
473       .shaderTessellationAndGeometryPointSize   = true,
474       .shaderImageGatherExtended                = true,
475       .shaderStorageImageExtendedFormats        = true,
476       .shaderStorageImageMultisample            = false,
477       /* Gfx12.5 has all the required format supported in HW for typed
478        * read/writes
479        */
480       .shaderStorageImageReadWithoutFormat      = pdevice->info.verx10 >= 125,
481       .shaderStorageImageWriteWithoutFormat     = true,
482       .shaderUniformBufferArrayDynamicIndexing  = true,
483       .shaderSampledImageArrayDynamicIndexing   = true,
484       .shaderStorageBufferArrayDynamicIndexing  = true,
485       .shaderStorageImageArrayDynamicIndexing   = true,
486       .shaderClipDistance                       = true,
487       .shaderCullDistance                       = true,
488       .shaderFloat64                            = pdevice->info.has_64bit_float,
489       .shaderInt64                              = true,
490       .shaderInt16                              = true,
491       .shaderResourceMinLod                     = true,
492       .shaderResourceResidency                  = has_sparse_or_fake,
493       .sparseBinding                            = has_sparse_or_fake,
494       .sparseResidencyAliased                   = has_sparse_or_fake,
495       .sparseResidencyBuffer                    = has_sparse_or_fake,
496       .sparseResidencyImage2D                   = has_sparse_or_fake,
497       .sparseResidencyImage3D                   = has_sparse_or_fake,
498       .sparseResidency2Samples                  = false,
499       .sparseResidency4Samples                  = false,
500       .sparseResidency8Samples                  = false,
501       .sparseResidency16Samples                 = false,
502       .variableMultisampleRate                  = true,
503       .inheritedQueries                         = true,
504 
505       /* Vulkan 1.1 */
506       .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
507       .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
508       .storagePushConstant16               = true,
509       .storageInputOutput16                = false,
510       .multiview                           = true,
511       .multiviewGeometryShader             = true,
512       .multiviewTessellationShader         = true,
513       .variablePointersStorageBuffer       = true,
514       .variablePointers                    = true,
515       .protectedMemory                     = pdevice->has_protected_contexts,
516       .samplerYcbcrConversion              = true,
517       .shaderDrawParameters                = true,
518 
519       /* Vulkan 1.2 */
520       .samplerMirrorClampToEdge            = true,
521       .drawIndirectCount                   = true,
522       .storageBuffer8BitAccess             = true,
523       .uniformAndStorageBuffer8BitAccess   = true,
524       .storagePushConstant8                = true,
525       .shaderBufferInt64Atomics            = true,
526       .shaderSharedInt64Atomics            = false,
527       .shaderFloat16                       = !pdevice->instance->no_16bit,
528       .shaderInt8                          = !pdevice->instance->no_16bit,
529 
530       .descriptorIndexing                                 = true,
531       .shaderInputAttachmentArrayDynamicIndexing          = false,
532       .shaderUniformTexelBufferArrayDynamicIndexing       = true,
533       .shaderStorageTexelBufferArrayDynamicIndexing       = true,
534       .shaderUniformBufferArrayNonUniformIndexing         = true,
535       .shaderSampledImageArrayNonUniformIndexing          = true,
536       .shaderStorageBufferArrayNonUniformIndexing         = true,
537       .shaderStorageImageArrayNonUniformIndexing          = true,
538       .shaderInputAttachmentArrayNonUniformIndexing       = false,
539       .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
540       .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
541       .descriptorBindingUniformBufferUpdateAfterBind      = true,
542       .descriptorBindingSampledImageUpdateAfterBind       = true,
543       .descriptorBindingStorageImageUpdateAfterBind       = true,
544       .descriptorBindingStorageBufferUpdateAfterBind      = true,
545       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
546       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
547       .descriptorBindingUpdateUnusedWhilePending          = true,
548       .descriptorBindingPartiallyBound                    = true,
549       .descriptorBindingVariableDescriptorCount           = true,
550       .runtimeDescriptorArray                             = true,
551 
552       .samplerFilterMinmax                 = true,
553       .scalarBlockLayout                   = true,
554       .imagelessFramebuffer                = true,
555       .uniformBufferStandardLayout         = true,
556       .shaderSubgroupExtendedTypes         = true,
557       .separateDepthStencilLayouts         = true,
558       .hostQueryReset                      = true,
559       .timelineSemaphore                   = true,
560       .bufferDeviceAddress                 = true,
561       .bufferDeviceAddressCaptureReplay    = true,
562       .bufferDeviceAddressMultiDevice      = false,
563       .vulkanMemoryModel                   = true,
564       .vulkanMemoryModelDeviceScope        = true,
565       .vulkanMemoryModelAvailabilityVisibilityChains = true,
566       .shaderOutputViewportIndex           = true,
567       .shaderOutputLayer                   = true,
568       .subgroupBroadcastDynamicId          = true,
569 
570       /* Vulkan 1.3 */
571       .robustImageAccess = true,
572       .inlineUniformBlock = true,
573       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
574       .pipelineCreationCacheControl = true,
575       .privateData = true,
576       .shaderDemoteToHelperInvocation = true,
577       .shaderTerminateInvocation = true,
578       .subgroupSizeControl = true,
579       .computeFullSubgroups = true,
580       .synchronization2 = true,
581       .textureCompressionASTC_HDR = false,
582       .shaderZeroInitializeWorkgroupMemory = true,
583       .dynamicRendering = true,
584       .shaderIntegerDotProduct = true,
585       .maintenance4 = true,
586 
587       /* VK_EXT_4444_formats */
588       .formatA4R4G4B4 = true,
589       .formatA4B4G4R4 = false,
590 
591       /* VK_KHR_acceleration_structure */
592       .accelerationStructure = rt_enabled,
593       .accelerationStructureCaptureReplay = false, /* TODO */
594       .accelerationStructureIndirectBuild = false, /* TODO */
595       .accelerationStructureHostCommands = false,
596       .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
597 
598       /* VK_EXT_border_color_swizzle */
599       .borderColorSwizzle = true,
600       .borderColorSwizzleFromImage = true,
601 
602       /* VK_EXT_color_write_enable */
603       .colorWriteEnable = true,
604 
605       /* VK_EXT_image_2d_view_of_3d  */
606       .image2DViewOf3D = true,
607       .sampler2DViewOf3D = true,
608 
609       /* VK_EXT_image_sliced_view_of_3d */
610       .imageSlicedViewOf3D = true,
611 
612       /* VK_NV_compute_shader_derivatives */
613       .computeDerivativeGroupQuads = true,
614       .computeDerivativeGroupLinear = true,
615 
616       /* VK_EXT_conditional_rendering */
617       .conditionalRendering = true,
618       .inheritedConditionalRendering = true,
619 
620       /* VK_EXT_custom_border_color */
621       .customBorderColors = true,
622       .customBorderColorWithoutFormat = true,
623 
624       /* VK_EXT_depth_clamp_zero_one */
625       .depthClampZeroOne = true,
626 
627       /* VK_EXT_depth_clip_enable */
628       .depthClipEnable = true,
629 
630       /* VK_EXT_fragment_shader_interlock */
631       .fragmentShaderSampleInterlock = true,
632       .fragmentShaderPixelInterlock = true,
633       .fragmentShaderShadingRateInterlock = false,
634 
635       /* VK_EXT_global_priority_query */
636       .globalPriorityQuery = true,
637 
638       /* VK_EXT_graphics_pipeline_library */
639       .graphicsPipelineLibrary =
640          pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
641 
642       /* VK_KHR_fragment_shading_rate */
643       .pipelineFragmentShadingRate = true,
644       .primitiveFragmentShadingRate =
645          pdevice->info.has_coarse_pixel_primitive_and_cb,
646       .attachmentFragmentShadingRate =
647          pdevice->info.has_coarse_pixel_primitive_and_cb,
648 
649       /* VK_EXT_image_view_min_lod */
650       .minLod = true,
651 
652       /* VK_EXT_index_type_uint8 */
653       .indexTypeUint8 = true,
654 
655       /* VK_EXT_line_rasterization */
656       /* Rectangular lines must use the strict algorithm, which is not
657        * supported for wide lines prior to ICL.  See rasterization_mode for
658        * details and how the HW states are programmed.
659        */
660       .rectangularLines = pdevice->info.ver >= 10,
661       .bresenhamLines = true,
662       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
663        * BSpec section "Multisample ModesState" table for "AA Line Support
664        * Requirements":
665        *
666        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
667        *
668        * Fortunately, this isn't a case most people care about.
669        */
670       .smoothLines = pdevice->info.ver < 10,
671       .stippledRectangularLines = false,
672       .stippledBresenhamLines = true,
673       .stippledSmoothLines = false,
674 
675       /* VK_NV_mesh_shader */
676       .taskShaderNV = false,
677       .meshShaderNV = false,
678 
679       /* VK_EXT_mesh_shader */
680       .taskShader = mesh_shader,
681       .meshShader = mesh_shader,
682       .multiviewMeshShader = false,
683       .primitiveFragmentShadingRateMeshShader = mesh_shader,
684       .meshShaderQueries = false,
685 
686       /* VK_EXT_mutable_descriptor_type */
687       .mutableDescriptorType = true,
688 
689       /* VK_KHR_performance_query */
690       .performanceCounterQueryPools = true,
691       /* HW only supports a single configuration at a time. */
692       .performanceCounterMultipleQueryPools = false,
693 
694       /* VK_KHR_pipeline_executable_properties */
695       .pipelineExecutableInfo = true,
696 
697       /* VK_EXT_primitives_generated_query */
698       .primitivesGeneratedQuery = true,
699       .primitivesGeneratedQueryWithRasterizerDiscard = false,
700       .primitivesGeneratedQueryWithNonZeroStreams = false,
701 
702       /* VK_EXT_pipeline_library_group_handles */
703       .pipelineLibraryGroupHandles = true,
704 
705       /* VK_EXT_provoking_vertex */
706       .provokingVertexLast = true,
707       .transformFeedbackPreservesProvokingVertex = true,
708 
709       /* VK_KHR_ray_query */
710       .rayQuery = rt_enabled,
711 
712       /* VK_KHR_ray_tracing_maintenance1 */
713       .rayTracingMaintenance1 = rt_enabled,
714       .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
715 
716       /* VK_KHR_ray_tracing_pipeline */
717       .rayTracingPipeline = rt_enabled,
718       .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
719       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
720       .rayTracingPipelineTraceRaysIndirect = rt_enabled,
721       .rayTraversalPrimitiveCulling = rt_enabled,
722 
723       /* VK_EXT_robustness2 */
724       .robustBufferAccess2 = true,
725       .robustImageAccess2 = true,
726       .nullDescriptor = true,
727 
728       /* VK_EXT_shader_atomic_float */
729       .shaderBufferFloat32Atomics =    true,
730       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
731       .shaderBufferFloat64Atomics =
732          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
733       .shaderBufferFloat64AtomicAdd =  false,
734       .shaderSharedFloat32Atomics =    true,
735       .shaderSharedFloat32AtomicAdd =  false,
736       .shaderSharedFloat64Atomics =    false,
737       .shaderSharedFloat64AtomicAdd =  false,
738       .shaderImageFloat32Atomics =     true,
739       .shaderImageFloat32AtomicAdd =   false,
740       .sparseImageFloat32Atomics =     false,
741       .sparseImageFloat32AtomicAdd =   false,
742 
743       /* VK_EXT_shader_atomic_float2 */
744       .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
745       .shaderBufferFloat16AtomicAdd    = false,
746       .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
747       .shaderBufferFloat32AtomicMinMax = true,
748       .shaderBufferFloat64AtomicMinMax =
749          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
750       .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
751       .shaderSharedFloat16AtomicAdd    = false,
752       .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
753       .shaderSharedFloat32AtomicMinMax = true,
754       .shaderSharedFloat64AtomicMinMax = false,
755       .shaderImageFloat32AtomicMinMax  = false,
756       .sparseImageFloat32AtomicMinMax  = false,
757 
758       /* VK_KHR_shader_clock */
759       .shaderSubgroupClock = true,
760       .shaderDeviceClock = false,
761 
762       /* VK_INTEL_shader_integer_functions2 */
763       .shaderIntegerFunctions2 = true,
764 
765       /* VK_EXT_shader_module_identifier */
766       .shaderModuleIdentifier = true,
767 
768       /* VK_KHR_shader_subgroup_uniform_control_flow */
769       .shaderSubgroupUniformControlFlow = true,
770 
771       /* VK_EXT_texel_buffer_alignment */
772       .texelBufferAlignment = true,
773 
774       /* VK_EXT_transform_feedback */
775       .transformFeedback = true,
776       .geometryStreams = true,
777 
778       /* VK_KHR_vertex_attribute_divisor */
779       .vertexAttributeInstanceRateDivisor = true,
780       .vertexAttributeInstanceRateZeroDivisor = true,
781 
782       /* VK_KHR_workgroup_memory_explicit_layout */
783       .workgroupMemoryExplicitLayout = true,
784       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
785       .workgroupMemoryExplicitLayout8BitAccess = true,
786       .workgroupMemoryExplicitLayout16BitAccess = true,
787 
788       /* VK_EXT_ycbcr_image_arrays */
789       .ycbcrImageArrays = true,
790 
791       /* VK_EXT_extended_dynamic_state */
792       .extendedDynamicState = true,
793 
794       /* VK_EXT_extended_dynamic_state2 */
795       .extendedDynamicState2 = true,
796       .extendedDynamicState2LogicOp = true,
797       .extendedDynamicState2PatchControlPoints = true,
798 
799       /* VK_EXT_extended_dynamic_state3 */
800       .extendedDynamicState3PolygonMode = true,
801       .extendedDynamicState3TessellationDomainOrigin = true,
802       .extendedDynamicState3RasterizationStream = true,
803       .extendedDynamicState3LineStippleEnable = true,
804       .extendedDynamicState3LineRasterizationMode = true,
805       .extendedDynamicState3LogicOpEnable = true,
806       .extendedDynamicState3AlphaToOneEnable = true,
807       .extendedDynamicState3DepthClipEnable = true,
808       .extendedDynamicState3DepthClampEnable = true,
809       .extendedDynamicState3DepthClipNegativeOneToOne = true,
810       .extendedDynamicState3ProvokingVertexMode = true,
811       .extendedDynamicState3ColorBlendEnable = true,
812       .extendedDynamicState3ColorWriteMask = true,
813       .extendedDynamicState3ColorBlendEquation = true,
814       .extendedDynamicState3SampleLocationsEnable = true,
815       .extendedDynamicState3SampleMask = true,
816       .extendedDynamicState3ConservativeRasterizationMode = true,
817 
818       .extendedDynamicState3RasterizationSamples = false,
819       .extendedDynamicState3AlphaToCoverageEnable = false,
820       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
821       .extendedDynamicState3ViewportWScalingEnable = false,
822       .extendedDynamicState3ViewportSwizzle = false,
823       .extendedDynamicState3ShadingRateImageEnable = false,
824       .extendedDynamicState3CoverageToColorEnable = false,
825       .extendedDynamicState3CoverageToColorLocation = false,
826       .extendedDynamicState3CoverageModulationMode = false,
827       .extendedDynamicState3CoverageModulationTableEnable = false,
828       .extendedDynamicState3CoverageModulationTable = false,
829       .extendedDynamicState3CoverageReductionMode = false,
830       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
831       .extendedDynamicState3ColorBlendAdvanced = false,
832 
833       /* VK_EXT_multi_draw */
834       .multiDraw = true,
835 
836       /* VK_EXT_non_seamless_cube_map */
837       .nonSeamlessCubeMap = true,
838 
839       /* VK_EXT_primitive_topology_list_restart */
840       .primitiveTopologyListRestart = true,
841       .primitiveTopologyPatchListRestart = true,
842 
843       /* VK_EXT_depth_clip_control */
844       .depthClipControl = true,
845 
846       /* VK_KHR_present_id */
847       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
848 
849       /* VK_KHR_present_wait */
850       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
851 
852       /* VK_EXT_vertex_input_dynamic_state */
853       .vertexInputDynamicState = true,
854 
855       /* VK_KHR_ray_tracing_position_fetch */
856       .rayTracingPositionFetch = rt_enabled,
857 
858       /* VK_EXT_dynamic_rendering_unused_attachments */
859       .dynamicRenderingUnusedAttachments = true,
860 
861       /* VK_EXT_depth_bias_control */
862       .depthBiasControl = true,
863       .floatRepresentation = true,
864       .leastRepresentableValueForceUnormRepresentation = false,
865       .depthBiasExact = true,
866 
867       /* VK_EXT_pipeline_robustness */
868       .pipelineRobustness = true,
869 
870       /* VK_KHR_maintenance5 */
871       .maintenance5 = true,
872 
873       /* VK_KHR_maintenance6 */
874       .maintenance6 = true,
875 
876       /* VK_EXT_nested_command_buffer */
877       .nestedCommandBuffer = true,
878       .nestedCommandBufferRendering = true,
879       .nestedCommandBufferSimultaneousUse = false,
880 
881       /* VK_KHR_cooperative_matrix */
882       .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
883 
884 
885       /* VK_KHR_shader_subgroup_rotate */
886       .shaderSubgroupRotate = true,
887       .shaderSubgroupRotateClustered = true,
888 
889       /* VK_EXT_attachment_feedback_loop_layout */
890       .attachmentFeedbackLoopLayout = true,
891 
892       /* VK_EXT_attachment_feedback_loop_dynamic_state */
893       .attachmentFeedbackLoopDynamicState = true,
894 
895       /* VK_KHR_shader_expect_assume */
896       .shaderExpectAssume = true,
897    };
898 
899    /* The new DOOM and Wolfenstein games require depthBounds without
900     * checking for it.  They seem to run fine without it so just claim it's
901     * there and accept the consequences.
902     */
903    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
904       features->depthBounds = true;
905 }
906 
907 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
908 
909 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
910 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
911 
912 #define MAX_CUSTOM_BORDER_COLORS                   4096
913 
914 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)915 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
916 {
917    VkDeviceSize ret = 0;
918 
919    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
920       if (pdevice->memory.heaps[i].size > ret)
921          ret = pdevice->memory.heaps[i].size;
922    }
923 
924    return ret;
925 }
926 
927 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)928 get_properties_1_1(const struct anv_physical_device *pdevice,
929                    struct vk_properties *p)
930 {
931    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
932    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
933    memset(p->deviceLUID, 0, VK_LUID_SIZE);
934    p->deviceNodeMask = 0;
935    p->deviceLUIDValid = false;
936 
937    p->subgroupSize = BRW_SUBGROUP_SIZE;
938    VkShaderStageFlags scalar_stages = 0;
939    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
940       scalar_stages |= mesa_to_vk_shader_stage(stage);
941    }
942    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
943       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
944                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
945                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
946                        VK_SHADER_STAGE_MISS_BIT_KHR |
947                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
948                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
949    }
950    if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
951       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
952                        VK_SHADER_STAGE_MESH_BIT_EXT;
953    }
954    p->subgroupSupportedStages = scalar_stages;
955    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
956                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
957                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
958                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
959                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
960                                     VK_SUBGROUP_FEATURE_QUAD_BIT |
961                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
962                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
963                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
964                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
965    p->subgroupQuadOperationsInAllStages = true;
966 
967    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
968    p->maxMultiviewViewCount      = 16;
969    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
970    /* Our protected implementation is a memory encryption mechanism, it
971     * doesn't page fault.
972     */
973    p->protectedNoFault           = true;
974    /* This value doesn't matter for us today as our per-stage descriptors are
975     * the real limit.
976     */
977    p->maxPerSetDescriptors       = 1024;
978 
979    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
980       p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
981                                         pdevice->memory.heaps[i].size);
982    }
983 }
984 
985 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)986 get_properties_1_2(const struct anv_physical_device *pdevice,
987                    struct vk_properties *p)
988 {
989    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
990    memset(p->driverName, 0, sizeof(p->driverName));
991    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
992             "Intel open-source Mesa driver");
993    memset(p->driverInfo, 0, sizeof(p->driverInfo));
994    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
995             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
996 
997    p->conformanceVersion = (VkConformanceVersion) {
998       .major = 1,
999       .minor = 3,
1000       .subminor = 6,
1001       .patch = 0,
1002    };
1003 
1004    p->denormBehaviorIndependence =
1005       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1006    p->roundingModeIndependence =
1007       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1008 
1009    /* Broadwell does not support HF denorms and there are restrictions
1010     * other gens. According to Kabylake's PRM:
1011     *
1012     * "math - Extended Math Function
1013     * [...]
1014     * Restriction : Half-float denorms are always retained."
1015     */
1016    p->shaderDenormFlushToZeroFloat16         = false;
1017    p->shaderDenormPreserveFloat16            = true;
1018    p->shaderRoundingModeRTEFloat16           = true;
1019    p->shaderRoundingModeRTZFloat16           = true;
1020    p->shaderSignedZeroInfNanPreserveFloat16  = true;
1021 
1022    p->shaderDenormFlushToZeroFloat32         = true;
1023    p->shaderDenormPreserveFloat32            = true;
1024    p->shaderRoundingModeRTEFloat32           = true;
1025    p->shaderRoundingModeRTZFloat32           = true;
1026    p->shaderSignedZeroInfNanPreserveFloat32  = true;
1027 
1028    p->shaderDenormFlushToZeroFloat64         = true;
1029    p->shaderDenormPreserveFloat64            = true;
1030    p->shaderRoundingModeRTEFloat64           = true;
1031    p->shaderRoundingModeRTZFloat64           = true;
1032    p->shaderSignedZeroInfNanPreserveFloat64  = true;
1033 
1034    /* It's a bit hard to exactly map our implementation to the limits
1035     * described by Vulkan.  The bindless surface handle in the extended
1036     * message descriptors is 20 bits and it's an index into the table of
1037     * RENDER_SURFACE_STATE structs that starts at bindless surface base
1038     * address.  This means that we can have at must 1M surface states
1039     * allocated at any given time.  Since most image views take two
1040     * descriptors, this means we have a limit of about 500K image views.
1041     *
1042     * However, since we allocate surface states at vkCreateImageView time,
1043     * this means our limit is actually something on the order of 500K image
1044     * views allocated at any time.  The actual limit describe by Vulkan, on
1045     * the other hand, is a limit of how many you can have in a descriptor set.
1046     * Assuming anyone using 1M descriptors will be using the same image view
1047     * twice a bunch of times (or a bunch of null descriptors), we can safely
1048     * advertise a larger limit here.
1049     */
1050    const unsigned max_bindless_views =
1051       anv_physical_device_bindless_heap_size(pdevice) / ANV_SURFACE_STATE_SIZE;
1052    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
1053    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
1054    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
1055    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
1056    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
1057    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1058    p->robustBufferAccessUpdateAfterBind                  = true;
1059    p->quadDivergentImplicitLod                           = false;
1060    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
1061    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1062    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
1063    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
1064    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
1065    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1066    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
1067    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
1068    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1069    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1070    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
1071    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1072    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
1073    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
1074    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1075 
1076    /* We support all of the depth resolve modes */
1077    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1078                                       VK_RESOLVE_MODE_AVERAGE_BIT |
1079                                       VK_RESOLVE_MODE_MIN_BIT |
1080                                       VK_RESOLVE_MODE_MAX_BIT;
1081    /* Average doesn't make sense for stencil so we don't support that */
1082    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1083                                       VK_RESOLVE_MODE_MIN_BIT |
1084                                       VK_RESOLVE_MODE_MAX_BIT;
1085    p->independentResolveNone  = true;
1086    p->independentResolve      = true;
1087 
1088    p->filterMinmaxSingleComponentFormats  = true;
1089    p->filterMinmaxImageComponentMapping   = true;
1090 
1091    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1092 
1093    p->framebufferIntegerColorSampleCounts =
1094       isl_device_get_sample_counts(&pdevice->isl_dev);
1095 }
1096 
1097 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1098 get_properties_1_3(const struct anv_physical_device *pdevice,
1099                    struct vk_properties *p)
1100 {
1101    if (pdevice->info.ver >= 20)
1102       p->minSubgroupSize = 16;
1103    else
1104       p->minSubgroupSize = 8;
1105    p->maxSubgroupSize = 32;
1106    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1107    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1108                                    VK_SHADER_STAGE_TASK_BIT_EXT |
1109                                    VK_SHADER_STAGE_MESH_BIT_EXT;
1110 
1111    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1112    p->maxPerStageDescriptorInlineUniformBlocks =
1113       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1114    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1115       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1116    p->maxDescriptorSetInlineUniformBlocks =
1117       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1118    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1119       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1120    p->maxInlineUniformTotalSize = UINT16_MAX;
1121 
1122    p->integerDotProduct8BitUnsignedAccelerated = false;
1123    p->integerDotProduct8BitSignedAccelerated = false;
1124    p->integerDotProduct8BitMixedSignednessAccelerated = false;
1125    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1126    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1127    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1128    p->integerDotProduct16BitUnsignedAccelerated = false;
1129    p->integerDotProduct16BitSignedAccelerated = false;
1130    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1131    p->integerDotProduct32BitUnsignedAccelerated = false;
1132    p->integerDotProduct32BitSignedAccelerated = false;
1133    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1134    p->integerDotProduct64BitUnsignedAccelerated = false;
1135    p->integerDotProduct64BitSignedAccelerated = false;
1136    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1137    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1138    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1139    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1140    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1141    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1142    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1143    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1144    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1145    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1146    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1147    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1148    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1149    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1150    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1151    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1152 
1153    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1154     * Base Address:
1155     *
1156     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1157     *    specifies the base address of the first element of the surface,
1158     *    computed in software by adding the surface base address to the
1159     *    byte offset of the element in the buffer. The base address must
1160     *    be aligned to element size."
1161     *
1162     * The typed dataport messages require that things be texel aligned.
1163     * Otherwise, we may just load/store the wrong data or, in the worst
1164     * case, there may be hangs.
1165     */
1166    p->storageTexelBufferOffsetAlignmentBytes = 16;
1167    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1168 
1169    /* The sampler, however, is much more forgiving and it can handle
1170     * arbitrary byte alignment for linear and buffer surfaces.  It's
1171     * hard to find a good PRM citation for this but years of empirical
1172     * experience demonstrate that this is true.
1173     */
1174    p->uniformTexelBufferOffsetAlignmentBytes = 1;
1175    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1176 
1177    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1178 }
1179 
1180 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1181 get_properties(const struct anv_physical_device *pdevice,
1182                struct vk_properties *props)
1183 {
1184 
1185       const struct intel_device_info *devinfo = &pdevice->info;
1186 
1187    const uint32_t max_ssbos = UINT16_MAX;
1188    const uint32_t max_textures = UINT16_MAX;
1189    const uint32_t max_samplers = UINT16_MAX;
1190    const uint32_t max_images = UINT16_MAX;
1191    const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1192 
1193    /* Claim a high per-stage limit since we have bindless. */
1194    const uint32_t max_per_stage = UINT32_MAX;
1195 
1196    const uint32_t max_workgroup_size =
1197       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1198 
1199    const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse ||
1200                                    pdevice->has_sparse;
1201 
1202    uint64_t sparse_addr_space_size =
1203       !has_sparse_or_fake ? 0 :
1204       pdevice->sparse_uses_trtt ? pdevice->va.trtt.size :
1205       pdevice->va.high_heap.size;
1206 
1207    VkSampleCountFlags sample_counts =
1208       isl_device_get_sample_counts(&pdevice->isl_dev);
1209 
1210 
1211    *props = (struct vk_properties) {
1212       .apiVersion = ANV_API_VERSION,
1213       .driverVersion = vk_get_driver_version(),
1214       .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1215                   pdevice->instance->force_vk_vendor : 0x8086,
1216       .deviceID = pdevice->info.pci_device_id,
1217       .deviceType = pdevice->info.has_local_mem ?
1218                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1219                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1220 
1221       /* Limits: */
1222       .maxImageDimension1D                      = (1 << 14),
1223       .maxImageDimension2D                      = (1 << 14),
1224       .maxImageDimension3D                      = (1 << 11),
1225       .maxImageDimensionCube                    = (1 << 14),
1226       .maxImageArrayLayers                      = (1 << 11),
1227       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1228       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1229       .maxStorageBufferRange                    = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1230       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1231       .maxMemoryAllocationCount                 = UINT32_MAX,
1232       .maxSamplerAllocationCount                = 64 * 1024,
1233       .bufferImageGranularity                   = 1,
1234       .sparseAddressSpaceSize                   = sparse_addr_space_size,
1235       .maxBoundDescriptorSets                   = MAX_SETS,
1236       .maxPerStageDescriptorSamplers            = max_samplers,
1237       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1238       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1239       .maxPerStageDescriptorSampledImages       = max_textures,
1240       .maxPerStageDescriptorStorageImages       = max_images,
1241       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1242       .maxPerStageResources                     = max_per_stage,
1243       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1244       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1245       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1246       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1247       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1248       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1249       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1250       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1251       .maxVertexInputAttributes                 = MAX_VES,
1252       .maxVertexInputBindings                   = MAX_VBS,
1253       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1254        *
1255        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1256        */
1257       .maxVertexInputAttributeOffset            = 2047,
1258       /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1259        *
1260        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1261        */
1262       .maxVertexInputBindingStride              = 4095,
1263       .maxVertexOutputComponents                = 128,
1264       .maxTessellationGenerationLevel           = 64,
1265       .maxTessellationPatchSize                 = 32,
1266       .maxTessellationControlPerVertexInputComponents = 128,
1267       .maxTessellationControlPerVertexOutputComponents = 128,
1268       .maxTessellationControlPerPatchOutputComponents = 128,
1269       .maxTessellationControlTotalOutputComponents = 2048,
1270       .maxTessellationEvaluationInputComponents = 128,
1271       .maxTessellationEvaluationOutputComponents = 128,
1272       .maxGeometryShaderInvocations             = 32,
1273       .maxGeometryInputComponents               = 128,
1274       .maxGeometryOutputComponents              = 128,
1275       .maxGeometryOutputVertices                = 256,
1276       .maxGeometryTotalOutputComponents         = 1024,
1277       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1278       .maxFragmentOutputAttachments             = 8,
1279       .maxFragmentDualSrcAttachments            = 1,
1280       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1281       .maxComputeSharedMemorySize               = 64 * 1024,
1282       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1283       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1284       .maxComputeWorkGroupSize = {
1285          max_workgroup_size,
1286          max_workgroup_size,
1287          max_workgroup_size,
1288       },
1289       .subPixelPrecisionBits                    = 8,
1290       .subTexelPrecisionBits                    = 8,
1291       .mipmapPrecisionBits                      = 8,
1292       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1293       .maxDrawIndirectCount                     = UINT32_MAX,
1294       .maxSamplerLodBias                        = 16,
1295       .maxSamplerAnisotropy                     = 16,
1296       .maxViewports                             = MAX_VIEWPORTS,
1297       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1298       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1299       .viewportSubPixelBits                     = 13, /* We take a float? */
1300       .minMemoryMapAlignment                    = 4096, /* A page */
1301       /* The dataport requires texel alignment so we need to assume a worst
1302        * case of R32G32B32A32 which is 16 bytes.
1303        */
1304       .minTexelBufferOffsetAlignment            = 16,
1305       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1306       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1307       .minTexelOffset                           = -8,
1308       .maxTexelOffset                           = 7,
1309       .minTexelGatherOffset                     = -32,
1310       .maxTexelGatherOffset                     = 31,
1311       .minInterpolationOffset                   = -0.5,
1312       .maxInterpolationOffset                   = 0.4375,
1313       .subPixelInterpolationOffsetBits          = 4,
1314       .maxFramebufferWidth                      = (1 << 14),
1315       .maxFramebufferHeight                     = (1 << 14),
1316       .maxFramebufferLayers                     = (1 << 11),
1317       .framebufferColorSampleCounts             = sample_counts,
1318       .framebufferDepthSampleCounts             = sample_counts,
1319       .framebufferStencilSampleCounts           = sample_counts,
1320       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1321       .maxColorAttachments                      = MAX_RTS,
1322       .sampledImageColorSampleCounts            = sample_counts,
1323       .sampledImageIntegerSampleCounts          = sample_counts,
1324       .sampledImageDepthSampleCounts            = sample_counts,
1325       .sampledImageStencilSampleCounts          = sample_counts,
1326       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1327       .maxSampleMaskWords                       = 1,
1328       .timestampComputeAndGraphics              = true,
1329       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1330       .maxClipDistances                         = 8,
1331       .maxCullDistances                         = 8,
1332       .maxCombinedClipAndCullDistances          = 8,
1333       .discreteQueuePriorities                  = 2,
1334       .pointSizeRange                           = { 0.125, 255.875 },
1335       /* While SKL and up support much wider lines than we are setting here,
1336        * in practice we run into conformance issues if we go past this limit.
1337        * Since the Windows driver does the same, it's probably fair to assume
1338        * that no one needs more than this.
1339        */
1340       .lineWidthRange                           = { 0.0, 8.0 },
1341       .pointSizeGranularity                     = (1.0 / 8.0),
1342       .lineWidthGranularity                     = (1.0 / 128.0),
1343       .strictLines                              = false,
1344       .standardSampleLocations                  = true,
1345       .optimalBufferCopyOffsetAlignment         = 128,
1346       .optimalBufferCopyRowPitchAlignment       = 128,
1347       .nonCoherentAtomSize                      = 64,
1348 
1349       /* Sparse: */
1350       .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1351       .sparseResidencyStandard2DMultisampleBlockShape = false,
1352       .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1353       .sparseResidencyAlignedMipSize = false,
1354       .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1355 
1356       /* VK_KHR_cooperative_matrix */
1357       .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1358    };
1359 
1360    snprintf(props->deviceName, sizeof(props->deviceName),
1361             "%s", pdevice->info.name);
1362    memcpy(props->pipelineCacheUUID,
1363           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1364 
1365    get_properties_1_1(pdevice, props);
1366    get_properties_1_2(pdevice, props);
1367    get_properties_1_3(pdevice, props);
1368 
1369    /* VK_KHR_acceleration_structure */
1370    {
1371       props->maxGeometryCount = (1u << 24) - 1;
1372       props->maxInstanceCount = (1u << 24) - 1;
1373       props->maxPrimitiveCount = (1u << 29) - 1;
1374       props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1375       props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1376       props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1377       props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1378       props->minAccelerationStructureScratchOffsetAlignment = 64;
1379    }
1380 
1381    /* VK_KHR_fragment_shading_rate */
1382    {
1383       props->primitiveFragmentShadingRateWithMultipleViewports =
1384          pdevice->info.has_coarse_pixel_primitive_and_cb;
1385       props->layeredShadingRateAttachments =
1386       pdevice->info.has_coarse_pixel_primitive_and_cb;
1387       props->fragmentShadingRateNonTrivialCombinerOps =
1388          pdevice->info.has_coarse_pixel_primitive_and_cb;
1389       props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1390       props->maxFragmentSizeAspectRatio =
1391          pdevice->info.has_coarse_pixel_primitive_and_cb ?
1392          2 : 4;
1393       props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1394          (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1395       props->maxFragmentShadingRateRasterizationSamples =
1396       pdevice->info.has_coarse_pixel_primitive_and_cb ?
1397          VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
1398       props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1399       props->fragmentShadingRateWithSampleMask = true;
1400       props->fragmentShadingRateWithShaderSampleMask = false;
1401       props->fragmentShadingRateWithConservativeRasterization = true;
1402       props->fragmentShadingRateWithFragmentShaderInterlock = true;
1403       props->fragmentShadingRateWithCustomSampleLocations = true;
1404       props->fragmentShadingRateStrictMultiplyCombiner = true;
1405 
1406       if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1407          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1408          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1409          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1410       } else {
1411          /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1412          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1413          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1414          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1415       }
1416    }
1417 
1418    /* VK_KHR_maintenance5 */
1419    {
1420       props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1421       props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1422       props->depthStencilSwizzleOneSupport = true;
1423       props->polygonModePointSize = true;
1424       props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1425       props->nonStrictWideLinesUseParallelogram = false;
1426    }
1427 
1428    /* VK_KHR_maintenance6 */
1429    {
1430       props->blockTexelViewCompatibleMultipleLayers = true;
1431       props->maxCombinedImageSamplerDescriptorCount = 3;
1432       props->fragmentShadingRateClampCombinerInputs = true;
1433    }
1434 
1435    /* VK_KHR_performance_query */
1436    {
1437       props->allowCommandBufferQueryCopies = false;
1438    }
1439 
1440    /* VK_KHR_push_descriptor */
1441    {
1442       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1443    }
1444 
1445    /* VK_KHR_ray_tracing_pipeline */
1446    {
1447       /* TODO */
1448       props->shaderGroupHandleSize = 32;
1449       props->maxRayRecursionDepth = 31;
1450       /* MemRay::hitGroupSRStride is 16 bits */
1451       props->maxShaderGroupStride = UINT16_MAX;
1452       /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1453       props->shaderGroupBaseAlignment = 16;
1454       props->shaderGroupHandleAlignment = 16;
1455       props->shaderGroupHandleCaptureReplaySize = 32;
1456       props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1457       props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1458    }
1459 
1460    /* VK_KHR_vertex_attribute_divisor */
1461    {
1462       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1463       props->supportsNonZeroFirstInstance = true;
1464    }
1465 
1466    /* VK_EXT_conservative_rasterization */
1467    {
1468       /* There's nothing in the public docs about this value as far as I can
1469        * tell. However, this is the value the Windows driver reports and
1470        * there's a comment on a rejected HW feature in the internal docs that
1471        * says:
1472        *
1473        *    "This is similar to conservative rasterization, except the
1474        *    primitive area is not extended by 1/512 and..."
1475        *
1476        * That's a bit of an obtuse reference but it's the best we've got for
1477        * now.
1478        */
1479       props->primitiveOverestimationSize = 1.0f / 512.0f;
1480       props->maxExtraPrimitiveOverestimationSize = 0.0f;
1481       props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1482       props->primitiveUnderestimation = false;
1483       props->conservativePointAndLineRasterization = false;
1484       props->degenerateTrianglesRasterized = true;
1485       props->degenerateLinesRasterized = false;
1486       props->fullyCoveredFragmentShaderInputVariable = false;
1487       props->conservativeRasterizationPostDepthCoverage = true;
1488    }
1489 
1490    /* VK_EXT_custom_border_color */
1491    {
1492       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1493    }
1494 
1495    /* VK_EXT_extended_dynamic_state3 */
1496    {
1497       props->dynamicPrimitiveTopologyUnrestricted = true;
1498    }
1499 
1500    /* VK_EXT_external_memory_host */
1501    {
1502       props->minImportedHostPointerAlignment = 4096;
1503    }
1504 
1505    /* VK_EXT_graphics_pipeline_library */
1506    {
1507       props->graphicsPipelineLibraryFastLinking = true;
1508       props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1509    }
1510 
1511    /* VK_EXT_line_rasterization */
1512    {
1513       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1514        * Rules - Legacy Mode", it says the following:
1515        *
1516        *    "Note that the device divides a pixel into a 16x16 array of
1517        *     subpixels, referenced by their upper left corners."
1518        *
1519        * This is the only known reference in the PRMs to the subpixel
1520        * precision of line rasterization and a "16x16 array of subpixels"
1521        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1522        * subpixel precision bits applies to all line rasterization types.
1523        */
1524       props->lineSubPixelPrecisionBits = 4;
1525    }
1526 
1527    /* VK_EXT_mesh_shader */
1528    {
1529       /* Bounded by the maximum representable size in
1530        * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
1531        */
1532       const uint32_t max_slm_size = 64 * 1024;
1533 
1534       /* Bounded by the maximum representable size in
1535        * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
1536        */
1537       const uint32_t max_workgroup_size = 1 << 10;
1538 
1539       /* 3DMESH_3D limitation. */
1540       const uint32_t max_threadgroup_count = 1 << 22;
1541 
1542       /* 3DMESH_3D limitation. */
1543       const uint32_t max_threadgroup_xyz = 65535;
1544 
1545       const uint32_t max_urb_size = 64 * 1024;
1546 
1547       props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1548       props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1549       props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1550       props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1551 
1552       props->maxTaskWorkGroupInvocations = max_workgroup_size;
1553       props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1554       props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1555       props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1556 
1557       /* TUE header with padding */
1558       const uint32_t task_payload_reserved = 32;
1559 
1560       props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1561       props->maxTaskSharedMemorySize = max_slm_size;
1562       props->maxTaskPayloadAndSharedMemorySize =
1563          props->maxTaskPayloadSize +
1564          props->maxTaskSharedMemorySize;
1565 
1566       props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1567       props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1568       props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1569       props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1570 
1571       props->maxMeshWorkGroupInvocations = max_workgroup_size;
1572       props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1573       props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1574       props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1575 
1576       props->maxMeshSharedMemorySize = max_slm_size;
1577       props->maxMeshPayloadAndSharedMemorySize =
1578          props->maxTaskPayloadSize +
1579          props->maxMeshSharedMemorySize;
1580 
1581       /* Unfortunately spec's formula for the max output size doesn't match our hardware
1582        * (because some per-primitive and per-vertex attributes have alignment restrictions),
1583        * so we have to advertise the minimum value mandated by the spec to not overflow it.
1584        */
1585       props->maxMeshOutputPrimitives = 256;
1586       props->maxMeshOutputVertices = 256;
1587 
1588       /* NumPrim + Primitive Data List */
1589       const uint32_t max_indices_memory =
1590          ALIGN(sizeof(uint32_t) +
1591                sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1592 
1593       props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1594 
1595       props->maxMeshPayloadAndOutputMemorySize =
1596          props->maxTaskPayloadSize +
1597          props->maxMeshOutputMemorySize;
1598 
1599       props->maxMeshOutputComponents = 128;
1600 
1601       /* RTAIndex is 11-bits wide */
1602       props->maxMeshOutputLayers = 1 << 11;
1603 
1604       props->maxMeshMultiviewViewCount = 1;
1605 
1606       /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1607       props->meshOutputPerVertexGranularity = 8;
1608       /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1609       props->meshOutputPerPrimitiveGranularity = 8;
1610 
1611       /* SIMD16 */
1612       props->maxPreferredTaskWorkGroupInvocations = 16;
1613       props->maxPreferredMeshWorkGroupInvocations = 16;
1614 
1615       props->prefersLocalInvocationVertexOutput = false;
1616       props->prefersLocalInvocationPrimitiveOutput = false;
1617       props->prefersCompactVertexOutput = false;
1618       props->prefersCompactPrimitiveOutput = false;
1619 
1620       /* Spec minimum values */
1621       assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1622       assert(props->maxTaskWorkGroupCount[0] >= 65535);
1623       assert(props->maxTaskWorkGroupCount[1] >= 65535);
1624       assert(props->maxTaskWorkGroupCount[2] >= 65535);
1625 
1626       assert(props->maxTaskWorkGroupInvocations >= 128);
1627       assert(props->maxTaskWorkGroupSize[0] >= 128);
1628       assert(props->maxTaskWorkGroupSize[1] >= 128);
1629       assert(props->maxTaskWorkGroupSize[2] >= 128);
1630 
1631       assert(props->maxTaskPayloadSize >= 16384);
1632       assert(props->maxTaskSharedMemorySize >= 32768);
1633       assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1634 
1635 
1636       assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1637       assert(props->maxMeshWorkGroupCount[0] >= 65535);
1638       assert(props->maxMeshWorkGroupCount[1] >= 65535);
1639       assert(props->maxMeshWorkGroupCount[2] >= 65535);
1640 
1641       assert(props->maxMeshWorkGroupInvocations >= 128);
1642       assert(props->maxMeshWorkGroupSize[0] >= 128);
1643       assert(props->maxMeshWorkGroupSize[1] >= 128);
1644       assert(props->maxMeshWorkGroupSize[2] >= 128);
1645 
1646       assert(props->maxMeshSharedMemorySize >= 28672);
1647       assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1648       assert(props->maxMeshOutputMemorySize >= 32768);
1649       assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1650 
1651       assert(props->maxMeshOutputComponents >= 128);
1652 
1653       assert(props->maxMeshOutputVertices >= 256);
1654       assert(props->maxMeshOutputPrimitives >= 256);
1655       assert(props->maxMeshOutputLayers >= 8);
1656       assert(props->maxMeshMultiviewViewCount >= 1);
1657    }
1658 
1659    /* VK_EXT_multi_draw */
1660    {
1661       props->maxMultiDrawCount = 2048;
1662    }
1663 
1664    /* VK_EXT_nested_command_buffer */
1665    {
1666       props->maxCommandBufferNestingLevel = UINT32_MAX;
1667    }
1668 
1669    /* VK_EXT_pci_bus_info */
1670    {
1671       props->pciDomain = pdevice->info.pci_domain;
1672       props->pciBus = pdevice->info.pci_bus;
1673       props->pciDevice = pdevice->info.pci_dev;
1674       props->pciFunction = pdevice->info.pci_func;
1675    }
1676 
1677    /* VK_EXT_physical_device_drm */
1678    {
1679       props->drmHasPrimary = pdevice->has_master;
1680       props->drmPrimaryMajor = pdevice->master_major;
1681       props->drmPrimaryMinor = pdevice->master_minor;
1682       props->drmHasRender = pdevice->has_local;
1683       props->drmRenderMajor = pdevice->local_major;
1684       props->drmRenderMinor = pdevice->local_minor;
1685    }
1686 
1687    /* VK_EXT_pipeline_robustness */
1688    {
1689       props->defaultRobustnessStorageBuffers =
1690          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1691       props->defaultRobustnessUniformBuffers =
1692          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1693       props->defaultRobustnessVertexInputs =
1694          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1695       props->defaultRobustnessImages =
1696          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1697    }
1698 
1699    /* VK_EXT_provoking_vertex */
1700    {
1701       props->provokingVertexModePerPipeline = true;
1702       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1703    }
1704 
1705    /* VK_EXT_robustness2 */
1706    {
1707       props->robustStorageBufferAccessSizeAlignment =
1708          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1709       props->robustUniformBufferAccessSizeAlignment =
1710          ANV_UBO_ALIGNMENT;
1711    }
1712 
1713    /* VK_EXT_sample_locations */
1714    {
1715       props->sampleLocationSampleCounts =
1716          isl_device_get_sample_counts(&pdevice->isl_dev);
1717 
1718       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1719       props->maxSampleLocationGridSize.width = 1;
1720       props->maxSampleLocationGridSize.height = 1;
1721 
1722       props->sampleLocationCoordinateRange[0] = 0;
1723       props->sampleLocationCoordinateRange[1] = 0.9375;
1724       props->sampleLocationSubPixelBits = 4;
1725 
1726       props->variableSampleLocations = true;
1727    }
1728 
1729    /* VK_EXT_shader_module_identifier */
1730    {
1731       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1732                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1733       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1734              vk_shaderModuleIdentifierAlgorithmUUID,
1735              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1736    }
1737 
1738    /* VK_EXT_transform_feedback */
1739    {
1740       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1741       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1742       props->maxTransformFeedbackBufferSize = (1ull << 32);
1743       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1744       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1745       props->maxTransformFeedbackBufferDataStride = 2048;
1746       props->transformFeedbackQueries = true;
1747       props->transformFeedbackStreamsLinesTriangles = false;
1748       props->transformFeedbackRasterizationStreamSelect = false;
1749       props->transformFeedbackDraw = true;
1750    }
1751 }
1752 
1753 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1754 anv_init_meminfo(struct anv_physical_device *device, int fd)
1755 {
1756    const struct intel_device_info *devinfo = &device->info;
1757 
1758    device->sys.region = &devinfo->mem.sram.mem;
1759    device->sys.size = devinfo->mem.sram.mappable.size;
1760    device->sys.available = devinfo->mem.sram.mappable.free;
1761 
1762    device->vram_mappable.region = &devinfo->mem.vram.mem;
1763    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1764    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1765 
1766    device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1767    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1768    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1769 
1770    return VK_SUCCESS;
1771 }
1772 
1773 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1774 anv_update_meminfo(struct anv_physical_device *device, int fd)
1775 {
1776    if (!intel_device_info_update_memory_info(&device->info, fd))
1777       return;
1778 
1779    const struct intel_device_info *devinfo = &device->info;
1780    device->sys.available = devinfo->mem.sram.mappable.free;
1781    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1782    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1783 }
1784 
1785 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1786 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1787 {
1788    VkResult result = anv_init_meminfo(device, fd);
1789    if (result != VK_SUCCESS)
1790       return result;
1791 
1792    assert(device->sys.size != 0);
1793 
1794    if (anv_physical_device_has_vram(device)) {
1795       /* We can create 2 or 3 different heaps when we have local memory
1796        * support, first heap with local memory size and second with system
1797        * memory size and the third is added only if part of the vram is
1798        * mappable to the host.
1799        */
1800       device->memory.heap_count = 2;
1801       device->memory.heaps[0] = (struct anv_memory_heap) {
1802          /* If there is a vram_non_mappable, use that for the device only
1803           * heap. Otherwise use the vram_mappable.
1804           */
1805          .size = device->vram_non_mappable.size != 0 ?
1806                  device->vram_non_mappable.size : device->vram_mappable.size,
1807          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1808          .is_local_mem = true,
1809       };
1810       device->memory.heaps[1] = (struct anv_memory_heap) {
1811          .size = device->sys.size,
1812          .flags = 0,
1813          .is_local_mem = false,
1814       };
1815       /* Add an additional smaller vram mappable heap if we can't map all the
1816        * vram to the host.
1817        */
1818       if (device->vram_non_mappable.size > 0) {
1819          device->memory.heap_count++;
1820          device->memory.heaps[2] = (struct anv_memory_heap) {
1821             .size = device->vram_mappable.size,
1822             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1823             .is_local_mem = true,
1824          };
1825       }
1826    } else {
1827       device->memory.heap_count = 1;
1828       device->memory.heaps[0] = (struct anv_memory_heap) {
1829          .size = device->sys.size,
1830          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1831          .is_local_mem = false,
1832       };
1833    }
1834 
1835    switch (device->info.kmd_type) {
1836    case INTEL_KMD_TYPE_XE:
1837       result = anv_xe_physical_device_init_memory_types(device);
1838       break;
1839    case INTEL_KMD_TYPE_I915:
1840    default:
1841       result = anv_i915_physical_device_init_memory_types(device);
1842       break;
1843    }
1844 
1845    if (result != VK_SUCCESS)
1846       return result;
1847 
1848    for (unsigned i = 0; i < device->memory.type_count; i++) {
1849       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1850       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1851           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1852 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1853          device->memory.need_flush = true;
1854 #else
1855          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1856                           "Memory configuration requires flushing, but it's not implemented for this architecture");
1857 #endif
1858    }
1859 
1860    return VK_SUCCESS;
1861 }
1862 
1863 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1864 anv_physical_device_init_uuids(struct anv_physical_device *device)
1865 {
1866    const struct build_id_note *note =
1867       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1868    if (!note) {
1869       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1870                        "Failed to find build-id");
1871    }
1872 
1873    unsigned build_id_len = build_id_length(note);
1874    if (build_id_len < 20) {
1875       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1876                        "build-id too short.  It needs to be a SHA");
1877    }
1878 
1879    memcpy(device->driver_build_sha1, build_id_data(note), 20);
1880 
1881    struct mesa_sha1 sha1_ctx;
1882    uint8_t sha1[20];
1883    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1884 
1885    /* The pipeline cache UUID is used for determining when a pipeline cache is
1886     * invalid.  It needs both a driver build and the PCI ID of the device.
1887     */
1888    _mesa_sha1_init(&sha1_ctx);
1889    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1890    brw_device_sha1_update(&sha1_ctx, &device->info);
1891    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1892                      sizeof(device->always_use_bindless));
1893    _mesa_sha1_final(&sha1_ctx, sha1);
1894    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1895 
1896    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1897    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1898 
1899    return VK_SUCCESS;
1900 }
1901 
1902 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1903 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1904 {
1905 #ifdef ENABLE_SHADER_CACHE
1906    char renderer[10];
1907    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
1908                                device->info.pci_device_id);
1909    assert(len == sizeof(renderer) - 2);
1910 
1911    char timestamp[41];
1912    _mesa_sha1_format(timestamp, device->driver_build_sha1);
1913 
1914    const uint64_t driver_flags =
1915       brw_get_compiler_config_value(device->compiler);
1916    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1917 #endif
1918 }
1919 
1920 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)1921 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
1922 {
1923 #ifdef ENABLE_SHADER_CACHE
1924    if (device->vk.disk_cache) {
1925       disk_cache_destroy(device->vk.disk_cache);
1926       device->vk.disk_cache = NULL;
1927    }
1928 #else
1929    assert(device->vk.disk_cache == NULL);
1930 #endif
1931 }
1932 
1933 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
1934  * queue overrides.
1935  *
1936  * To override the number queues:
1937  *  * "gc" is for graphics queues with compute support
1938  *  * "g" is for graphics queues with no compute support
1939  *  * "c" is for compute queues with no graphics support
1940  *  * "v" is for video queues with no graphics support
1941  *
1942  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
1943  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
1944  * with compute-only support.
1945  *
1946  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
1947  * include 1 queue with compute-only support, but it will not change the
1948  * number of graphics+compute queues.
1949  *
1950  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
1951  * to include 1 queue with compute-only support, and it would override the
1952  * number of graphics+compute queues to be 0.
1953  */
1954 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count)1955 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count)
1956 {
1957    int gc_override = -1;
1958    int g_override = -1;
1959    int c_override = -1;
1960    int v_override = -1;
1961    char *env = getenv("ANV_QUEUE_OVERRIDE");
1962 
1963    if (env == NULL)
1964       return;
1965 
1966    env = strdup(env);
1967    char *save = NULL;
1968    char *next = strtok_r(env, ",", &save);
1969    while (next != NULL) {
1970       if (strncmp(next, "gc=", 3) == 0) {
1971          gc_override = strtol(next + 3, NULL, 0);
1972       } else if (strncmp(next, "g=", 2) == 0) {
1973          g_override = strtol(next + 2, NULL, 0);
1974       } else if (strncmp(next, "c=", 2) == 0) {
1975          c_override = strtol(next + 2, NULL, 0);
1976       } else if (strncmp(next, "v=", 2) == 0) {
1977          v_override = strtol(next + 2, NULL, 0);
1978       } else {
1979          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
1980       }
1981       next = strtok_r(NULL, ",", &save);
1982    }
1983    free(env);
1984    if (gc_override >= 0)
1985       *gc_count = gc_override;
1986    if (g_override >= 0)
1987       *g_count = g_override;
1988    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
1989       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
1990                 "Vulkan specification");
1991    if (c_override >= 0)
1992       *c_count = c_override;
1993    if (v_override >= 0)
1994       *v_count = v_override;
1995 }
1996 
1997 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)1998 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
1999 {
2000    uint32_t family_count = 0;
2001    VkQueueFlags sparse_flags = (pdevice->instance->has_fake_sparse ||
2002                                 pdevice->has_sparse) ?
2003                                VK_QUEUE_SPARSE_BINDING_BIT : 0;
2004 
2005    if (pdevice->engine_info) {
2006       int gc_count =
2007          intel_engines_count(pdevice->engine_info,
2008                              INTEL_ENGINE_CLASS_RENDER);
2009       int v_count =
2010          intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2011       int g_count = 0;
2012       int c_count = 0;
2013       const bool kernel_supports_non_render_engines =
2014          pdevice->info.kmd_type == INTEL_KMD_TYPE_XE || pdevice->has_vm_control;
2015       const bool sparse_supports_non_render_engines =
2016          !pdevice->has_sparse || !pdevice->sparse_uses_trtt;
2017       const bool can_use_non_render_engines =
2018          kernel_supports_non_render_engines &&
2019          sparse_supports_non_render_engines;
2020 
2021       if (can_use_non_render_engines) {
2022          c_count = intel_engines_supported_count(pdevice->local_fd,
2023                                                  &pdevice->info,
2024                                                  pdevice->engine_info,
2025                                                  INTEL_ENGINE_CLASS_COMPUTE);
2026       }
2027       enum intel_engine_class compute_class =
2028          c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
2029 
2030       int blit_count = 0;
2031       if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2032          blit_count = intel_engines_supported_count(pdevice->local_fd,
2033                                                     &pdevice->info,
2034                                                     pdevice->engine_info,
2035                                                     INTEL_ENGINE_CLASS_COPY);
2036       }
2037 
2038       anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count);
2039 
2040       if (gc_count > 0) {
2041          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2042             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2043                           VK_QUEUE_COMPUTE_BIT |
2044                           VK_QUEUE_TRANSFER_BIT |
2045                           sparse_flags,
2046             .queueCount = gc_count,
2047             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2048          };
2049       }
2050       if (g_count > 0) {
2051          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2052             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2053                           VK_QUEUE_TRANSFER_BIT |
2054                           sparse_flags,
2055             .queueCount = g_count,
2056             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2057          };
2058       }
2059       if (c_count > 0) {
2060          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2061             .queueFlags = VK_QUEUE_COMPUTE_BIT |
2062                           VK_QUEUE_TRANSFER_BIT |
2063                           sparse_flags,
2064             .queueCount = c_count,
2065             .engine_class = compute_class,
2066          };
2067       }
2068       if (v_count > 0 && pdevice->video_decode_enabled) {
2069          /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2070           * to the first VCS engine instance.
2071           *
2072           * We should be able to query HEVC support from the kernel using the engine query uAPI,
2073           * but this appears to be broken :
2074           *    https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2075           *
2076           * When this bug is fixed we should be able to check HEVC support to determine the
2077           * correct number of queues.
2078           */
2079          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2080             .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2081             .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2082             .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2083          };
2084       }
2085       if (blit_count > 0) {
2086          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2087             .queueFlags = VK_QUEUE_TRANSFER_BIT,
2088             .queueCount = blit_count,
2089             .engine_class = INTEL_ENGINE_CLASS_COPY,
2090          };
2091       }
2092    } else {
2093       /* Default to a single render queue */
2094       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2095          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2096                        VK_QUEUE_COMPUTE_BIT |
2097                        VK_QUEUE_TRANSFER_BIT |
2098                        sparse_flags,
2099          .queueCount = 1,
2100          .engine_class = INTEL_ENGINE_CLASS_RENDER,
2101       };
2102       family_count = 1;
2103    }
2104    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2105    pdevice->queue.family_count = family_count;
2106 }
2107 
2108 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2109 anv_physical_device_get_parameters(struct anv_physical_device *device)
2110 {
2111    switch (device->info.kmd_type) {
2112    case INTEL_KMD_TYPE_I915:
2113       return anv_i915_physical_device_get_parameters(device);
2114    case INTEL_KMD_TYPE_XE:
2115       return anv_xe_physical_device_get_parameters(device);
2116    default:
2117       unreachable("Missing");
2118       return VK_ERROR_UNKNOWN;
2119    }
2120 }
2121 
2122 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2123 anv_physical_device_try_create(struct vk_instance *vk_instance,
2124                                struct _drmDevice *drm_device,
2125                                struct vk_physical_device **out)
2126 {
2127    struct anv_instance *instance =
2128       container_of(vk_instance, struct anv_instance, vk);
2129 
2130    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2131        drm_device->bustype != DRM_BUS_PCI ||
2132        drm_device->deviceinfo.pci->vendor_id != 0x8086)
2133       return VK_ERROR_INCOMPATIBLE_DRIVER;
2134 
2135    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2136    const char *path = drm_device->nodes[DRM_NODE_RENDER];
2137    VkResult result;
2138    int fd;
2139    int master_fd = -1;
2140 
2141    process_intel_debug_variable();
2142 
2143    fd = open(path, O_RDWR | O_CLOEXEC);
2144    if (fd < 0) {
2145       if (errno == ENOMEM) {
2146          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2147                           "Unable to open device %s: out of memory", path);
2148       }
2149       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2150                        "Unable to open device %s: %m", path);
2151    }
2152 
2153    struct intel_device_info devinfo;
2154    if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2155       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2156       goto fail_fd;
2157    }
2158 
2159    if (devinfo.ver == 20) {
2160       mesa_logw("Vulkan not yet supported on %s", devinfo.name);
2161    } else if (devinfo.ver > 12) {
2162       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2163                          "Vulkan not yet supported on %s", devinfo.name);
2164       goto fail_fd;
2165    } else if (devinfo.ver < 9) {
2166       /* Silently fail here, hasvk should pick up this device. */
2167       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2168       goto fail_fd;
2169    }
2170 
2171    /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2172     * need to always disable preemption :
2173     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2174     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2175     */
2176    if (devinfo.verx10 == 120)
2177       BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2178 
2179    if (!devinfo.has_context_isolation) {
2180       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2181                          "Vulkan requires context isolation for %s", devinfo.name);
2182       goto fail_fd;
2183    }
2184 
2185    struct anv_physical_device *device =
2186       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2187                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2188    if (device == NULL) {
2189       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2190       goto fail_fd;
2191    }
2192 
2193    struct vk_physical_device_dispatch_table dispatch_table;
2194    vk_physical_device_dispatch_table_from_entrypoints(
2195       &dispatch_table, &anv_physical_device_entrypoints, true);
2196    vk_physical_device_dispatch_table_from_entrypoints(
2197       &dispatch_table, &wsi_physical_device_entrypoints, false);
2198 
2199    result = vk_physical_device_init(&device->vk, &instance->vk,
2200                                     NULL, NULL, NULL, /* We set up extensions later */
2201                                     &dispatch_table);
2202    if (result != VK_SUCCESS) {
2203       vk_error(instance, result);
2204       goto fail_alloc;
2205    }
2206    device->instance = instance;
2207 
2208    assert(strlen(path) < ARRAY_SIZE(device->path));
2209    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2210 
2211    device->info = devinfo;
2212 
2213    device->local_fd = fd;
2214    result = anv_physical_device_get_parameters(device);
2215    if (result != VK_SUCCESS)
2216       goto fail_base;
2217 
2218    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2219                                               device->info.aperture_bytes;
2220 
2221    if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2222       vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2223                 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2224       goto fail_base;
2225    }
2226 
2227    /* We currently only have the right bits for instructions in Gen12+. If the
2228     * kernel ever starts supporting that feature on previous generations,
2229     * we'll need to edit genxml prior to enabling here.
2230     */
2231    device->has_protected_contexts = device->info.ver >= 12 &&
2232       intel_gem_supports_protected_context(fd, device->info.kmd_type);
2233 
2234    /* Just pick one; they're all the same */
2235    device->has_astc_ldr =
2236       isl_format_supports_sampling(&device->info,
2237                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2238    if (!device->has_astc_ldr &&
2239        driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2240       device->emu_astc_ldr = true;
2241    if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2242       device->flush_astc_ldr_void_extent_denorms =
2243          device->has_astc_ldr && !device->emu_astc_ldr;
2244    }
2245    device->disable_fcv = intel_device_info_is_mtl_or_arl(&device->info) ||
2246                          instance->disable_fcv;
2247 
2248    result = anv_physical_device_init_heaps(device, fd);
2249    if (result != VK_SUCCESS)
2250       goto fail_base;
2251 
2252    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2253       device->has_exec_timeline = false;
2254 
2255    device->has_cooperative_matrix =
2256       device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2257 
2258    unsigned st_idx = 0;
2259 
2260    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2261    if (!device->has_exec_timeline)
2262       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2263    device->sync_types[st_idx++] = &device->sync_syncobj_type;
2264 
2265    /* anv_bo_sync_type is only supported with i915 for now  */
2266    if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2267       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2268          device->sync_types[st_idx++] = &anv_bo_sync_type;
2269 
2270       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2271          device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2272          device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2273       }
2274    } else {
2275       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2276       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2277    }
2278 
2279    device->sync_types[st_idx++] = NULL;
2280    assert(st_idx <= ARRAY_SIZE(device->sync_types));
2281    device->vk.supported_sync_types = device->sync_types;
2282 
2283    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2284 
2285    device->always_use_bindless =
2286       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2287 
2288    device->use_call_secondary =
2289       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2290 
2291    device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2292 
2293    device->uses_ex_bso = device->info.verx10 >= 125;
2294 
2295    /* For now always use indirect descriptors. We'll update this
2296     * to !uses_ex_bso when all the infrastructure is built up.
2297     */
2298    device->indirect_descriptors =
2299       !device->uses_ex_bso ||
2300       driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2301 
2302    device->alloc_aux_tt_mem =
2303       device->info.has_aux_map && device->info.verx10 >= 125;
2304    /* Check if we can read the GPU timestamp register from the CPU */
2305    uint64_t u64_ignore;
2306    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2307                                                                device->info.kmd_type,
2308                                                                &u64_ignore);
2309 
2310    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2311 
2312    /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2313    if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2314       device->has_sparse = true;
2315       device->sparse_uses_trtt =
2316          debug_get_bool_option("ANV_SPARSE_USE_TRTT", false);
2317    } else {
2318       device->has_sparse =
2319          device->info.ver >= 12 &&
2320          device->has_exec_timeline &&
2321          debug_get_bool_option("ANV_SPARSE", true);
2322       device->sparse_uses_trtt = true;
2323    }
2324 
2325    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2326       driQueryOptionb(&instance->dri_options, "always_flush_cache");
2327 
2328    device->compiler = brw_compiler_create(NULL, &device->info);
2329    if (device->compiler == NULL) {
2330       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2331       goto fail_base;
2332    }
2333    device->compiler->shader_debug_log = compiler_debug_log;
2334    device->compiler->shader_perf_log = compiler_perf_log;
2335    device->compiler->constant_buffer_0_is_relative = false;
2336    device->compiler->supports_shader_constants = true;
2337    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
2338    device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2339    device->compiler->use_bindless_sampler_offset = false;
2340    device->compiler->spilling_rate =
2341       driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2342 
2343    isl_device_init(&device->isl_dev, &device->info);
2344    device->isl_dev.buffer_length_in_aux_addr = true;
2345 
2346    result = anv_physical_device_init_uuids(device);
2347    if (result != VK_SUCCESS)
2348       goto fail_compiler;
2349 
2350    anv_physical_device_init_va_ranges(device);
2351 
2352    anv_physical_device_init_disk_cache(device);
2353 
2354    if (instance->vk.enabled_extensions.KHR_display) {
2355       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2356       if (master_fd >= 0) {
2357          /* fail if we don't have permission to even render on this device */
2358          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2359             close(master_fd);
2360             master_fd = -1;
2361          }
2362       }
2363    }
2364    device->master_fd = master_fd;
2365 
2366    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2367    device->info.has_compute_engine = device->engine_info &&
2368                                      intel_engines_count(device->engine_info,
2369                                                          INTEL_ENGINE_CLASS_COMPUTE);
2370    anv_physical_device_init_queue_families(device);
2371 
2372    anv_physical_device_init_perf(device, fd);
2373 
2374    /* Gather major/minor before WSI. */
2375    struct stat st;
2376 
2377    if (stat(primary_path, &st) == 0) {
2378       device->has_master = true;
2379       device->master_major = major(st.st_rdev);
2380       device->master_minor = minor(st.st_rdev);
2381    } else {
2382       device->has_master = false;
2383       device->master_major = 0;
2384       device->master_minor = 0;
2385    }
2386 
2387    if (stat(path, &st) == 0) {
2388       device->has_local = true;
2389       device->local_major = major(st.st_rdev);
2390       device->local_minor = minor(st.st_rdev);
2391    } else {
2392       device->has_local = false;
2393       device->local_major = 0;
2394       device->local_minor = 0;
2395    }
2396 
2397    get_device_extensions(device, &device->vk.supported_extensions);
2398    get_features(device, &device->vk.supported_features);
2399    get_properties(device, &device->vk.properties);
2400 
2401    result = anv_init_wsi(device);
2402    if (result != VK_SUCCESS)
2403       goto fail_perf;
2404 
2405    anv_measure_device_init(device);
2406 
2407    anv_genX(&device->info, init_physical_device_state)(device);
2408 
2409    *out = &device->vk;
2410 
2411    return VK_SUCCESS;
2412 
2413 fail_perf:
2414    ralloc_free(device->perf);
2415    free(device->engine_info);
2416    anv_physical_device_free_disk_cache(device);
2417 fail_compiler:
2418    ralloc_free(device->compiler);
2419 fail_base:
2420    vk_physical_device_finish(&device->vk);
2421 fail_alloc:
2422    vk_free(&instance->vk.alloc, device);
2423 fail_fd:
2424    close(fd);
2425    if (master_fd != -1)
2426       close(master_fd);
2427    return result;
2428 }
2429 
2430 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2431 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2432 {
2433    struct anv_physical_device *device =
2434       container_of(vk_device, struct anv_physical_device, vk);
2435 
2436    anv_finish_wsi(device);
2437    anv_measure_device_destroy(device);
2438    free(device->engine_info);
2439    anv_physical_device_free_disk_cache(device);
2440    ralloc_free(device->compiler);
2441    ralloc_free(device->perf);
2442    close(device->local_fd);
2443    if (device->master_fd >= 0)
2444       close(device->master_fd);
2445    vk_physical_device_finish(&device->vk);
2446    vk_free(&device->instance->vk.alloc, device);
2447 }
2448 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2449 VkResult anv_EnumerateInstanceExtensionProperties(
2450     const char*                                 pLayerName,
2451     uint32_t*                                   pPropertyCount,
2452     VkExtensionProperties*                      pProperties)
2453 {
2454    if (pLayerName)
2455       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2456 
2457    return vk_enumerate_instance_extension_properties(
2458       &instance_extensions, pPropertyCount, pProperties);
2459 }
2460 
2461 static void
anv_init_dri_options(struct anv_instance * instance)2462 anv_init_dri_options(struct anv_instance *instance)
2463 {
2464    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
2465                       ARRAY_SIZE(anv_dri_options));
2466    driParseConfigFiles(&instance->dri_options,
2467                        &instance->available_dri_options, 0, "anv", NULL, NULL,
2468                        instance->vk.app_info.app_name,
2469                        instance->vk.app_info.app_version,
2470                        instance->vk.app_info.engine_name,
2471                        instance->vk.app_info.engine_version);
2472 
2473     instance->assume_full_subgroups =
2474             driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
2475     instance->limit_trig_input_range =
2476             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
2477     instance->sample_mask_out_opengl_behaviour =
2478             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
2479     instance->force_filter_addr_rounding =
2480             driQueryOptionb(&instance->dri_options, "anv_force_filter_addr_rounding");
2481     instance->lower_depth_range_rate =
2482             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
2483     instance->no_16bit =
2484             driQueryOptionb(&instance->dri_options, "no_16bit");
2485     instance->intel_enable_wa_14018912822 =
2486             driQueryOptionb(&instance->dri_options, "intel_enable_wa_14018912822");
2487     instance->mesh_conv_prim_attrs_to_vert_attrs =
2488             driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs");
2489     instance->fp64_workaround_enabled =
2490             driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
2491     instance->generated_indirect_threshold =
2492             driQueryOptioni(&instance->dri_options, "generated_indirect_threshold");
2493     instance->generated_indirect_ring_threshold =
2494             driQueryOptioni(&instance->dri_options, "generated_indirect_ring_threshold");
2495     instance->query_clear_with_blorp_threshold =
2496        driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold");
2497     instance->query_copy_with_shader_threshold =
2498        driQueryOptioni(&instance->dri_options, "query_copy_with_shader_threshold");
2499     instance->force_vk_vendor =
2500        driQueryOptioni(&instance->dri_options, "force_vk_vendor");
2501     instance->has_fake_sparse =
2502        driQueryOptionb(&instance->dri_options, "fake_sparse");
2503     instance->enable_tbimr = driQueryOptionb(&instance->dri_options, "intel_tbimr");
2504     instance->disable_fcv =
2505             driQueryOptionb(&instance->dri_options, "anv_disable_fcv");
2506     instance->external_memory_implicit_sync =
2507             driQueryOptionb(&instance->dri_options, "anv_external_memory_implicit_sync");
2508 }
2509 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)2510 VkResult anv_CreateInstance(
2511     const VkInstanceCreateInfo*                 pCreateInfo,
2512     const VkAllocationCallbacks*                pAllocator,
2513     VkInstance*                                 pInstance)
2514 {
2515    struct anv_instance *instance;
2516    VkResult result;
2517 
2518    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
2519 
2520    if (pAllocator == NULL)
2521       pAllocator = vk_default_allocator();
2522 
2523    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
2524                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2525    if (!instance)
2526       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2527 
2528    struct vk_instance_dispatch_table dispatch_table;
2529    vk_instance_dispatch_table_from_entrypoints(
2530       &dispatch_table, &anv_instance_entrypoints, true);
2531    vk_instance_dispatch_table_from_entrypoints(
2532       &dispatch_table, &wsi_instance_entrypoints, false);
2533 
2534    result = vk_instance_init(&instance->vk, &instance_extensions,
2535                              &dispatch_table, pCreateInfo, pAllocator);
2536    if (result != VK_SUCCESS) {
2537       vk_free(pAllocator, instance);
2538       return vk_error(NULL, result);
2539    }
2540 
2541    instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
2542    instance->vk.physical_devices.destroy = anv_physical_device_destroy;
2543 
2544    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
2545 
2546    anv_init_dri_options(instance);
2547 
2548    intel_driver_ds_init();
2549 
2550    *pInstance = anv_instance_to_handle(instance);
2551 
2552    return VK_SUCCESS;
2553 }
2554 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)2555 void anv_DestroyInstance(
2556     VkInstance                                  _instance,
2557     const VkAllocationCallbacks*                pAllocator)
2558 {
2559    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2560 
2561    if (!instance)
2562       return;
2563 
2564    VG(VALGRIND_DESTROY_MEMPOOL(instance));
2565 
2566    driDestroyOptionCache(&instance->dri_options);
2567    driDestroyOptionInfo(&instance->available_dri_options);
2568 
2569    vk_instance_finish(&instance->vk);
2570    vk_free(&instance->vk.alloc, instance);
2571 }
2572 
anv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)2573 void anv_GetPhysicalDeviceProperties2(
2574     VkPhysicalDevice                            physicalDevice,
2575     VkPhysicalDeviceProperties2*                pProperties)
2576 {
2577    vk_common_GetPhysicalDeviceProperties2(physicalDevice, pProperties);
2578 
2579    /* Unfortunately the runtime isn't handling ANDROID extensions. */
2580    vk_foreach_struct(ext, pProperties->pNext) {
2581       switch (ext->sType) {
2582 #if DETECT_OS_ANDROID
2583 #pragma GCC diagnostic push
2584 #pragma GCC diagnostic ignored "-Wswitch"
2585       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2586          VkPhysicalDevicePresentationPropertiesANDROID *props =
2587             (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2588          uint64_t front_rendering_usage = 0;
2589          struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
2590          if (gralloc != NULL) {
2591             u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
2592             u_gralloc_destroy(&gralloc);
2593          }
2594          props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
2595          break;
2596       }
2597 #pragma GCC diagnostic pop
2598 #endif
2599 
2600       default:
2601          break;
2602       }
2603    }
2604 }
2605 
2606 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2607 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2608 {
2609 
2610    /*
2611     * For Xe2+:
2612     * Bspec 60411: Timestamp register can hold 64-bit value
2613     *
2614     * Platforms < Xe2:
2615     * Bpsec 46111: Timestamp register can hold only 36-bit
2616     *              value
2617     */
2618    const VkQueueFamilyProperties anv_queue_family_properties_template =
2619    {
2620       .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2621       .minImageTransferGranularity = { 1, 1, 1 },
2622    };
2623 
2624    return anv_queue_family_properties_template;
2625 }
2626 
2627 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2628 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2629                                          uint32_t family_index)
2630 {
2631    const struct anv_queue_family *family = &device->queue.families[family_index];
2632    VkQueueFamilyProperties properties =
2633       get_anv_queue_family_properties_template(device);
2634 
2635    properties.queueFlags = family->queueFlags;
2636    properties.queueCount = family->queueCount;
2637    /* TODO: enable protected content on video queue */
2638    if (device->has_protected_contexts &&
2639        (family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) == 0)
2640       properties.queueFlags |= VK_QUEUE_PROTECTED_BIT;
2641    return properties;
2642 }
2643 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2644 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2645     VkPhysicalDevice                            physicalDevice,
2646     uint32_t*                                   pQueueFamilyPropertyCount,
2647     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2648 {
2649    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2650    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2651                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2652 
2653    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2654       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2655       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2656          p->queueFamilyProperties =
2657             anv_device_physical_get_queue_properties(pdevice, i);
2658 
2659          vk_foreach_struct(ext, p->pNext) {
2660             switch (ext->sType) {
2661             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2662                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2663                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2664 
2665                /* Deliberately sorted low to high */
2666                VkQueueGlobalPriorityKHR all_priorities[] = {
2667                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2668                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2669                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2670                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2671                };
2672 
2673                uint32_t count = 0;
2674                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2675                   if (all_priorities[i] > pdevice->max_context_priority)
2676                      break;
2677 
2678                   properties->priorities[count++] = all_priorities[i];
2679                }
2680                properties->priorityCount = count;
2681                break;
2682             }
2683             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2684                VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2685                   (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2686                prop->queryResultStatusSupport = VK_TRUE;
2687                break;
2688             }
2689             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2690                VkQueueFamilyVideoPropertiesKHR *prop =
2691                   (VkQueueFamilyVideoPropertiesKHR *)ext;
2692                if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2693                   prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2694                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2695                }
2696                break;
2697             }
2698             default:
2699                anv_debug_ignored_stype(ext->sType);
2700             }
2701          }
2702       }
2703    }
2704 }
2705 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2706 void anv_GetPhysicalDeviceMemoryProperties(
2707     VkPhysicalDevice                            physicalDevice,
2708     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2709 {
2710    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2711 
2712    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2713    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2714       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2715          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2716          .heapIndex     = physical_device->memory.types[i].heapIndex,
2717       };
2718    }
2719 
2720    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2721    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2722       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2723          .size    = physical_device->memory.heaps[i].size,
2724          .flags   = physical_device->memory.heaps[i].flags,
2725       };
2726    }
2727 }
2728 
2729 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2730 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2731                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2732 {
2733    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2734 
2735    if (!device->vk.supported_extensions.EXT_memory_budget)
2736       return;
2737 
2738    anv_update_meminfo(device, device->local_fd);
2739 
2740    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2741    for (size_t i = 0; i < device->memory.heap_count; i++) {
2742       if (device->memory.heaps[i].is_local_mem) {
2743          total_vram_heaps_size += device->memory.heaps[i].size;
2744       } else {
2745          total_sys_heaps_size += device->memory.heaps[i].size;
2746       }
2747    }
2748 
2749    for (size_t i = 0; i < device->memory.heap_count; i++) {
2750       VkDeviceSize heap_size = device->memory.heaps[i].size;
2751       VkDeviceSize heap_used = device->memory.heaps[i].used;
2752       VkDeviceSize heap_budget, total_heaps_size;
2753       uint64_t mem_available = 0;
2754 
2755       if (device->memory.heaps[i].is_local_mem) {
2756          total_heaps_size = total_vram_heaps_size;
2757          if (device->vram_non_mappable.size > 0 && i == 0) {
2758             mem_available = device->vram_non_mappable.available;
2759          } else {
2760             mem_available = device->vram_mappable.available;
2761          }
2762       } else {
2763          total_heaps_size = total_sys_heaps_size;
2764          mem_available = MIN2(device->sys.available, total_heaps_size);
2765       }
2766 
2767       double heap_proportion = (double) heap_size / total_heaps_size;
2768       VkDeviceSize available_prop = mem_available * heap_proportion;
2769 
2770       /*
2771        * Let's not incite the app to starve the system: report at most 90% of
2772        * the available heap memory.
2773        */
2774       uint64_t heap_available = available_prop * 9 / 10;
2775       heap_budget = MIN2(heap_size, heap_used + heap_available);
2776 
2777       /*
2778        * Round down to the nearest MB
2779        */
2780       heap_budget &= ~((1ull << 20) - 1);
2781 
2782       /*
2783        * The heapBudget value must be non-zero for array elements less than
2784        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2785        * value must be less than or equal to VkMemoryHeap::size for each heap.
2786        */
2787       assert(0 < heap_budget && heap_budget <= heap_size);
2788 
2789       memoryBudget->heapUsage[i] = heap_used;
2790       memoryBudget->heapBudget[i] = heap_budget;
2791    }
2792 
2793    /* The heapBudget and heapUsage values must be zero for array elements
2794     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2795     */
2796    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2797       memoryBudget->heapBudget[i] = 0;
2798       memoryBudget->heapUsage[i] = 0;
2799    }
2800 }
2801 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2802 void anv_GetPhysicalDeviceMemoryProperties2(
2803     VkPhysicalDevice                            physicalDevice,
2804     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2805 {
2806    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2807                                          &pMemoryProperties->memoryProperties);
2808 
2809    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2810       switch (ext->sType) {
2811       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2812          anv_get_memory_budget(physicalDevice, (void*)ext);
2813          break;
2814       default:
2815          anv_debug_ignored_stype(ext->sType);
2816          break;
2817       }
2818    }
2819 }
2820 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2821 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2822     VkInstance                                  _instance,
2823     const char*                                 pName)
2824 {
2825    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2826    return vk_instance_get_proc_addr(&instance->vk,
2827                                     &anv_instance_entrypoints,
2828                                     pName);
2829 }
2830 
2831 /* With version 1+ of the loader interface the ICD should expose
2832  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2833  */
2834 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2835 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2836     VkInstance                                  instance,
2837     const char*                                 pName)
2838 {
2839    return anv_GetInstanceProcAddr(instance, pName);
2840 }
2841 
2842 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2843 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2844 {
2845    struct anv_state state;
2846 
2847    state = anv_state_pool_alloc(pool, size, align);
2848    memcpy(state.map, p, size);
2849 
2850    return state;
2851 }
2852 
2853 static void
anv_device_init_border_colors(struct anv_device * device)2854 anv_device_init_border_colors(struct anv_device *device)
2855 {
2856    static const struct gfx8_border_color border_colors[] = {
2857       [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2858       [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2859       [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2860       [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2861       [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2862       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2863    };
2864 
2865    device->border_colors =
2866       anv_state_pool_emit_data(&device->dynamic_state_pool,
2867                                sizeof(border_colors), 64, border_colors);
2868 }
2869 
2870 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2871 anv_device_init_trivial_batch(struct anv_device *device)
2872 {
2873    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2874                                          ANV_BO_ALLOC_MAPPED |
2875                                          ANV_BO_ALLOC_HOST_COHERENT |
2876                                          ANV_BO_ALLOC_INTERNAL,
2877                                          0 /* explicit_address */,
2878                                          &device->trivial_batch_bo);
2879    if (result != VK_SUCCESS)
2880       return result;
2881 
2882    struct anv_batch batch = {
2883       .start = device->trivial_batch_bo->map,
2884       .next = device->trivial_batch_bo->map,
2885       .end = device->trivial_batch_bo->map + 4096,
2886    };
2887 
2888    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2889    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2890 
2891    return VK_SUCCESS;
2892 }
2893 
2894 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2895 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2896                  struct anv_block_pool *pool,
2897                  uint64_t address)
2898 {
2899    anv_block_pool_foreach_bo(bo, pool) {
2900       uint64_t bo_address = intel_48b_address(bo->offset);
2901       if (address >= bo_address && address < (bo_address + bo->size)) {
2902          *ret = (struct intel_batch_decode_bo) {
2903             .addr = bo_address,
2904             .size = bo->size,
2905             .map = bo->map,
2906          };
2907          return true;
2908       }
2909    }
2910    return false;
2911 }
2912 
2913 /* Finding a buffer for batch decoding */
2914 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2915 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2916 {
2917    struct anv_device *device = v_batch;
2918    struct intel_batch_decode_bo ret_bo = {};
2919 
2920    assert(ppgtt);
2921 
2922    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2923       return ret_bo;
2924    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2925       return ret_bo;
2926    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2927       return ret_bo;
2928    if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
2929       return ret_bo;
2930    if (device->physical->indirect_descriptors &&
2931        get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
2932       return ret_bo;
2933    if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
2934       return ret_bo;
2935    if (device->physical->indirect_descriptors &&
2936        get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
2937       return ret_bo;
2938 
2939    if (!device->cmd_buffer_being_decoded)
2940       return (struct intel_batch_decode_bo) { };
2941 
2942    struct anv_batch_bo **bbo;
2943    u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
2944       /* The decoder zeroes out the top 16 bits, so we need to as well */
2945       uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);
2946 
2947       if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
2948          return (struct intel_batch_decode_bo) {
2949             .addr = bo_address,
2950             .size = (*bbo)->bo->size,
2951             .map = (*bbo)->bo->map,
2952          };
2953       }
2954 
2955       uint32_t dep_words = (*bbo)->relocs.dep_words;
2956       BITSET_WORD *deps = (*bbo)->relocs.deps;
2957       for (uint32_t w = 0; w < dep_words; w++) {
2958          BITSET_WORD mask = deps[w];
2959          while (mask) {
2960             int i = u_bit_scan(&mask);
2961             uint32_t gem_handle = w * BITSET_WORDBITS + i;
2962             struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
2963             assert(bo->refcount > 0);
2964             bo_address = bo->offset & (~0ull >> 16);
2965             if (address >= bo_address && address < bo_address + bo->size) {
2966                return (struct intel_batch_decode_bo) {
2967                   .addr = bo_address,
2968                   .size = bo->size,
2969                   .map = bo->map,
2970                };
2971             }
2972          }
2973       }
2974    }
2975 
2976    return (struct intel_batch_decode_bo) { };
2977 }
2978 
2979 struct intel_aux_map_buffer {
2980    struct intel_buffer base;
2981    struct anv_state state;
2982 };
2983 
2984 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2985 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2986 {
2987    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
2988    if (!buf)
2989       return NULL;
2990 
2991    struct anv_device *device = (struct anv_device*)driver_ctx;
2992 
2993    struct anv_state_pool *pool = &device->dynamic_state_pool;
2994    buf->state = anv_state_pool_alloc(pool, size, size);
2995 
2996    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
2997    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
2998    buf->base.map = buf->state.map;
2999    buf->base.driver_bo = &buf->state;
3000    return &buf->base;
3001 }
3002 
3003 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)3004 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3005 {
3006    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3007    struct anv_device *device = (struct anv_device*)driver_ctx;
3008    struct anv_state_pool *pool = &device->dynamic_state_pool;
3009    anv_state_pool_free(pool, buf->state);
3010    free(buf);
3011 }
3012 
3013 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3014    .alloc = intel_aux_map_buffer_alloc,
3015    .free = intel_aux_map_buffer_free,
3016 };
3017 
3018 static VkResult
anv_device_setup_context_or_vm(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)3019 anv_device_setup_context_or_vm(struct anv_device *device,
3020                                const VkDeviceCreateInfo *pCreateInfo,
3021                                const uint32_t num_queues)
3022 {
3023    switch (device->info->kmd_type) {
3024    case INTEL_KMD_TYPE_I915:
3025       return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
3026    case INTEL_KMD_TYPE_XE:
3027       return anv_xe_device_setup_vm(device);
3028    default:
3029       unreachable("Missing");
3030       return VK_ERROR_UNKNOWN;
3031    }
3032 }
3033 
3034 static bool
anv_device_destroy_context_or_vm(struct anv_device * device)3035 anv_device_destroy_context_or_vm(struct anv_device *device)
3036 {
3037    switch (device->info->kmd_type) {
3038    case INTEL_KMD_TYPE_I915:
3039       if (device->physical->has_vm_control)
3040          return anv_i915_device_destroy_vm(device);
3041       else
3042          return intel_gem_destroy_context(device->fd, device->context_id);
3043    case INTEL_KMD_TYPE_XE:
3044       return anv_xe_device_destroy_vm(device);
3045    default:
3046       unreachable("Missing");
3047       return false;
3048    }
3049 }
3050 
3051 static VkResult
anv_device_init_trtt(struct anv_device * device)3052 anv_device_init_trtt(struct anv_device *device)
3053 {
3054    struct anv_trtt *trtt = &device->trtt;
3055 
3056    if (pthread_mutex_init(&trtt->mutex, NULL) != 0)
3057       return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3058 
3059    list_inithead(&trtt->in_flight_batches);
3060 
3061    return VK_SUCCESS;
3062 }
3063 
3064 static void
anv_device_finish_trtt(struct anv_device * device)3065 anv_device_finish_trtt(struct anv_device *device)
3066 {
3067    struct anv_trtt *trtt = &device->trtt;
3068 
3069    if (trtt->timeline_val > 0) {
3070       struct drm_syncobj_timeline_wait wait = {
3071          .handles = (uintptr_t)&trtt->timeline_handle,
3072          .points = (uintptr_t)&trtt->timeline_val,
3073          .timeout_nsec = INT64_MAX,
3074          .count_handles = 1,
3075          .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
3076          .first_signaled = false,
3077       };
3078       if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &wait))
3079          fprintf(stderr, "TR-TT syncobj wait failed!\n");
3080 
3081       list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
3082                                &trtt->in_flight_batches, link)
3083          anv_trtt_batch_bo_free(device, trtt_bbo);
3084 
3085    }
3086 
3087    if (trtt->timeline_handle > 0) {
3088       struct drm_syncobj_destroy destroy = {
3089          .handle = trtt->timeline_handle,
3090       };
3091       if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy))
3092          fprintf(stderr, "TR-TT syncobj destroy failed!\n");
3093    }
3094 
3095    pthread_mutex_destroy(&trtt->mutex);
3096 
3097    vk_free(&device->vk.alloc, trtt->l3_mirror);
3098    vk_free(&device->vk.alloc, trtt->l2_mirror);
3099 
3100    for (int i = 0; i < trtt->num_page_table_bos; i++)
3101       anv_device_release_bo(device, trtt->page_table_bos[i]);
3102 
3103    vk_free(&device->vk.alloc, trtt->page_table_bos);
3104 }
3105 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)3106 VkResult anv_CreateDevice(
3107     VkPhysicalDevice                            physicalDevice,
3108     const VkDeviceCreateInfo*                   pCreateInfo,
3109     const VkAllocationCallbacks*                pAllocator,
3110     VkDevice*                                   pDevice)
3111 {
3112    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3113    VkResult result;
3114    struct anv_device *device;
3115 
3116    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3117 
3118    /* Check requested queues and fail if we are requested to create any
3119     * queues with flags we don't support.
3120     */
3121    assert(pCreateInfo->queueCreateInfoCount > 0);
3122    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3123       if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
3124          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
3125    }
3126 
3127    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
3128                        sizeof(*device), 8,
3129                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3130    if (!device)
3131       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
3132 
3133    struct vk_device_dispatch_table dispatch_table;
3134 
3135    bool override_initial_entrypoints = true;
3136    if (physical_device->instance->vk.app_info.app_name &&
3137        !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
3138       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3139                                                 &anv_hitman3_device_entrypoints,
3140                                                 true);
3141       override_initial_entrypoints = false;
3142    }
3143    if (physical_device->info.ver < 12 &&
3144        physical_device->instance->vk.app_info.app_name &&
3145        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
3146       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3147                                                 &anv_doom64_device_entrypoints,
3148                                                 true);
3149       override_initial_entrypoints = false;
3150    }
3151 #if DETECT_OS_ANDROID
3152    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3153                                              &anv_android_device_entrypoints,
3154                                              true);
3155    override_initial_entrypoints = false;
3156 #endif
3157    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
3158       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3159                                                 &anv_rmv_device_entrypoints,
3160                                                 true);
3161       override_initial_entrypoints = false;
3162    }
3163    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3164       anv_genX(&physical_device->info, device_entrypoints),
3165       override_initial_entrypoints);
3166    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3167       &anv_device_entrypoints, false);
3168    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3169       &wsi_device_entrypoints, false);
3170 
3171 
3172    result = vk_device_init(&device->vk, &physical_device->vk,
3173                            &dispatch_table, pCreateInfo, pAllocator);
3174    if (result != VK_SUCCESS)
3175       goto fail_alloc;
3176 
3177    if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
3178       for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
3179          struct intel_batch_decode_ctx *decoder = &device->decoder[i];
3180 
3181          const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
3182 
3183          intel_batch_decode_ctx_init_brw(decoder,
3184                                          &physical_device->compiler->isa,
3185                                          &physical_device->info,
3186                                          stderr, decode_flags, NULL,
3187                                          decode_get_bo, NULL, device);
3188          intel_batch_stats_reset(decoder);
3189 
3190          decoder->engine = physical_device->queue.families[i].engine_class;
3191          decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
3192          decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
3193          decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
3194       }
3195    }
3196 
3197    anv_device_set_physical(device, physical_device);
3198    device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);
3199 
3200    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
3201    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3202    if (device->fd == -1) {
3203       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3204       goto fail_device;
3205    }
3206 
3207    switch (device->info->kmd_type) {
3208    case INTEL_KMD_TYPE_I915:
3209       device->vk.check_status = anv_i915_device_check_status;
3210       break;
3211    case INTEL_KMD_TYPE_XE:
3212       device->vk.check_status = anv_xe_device_check_status;
3213       break;
3214    default:
3215       unreachable("Missing");
3216    }
3217 
3218    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
3219    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3220    if (physical_device->info.kmd_type == INTEL_KMD_TYPE_I915)
3221       device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3222    vk_device_set_drm_fd(&device->vk, device->fd);
3223 
3224    uint32_t num_queues = 0;
3225    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3226       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3227 
3228    result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
3229    if (result != VK_SUCCESS)
3230       goto fail_fd;
3231 
3232    device->queues =
3233       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3234                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3235    if (device->queues == NULL) {
3236       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3237       goto fail_context_id;
3238    }
3239 
3240    device->queue_count = 0;
3241    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3242       const VkDeviceQueueCreateInfo *queueCreateInfo =
3243          &pCreateInfo->pQueueCreateInfos[i];
3244 
3245       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3246          result = anv_queue_init(device, &device->queues[device->queue_count],
3247                                  queueCreateInfo, j);
3248          if (result != VK_SUCCESS)
3249             goto fail_queues;
3250 
3251          device->queue_count++;
3252       }
3253    }
3254 
3255    if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3256       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3257       goto fail_queues;
3258    }
3259 
3260    /* keep the page with address zero out of the allocator */
3261    util_vma_heap_init(&device->vma_lo,
3262                       device->physical->va.low_heap.addr,
3263                       device->physical->va.low_heap.size);
3264 
3265    util_vma_heap_init(&device->vma_hi,
3266                       device->physical->va.high_heap.addr,
3267                       device->physical->va.high_heap.size);
3268 
3269    if (device->physical->indirect_descriptors) {
3270       util_vma_heap_init(&device->vma_desc,
3271                          device->physical->va.indirect_descriptor_pool.addr,
3272                          device->physical->va.indirect_descriptor_pool.size);
3273    } else {
3274       util_vma_heap_init(&device->vma_desc,
3275                          device->physical->va.bindless_surface_state_pool.addr,
3276                          device->physical->va.bindless_surface_state_pool.size);
3277    }
3278 
3279    util_vma_heap_init(&device->vma_samplers,
3280                       device->physical->va.sampler_state_pool.addr,
3281                       device->physical->va.sampler_state_pool.size);
3282    util_vma_heap_init(&device->vma_trtt,
3283                       device->physical->va.trtt.addr,
3284                       device->physical->va.trtt.size);
3285 
3286    list_inithead(&device->memory_objects);
3287    list_inithead(&device->image_private_objects);
3288 
3289    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3290       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3291       goto fail_vmas;
3292    }
3293 
3294    pthread_condattr_t condattr;
3295    if (pthread_condattr_init(&condattr) != 0) {
3296       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3297       goto fail_mutex;
3298    }
3299    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3300       pthread_condattr_destroy(&condattr);
3301       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3302       goto fail_mutex;
3303    }
3304    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3305       pthread_condattr_destroy(&condattr);
3306       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3307       goto fail_mutex;
3308    }
3309    pthread_condattr_destroy(&condattr);
3310 
3311    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
3312       anv_memory_trace_init(device);
3313 
3314    result = anv_bo_cache_init(&device->bo_cache, device);
3315    if (result != VK_SUCCESS)
3316       goto fail_queue_cond;
3317 
3318    anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
3319                     ANV_BO_ALLOC_MAPPED |
3320                     ANV_BO_ALLOC_HOST_CACHED_COHERENT |
3321                     ANV_BO_ALLOC_CAPTURE);
3322    if (device->vk.enabled_extensions.KHR_acceleration_structure) {
3323       anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
3324                        0 /* alloc_flags */);
3325    }
3326 
3327    /* Because scratch is also relative to General State Base Address, we leave
3328     * the base address 0 and start the pool memory at an offset.  This way we
3329     * get the correct offsets in the anv_states that get allocated from it.
3330     */
3331    result = anv_state_pool_init(&device->general_state_pool, device,
3332                                 &(struct anv_state_pool_params) {
3333                                    .name         = "general pool",
3334                                    .base_address = 0,
3335                                    .start_offset = device->physical->va.general_state_pool.addr,
3336                                    .block_size   = 16384,
3337                                    .max_size     = device->physical->va.general_state_pool.size
3338                                 });
3339    if (result != VK_SUCCESS)
3340       goto fail_batch_bo_pool;
3341 
3342    result = anv_state_pool_init(&device->dynamic_state_pool, device,
3343                                 &(struct anv_state_pool_params) {
3344                                    .name         = "dynamic pool",
3345                                    .base_address = device->physical->va.dynamic_state_pool.addr,
3346                                    .block_size   = 16384,
3347                                    .max_size     = device->physical->va.dynamic_state_pool.size,
3348                                 });
3349    if (result != VK_SUCCESS)
3350       goto fail_general_state_pool;
3351 
3352    /* The border color pointer is limited to 24 bits, so we need to make
3353     * sure that any such color used at any point in the program doesn't
3354     * exceed that limit.
3355     * We achieve that by reserving all the custom border colors we support
3356     * right off the bat, so they are close to the base address.
3357     */
3358    anv_state_reserved_pool_init(&device->custom_border_colors,
3359                                 &device->dynamic_state_pool,
3360                                 MAX_CUSTOM_BORDER_COLORS,
3361                                 sizeof(struct gfx8_border_color), 64);
3362 
3363    result = anv_state_pool_init(&device->instruction_state_pool, device,
3364                                 &(struct anv_state_pool_params) {
3365                                    .name         = "instruction pool",
3366                                    .base_address = device->physical->va.instruction_state_pool.addr,
3367                                    .block_size   = 16384,
3368                                    .max_size     = device->physical->va.instruction_state_pool.size,
3369                                 });
3370    if (result != VK_SUCCESS)
3371       goto fail_dynamic_state_pool;
3372 
3373    if (device->info->verx10 >= 125) {
3374       /* Put the scratch surface states at the beginning of the internal
3375        * surface state pool.
3376        */
3377       result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
3378                                    &(struct anv_state_pool_params) {
3379                                       .name         = "scratch surface state pool",
3380                                       .base_address = device->physical->va.scratch_surface_state_pool.addr,
3381                                       .block_size   = 4096,
3382                                       .max_size     = device->physical->va.scratch_surface_state_pool.size,
3383                                    });
3384       if (result != VK_SUCCESS)
3385          goto fail_instruction_state_pool;
3386 
3387       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3388                                    &(struct anv_state_pool_params) {
3389                                       .name         = "internal surface state pool",
3390                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
3391                                       .start_offset = device->physical->va.scratch_surface_state_pool.size,
3392                                       .block_size   = 4096,
3393                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
3394                                    });
3395    } else {
3396       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3397                                    &(struct anv_state_pool_params) {
3398                                       .name         = "internal surface state pool",
3399                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
3400                                       .block_size   = 4096,
3401                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
3402                                    });
3403    }
3404    if (result != VK_SUCCESS)
3405       goto fail_scratch_surface_state_pool;
3406 
3407    if (device->physical->indirect_descriptors) {
3408       result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
3409                                    &(struct anv_state_pool_params) {
3410                                       .name         = "bindless surface state pool",
3411                                       .base_address = device->physical->va.bindless_surface_state_pool.addr,
3412                                       .block_size   = 4096,
3413                                       .max_size     = device->physical->va.bindless_surface_state_pool.size,
3414                                    });
3415       if (result != VK_SUCCESS)
3416          goto fail_internal_surface_state_pool;
3417    }
3418 
3419    if (device->info->verx10 >= 125) {
3420       /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3421        * table its own base address separately from surface state base.
3422        */
3423       result = anv_state_pool_init(&device->binding_table_pool, device,
3424                                    &(struct anv_state_pool_params) {
3425                                       .name         = "binding table pool",
3426                                       .base_address = device->physical->va.binding_table_pool.addr,
3427                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
3428                                       .max_size     = device->physical->va.binding_table_pool.size,
3429                                    });
3430    } else {
3431       /* The binding table should be in front of the surface states in virtual
3432        * address space so that all surface states can be express as relative
3433        * offsets from the binding table location.
3434        */
3435       assert(device->physical->va.binding_table_pool.addr <
3436              device->physical->va.internal_surface_state_pool.addr);
3437       int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
3438                                (int64_t)device->physical->va.internal_surface_state_pool.addr;
3439       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3440       result = anv_state_pool_init(&device->binding_table_pool, device,
3441                                    &(struct anv_state_pool_params) {
3442                                       .name         = "binding table pool",
3443                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
3444                                       .start_offset = bt_pool_offset,
3445                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
3446                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
3447                                    });
3448    }
3449    if (result != VK_SUCCESS)
3450       goto fail_bindless_surface_state_pool;
3451 
3452    if (device->physical->indirect_descriptors) {
3453       result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
3454                                    &(struct anv_state_pool_params) {
3455                                       .name         = "indirect push descriptor pool",
3456                                       .base_address = device->physical->va.indirect_push_descriptor_pool.addr,
3457                                       .block_size   = 4096,
3458                                       .max_size     = device->physical->va.indirect_push_descriptor_pool.size,
3459                                    });
3460       if (result != VK_SUCCESS)
3461          goto fail_binding_table_pool;
3462    }
3463 
3464    if (device->info->has_aux_map) {
3465       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3466                                                &physical_device->info);
3467       if (!device->aux_map_ctx)
3468          goto fail_indirect_push_descriptor_pool;
3469    }
3470 
3471    result = anv_device_alloc_bo(device, "workaround", 8192,
3472                                 ANV_BO_ALLOC_CAPTURE |
3473                                 ANV_BO_ALLOC_HOST_COHERENT |
3474                                 ANV_BO_ALLOC_MAPPED |
3475                                 ANV_BO_ALLOC_INTERNAL,
3476                                 0 /* explicit_address */,
3477                                 &device->workaround_bo);
3478    if (result != VK_SUCCESS)
3479       goto fail_surface_aux_map_pool;
3480 
3481    device->workaround_address = (struct anv_address) {
3482       .bo = device->workaround_bo,
3483       .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
3484                                                     device->workaround_bo->size,
3485                                                     "Anv"), 32),
3486    };
3487 
3488    device->workarounds.doom64_images = NULL;
3489 
3490    device->rt_uuid_addr = anv_address_add(device->workaround_address, 8);
3491    memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
3492           physical_device->rt_uuid,
3493           sizeof(physical_device->rt_uuid));
3494 
3495    device->debug_frame_desc =
3496       intel_debug_get_identifier_block(device->workaround_bo->map,
3497                                        device->workaround_bo->size,
3498                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
3499 
3500    if (device->vk.enabled_extensions.KHR_ray_query) {
3501       uint32_t ray_queries_size =
3502          align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
3503 
3504       result = anv_device_alloc_bo(device, "ray queries",
3505                                    ray_queries_size,
3506                                    ANV_BO_ALLOC_INTERNAL,
3507                                    0 /* explicit_address */,
3508                                    &device->ray_query_bo);
3509       if (result != VK_SUCCESS)
3510          goto fail_workaround_bo;
3511    }
3512 
3513    result = anv_device_init_trivial_batch(device);
3514    if (result != VK_SUCCESS)
3515       goto fail_ray_query_bo;
3516 
3517    /* Emit the CPS states before running the initialization batch as those
3518     * structures are referenced.
3519     */
3520    if (device->info->ver >= 12) {
3521       uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
3522 
3523       if (device->info->has_coarse_pixel_primitive_and_cb)
3524          n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
3525 
3526       n_cps_states += 1; /* Disable CPS */
3527 
3528        /* Each of the combinaison must be replicated on all viewports */
3529       n_cps_states *= MAX_VIEWPORTS;
3530 
3531       device->cps_states =
3532          anv_state_pool_alloc(&device->dynamic_state_pool,
3533                               n_cps_states * CPS_STATE_length(device->info) * 4,
3534                               32);
3535       if (device->cps_states.map == NULL)
3536          goto fail_trivial_batch;
3537 
3538       anv_genX(device->info, init_cps_device_state)(device);
3539    }
3540 
3541    if (device->physical->indirect_descriptors) {
3542       /* Allocate a null surface state at surface state offset 0. This makes
3543        * NULL descriptor handling trivial because we can just memset
3544        * structures to zero and they have a valid descriptor.
3545        */
3546       device->null_surface_state =
3547          anv_state_pool_alloc(&device->bindless_surface_state_pool,
3548                               device->isl_dev.ss.size,
3549                               device->isl_dev.ss.align);
3550       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3551                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3552       assert(device->null_surface_state.offset == 0);
3553    } else {
3554       /* When using direct descriptors, those can hold the null surface state
3555        * directly. We still need a null surface for the binding table entries
3556        * though but this one can live anywhere the internal surface state
3557        * pool.
3558        */
3559       device->null_surface_state =
3560          anv_state_pool_alloc(&device->internal_surface_state_pool,
3561                               device->isl_dev.ss.size,
3562                               device->isl_dev.ss.align);
3563       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3564                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3565    }
3566 
3567    isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
3568                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3569 
3570    anv_scratch_pool_init(device, &device->scratch_pool);
3571 
3572    /* TODO(RT): Do we want some sort of data structure for this? */
3573    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3574 
3575    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
3576       /* The docs say to always allocate 128KB per DSS */
3577       const uint32_t btd_fifo_bo_size =
3578          128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
3579       result = anv_device_alloc_bo(device,
3580                                    "rt-btd-fifo",
3581                                    btd_fifo_bo_size,
3582                                    ANV_BO_ALLOC_INTERNAL,
3583                                    0 /* explicit_address */,
3584                                    &device->btd_fifo_bo);
3585       if (result != VK_SUCCESS)
3586          goto fail_trivial_batch_bo_and_scratch_pool;
3587    }
3588 
3589    result = anv_device_init_trtt(device);
3590    if (result != VK_SUCCESS)
3591       goto fail_btd_fifo_bo;
3592 
3593    result = anv_genX(device->info, init_device_state)(device);
3594    if (result != VK_SUCCESS)
3595       goto fail_trtt;
3596 
3597    struct vk_pipeline_cache_create_info pcc_info = { };
3598    device->default_pipeline_cache =
3599       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3600    if (!device->default_pipeline_cache) {
3601       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3602       goto fail_trtt;
3603    }
3604 
3605    /* Internal shaders need their own pipeline cache because, unlike the rest
3606     * of ANV, it won't work at all without the cache. It depends on it for
3607     * shaders to remain resident while it runs. Therefore, we need a special
3608     * cache just for BLORP/RT that's forced to always be enabled.
3609     */
3610    pcc_info.force_enable = true;
3611    device->internal_cache =
3612       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3613    if (device->internal_cache == NULL) {
3614       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3615       goto fail_default_pipeline_cache;
3616    }
3617 
3618    /* The device (currently is ICL/TGL) does not have float64 support. */
3619    if (!device->info->has_64bit_float &&
3620       device->physical->instance->fp64_workaround_enabled)
3621       anv_load_fp64_shader(device);
3622 
3623    result = anv_device_init_rt_shaders(device);
3624    if (result != VK_SUCCESS) {
3625       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3626       goto fail_internal_cache;
3627    }
3628 
3629 #if DETECT_OS_ANDROID
3630    device->u_gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
3631 #endif
3632 
3633    device->robust_buffer_access =
3634       device->vk.enabled_features.robustBufferAccess ||
3635       device->vk.enabled_features.nullDescriptor;
3636 
3637    device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
3638                                              4);
3639    p_atomic_set(&device->draw_call_count, 0);
3640 
3641    /* Create a separate command pool for companion RCS command buffer. */
3642    if (device->info->verx10 >= 125) {
3643       VkCommandPoolCreateInfo pool_info = {
3644          .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
3645          .queueFamilyIndex =
3646              anv_get_first_render_queue_index(device->physical),
3647       };
3648 
3649       result = vk_common_CreateCommandPool(anv_device_to_handle(device),
3650                                            &pool_info, NULL,
3651                                            &device->companion_rcs_cmd_pool);
3652       if (result != VK_SUCCESS) {
3653          goto fail_internal_cache;
3654       }
3655    }
3656 
3657    anv_device_init_blorp(device);
3658 
3659    anv_device_init_border_colors(device);
3660 
3661    anv_device_init_internal_kernels(device);
3662 
3663    anv_device_init_astc_emu(device);
3664 
3665    anv_device_perf_init(device);
3666 
3667    anv_device_utrace_init(device);
3668 
3669    BITSET_ONES(device->gfx_dirty_state);
3670    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
3671    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
3672    if (device->info->ver < 11)
3673       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
3674    if (device->info->ver < 12) {
3675       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
3676       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
3677    }
3678    if (!device->vk.enabled_extensions.EXT_sample_locations)
3679       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
3680    if (!device->vk.enabled_extensions.KHR_fragment_shading_rate)
3681       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
3682    if (!device->vk.enabled_extensions.EXT_mesh_shader) {
3683       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
3684       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
3685       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
3686       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
3687       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
3688       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
3689       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
3690       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
3691    }
3692    if (!intel_needs_workaround(device->info, 18019816803))
3693       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
3694    if (device->info->ver > 9)
3695       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
3696 
3697    *pDevice = anv_device_to_handle(device);
3698 
3699    return VK_SUCCESS;
3700 
3701  fail_internal_cache:
3702    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3703  fail_default_pipeline_cache:
3704    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3705  fail_trtt:
3706    anv_device_finish_trtt(device);
3707  fail_btd_fifo_bo:
3708    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3709       anv_device_release_bo(device, device->btd_fifo_bo);
3710  fail_trivial_batch_bo_and_scratch_pool:
3711    anv_scratch_pool_finish(device, &device->scratch_pool);
3712  fail_trivial_batch:
3713    anv_device_release_bo(device, device->trivial_batch_bo);
3714  fail_ray_query_bo:
3715    if (device->ray_query_bo)
3716       anv_device_release_bo(device, device->ray_query_bo);
3717  fail_workaround_bo:
3718    anv_device_release_bo(device, device->workaround_bo);
3719  fail_surface_aux_map_pool:
3720    if (device->info->has_aux_map) {
3721       intel_aux_map_finish(device->aux_map_ctx);
3722       device->aux_map_ctx = NULL;
3723    }
3724  fail_indirect_push_descriptor_pool:
3725    if (device->physical->indirect_descriptors)
3726       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
3727  fail_binding_table_pool:
3728    anv_state_pool_finish(&device->binding_table_pool);
3729  fail_bindless_surface_state_pool:
3730    if (device->physical->indirect_descriptors)
3731       anv_state_pool_finish(&device->bindless_surface_state_pool);
3732  fail_internal_surface_state_pool:
3733    anv_state_pool_finish(&device->internal_surface_state_pool);
3734  fail_scratch_surface_state_pool:
3735    if (device->info->verx10 >= 125)
3736       anv_state_pool_finish(&device->scratch_surface_state_pool);
3737  fail_instruction_state_pool:
3738    anv_state_pool_finish(&device->instruction_state_pool);
3739  fail_dynamic_state_pool:
3740    anv_state_reserved_pool_finish(&device->custom_border_colors);
3741    anv_state_pool_finish(&device->dynamic_state_pool);
3742  fail_general_state_pool:
3743    anv_state_pool_finish(&device->general_state_pool);
3744  fail_batch_bo_pool:
3745    if (device->vk.enabled_extensions.KHR_acceleration_structure)
3746       anv_bo_pool_finish(&device->bvh_bo_pool);
3747    anv_bo_pool_finish(&device->batch_bo_pool);
3748    anv_bo_cache_finish(&device->bo_cache);
3749  fail_queue_cond:
3750    pthread_cond_destroy(&device->queue_submit);
3751  fail_mutex:
3752    pthread_mutex_destroy(&device->mutex);
3753  fail_vmas:
3754    util_vma_heap_finish(&device->vma_trtt);
3755    if (!device->physical->indirect_descriptors)
3756       util_vma_heap_finish(&device->vma_samplers);
3757    util_vma_heap_finish(&device->vma_desc);
3758    util_vma_heap_finish(&device->vma_hi);
3759    util_vma_heap_finish(&device->vma_lo);
3760    pthread_mutex_destroy(&device->vma_mutex);
3761  fail_queues:
3762    for (uint32_t i = 0; i < device->queue_count; i++)
3763       anv_queue_finish(&device->queues[i]);
3764    vk_free(&device->vk.alloc, device->queues);
3765  fail_context_id:
3766    anv_device_destroy_context_or_vm(device);
3767  fail_fd:
3768    close(device->fd);
3769  fail_device:
3770    vk_device_finish(&device->vk);
3771  fail_alloc:
3772    vk_free(&device->vk.alloc, device);
3773 
3774    return result;
3775 }
3776 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3777 void anv_DestroyDevice(
3778     VkDevice                                    _device,
3779     const VkAllocationCallbacks*                pAllocator)
3780 {
3781    ANV_FROM_HANDLE(anv_device, device, _device);
3782 
3783    if (!device)
3784       return;
3785 
3786 #if DETECT_OS_ANDROID
3787    u_gralloc_destroy(&device->u_gralloc);
3788 #endif
3789 
3790    anv_memory_trace_finish(device);
3791 
3792    struct anv_physical_device *pdevice = device->physical;
3793 
3794    for (uint32_t i = 0; i < device->queue_count; i++)
3795       anv_queue_finish(&device->queues[i]);
3796    vk_free(&device->vk.alloc, device->queues);
3797 
3798    anv_device_utrace_finish(device);
3799 
3800    anv_device_finish_blorp(device);
3801 
3802    anv_device_finish_rt_shaders(device);
3803 
3804    anv_device_finish_astc_emu(device);
3805 
3806    anv_device_finish_internal_kernels(device);
3807 
3808    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3809    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3810 
3811    anv_device_finish_trtt(device);
3812 
3813    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3814       anv_device_release_bo(device, device->btd_fifo_bo);
3815 
3816    if (device->info->verx10 >= 125) {
3817       vk_common_DestroyCommandPool(anv_device_to_handle(device),
3818                                    device->companion_rcs_cmd_pool, NULL);
3819    }
3820 
3821 #ifdef HAVE_VALGRIND
3822    /* We only need to free these to prevent valgrind errors.  The backing
3823     * BO will go away in a couple of lines so we don't actually leak.
3824     */
3825    anv_state_reserved_pool_finish(&device->custom_border_colors);
3826    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3827    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3828    anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
3829    anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
3830 #endif
3831 
3832    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3833       if (device->rt_scratch_bos[i] != NULL)
3834          anv_device_release_bo(device, device->rt_scratch_bos[i]);
3835    }
3836 
3837    anv_scratch_pool_finish(device, &device->scratch_pool);
3838 
3839    if (device->vk.enabled_extensions.KHR_ray_query) {
3840       for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
3841          if (device->ray_query_shadow_bos[i] != NULL)
3842             anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
3843       }
3844       anv_device_release_bo(device, device->ray_query_bo);
3845    }
3846    anv_device_release_bo(device, device->workaround_bo);
3847    anv_device_release_bo(device, device->trivial_batch_bo);
3848 
3849    if (device->info->has_aux_map) {
3850       intel_aux_map_finish(device->aux_map_ctx);
3851       device->aux_map_ctx = NULL;
3852    }
3853 
3854    if (device->physical->indirect_descriptors)
3855       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
3856    anv_state_pool_finish(&device->binding_table_pool);
3857    if (device->info->verx10 >= 125)
3858       anv_state_pool_finish(&device->scratch_surface_state_pool);
3859    anv_state_pool_finish(&device->internal_surface_state_pool);
3860    if (device->physical->indirect_descriptors)
3861       anv_state_pool_finish(&device->bindless_surface_state_pool);
3862    anv_state_pool_finish(&device->instruction_state_pool);
3863    anv_state_pool_finish(&device->dynamic_state_pool);
3864    anv_state_pool_finish(&device->general_state_pool);
3865 
3866    if (device->vk.enabled_extensions.KHR_acceleration_structure)
3867       anv_bo_pool_finish(&device->bvh_bo_pool);
3868    anv_bo_pool_finish(&device->batch_bo_pool);
3869 
3870    anv_bo_cache_finish(&device->bo_cache);
3871 
3872    util_vma_heap_finish(&device->vma_trtt);
3873    if (!device->physical->indirect_descriptors)
3874       util_vma_heap_finish(&device->vma_samplers);
3875    util_vma_heap_finish(&device->vma_desc);
3876    util_vma_heap_finish(&device->vma_hi);
3877    util_vma_heap_finish(&device->vma_lo);
3878    pthread_mutex_destroy(&device->vma_mutex);
3879 
3880    pthread_cond_destroy(&device->queue_submit);
3881    pthread_mutex_destroy(&device->mutex);
3882 
3883    ralloc_free(device->fp64_nir);
3884 
3885    anv_device_destroy_context_or_vm(device);
3886 
3887    if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
3888       for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
3889          if (INTEL_DEBUG(DEBUG_BATCH_STATS))
3890             intel_batch_print_stats(&device->decoder[i]);
3891          intel_batch_decode_ctx_finish(&device->decoder[i]);
3892       }
3893    }
3894 
3895    close(device->fd);
3896 
3897    vk_device_finish(&device->vk);
3898    vk_free(&device->vk.alloc, device);
3899 }
3900 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3901 VkResult anv_EnumerateInstanceLayerProperties(
3902     uint32_t*                                   pPropertyCount,
3903     VkLayerProperties*                          pProperties)
3904 {
3905    if (pProperties == NULL) {
3906       *pPropertyCount = 0;
3907       return VK_SUCCESS;
3908    }
3909 
3910    /* None supported at this time */
3911    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3912 }
3913 
3914 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)3915 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3916                 int64_t timeout)
3917 {
3918    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3919    if (ret == -1 && errno == ETIME) {
3920       return VK_TIMEOUT;
3921    } else if (ret == -1) {
3922       /* We don't know the real error. */
3923       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
3924    } else {
3925       return VK_SUCCESS;
3926    }
3927 }
3928 
3929 static struct util_vma_heap *
anv_vma_heap_for_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)3930 anv_vma_heap_for_flags(struct anv_device *device,
3931                        enum anv_bo_alloc_flags alloc_flags)
3932 {
3933    if (alloc_flags & ANV_BO_ALLOC_TRTT)
3934       return &device->vma_trtt;
3935 
3936    if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
3937       return &device->vma_lo;
3938 
3939    if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
3940       return &device->vma_desc;
3941 
3942    if (alloc_flags & ANV_BO_ALLOC_SAMPLER_POOL)
3943       return &device->vma_samplers;
3944 
3945    return &device->vma_hi;
3946 }
3947 
3948 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct util_vma_heap ** out_vma_heap)3949 anv_vma_alloc(struct anv_device *device,
3950               uint64_t size, uint64_t align,
3951               enum anv_bo_alloc_flags alloc_flags,
3952               uint64_t client_address,
3953               struct util_vma_heap **out_vma_heap)
3954 {
3955    pthread_mutex_lock(&device->vma_mutex);
3956 
3957    uint64_t addr = 0;
3958    *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
3959 
3960    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3961       assert(*out_vma_heap == &device->vma_hi ||
3962              *out_vma_heap == &device->vma_trtt);
3963 
3964       if (client_address) {
3965          if (util_vma_heap_alloc_addr(*out_vma_heap,
3966                                       client_address, size)) {
3967             addr = client_address;
3968          }
3969       } else {
3970          (*out_vma_heap)->alloc_high = false;
3971          addr = util_vma_heap_alloc(*out_vma_heap, size, align);
3972          (*out_vma_heap)->alloc_high = true;
3973       }
3974       /* We don't want to fall back to other heaps */
3975       goto done;
3976    }
3977 
3978    assert(client_address == 0);
3979 
3980    addr = util_vma_heap_alloc(*out_vma_heap, size, align);
3981 
3982 done:
3983    pthread_mutex_unlock(&device->vma_mutex);
3984 
3985    assert(addr == intel_48b_address(addr));
3986    return intel_canonical_address(addr);
3987 }
3988 
3989 void
anv_vma_free(struct anv_device * device,struct util_vma_heap * vma_heap,uint64_t address,uint64_t size)3990 anv_vma_free(struct anv_device *device,
3991              struct util_vma_heap *vma_heap,
3992              uint64_t address, uint64_t size)
3993 {
3994    assert(vma_heap == &device->vma_lo ||
3995           vma_heap == &device->vma_hi ||
3996           vma_heap == &device->vma_desc ||
3997           vma_heap == &device->vma_samplers ||
3998           vma_heap == &device->vma_trtt);
3999 
4000    const uint64_t addr_48b = intel_48b_address(address);
4001 
4002    pthread_mutex_lock(&device->vma_mutex);
4003 
4004    util_vma_heap_free(vma_heap, addr_48b, size);
4005 
4006    pthread_mutex_unlock(&device->vma_mutex);
4007 }
4008 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)4009 VkResult anv_AllocateMemory(
4010     VkDevice                                    _device,
4011     const VkMemoryAllocateInfo*                 pAllocateInfo,
4012     const VkAllocationCallbacks*                pAllocator,
4013     VkDeviceMemory*                             pMem)
4014 {
4015    ANV_FROM_HANDLE(anv_device, device, _device);
4016    struct anv_physical_device *pdevice = device->physical;
4017    struct anv_device_memory *mem;
4018    VkResult result = VK_SUCCESS;
4019 
4020    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
4021 
4022    VkDeviceSize aligned_alloc_size =
4023       align64(pAllocateInfo->allocationSize, 4096);
4024 
4025    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
4026    const struct anv_memory_type *mem_type =
4027       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
4028    assert(mem_type->heapIndex < pdevice->memory.heap_count);
4029    struct anv_memory_heap *mem_heap =
4030       &pdevice->memory.heaps[mem_type->heapIndex];
4031 
4032    if (aligned_alloc_size > mem_heap->size)
4033       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4034 
4035    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
4036    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
4037       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4038 
4039    mem = vk_device_memory_create(&device->vk, pAllocateInfo,
4040                                  pAllocator, sizeof(*mem));
4041    if (mem == NULL)
4042       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4043 
4044    mem->type = mem_type;
4045    mem->map = NULL;
4046    mem->map_size = 0;
4047    mem->map_delta = 0;
4048 
4049    enum anv_bo_alloc_flags alloc_flags = 0;
4050 
4051    const VkImportMemoryFdInfoKHR *fd_info = NULL;
4052    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
4053    const struct wsi_memory_allocate_info *wsi_info = NULL;
4054    uint64_t client_address = 0;
4055 
4056    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
4057       /* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
4058        * value, so use cast to avoid compiler warn
4059        */
4060       switch ((uint32_t)ext->sType) {
4061       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
4062       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
4063       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
4064       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
4065       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
4066          /* handled by vk_device_memory_create */
4067          break;
4068 
4069       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
4070          fd_info = (void *)ext;
4071          break;
4072 
4073       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
4074          dedicated_info = (void *)ext;
4075          break;
4076 
4077       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
4078          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
4079             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
4080          client_address = addr_info->opaqueCaptureAddress;
4081          break;
4082       }
4083 
4084       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
4085          wsi_info = (void *)ext;
4086          break;
4087 
4088       default:
4089          anv_debug_ignored_stype(ext->sType);
4090          break;
4091       }
4092    }
4093 
4094    /* If i915 reported a mappable/non_mappable vram regions and the
4095     * application want lmem mappable, then we need to use the
4096     * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
4097     */
4098    if (pdevice->vram_mappable.size > 0 &&
4099        pdevice->vram_non_mappable.size > 0 &&
4100        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
4101        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
4102       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
4103 
4104    if (!mem_heap->is_local_mem)
4105       alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
4106 
4107    if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
4108       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
4109 
4110    if (mem->vk.alloc_flags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
4111       alloc_flags |= ANV_BO_ALLOC_PROTECTED;
4112 
4113    /* For now, always allocated AUX-TT aligned memory, regardless of dedicated
4114     * allocations. An application can for example, suballocate a large
4115     * VkDeviceMemory and try to bind an image created with a CCS modifier. In
4116     * that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
4117     * requirements, so we need to ensure both the VkDeviceMemory and the
4118     * alignment reported through vkGetImageMemoryRequirements() meet the
4119     * AUX-TT requirement.
4120     *
4121     * TODO: when we enable EXT_descriptor_buffer, we'll be able to drop the
4122     * AUX-TT alignment for that type of allocation.
4123     */
4124    if (device->info->has_aux_map)
4125       alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;
4126 
4127    /* If the allocation is not dedicated nor a host pointer, allocate
4128     * additional CCS space.
4129     *
4130     * TODO: If we ever ship VK_EXT_descriptor_buffer (ahahah... :() we could
4131     * drop this flag in the descriptor buffer case as we don't need any
4132     * compression there.
4133     *
4134     * TODO: We could also create new memory types for allocations that don't
4135     * need any compression.
4136     */
4137    if (device->physical->alloc_aux_tt_mem &&
4138        dedicated_info == NULL &&
4139        mem->vk.host_ptr == NULL)
4140       alloc_flags |= ANV_BO_ALLOC_AUX_CCS;
4141 
4142    /* TODO: Android, ChromeOS and other applications may need another way to
4143     * allocate buffers that can be scanout to display but it should pretty
4144     * easy to catch those as Xe KMD driver will print warnings in dmesg when
4145     * scanning buffers allocated without proper flag set.
4146     */
4147    if (wsi_info)
4148       alloc_flags |= ANV_BO_ALLOC_SCANOUT;
4149 
4150    /* Anything imported or exported is EXTERNAL */
4151    if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
4152       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
4153 
4154       /* wsi has its own way of synchronizing with the compositor */
4155       if (pdevice->instance->external_memory_implicit_sync &&
4156           !wsi_info && dedicated_info &&
4157           dedicated_info->image != VK_NULL_HANDLE) {
4158          ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4159 
4160          /* Apply implicit sync to be compatible with clients relying on
4161           * implicit fencing. This matches the behavior in iris i915_batch
4162           * submit. An example client is VA-API (iHD), so only dedicated
4163           * image scenario has to be covered.
4164           */
4165          alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;
4166 
4167          /* For color attachment, apply IMPLICIT_WRITE so a client on the
4168           * consumer side relying on implicit fencing can have a fence to
4169           * wait for render complete.
4170           */
4171          if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
4172             alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
4173       }
4174    }
4175 
4176    if (mem->vk.ahardware_buffer) {
4177       result = anv_import_ahw_memory(_device, mem);
4178       if (result != VK_SUCCESS)
4179          goto fail;
4180 
4181       goto success;
4182    }
4183 
4184    /* The Vulkan spec permits handleType to be 0, in which case the struct is
4185     * ignored.
4186     */
4187    if (fd_info && fd_info->handleType) {
4188       /* At the moment, we support only the below handle types. */
4189       assert(fd_info->handleType ==
4190                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4191              fd_info->handleType ==
4192                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4193 
4194       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
4195                                     client_address, &mem->bo);
4196       if (result != VK_SUCCESS)
4197          goto fail;
4198 
4199       /* For security purposes, we reject importing the bo if it's smaller
4200        * than the requested allocation size.  This prevents a malicious client
4201        * from passing a buffer to a trusted client, lying about the size, and
4202        * telling the trusted client to try and texture from an image that goes
4203        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
4204        * in the trusted client.  The trusted client can protect itself against
4205        * this sort of attack but only if it can trust the buffer size.
4206        */
4207       if (mem->bo->size < aligned_alloc_size) {
4208          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
4209                             "aligned allocationSize too large for "
4210                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
4211                             "%"PRIu64"B > %"PRIu64"B",
4212                             aligned_alloc_size, mem->bo->size);
4213          anv_device_release_bo(device, mem->bo);
4214          goto fail;
4215       }
4216 
4217       /* From the Vulkan spec:
4218        *
4219        *    "Importing memory from a file descriptor transfers ownership of
4220        *    the file descriptor from the application to the Vulkan
4221        *    implementation. The application must not perform any operations on
4222        *    the file descriptor after a successful import."
4223        *
4224        * If the import fails, we leave the file descriptor open.
4225        */
4226       close(fd_info->fd);
4227       goto success;
4228    }
4229 
4230    if (mem->vk.host_ptr) {
4231       if (mem->vk.import_handle_type ==
4232           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
4233          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4234          goto fail;
4235       }
4236 
4237       assert(mem->vk.import_handle_type ==
4238              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
4239 
4240       result = anv_device_import_bo_from_host_ptr(device,
4241                                                   mem->vk.host_ptr,
4242                                                   mem->vk.size,
4243                                                   alloc_flags,
4244                                                   client_address,
4245                                                   &mem->bo);
4246       if (result != VK_SUCCESS)
4247          goto fail;
4248 
4249       goto success;
4250    }
4251 
4252    if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
4253       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
4254    } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
4255       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4256          alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
4257       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
4258          alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
4259    } else {
4260       /* Required to set some host mode to have a valid pat index set */
4261       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
4262    }
4263 
4264    /* Regular allocate (not importing memory). */
4265 
4266    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
4267                                 alloc_flags, client_address, &mem->bo);
4268    if (result != VK_SUCCESS)
4269       goto fail;
4270 
4271    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
4272       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4273 
4274       /* Some legacy (non-modifiers) consumers need the tiling to be set on
4275        * the BO.  In this case, we have a dedicated allocation.
4276        */
4277       if (image->vk.wsi_legacy_scanout) {
4278          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
4279          result = anv_device_set_bo_tiling(device, mem->bo,
4280                                            surf->row_pitch_B,
4281                                            surf->tiling);
4282          if (result != VK_SUCCESS) {
4283             anv_device_release_bo(device, mem->bo);
4284             goto fail;
4285          }
4286       }
4287    }
4288 
4289  success:
4290    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
4291    if (mem_heap_used > mem_heap->size) {
4292       p_atomic_add(&mem_heap->used, -mem->bo->size);
4293       anv_device_release_bo(device, mem->bo);
4294       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
4295                          "Out of heap memory");
4296       goto fail;
4297    }
4298 
4299    pthread_mutex_lock(&device->mutex);
4300    list_addtail(&mem->link, &device->memory_objects);
4301    pthread_mutex_unlock(&device->mutex);
4302 
4303    ANV_RMV(heap_create, device, mem, false, 0);
4304 
4305    *pMem = anv_device_memory_to_handle(mem);
4306 
4307    return VK_SUCCESS;
4308 
4309  fail:
4310    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
4311 
4312    return result;
4313 }
4314 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)4315 VkResult anv_GetMemoryFdKHR(
4316     VkDevice                                    device_h,
4317     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
4318     int*                                        pFd)
4319 {
4320    ANV_FROM_HANDLE(anv_device, dev, device_h);
4321    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
4322 
4323    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4324 
4325    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4326           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4327 
4328    return anv_device_export_bo(dev, mem->bo, pFd);
4329 }
4330 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)4331 VkResult anv_GetMemoryFdPropertiesKHR(
4332     VkDevice                                    _device,
4333     VkExternalMemoryHandleTypeFlagBits          handleType,
4334     int                                         fd,
4335     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
4336 {
4337    ANV_FROM_HANDLE(anv_device, device, _device);
4338 
4339    switch (handleType) {
4340    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4341       /* dma-buf can be imported as any memory type */
4342       pMemoryFdProperties->memoryTypeBits =
4343          (1 << device->physical->memory.type_count) - 1;
4344       return VK_SUCCESS;
4345 
4346    default:
4347       /* The valid usage section for this function says:
4348        *
4349        *    "handleType must not be one of the handle types defined as
4350        *    opaque."
4351        *
4352        * So opaque handle types fall into the default "unsupported" case.
4353        */
4354       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4355    }
4356 }
4357 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)4358 VkResult anv_GetMemoryHostPointerPropertiesEXT(
4359    VkDevice                                    _device,
4360    VkExternalMemoryHandleTypeFlagBits          handleType,
4361    const void*                                 pHostPointer,
4362    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
4363 {
4364    ANV_FROM_HANDLE(anv_device, device, _device);
4365 
4366    assert(pMemoryHostPointerProperties->sType ==
4367           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4368 
4369    switch (handleType) {
4370    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4371       /* Host memory can be imported as any memory type. */
4372       pMemoryHostPointerProperties->memoryTypeBits =
4373          (1ull << device->physical->memory.type_count) - 1;
4374 
4375       return VK_SUCCESS;
4376 
4377    default:
4378       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4379    }
4380 }
4381 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)4382 void anv_FreeMemory(
4383     VkDevice                                    _device,
4384     VkDeviceMemory                              _mem,
4385     const VkAllocationCallbacks*                pAllocator)
4386 {
4387    ANV_FROM_HANDLE(anv_device, device, _device);
4388    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4389 
4390    if (mem == NULL)
4391       return;
4392 
4393    pthread_mutex_lock(&device->mutex);
4394    list_del(&mem->link);
4395    pthread_mutex_unlock(&device->mutex);
4396 
4397    if (mem->map) {
4398       const VkMemoryUnmapInfoKHR unmap = {
4399          .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
4400          .memory = _mem,
4401       };
4402       anv_UnmapMemory2KHR(_device, &unmap);
4403    }
4404 
4405    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4406                 -mem->bo->size);
4407 
4408    anv_device_release_bo(device, mem->bo);
4409 
4410    ANV_RMV(resource_destroy, device, mem);
4411 
4412    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
4413 }
4414 
anv_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)4415 VkResult anv_MapMemory2KHR(
4416     VkDevice                                    _device,
4417     const VkMemoryMapInfoKHR*                   pMemoryMapInfo,
4418     void**                                      ppData)
4419 {
4420    ANV_FROM_HANDLE(anv_device, device, _device);
4421    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);
4422 
4423    if (mem == NULL) {
4424       *ppData = NULL;
4425       return VK_SUCCESS;
4426    }
4427 
4428    if (mem->vk.host_ptr) {
4429       *ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
4430       return VK_SUCCESS;
4431    }
4432 
4433    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4434     *
4435     *  * memory must have been created with a memory type that reports
4436     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
4437     */
4438    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
4439       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4440                        "Memory object not mappable.");
4441    }
4442 
4443    assert(pMemoryMapInfo->size > 0);
4444    const VkDeviceSize offset = pMemoryMapInfo->offset;
4445    const VkDeviceSize size =
4446       vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
4447                                        pMemoryMapInfo->size);
4448 
4449    if (size != (size_t)size) {
4450       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4451                        "requested size 0x%"PRIx64" does not fit in %u bits",
4452                        size, (unsigned)(sizeof(size_t) * 8));
4453    }
4454 
4455    /* From the Vulkan 1.2.194 spec:
4456     *
4457     *    "memory must not be currently host mapped"
4458     */
4459    if (mem->map != NULL) {
4460       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4461                        "Memory object already mapped.");
4462    }
4463 
4464    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
4465    uint64_t map_offset;
4466    if (!device->physical->info.has_mmap_offset)
4467       map_offset = offset & ~4095ull;
4468    else
4469       map_offset = 0;
4470    assert(offset >= map_offset);
4471    uint64_t map_size = (offset + size) - map_offset;
4472 
4473    /* Let's map whole pages */
4474    map_size = align64(map_size, 4096);
4475 
4476    void *map;
4477    VkResult result = anv_device_map_bo(device, mem->bo, map_offset, map_size, &map);
4478    if (result != VK_SUCCESS)
4479       return result;
4480 
4481    mem->map = map;
4482    mem->map_size = map_size;
4483    mem->map_delta = (offset - map_offset);
4484    *ppData = mem->map + mem->map_delta;
4485 
4486    return VK_SUCCESS;
4487 }
4488 
anv_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)4489 VkResult anv_UnmapMemory2KHR(
4490     VkDevice                                    _device,
4491     const VkMemoryUnmapInfoKHR*                 pMemoryUnmapInfo)
4492 {
4493    ANV_FROM_HANDLE(anv_device, device, _device);
4494    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);
4495 
4496    if (mem == NULL || mem->vk.host_ptr)
4497       return VK_SUCCESS;
4498 
4499    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
4500 
4501    mem->map = NULL;
4502    mem->map_size = 0;
4503    mem->map_delta = 0;
4504 
4505    return VK_SUCCESS;
4506 }
4507 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4508 VkResult anv_FlushMappedMemoryRanges(
4509     VkDevice                                    _device,
4510     uint32_t                                    memoryRangeCount,
4511     const VkMappedMemoryRange*                  pMemoryRanges)
4512 {
4513 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4514    ANV_FROM_HANDLE(anv_device, device, _device);
4515 
4516    if (!device->physical->memory.need_flush)
4517       return VK_SUCCESS;
4518 
4519    /* Make sure the writes we're flushing have landed. */
4520    __builtin_ia32_mfence();
4521 
4522    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4523       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4524       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4525          continue;
4526 
4527       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4528       if (map_offset >= mem->map_size)
4529          continue;
4530 
4531       intel_flush_range(mem->map + map_offset,
4532                         MIN2(pMemoryRanges[i].size,
4533                              mem->map_size - map_offset));
4534    }
4535 #endif
4536    return VK_SUCCESS;
4537 }
4538 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4539 VkResult anv_InvalidateMappedMemoryRanges(
4540     VkDevice                                    _device,
4541     uint32_t                                    memoryRangeCount,
4542     const VkMappedMemoryRange*                  pMemoryRanges)
4543 {
4544 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4545    ANV_FROM_HANDLE(anv_device, device, _device);
4546 
4547    if (!device->physical->memory.need_flush)
4548       return VK_SUCCESS;
4549 
4550    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4551       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4552       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4553          continue;
4554 
4555       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4556       if (map_offset >= mem->map_size)
4557          continue;
4558 
4559       intel_invalidate_range(mem->map + map_offset,
4560                              MIN2(pMemoryRanges[i].size,
4561                                   mem->map_size - map_offset));
4562    }
4563 
4564    /* Make sure no reads get moved up above the invalidate. */
4565    __builtin_ia32_mfence();
4566 #endif
4567    return VK_SUCCESS;
4568 }
4569 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)4570 void anv_GetDeviceMemoryCommitment(
4571     VkDevice                                    device,
4572     VkDeviceMemory                              memory,
4573     VkDeviceSize*                               pCommittedMemoryInBytes)
4574 {
4575    *pCommittedMemoryInBytes = 0;
4576 }
4577 
4578 static void
anv_bind_buffer_memory(struct anv_device * device,const VkBindBufferMemoryInfo * pBindInfo)4579 anv_bind_buffer_memory(struct anv_device *device,
4580                        const VkBindBufferMemoryInfo *pBindInfo)
4581 {
4582    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4583    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4584 
4585    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4586    assert(!anv_buffer_is_sparse(buffer));
4587 
4588    const VkBindMemoryStatusKHR *bind_status =
4589       vk_find_struct_const(pBindInfo->pNext, BIND_MEMORY_STATUS_KHR);
4590 
4591    if (mem) {
4592       assert(pBindInfo->memoryOffset < mem->vk.size);
4593       assert(mem->vk.size - pBindInfo->memoryOffset >= buffer->vk.size);
4594       buffer->address = (struct anv_address) {
4595          .bo = mem->bo,
4596          .offset = pBindInfo->memoryOffset,
4597       };
4598    } else {
4599       buffer->address = ANV_NULL_ADDRESS;
4600    }
4601 
4602    ANV_RMV(buffer_bind, device, buffer);
4603 
4604    if (bind_status)
4605       *bind_status->pResult = VK_SUCCESS;
4606 }
4607 
anv_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)4608 VkResult anv_BindBufferMemory2(
4609     VkDevice                                    _device,
4610     uint32_t                                    bindInfoCount,
4611     const VkBindBufferMemoryInfo*               pBindInfos)
4612 {
4613    ANV_FROM_HANDLE(anv_device, device, _device);
4614 
4615    for (uint32_t i = 0; i < bindInfoCount; i++)
4616       anv_bind_buffer_memory(device, &pBindInfos[i]);
4617 
4618    return VK_SUCCESS;
4619 }
4620 
4621 // Event functions
4622 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)4623 VkResult anv_CreateEvent(
4624     VkDevice                                    _device,
4625     const VkEventCreateInfo*                    pCreateInfo,
4626     const VkAllocationCallbacks*                pAllocator,
4627     VkEvent*                                    pEvent)
4628 {
4629    ANV_FROM_HANDLE(anv_device, device, _device);
4630    struct anv_event *event;
4631 
4632    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4633 
4634    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4635                            VK_OBJECT_TYPE_EVENT);
4636    if (event == NULL)
4637       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4638 
4639    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4640                                        sizeof(uint64_t), 8);
4641    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4642 
4643    ANV_RMV(event_create, device, event, pCreateInfo->flags, false);
4644 
4645    *pEvent = anv_event_to_handle(event);
4646 
4647    return VK_SUCCESS;
4648 }
4649 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)4650 void anv_DestroyEvent(
4651     VkDevice                                    _device,
4652     VkEvent                                     _event,
4653     const VkAllocationCallbacks*                pAllocator)
4654 {
4655    ANV_FROM_HANDLE(anv_device, device, _device);
4656    ANV_FROM_HANDLE(anv_event, event, _event);
4657 
4658    if (!event)
4659       return;
4660 
4661    ANV_RMV(resource_destroy, device, event);
4662 
4663    anv_state_pool_free(&device->dynamic_state_pool, event->state);
4664 
4665    vk_object_free(&device->vk, pAllocator, event);
4666 }
4667 
anv_GetEventStatus(VkDevice _device,VkEvent _event)4668 VkResult anv_GetEventStatus(
4669     VkDevice                                    _device,
4670     VkEvent                                     _event)
4671 {
4672    ANV_FROM_HANDLE(anv_device, device, _device);
4673    ANV_FROM_HANDLE(anv_event, event, _event);
4674 
4675    if (vk_device_is_lost(&device->vk))
4676       return VK_ERROR_DEVICE_LOST;
4677 
4678    return *(uint64_t *)event->state.map;
4679 }
4680 
anv_SetEvent(VkDevice _device,VkEvent _event)4681 VkResult anv_SetEvent(
4682     VkDevice                                    _device,
4683     VkEvent                                     _event)
4684 {
4685    ANV_FROM_HANDLE(anv_event, event, _event);
4686 
4687    *(uint64_t *)event->state.map = VK_EVENT_SET;
4688 
4689    return VK_SUCCESS;
4690 }
4691 
anv_ResetEvent(VkDevice _device,VkEvent _event)4692 VkResult anv_ResetEvent(
4693     VkDevice                                    _device,
4694     VkEvent                                     _event)
4695 {
4696    ANV_FROM_HANDLE(anv_event, event, _event);
4697 
4698    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4699 
4700    return VK_SUCCESS;
4701 }
4702 
4703 // Buffer functions
4704 
4705 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkBufferCreateFlags flags,VkDeviceSize size,VkBufferUsageFlags usage,bool is_sparse,VkMemoryRequirements2 * pMemoryRequirements)4706 anv_get_buffer_memory_requirements(struct anv_device *device,
4707                                    VkBufferCreateFlags flags,
4708                                    VkDeviceSize size,
4709                                    VkBufferUsageFlags usage,
4710                                    bool is_sparse,
4711                                    VkMemoryRequirements2* pMemoryRequirements)
4712 {
4713    /* The Vulkan spec (git aaed022) says:
4714     *
4715     *    memoryTypeBits is a bitfield and contains one bit set for every
4716     *    supported memory type for the resource. The bit `1<<i` is set if and
4717     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4718     *    structure for the physical device is supported.
4719     */
4720    uint32_t memory_types = 0;
4721    for (uint32_t i = 0; i < device->physical->memory.type_count; i++) {
4722       /* Have the protected buffer bit match only the memory types with the
4723        * equivalent bit.
4724        */
4725       if (!!(flags & VK_BUFFER_CREATE_PROTECTED_BIT) !=
4726           !!(device->physical->memory.types[i].propertyFlags &
4727              VK_MEMORY_PROPERTY_PROTECTED_BIT))
4728          continue;
4729 
4730       memory_types |= 1ull << i;
4731    }
4732 
4733    /* The GPU appears to write back to main memory in cachelines. Writes to a
4734     * buffers should not clobber with writes to another buffers so make sure
4735     * those are in different cachelines.
4736     */
4737    uint32_t alignment = 64;
4738 
4739    /* From the spec, section "Sparse Buffer and Fully-Resident Image Block
4740     * Size":
4741     *   "The sparse block size in bytes for sparse buffers and fully-resident
4742     *    images is reported as VkMemoryRequirements::alignment. alignment
4743     *    represents both the memory alignment requirement and the binding
4744     *    granularity (in bytes) for sparse resources."
4745     */
4746    if (is_sparse) {
4747       alignment = ANV_SPARSE_BLOCK_SIZE;
4748       size = align64(size, alignment);
4749    }
4750 
4751    pMemoryRequirements->memoryRequirements.size = size;
4752    pMemoryRequirements->memoryRequirements.alignment = alignment;
4753 
4754    /* Storage and Uniform buffers should have their size aligned to
4755     * 32-bits to avoid boundary checks when last DWord is not complete.
4756     * This would ensure that not internal padding would be needed for
4757     * 16-bit types.
4758     */
4759    if (device->robust_buffer_access &&
4760        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4761         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4762       pMemoryRequirements->memoryRequirements.size = align64(size, 4);
4763 
4764    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4765 
4766    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4767       switch (ext->sType) {
4768       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4769          VkMemoryDedicatedRequirements *requirements = (void *)ext;
4770          requirements->prefersDedicatedAllocation = false;
4771          requirements->requiresDedicatedAllocation = false;
4772          break;
4773       }
4774 
4775       default:
4776          anv_debug_ignored_stype(ext->sType);
4777          break;
4778       }
4779    }
4780 }
4781 
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)4782 void anv_GetDeviceBufferMemoryRequirements(
4783     VkDevice                                    _device,
4784     const VkDeviceBufferMemoryRequirements*     pInfo,
4785     VkMemoryRequirements2*                      pMemoryRequirements)
4786 {
4787    ANV_FROM_HANDLE(anv_device, device, _device);
4788    const bool is_sparse =
4789       pInfo->pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
4790 
4791    if (!device->physical->has_sparse &&
4792        INTEL_DEBUG(DEBUG_SPARSE) &&
4793        pInfo->pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
4794                                     VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
4795                                     VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))
4796       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
4797               __LINE__, pInfo->pCreateInfo->flags);
4798 
4799    anv_get_buffer_memory_requirements(device,
4800                                       pInfo->pCreateInfo->flags,
4801                                       pInfo->pCreateInfo->size,
4802                                       pInfo->pCreateInfo->usage,
4803                                       is_sparse,
4804                                       pMemoryRequirements);
4805 }
4806 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)4807 VkResult anv_CreateBuffer(
4808     VkDevice                                    _device,
4809     const VkBufferCreateInfo*                   pCreateInfo,
4810     const VkAllocationCallbacks*                pAllocator,
4811     VkBuffer*                                   pBuffer)
4812 {
4813    ANV_FROM_HANDLE(anv_device, device, _device);
4814    struct anv_buffer *buffer;
4815 
4816    if (!device->physical->has_sparse &&
4817        INTEL_DEBUG(DEBUG_SPARSE) &&
4818        pCreateInfo->flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
4819                              VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT |
4820                              VK_BUFFER_CREATE_SPARSE_ALIASED_BIT))
4821       fprintf(stderr, "=== %s %s:%d flags:0x%08x\n", __func__, __FILE__,
4822               __LINE__, pCreateInfo->flags);
4823 
4824    /* Don't allow creating buffers bigger than our address space.  The real
4825     * issue here is that we may align up the buffer size and we don't want
4826     * doing so to cause roll-over.  However, no one has any business
4827     * allocating a buffer larger than our GTT size.
4828     */
4829    if (pCreateInfo->size > device->physical->gtt_size)
4830       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4831 
4832    buffer = vk_buffer_create(&device->vk, pCreateInfo,
4833                              pAllocator, sizeof(*buffer));
4834    if (buffer == NULL)
4835       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4836 
4837    buffer->address = ANV_NULL_ADDRESS;
4838    if (anv_buffer_is_sparse(buffer)) {
4839       const VkBufferOpaqueCaptureAddressCreateInfo *opaque_addr_info =
4840          vk_find_struct_const(pCreateInfo->pNext,
4841                               BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
4842       enum anv_bo_alloc_flags alloc_flags = 0;
4843       uint64_t client_address = 0;
4844 
4845       if (opaque_addr_info) {
4846          alloc_flags = ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
4847          client_address = opaque_addr_info->opaqueCaptureAddress;
4848       }
4849 
4850       VkResult result = anv_init_sparse_bindings(device, buffer->vk.size,
4851                                                  &buffer->sparse_data,
4852                                                  alloc_flags, client_address,
4853                                                  &buffer->address);
4854       if (result != VK_SUCCESS) {
4855          vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
4856          return result;
4857       }
4858    }
4859 
4860    ANV_RMV(buffer_create, device, false, buffer);
4861 
4862    *pBuffer = anv_buffer_to_handle(buffer);
4863 
4864    return VK_SUCCESS;
4865 }
4866 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)4867 void anv_DestroyBuffer(
4868     VkDevice                                    _device,
4869     VkBuffer                                    _buffer,
4870     const VkAllocationCallbacks*                pAllocator)
4871 {
4872    ANV_FROM_HANDLE(anv_device, device, _device);
4873    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4874 
4875    if (!buffer)
4876       return;
4877 
4878    ANV_RMV(buffer_destroy, device, buffer);
4879 
4880    if (anv_buffer_is_sparse(buffer)) {
4881       assert(buffer->address.offset == buffer->sparse_data.address);
4882       anv_free_sparse_bindings(device, &buffer->sparse_data);
4883    }
4884 
4885    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
4886 }
4887 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)4888 VkDeviceAddress anv_GetBufferDeviceAddress(
4889     VkDevice                                    device,
4890     const VkBufferDeviceAddressInfo*            pInfo)
4891 {
4892    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4893 
4894    assert(!anv_address_is_null(buffer->address));
4895 
4896    return anv_address_physical(buffer->address);
4897 }
4898 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)4899 uint64_t anv_GetBufferOpaqueCaptureAddress(
4900     VkDevice                                    device,
4901     const VkBufferDeviceAddressInfo*            pInfo)
4902 {
4903    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4904 
4905    return anv_address_physical(buffer->address);
4906 }
4907 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)4908 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4909     VkDevice                                    device,
4910     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
4911 {
4912    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4913 
4914    assert(memory->bo->alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS);
4915 
4916    return intel_48b_address(memory->bo->offset);
4917 }
4918 
4919 void
anv_fill_buffer_surface_state(struct anv_device * device,void * surface_state_ptr,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)4920 anv_fill_buffer_surface_state(struct anv_device *device,
4921                               void *surface_state_ptr,
4922                               enum isl_format format,
4923                               struct isl_swizzle swizzle,
4924                               isl_surf_usage_flags_t usage,
4925                               struct anv_address address,
4926                               uint32_t range, uint32_t stride)
4927 {
4928    isl_buffer_fill_state(&device->isl_dev, surface_state_ptr,
4929                          .address = anv_address_physical(address),
4930                          .mocs = isl_mocs(&device->isl_dev, usage,
4931                                           address.bo && anv_bo_is_external(address.bo)),
4932                          .size_B = range,
4933                          .format = format,
4934                          .swizzle = swizzle,
4935                          .stride_B = stride);
4936 }
4937 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)4938 void anv_DestroySampler(
4939     VkDevice                                    _device,
4940     VkSampler                                   _sampler,
4941     const VkAllocationCallbacks*                pAllocator)
4942 {
4943    ANV_FROM_HANDLE(anv_device, device, _device);
4944    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4945 
4946    if (!sampler)
4947       return;
4948 
4949    if (sampler->bindless_state.map) {
4950       anv_state_pool_free(&device->dynamic_state_pool,
4951                           sampler->bindless_state);
4952    }
4953 
4954    if (sampler->custom_border_color.map) {
4955       anv_state_reserved_pool_free(&device->custom_border_colors,
4956                                    sampler->custom_border_color);
4957    }
4958 
4959    vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
4960 }
4961 
4962 static const VkTimeDomainKHR anv_time_domains[] = {
4963    VK_TIME_DOMAIN_DEVICE_KHR,
4964    VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
4965 #ifdef CLOCK_MONOTONIC_RAW
4966    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
4967 #endif
4968 };
4969 
anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)4970 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
4971    VkPhysicalDevice                             physicalDevice,
4972    uint32_t                                     *pTimeDomainCount,
4973    VkTimeDomainKHR                              *pTimeDomains)
4974 {
4975    int d;
4976    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
4977 
4978    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4979       vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
4980          *i = anv_time_domains[d];
4981       }
4982    }
4983 
4984    return vk_outarray_status(&out);
4985 }
4986 
4987 static inline clockid_t
anv_get_default_cpu_clock_id(void)4988 anv_get_default_cpu_clock_id(void)
4989 {
4990 #ifdef CLOCK_MONOTONIC_RAW
4991    return CLOCK_MONOTONIC_RAW;
4992 #else
4993    return CLOCK_MONOTONIC;
4994 #endif
4995 }
4996 
4997 static inline clockid_t
vk_time_domain_to_clockid(VkTimeDomainKHR domain)4998 vk_time_domain_to_clockid(VkTimeDomainKHR domain)
4999 {
5000    switch (domain) {
5001 #ifdef CLOCK_MONOTONIC_RAW
5002    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
5003       return CLOCK_MONOTONIC_RAW;
5004 #endif
5005    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
5006       return CLOCK_MONOTONIC;
5007    default:
5008       unreachable("Missing");
5009       return CLOCK_MONOTONIC;
5010    }
5011 }
5012 
5013 static inline bool
is_cpu_time_domain(VkTimeDomainKHR domain)5014 is_cpu_time_domain(VkTimeDomainKHR domain)
5015 {
5016    return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
5017           domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
5018 }
5019 
5020 static inline bool
is_gpu_time_domain(VkTimeDomainKHR domain)5021 is_gpu_time_domain(VkTimeDomainKHR domain)
5022 {
5023    return domain == VK_TIME_DOMAIN_DEVICE_KHR;
5024 }
5025 
anv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)5026 VkResult anv_GetCalibratedTimestampsKHR(
5027    VkDevice                                     _device,
5028    uint32_t                                     timestampCount,
5029    const VkCalibratedTimestampInfoKHR           *pTimestampInfos,
5030    uint64_t                                     *pTimestamps,
5031    uint64_t                                     *pMaxDeviation)
5032 {
5033    ANV_FROM_HANDLE(anv_device, device, _device);
5034    const uint64_t timestamp_frequency = device->info->timestamp_frequency;
5035    const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
5036    uint32_t d, increment;
5037    uint64_t begin, end;
5038    uint64_t max_clock_period = 0;
5039    const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
5040    const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
5041    clockid_t cpu_clock_id = -1;
5042 
5043    begin = end = vk_clock_gettime(anv_get_default_cpu_clock_id());
5044 
5045    for (d = 0, increment = 1; d < timestampCount; d += increment) {
5046       const VkTimeDomainKHR current = pTimestampInfos[d].timeDomain;
5047       /* If we have a request pattern like this :
5048        * - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
5049        * - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
5050        * - domain2 = domain0 (optional)
5051        *
5052        * We can combine all of those into a single ioctl for maximum accuracy.
5053        */
5054       if (has_correlate_timestamp && (d + 1) < timestampCount) {
5055          const VkTimeDomainKHR next = pTimestampInfos[d + 1].timeDomain;
5056 
5057          if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
5058              (is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
5059             /* We'll consume at least 2 elements. */
5060             increment = 2;
5061 
5062             if (is_cpu_time_domain(current))
5063                cpu_clock_id = vk_time_domain_to_clockid(current);
5064             else
5065                cpu_clock_id = vk_time_domain_to_clockid(next);
5066 
5067             uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
5068             if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
5069                                                             kmd_type,
5070                                                             INTEL_ENGINE_CLASS_RENDER,
5071                                                             0 /* engine_instance */,
5072                                                             cpu_clock_id,
5073                                                             &cpu_timestamp,
5074                                                             &gpu_timestamp,
5075                                                             &cpu_delta_timestamp))
5076                return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");
5077 
5078             cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
5079             if (is_cpu_time_domain(current)) {
5080                pTimestamps[d] = cpu_timestamp;
5081                pTimestamps[d + 1] = gpu_timestamp;
5082             } else {
5083                pTimestamps[d] = gpu_timestamp;
5084                pTimestamps[d + 1] = cpu_end_timestamp;
5085             }
5086             max_clock_period = MAX2(max_clock_period, device_period);
5087 
5088             /* If we can consume a third element */
5089             if ((d + 2) < timestampCount &&
5090                 is_cpu_time_domain(current) &&
5091                 current == pTimestampInfos[d + 2].timeDomain) {
5092                pTimestamps[d + 2] = cpu_end_timestamp;
5093                increment++;
5094             }
5095 
5096             /* If we're the first element, we can replace begin */
5097             if (d == 0 && cpu_clock_id == anv_get_default_cpu_clock_id())
5098                begin = cpu_timestamp;
5099 
5100             /* If we're in the same clock domain as begin/end. We can set the end. */
5101             if (cpu_clock_id == anv_get_default_cpu_clock_id())
5102                end = cpu_end_timestamp;
5103 
5104             continue;
5105          }
5106       }
5107 
5108       /* fallback to regular method */
5109       increment = 1;
5110       switch (current) {
5111       case VK_TIME_DOMAIN_DEVICE_KHR:
5112          if (!intel_gem_read_render_timestamp(device->fd,
5113                                               device->info->kmd_type,
5114                                               &pTimestamps[d])) {
5115             return vk_device_set_lost(&device->vk, "Failed to read the "
5116                                       "TIMESTAMP register: %m");
5117          }
5118          max_clock_period = MAX2(max_clock_period, device_period);
5119          break;
5120       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
5121          pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
5122          max_clock_period = MAX2(max_clock_period, 1);
5123          break;
5124 
5125 #ifdef CLOCK_MONOTONIC_RAW
5126       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
5127          pTimestamps[d] = begin;
5128          break;
5129 #endif
5130       default:
5131          pTimestamps[d] = 0;
5132          break;
5133       }
5134    }
5135 
5136    /* If last timestamp was not get with has_correlate_timestamp method or
5137     * if it was but last cpu clock is not the default one, get time again
5138     */
5139    if (increment == 1 || cpu_clock_id != anv_get_default_cpu_clock_id())
5140       end = vk_clock_gettime(anv_get_default_cpu_clock_id());
5141 
5142    *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
5143 
5144    return VK_SUCCESS;
5145 }
5146 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)5147 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
5148     VkPhysicalDevice                            physicalDevice,
5149     VkSampleCountFlagBits                       samples,
5150     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
5151 {
5152    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
5153 
5154    assert(pMultisampleProperties->sType ==
5155           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
5156 
5157    VkExtent2D grid_size;
5158    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
5159       grid_size.width = 1;
5160       grid_size.height = 1;
5161    } else {
5162       grid_size.width = 0;
5163       grid_size.height = 0;
5164    }
5165    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
5166 
5167    vk_foreach_struct(ext, pMultisampleProperties->pNext)
5168       anv_debug_ignored_stype(ext->sType);
5169 }
5170 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)5171 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
5172     VkPhysicalDevice                            physicalDevice,
5173     uint32_t*                                   pFragmentShadingRateCount,
5174     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
5175 {
5176    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
5177    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
5178                           pFragmentShadingRates, pFragmentShadingRateCount);
5179 
5180 #define append_rate(_samples, _width, _height)                                      \
5181    do {                                                                             \
5182       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
5183          __r->sampleCounts = _samples;                                              \
5184          __r->fragmentSize = (VkExtent2D) {                                         \
5185             .width = _width,                                                        \
5186             .height = _height,                                                      \
5187          };                                                                         \
5188       }                                                                             \
5189    } while (0)
5190 
5191    VkSampleCountFlags sample_counts =
5192       isl_device_get_sample_counts(&physical_device->isl_dev);
5193 
5194    /* BSpec 47003: There are a number of restrictions on the sample count
5195     * based off the coarse pixel size.
5196     */
5197    static const VkSampleCountFlags cp_size_sample_limits[] = {
5198       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
5199              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
5200       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
5201       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
5202       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
5203       [16] = ISL_SAMPLE_COUNT_1_BIT,
5204    };
5205 
5206    for (uint32_t x = 4; x >= 1; x /= 2) {
5207        for (uint32_t y = 4; y >= 1; y /= 2) {
5208           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
5209              /* BSpec 47003:
5210               *   "CPsize 1x4 and 4x1 are not supported"
5211               */
5212              if ((x == 1 && y == 4) || (x == 4 && y == 1))
5213                 continue;
5214 
5215              /* For size {1, 1}, the sample count must be ~0
5216               *
5217               * 4x2 is also a specially case.
5218               */
5219              if (x == 1 && y == 1)
5220                 append_rate(~0, x, y);
5221              else if (x == 4 && y == 2)
5222                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
5223              else
5224                 append_rate(cp_size_sample_limits[x * y], x, y);
5225           } else {
5226              /* For size {1, 1}, the sample count must be ~0 */
5227              if (x == 1 && y == 1)
5228                 append_rate(~0, x, y);
5229              else
5230                 append_rate(sample_counts, x, y);
5231           }
5232        }
5233    }
5234 
5235 #undef append_rate
5236 
5237    return vk_outarray_status(&out);
5238 }
5239 
5240 const struct intel_device_info_pat_entry *
anv_device_get_pat_entry(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)5241 anv_device_get_pat_entry(struct anv_device *device,
5242                          enum anv_bo_alloc_flags alloc_flags)
5243 {
5244    if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
5245       return &device->info->pat.cached_coherent;
5246 
5247    /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
5248     * be snopped by GPU and lmem will always be WC.
5249     * This might change in future discrete platforms.
5250     */
5251    if (anv_physical_device_has_vram(device->physical)) {
5252       if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
5253          return &device->info->pat.cached_coherent;
5254       return &device->info->pat.writecombining;
5255    }
5256 
5257    if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
5258       return &device->info->pat.cached_coherent;
5259    else if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
5260       return &device->info->pat.scanout;
5261    else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
5262       return &device->info->pat.writeback_incoherent;
5263    else
5264       return &device->info->pat.writecombining;
5265 }
5266 
5267 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)5268 convert_component_type(enum intel_cooperative_matrix_component_type t)
5269 {
5270    switch (t) {
5271    case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
5272    case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
5273    case INTEL_CMAT_SINT32:  return VK_COMPONENT_TYPE_SINT32_KHR;
5274    case INTEL_CMAT_SINT8:   return VK_COMPONENT_TYPE_SINT8_KHR;
5275    case INTEL_CMAT_UINT32:  return VK_COMPONENT_TYPE_UINT32_KHR;
5276    case INTEL_CMAT_UINT8:   return VK_COMPONENT_TYPE_UINT8_KHR;
5277    }
5278    unreachable("invalid cooperative matrix component type in configuration");
5279 }
5280 
5281 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)5282 convert_scope(enum intel_cmat_scope scope)
5283 {
5284    switch (scope) {
5285    case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
5286    default:
5287       unreachable("invalid cooperative matrix scope in configuration");
5288    }
5289 }
5290 
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)5291 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
5292    VkPhysicalDevice                            physicalDevice,
5293    uint32_t*                                   pPropertyCount,
5294    VkCooperativeMatrixPropertiesKHR*           pProperties)
5295 {
5296    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
5297    const struct intel_device_info *devinfo = &pdevice->info;
5298 
5299    assert(anv_has_cooperative_matrix(pdevice));
5300 
5301    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
5302 
5303    for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
5304       const struct intel_cooperative_matrix_configuration *cfg =
5305          &devinfo->cooperative_matrix_configurations[i];
5306 
5307       if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
5308          break;
5309 
5310       vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
5311          prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
5312 
5313          prop->MSize = cfg->m;
5314          prop->NSize = cfg->n;
5315          prop->KSize = cfg->k;
5316 
5317          prop->AType      = convert_component_type(cfg->a);
5318          prop->BType      = convert_component_type(cfg->b);
5319          prop->CType      = convert_component_type(cfg->c);
5320          prop->ResultType = convert_component_type(cfg->result);
5321 
5322          prop->saturatingAccumulation = VK_FALSE;
5323          prop->scope = convert_scope(cfg->scope);
5324       }
5325 
5326       /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
5327        *
5328        *    For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
5329        *    cooperative matrix operand must be present if and only if
5330        *    VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
5331        *    VK_TRUE.
5332        *
5333        * As a result, we have to advertise integer configs both with and
5334        * without this flag set.
5335        *
5336        * The DPAS instruction does not support the .sat modifier, so only
5337        * advertise the configurations when the DPAS would be lowered.
5338        *
5339        * FINISHME: It should be possible to do better than full lowering on
5340        * platforms that support DPAS. Emit a DPAS with a NULL accumulator
5341        * argument, then perform the correct sequence of saturating add
5342        * instructions.
5343        */
5344       if (cfg->a != INTEL_CMAT_FLOAT16 &&
5345           (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
5346          vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
5347             prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
5348 
5349             prop->MSize = cfg->m;
5350             prop->NSize = cfg->n;
5351             prop->KSize = cfg->k;
5352 
5353             prop->AType      = convert_component_type(cfg->a);
5354             prop->BType      = convert_component_type(cfg->b);
5355             prop->CType      = convert_component_type(cfg->c);
5356             prop->ResultType = convert_component_type(cfg->result);
5357 
5358             prop->saturatingAccumulation = VK_TRUE;
5359             prop->scope = convert_scope(cfg->scope);
5360          }
5361       }
5362    }
5363 
5364    return vk_outarray_status(&out);
5365 }
5366