• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41 
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_debug_identifier.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61 
62 #include "genxml/gen70_pack.h"
63 #include "genxml/genX_bits.h"
64 
65 static const driOptionDescription anv_dri_options[] = {
66    DRI_CONF_SECTION_PERFORMANCE
67       DRI_CONF_ADAPTIVE_SYNC(true)
68       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
73       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74       DRI_CONF_NO_16BIT(false)
75       DRI_CONF_HASVK_OVERRIDE_API_VERSION(false)
76    DRI_CONF_SECTION_END
77 
78    DRI_CONF_SECTION_DEBUG
79       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
80       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
81       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
82       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
83       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
84    DRI_CONF_SECTION_END
85 
86    DRI_CONF_SECTION_QUALITY
87       DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
88    DRI_CONF_SECTION_END
89 };
90 
91 /* This is probably far to big but it reflects the max size used for messages
92  * in OpenGLs KHR_debug.
93  */
94 #define MAX_DEBUG_MESSAGE_LENGTH    4096
95 
96 /* Render engine timestamp register */
97 #define TIMESTAMP 0x2358
98 
99 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
100 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
101 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
102 #endif
103 
104 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)105 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
106 {
107    char str[MAX_DEBUG_MESSAGE_LENGTH];
108    struct anv_device *device = (struct anv_device *)data;
109    UNUSED struct anv_instance *instance = device->physical->instance;
110 
111    va_list args;
112    va_start(args, fmt);
113    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
114    va_end(args);
115 
116    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
117 }
118 
119 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)120 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
121 {
122    va_list args;
123    va_start(args, fmt);
124 
125    if (INTEL_DEBUG(DEBUG_PERF))
126       mesa_logd_v(fmt, args);
127 
128    va_end(args);
129 }
130 
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
132     defined(VK_USE_PLATFORM_XCB_KHR) || \
133     defined(VK_USE_PLATFORM_XLIB_KHR) || \
134     defined(VK_USE_PLATFORM_DISPLAY_KHR)
135 #define ANV_USE_WSI_PLATFORM
136 #endif
137 
138 #ifdef ANDROID_STRICT
139 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
140 #else
141 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
142 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
143 #endif
144 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)145 VkResult anv_EnumerateInstanceVersion(
146     uint32_t*                                   pApiVersion)
147 {
148 #ifdef ANDROID_STRICT
149    *pApiVersion = ANV_API_VERSION;
150 #else
151    *pApiVersion = ANV_API_VERSION_1_3;
152 #endif
153    return VK_SUCCESS;
154 }
155 
156 static const struct vk_instance_extension_table instance_extensions = {
157    .KHR_device_group_creation                = true,
158    .KHR_external_fence_capabilities          = true,
159    .KHR_external_memory_capabilities         = true,
160    .KHR_external_semaphore_capabilities      = true,
161    .KHR_get_physical_device_properties2      = true,
162    .EXT_debug_report                         = true,
163    .EXT_debug_utils                          = true,
164 
165 #ifdef ANV_USE_WSI_PLATFORM
166    .KHR_get_surface_capabilities2            = true,
167    .KHR_surface                              = true,
168    .KHR_surface_protected_capabilities       = true,
169 #endif
170 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
171    .KHR_wayland_surface                      = true,
172 #endif
173 #ifdef VK_USE_PLATFORM_XCB_KHR
174    .KHR_xcb_surface                          = true,
175 #endif
176 #ifdef VK_USE_PLATFORM_XLIB_KHR
177    .KHR_xlib_surface                         = true,
178 #endif
179 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
180    .EXT_acquire_xlib_display                 = true,
181 #endif
182 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
183    .KHR_display                              = true,
184    .KHR_get_display_properties2              = true,
185    .EXT_direct_mode_display                  = true,
186    .EXT_display_surface_counter              = true,
187    .EXT_acquire_drm_display                  = true,
188 #endif
189 #ifndef VK_USE_PLATFORM_WIN32_KHR
190    .EXT_headless_surface                     = true,
191 #endif
192 };
193 
194 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)195 get_device_extensions(const struct anv_physical_device *device,
196                       struct vk_device_extension_table *ext)
197 {
198    const bool has_syncobj_wait =
199       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
200 
201    *ext = (struct vk_device_extension_table) {
202       .KHR_8bit_storage                      = device->info.ver >= 8,
203       .KHR_16bit_storage                     = device->info.ver >= 8 && !device->instance->no_16bit,
204       .KHR_bind_memory2                      = true,
205       .KHR_buffer_device_address             = device->has_a64_buffer_access,
206       .KHR_copy_commands2                    = true,
207       .KHR_create_renderpass2                = true,
208       .KHR_dedicated_allocation              = true,
209       .KHR_deferred_host_operations          = true,
210       .KHR_depth_stencil_resolve             = true,
211       .KHR_descriptor_update_template        = true,
212       .KHR_device_group                      = true,
213       .KHR_draw_indirect_count               = true,
214       .KHR_driver_properties                 = true,
215       .KHR_dynamic_rendering                 = true,
216       .KHR_external_fence                    = has_syncobj_wait,
217       .KHR_external_fence_fd                 = has_syncobj_wait,
218       .KHR_external_memory                   = true,
219       .KHR_external_memory_fd                = true,
220       .KHR_external_semaphore                = true,
221       .KHR_external_semaphore_fd             = true,
222       .KHR_format_feature_flags2             = true,
223       .KHR_get_memory_requirements2          = true,
224       .KHR_image_format_list                 = true,
225       .KHR_imageless_framebuffer             = true,
226 #ifdef ANV_USE_WSI_PLATFORM
227       .KHR_incremental_present               = true,
228 #endif
229       .KHR_maintenance1                      = true,
230       .KHR_maintenance2                      = true,
231       .KHR_maintenance3                      = true,
232       .KHR_maintenance4                      = true,
233       .KHR_multiview                         = true,
234       .KHR_performance_query =
235          !anv_use_relocations(device) && device->perf &&
236          (intel_perf_has_hold_preemption(device->perf) ||
237           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
238          device->use_call_secondary,
239       .KHR_pipeline_executable_properties    = true,
240       /* Hide these behind dri configs for now since we cannot implement it reliably on
241        * all surfaces yet. There is no surface capability query for present wait/id,
242        * but the feature is useful enough to hide behind an opt-in mechanism for now.
243        * If the instance only enables surface extensions that unconditionally support present wait,
244        * we can also expose the extension that way. */
245       .KHR_present_id =
246          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
247          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
248       .KHR_present_wait =
249          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
250          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
251       .KHR_push_descriptor                   = true,
252       .KHR_relaxed_block_layout              = true,
253       .KHR_sampler_mirror_clamp_to_edge      = true,
254       .KHR_sampler_ycbcr_conversion          = true,
255       .KHR_separate_depth_stencil_layouts    = true,
256       .KHR_shader_clock                      = true,
257       .KHR_shader_draw_parameters            = true,
258       .KHR_shader_expect_assume              = true,
259       .KHR_shader_float16_int8               = device->info.ver >= 8 && !device->instance->no_16bit,
260       .KHR_shader_float_controls             = true,
261       .KHR_shader_integer_dot_product        = true,
262       .KHR_shader_non_semantic_info          = true,
263       .KHR_shader_relaxed_extended_instruction = true,
264       .KHR_shader_subgroup_extended_types    = device->info.ver >= 8,
265       .KHR_shader_subgroup_uniform_control_flow = true,
266       .KHR_shader_terminate_invocation       = true,
267       .KHR_spirv_1_4                         = true,
268       .KHR_storage_buffer_storage_class      = true,
269 #ifdef ANV_USE_WSI_PLATFORM
270       .KHR_swapchain                         = true,
271       .KHR_swapchain_mutable_format          = true,
272 #endif
273       .KHR_synchronization2                  = true,
274       .KHR_timeline_semaphore                = true,
275       .KHR_uniform_buffer_standard_layout    = true,
276       .KHR_variable_pointers                 = true,
277       .KHR_vulkan_memory_model               = true,
278       .KHR_workgroup_memory_explicit_layout  = true,
279       .KHR_zero_initialize_workgroup_memory  = true,
280       .EXT_4444_formats                      = true,
281       .EXT_border_color_swizzle              = device->info.ver >= 8,
282       .EXT_buffer_device_address             = device->has_a64_buffer_access,
283       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
284       .EXT_color_write_enable                = true,
285       .EXT_conditional_rendering             = device->info.verx10 >= 75,
286       .EXT_custom_border_color               = device->info.ver >= 8,
287       .EXT_depth_clamp_zero_one              = true,
288       .EXT_depth_clamp_control               = true,
289       .EXT_depth_clip_control                = true,
290       .EXT_depth_clip_enable                 = true,
291 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
292       .EXT_display_control                   = true,
293 #endif
294       .EXT_extended_dynamic_state            = true,
295       .EXT_extended_dynamic_state2           = true,
296       .EXT_external_memory_dma_buf           = true,
297       .EXT_external_memory_host              = true,
298       .EXT_global_priority                   = device->max_context_priority >=
299                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
300       .EXT_global_priority_query             = device->max_context_priority >=
301                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
302       .EXT_host_query_reset                  = true,
303       .EXT_image_2d_view_of_3d               = true,
304       .EXT_image_robustness                  = true,
305       .EXT_image_drm_format_modifier         = true,
306       .EXT_image_view_min_lod                = true,
307       .EXT_index_type_uint8                  = true,
308       .EXT_inline_uniform_block              = true,
309       .EXT_line_rasterization                = true,
310       /* Enable the extension only if we have support on both the local &
311        * system memory
312        */
313       .EXT_memory_budget                     = device->sys.available,
314       .EXT_non_seamless_cube_map             = true,
315       .EXT_pci_bus_info                      = true,
316       .EXT_physical_device_drm               = true,
317       .EXT_pipeline_creation_cache_control   = true,
318       .EXT_pipeline_creation_feedback        = true,
319       .EXT_primitives_generated_query        = true,
320       .EXT_primitive_topology_list_restart   = true,
321       .EXT_private_data                      = true,
322       .EXT_provoking_vertex                  = true,
323       .EXT_queue_family_foreign              = true,
324       .EXT_robustness2                       = true,
325       .EXT_sample_locations                  = true,
326       .EXT_scalar_block_layout               = true,
327       .EXT_separate_stencil_usage            = true,
328       .EXT_shader_atomic_float               = true,
329       .EXT_shader_demote_to_helper_invocation = true,
330       .EXT_shader_module_identifier          = true,
331       .EXT_shader_replicated_composites      = true,
332       .EXT_shader_subgroup_ballot            = true,
333       .EXT_shader_subgroup_vote              = true,
334       .EXT_shader_viewport_index_layer       = true,
335       .EXT_subgroup_size_control             = true,
336       .EXT_texel_buffer_alignment            = true,
337       .EXT_tooling_info                      = true,
338       .EXT_transform_feedback                = true,
339       .EXT_vertex_attribute_divisor          = true,
340       .EXT_ycbcr_image_arrays                = true,
341 #if DETECT_OS_ANDROID
342       .ANDROID_external_memory_android_hardware_buffer = true,
343       .ANDROID_native_buffer                 = true,
344 #endif
345       .GOOGLE_decorate_string                = true,
346       .GOOGLE_hlsl_functionality1            = true,
347       .GOOGLE_user_type                      = true,
348       .INTEL_performance_query               = device->perf &&
349                                                intel_perf_has_hold_preemption(device->perf),
350       .INTEL_shader_integer_functions2       = device->info.ver >= 8,
351       .EXT_multi_draw                        = true,
352       .NV_compute_shader_derivatives         = true,
353       .VALVE_mutable_descriptor_type         = true,
354    };
355 }
356 
357 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)358 get_features(const struct anv_physical_device *pdevice,
359              struct vk_features *features)
360 {
361    /* Just pick one; they're all the same */
362    const bool has_astc_ldr =
363       isl_format_supports_sampling(&pdevice->info,
364                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
365 
366    *features = (struct vk_features) {
367       /* Vulkan 1.0 */
368       .robustBufferAccess                       = true,
369       .fullDrawIndexUint32                      = true,
370       .imageCubeArray                           = true,
371       .independentBlend                         = true,
372       .geometryShader                           = true,
373       .tessellationShader                       = true,
374       .sampleRateShading                        = true,
375       .dualSrcBlend                             = true,
376       .logicOp                                  = true,
377       .multiDrawIndirect                        = true,
378       .drawIndirectFirstInstance                = true,
379       .depthClamp                               = true,
380       .depthBiasClamp                           = true,
381       .fillModeNonSolid                         = true,
382       .depthBounds                              = pdevice->info.ver >= 12,
383       .wideLines                                = true,
384       .largePoints                              = true,
385       .alphaToOne                               = true,
386       .multiViewport                            = true,
387       .samplerAnisotropy                        = true,
388       .textureCompressionETC2                   = pdevice->info.ver >= 8 ||
389                                                   pdevice->info.platform == INTEL_PLATFORM_BYT,
390       .textureCompressionASTC_LDR               = has_astc_ldr,
391       .textureCompressionBC                     = true,
392       .occlusionQueryPrecise                    = true,
393       .pipelineStatisticsQuery                  = true,
394       .fragmentStoresAndAtomics                 = true,
395       .shaderTessellationAndGeometryPointSize   = true,
396       .shaderImageGatherExtended                = true,
397       .shaderStorageImageExtendedFormats        = true,
398       .shaderStorageImageMultisample            = false,
399       .shaderStorageImageReadWithoutFormat      = false,
400       .shaderStorageImageWriteWithoutFormat     = true,
401       .shaderUniformBufferArrayDynamicIndexing  = true,
402       .shaderSampledImageArrayDynamicIndexing   = true,
403       .shaderStorageBufferArrayDynamicIndexing  = true,
404       .shaderStorageImageArrayDynamicIndexing   = true,
405       .shaderClipDistance                       = true,
406       .shaderCullDistance                       = true,
407       .shaderFloat64                            = pdevice->info.ver >= 8 &&
408                                                   pdevice->info.has_64bit_float,
409       .shaderInt64                              = pdevice->info.ver >= 8,
410       .shaderInt16                              = pdevice->info.ver >= 8,
411       .shaderResourceMinLod                     = false,
412       .variableMultisampleRate                  = true,
413       .inheritedQueries                         = true,
414 
415       /* Vulkan 1.1 */
416       .storageBuffer16BitAccess            = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
417       .uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
418       .storagePushConstant16               = pdevice->info.ver >= 8,
419       .storageInputOutput16                = false,
420       .multiview                           = true,
421       .multiviewGeometryShader             = true,
422       .multiviewTessellationShader         = true,
423       .variablePointersStorageBuffer       = true,
424       .variablePointers                    = true,
425       .protectedMemory                     = false,
426       .samplerYcbcrConversion              = true,
427       .shaderDrawParameters                = true,
428 
429       /* Vulkan 1.2 */
430       .samplerMirrorClampToEdge            = true,
431       .drawIndirectCount                   = true,
432       .storageBuffer8BitAccess             = pdevice->info.ver >= 8,
433       .uniformAndStorageBuffer8BitAccess   = pdevice->info.ver >= 8,
434       .storagePushConstant8                = pdevice->info.ver >= 8,
435       .shaderBufferInt64Atomics            = false,
436       .shaderSharedInt64Atomics            = false,
437       .shaderFloat16                       = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
438       .shaderInt8                          = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
439 
440       .descriptorIndexing                                 = false,
441       .shaderInputAttachmentArrayDynamicIndexing          = false,
442       .shaderUniformTexelBufferArrayDynamicIndexing       = false,
443       .shaderStorageTexelBufferArrayDynamicIndexing       = false,
444       .shaderUniformBufferArrayNonUniformIndexing         = false,
445       .shaderSampledImageArrayNonUniformIndexing          = false,
446       .shaderStorageBufferArrayNonUniformIndexing         = false,
447       .shaderStorageImageArrayNonUniformIndexing          = false,
448       .shaderInputAttachmentArrayNonUniformIndexing       = false,
449       .shaderUniformTexelBufferArrayNonUniformIndexing    = false,
450       .shaderStorageTexelBufferArrayNonUniformIndexing    = false,
451       .descriptorBindingUniformBufferUpdateAfterBind      = false,
452       .descriptorBindingSampledImageUpdateAfterBind       = false,
453       .descriptorBindingStorageImageUpdateAfterBind       = false,
454       .descriptorBindingStorageBufferUpdateAfterBind      = false,
455       .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
456       .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
457       .descriptorBindingUpdateUnusedWhilePending          = false,
458       .descriptorBindingPartiallyBound                    = false,
459       .descriptorBindingVariableDescriptorCount           = false,
460       .runtimeDescriptorArray                             = false,
461 
462       .samplerFilterMinmax                 = false,
463       .scalarBlockLayout                   = true,
464       .imagelessFramebuffer                = true,
465       .uniformBufferStandardLayout         = true,
466       .shaderSubgroupExtendedTypes         = true,
467       .separateDepthStencilLayouts         = true,
468       .hostQueryReset                      = true,
469       .timelineSemaphore                   = true,
470       .bufferDeviceAddress                 = pdevice->has_a64_buffer_access,
471       .bufferDeviceAddressCaptureReplay    = pdevice->has_a64_buffer_access,
472       .bufferDeviceAddressMultiDevice      = false,
473       .vulkanMemoryModel                   = true,
474       .vulkanMemoryModelDeviceScope        = true,
475       .vulkanMemoryModelAvailabilityVisibilityChains = true,
476       .shaderOutputViewportIndex           = true,
477       .shaderOutputLayer                   = true,
478       .subgroupBroadcastDynamicId          = true,
479 
480       /* Vulkan 1.3 */
481       .robustImageAccess = true,
482       .inlineUniformBlock = true,
483       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
484       .pipelineCreationCacheControl = true,
485       .privateData = true,
486       .shaderDemoteToHelperInvocation = true,
487       .shaderTerminateInvocation = true,
488       .subgroupSizeControl = true,
489       .computeFullSubgroups = true,
490       .synchronization2 = true,
491       .textureCompressionASTC_HDR = false,
492       .shaderZeroInitializeWorkgroupMemory = true,
493       .dynamicRendering = true,
494       .shaderIntegerDotProduct = true,
495       .maintenance4 = true,
496 
497       /* VK_EXT_4444_formats */
498       .formatA4R4G4B4 = true,
499       .formatA4B4G4R4 = false,
500 
501       /* VK_EXT_border_color_swizzle */
502       .borderColorSwizzle = true,
503       .borderColorSwizzleFromImage = true,
504 
505       /* VK_EXT_color_write_enable */
506       .colorWriteEnable = true,
507 
508       /* VK_EXT_image_2d_view_of_3d */
509       .image2DViewOf3D = true,
510       .sampler2DViewOf3D = false,
511 
512       /* VK_NV_compute_shader_derivatives */
513       .computeDerivativeGroupQuads = true,
514       .computeDerivativeGroupLinear = true,
515 
516       /* VK_EXT_conditional_rendering */
517       .conditionalRendering = pdevice->info.verx10 >= 75,
518       .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
519 
520       /* VK_EXT_custom_border_color */
521       .customBorderColors = pdevice->info.ver >= 8,
522       .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
523 
524       /* VK_EXT_depth_clamp_zero_one */
525       .depthClampZeroOne = true,
526 
527       /* VK_EXT_depth_clip_enable */
528       .depthClipEnable = true,
529 
530       /* VK_KHR_global_priority */
531       .globalPriorityQuery = true,
532 
533       /* VK_EXT_image_view_min_lod */
534       .minLod = true,
535 
536       /* VK_EXT_index_type_uint8 */
537       .indexTypeUint8 = true,
538 
539       /* VK_EXT_line_rasterization */
540       /* Rectangular lines must use the strict algorithm, which is not
541        * supported for wide lines prior to ICL.  See rasterization_mode for
542        * details and how the HW states are programmed.
543        */
544       .rectangularLines = false,
545       .bresenhamLines = true,
546       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
547        * BSpec section "Multisample ModesState" table for "AA Line Support
548        * Requirements":
549        *
550        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
551        *
552        * Fortunately, this isn't a case most people care about.
553        */
554       .smoothLines = pdevice->info.ver < 10,
555       .stippledRectangularLines = false,
556       .stippledBresenhamLines = true,
557       .stippledSmoothLines = false,
558 
559       /* VK_EXT_mutable_descriptor_type */
560       .mutableDescriptorType = true,
561 
562       /* VK_KHR_performance_query */
563       .performanceCounterQueryPools = true,
564       /* HW only supports a single configuration at a time. */
565       .performanceCounterMultipleQueryPools = false,
566 
567       /* VK_KHR_pipeline_executable_properties */
568       .pipelineExecutableInfo = true,
569 
570       /* VK_EXT_primitives_generated_query */
571       .primitivesGeneratedQuery = true,
572       .primitivesGeneratedQueryWithRasterizerDiscard = false,
573       .primitivesGeneratedQueryWithNonZeroStreams = false,
574 
575       /* VK_EXT_provoking_vertex */
576       .provokingVertexLast = true,
577       .transformFeedbackPreservesProvokingVertex = true,
578 
579       /* VK_EXT_robustness2 */
580       .robustBufferAccess2 = true,
581       .robustImageAccess2 = true,
582       .nullDescriptor = true,
583 
584       /* VK_EXT_shader_atomic_float */
585       .shaderBufferFloat32Atomics =    true,
586       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
587       .shaderBufferFloat64Atomics =
588          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
589       .shaderBufferFloat64AtomicAdd =  false,
590       .shaderSharedFloat32Atomics =    true,
591       .shaderSharedFloat32AtomicAdd =  false,
592       .shaderSharedFloat64Atomics =    false,
593       .shaderSharedFloat64AtomicAdd =  false,
594       .shaderImageFloat32Atomics =     true,
595       .shaderImageFloat32AtomicAdd =   false,
596       .sparseImageFloat32Atomics =     false,
597       .sparseImageFloat32AtomicAdd =   false,
598 
599       /* VK_KHR_shader_clock */
600       .shaderSubgroupClock = true,
601       .shaderDeviceClock = false,
602 
603       /* VK_INTEL_shader_integer_functions2 */
604       .shaderIntegerFunctions2 = true,
605 
606       /* VK_EXT_shader_module_identifier */
607       .shaderModuleIdentifier = true,
608 
609       /* VK_EXT_shader_replicated_composites */
610       .shaderReplicatedComposites = true,
611 
612       /* VK_KHR_shader_subgroup_uniform_control_flow */
613       .shaderSubgroupUniformControlFlow = true,
614 
615       /* VK_EXT_texel_buffer_alignment */
616       .texelBufferAlignment = true,
617 
618       /* VK_EXT_transform_feedback */
619       .transformFeedback = true,
620       .geometryStreams = true,
621 
622       /* VK_EXT_vertex_attribute_divisor */
623       .vertexAttributeInstanceRateDivisor = true,
624       .vertexAttributeInstanceRateZeroDivisor = true,
625 
626       /* VK_KHR_workgroup_memory_explicit_layout */
627       .workgroupMemoryExplicitLayout = true,
628       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
629       .workgroupMemoryExplicitLayout8BitAccess = true,
630       .workgroupMemoryExplicitLayout16BitAccess = true,
631 
632       /* VK_EXT_ycbcr_image_arrays */
633       .ycbcrImageArrays = true,
634 
635       /* VK_EXT_extended_dynamic_state */
636       .extendedDynamicState = true,
637 
638       /* VK_EXT_extended_dynamic_state2 */
639       .extendedDynamicState2 = true,
640       .extendedDynamicState2LogicOp = true,
641       .extendedDynamicState2PatchControlPoints = false,
642 
643       /* VK_EXT_multi_draw */
644       .multiDraw = true,
645 
646       /* VK_EXT_non_seamless_cube_map */
647       .nonSeamlessCubeMap = true,
648 
649       /* VK_EXT_primitive_topology_list_restart */
650       .primitiveTopologyListRestart = true,
651       .primitiveTopologyPatchListRestart = true,
652 
653       /* VK_EXT_depth_clamp_control */
654       .depthClampControl = true,
655 
656       /* VK_EXT_depth_clip_control */
657       .depthClipControl = true,
658 
659       /* VK_KHR_present_id */
660       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
661 
662       /* VK_KHR_present_wait */
663       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
664 
665       /* VK_KHR_shader_expect_assume */
666       .shaderExpectAssume = true,
667 
668       /* VK_KHR_shader_relaxed_extended_instruction */
669       .shaderRelaxedExtendedInstruction = true,
670    };
671 
672    /* We can't do image stores in vec4 shaders */
673    features->vertexPipelineStoresAndAtomics =
674       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
675       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
676 
677    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
678 
679    /* The new DOOM and Wolfenstein games require depthBounds without
680     * checking for it.  They seem to run fine without it so just claim it's
681     * there and accept the consequences.
682     */
683    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
684       features->depthBounds = true;
685 }
686 
687 
688 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
689 
690 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
691 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
692 
693 #define MAX_CUSTOM_BORDER_COLORS                   4096
694 
695 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)696 get_properties_1_1(const struct anv_physical_device *pdevice,
697                    struct vk_properties *p)
698 {
699    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
700    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
701    memset(p->deviceLUID, 0, VK_LUID_SIZE);
702    p->deviceNodeMask = 0;
703    p->deviceLUIDValid = false;
704 
705    p->subgroupSize = ELK_SUBGROUP_SIZE;
706    VkShaderStageFlags scalar_stages = 0;
707    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
708       if (pdevice->compiler->scalar_stage[stage])
709          scalar_stages |= mesa_to_vk_shader_stage(stage);
710    }
711    p->subgroupSupportedStages = scalar_stages;
712    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
713                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
714                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
715                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
716                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
717                                     VK_SUBGROUP_FEATURE_QUAD_BIT;
718    if (pdevice->info.ver >= 8) {
719       /* TODO: There's no technical reason why these can't be made to
720        * work on gfx7 but they don't at the moment so it's best to leave
721        * the feature disabled than enabled and broken.
722        */
723       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
724                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
725    }
726    p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
727 
728    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
729    p->maxMultiviewViewCount      = 16;
730    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
731    p->protectedNoFault           = false;
732    /* This value doesn't matter for us today as our per-stage descriptors are
733     * the real limit.
734     */
735    p->maxPerSetDescriptors       = 1024;
736    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
737 }
738 
739 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)740 get_properties_1_2(const struct anv_physical_device *pdevice,
741                    struct vk_properties *p)
742 {
743    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
744    memset(p->driverName, 0, sizeof(p->driverName));
745    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
746             "Intel open-source Mesa driver");
747    memset(p->driverInfo, 0, sizeof(p->driverInfo));
748    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
749             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
750 
751    /* Don't advertise conformance with a particular version if the hardware's
752     * support is incomplete/alpha.
753     */
754    if (pdevice->is_alpha) {
755       p->conformanceVersion = (VkConformanceVersion) {
756          .major = 0,
757          .minor = 0,
758          .subminor = 0,
759          .patch = 0,
760       };
761    }
762    else {
763       p->conformanceVersion = (VkConformanceVersion) {
764          .major = 1,
765          .minor = pdevice->use_softpin ? 3 : 2,
766          .subminor = 0,
767          .patch = 0,
768       };
769    }
770 
771    p->denormBehaviorIndependence =
772       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
773    p->roundingModeIndependence =
774       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
775 
776    /* Broadwell does not support HF denorms and there are restrictions
777     * other gens. According to Kabylake's PRM:
778     *
779     * "math - Extended Math Function
780     * [...]
781     * Restriction : Half-float denorms are always retained."
782     */
783    p->shaderDenormFlushToZeroFloat16         = false;
784    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
785    p->shaderRoundingModeRTEFloat16           = true;
786    p->shaderRoundingModeRTZFloat16           = true;
787    p->shaderSignedZeroInfNanPreserveFloat16  = true;
788 
789    p->shaderDenormFlushToZeroFloat32         = true;
790    p->shaderDenormPreserveFloat32            = pdevice->info.ver >= 8;
791    p->shaderRoundingModeRTEFloat32           = true;
792    p->shaderRoundingModeRTZFloat32           = true;
793    p->shaderSignedZeroInfNanPreserveFloat32  = true;
794 
795    p->shaderDenormFlushToZeroFloat64         = true;
796    p->shaderDenormPreserveFloat64            = true;
797    p->shaderRoundingModeRTEFloat64           = true;
798    p->shaderRoundingModeRTZFloat64           = true;
799    p->shaderSignedZeroInfNanPreserveFloat64  = true;
800 
801    /* It's a bit hard to exactly map our implementation to the limits
802     * described by Vulkan.  The bindless surface handle in the extended
803     * message descriptors is 20 bits and it's an index into the table of
804     * RENDER_SURFACE_STATE structs that starts at bindless surface base
805     * address.  This means that we can have at must 1M surface states
806     * allocated at any given time.  Since most image views take two
807     * descriptors, this means we have a limit of about 500K image views.
808     *
809     * However, since we allocate surface states at vkCreateImageView time,
810     * this means our limit is actually something on the order of 500K image
811     * views allocated at any time.  The actual limit describe by Vulkan, on
812     * the other hand, is a limit of how many you can have in a descriptor set.
813     * Assuming anyone using 1M descriptors will be using the same image view
814     * twice a bunch of times (or a bunch of null descriptors), we can safely
815     * advertise a larger limit here.
816     */
817    const unsigned max_bindless_views = 1 << 20;
818    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
819    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
820    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
821    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
822    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
823    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
824    p->robustBufferAccessUpdateAfterBind                  = true;
825    p->quadDivergentImplicitLod                           = false;
826    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
827    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
828    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
829    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
830    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
831    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
832    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
833    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
834    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
835    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
836    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
837    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
838    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
839    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
840    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
841 
842    /* We support all of the depth resolve modes */
843    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
844                                       VK_RESOLVE_MODE_AVERAGE_BIT |
845                                       VK_RESOLVE_MODE_MIN_BIT |
846                                       VK_RESOLVE_MODE_MAX_BIT;
847    /* Average doesn't make sense for stencil so we don't support that */
848    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
849    if (pdevice->info.ver >= 8) {
850       /* The advanced stencil resolve modes currently require stencil
851        * sampling be supported by the hardware.
852        */
853       p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
854                                          VK_RESOLVE_MODE_MAX_BIT;
855    }
856    p->independentResolveNone  = true;
857    p->independentResolve      = true;
858 
859    p->filterMinmaxSingleComponentFormats  = false;
860    p->filterMinmaxImageComponentMapping   = false;
861 
862    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
863 
864    p->framebufferIntegerColorSampleCounts =
865       pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
866 }
867 
868 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)869 get_properties_1_3(const struct anv_physical_device *pdevice,
870                    struct vk_properties *p)
871 {
872    p->minSubgroupSize = 8;
873    p->maxSubgroupSize = 32;
874    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
875    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
876 
877    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
878    p->maxPerStageDescriptorInlineUniformBlocks =
879       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
880    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
881       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
882    p->maxDescriptorSetInlineUniformBlocks =
883       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
884    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
885       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
886    p->maxInlineUniformTotalSize = UINT16_MAX;
887 
888    p->integerDotProduct8BitUnsignedAccelerated = false;
889    p->integerDotProduct8BitSignedAccelerated = false;
890    p->integerDotProduct8BitMixedSignednessAccelerated = false;
891    p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
892    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
893    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
894    p->integerDotProduct16BitUnsignedAccelerated = false;
895    p->integerDotProduct16BitSignedAccelerated = false;
896    p->integerDotProduct16BitMixedSignednessAccelerated = false;
897    p->integerDotProduct32BitUnsignedAccelerated = false;
898    p->integerDotProduct32BitSignedAccelerated = false;
899    p->integerDotProduct32BitMixedSignednessAccelerated = false;
900    p->integerDotProduct64BitUnsignedAccelerated = false;
901    p->integerDotProduct64BitSignedAccelerated = false;
902    p->integerDotProduct64BitMixedSignednessAccelerated = false;
903    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
904    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
905    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
906    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
907    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
908    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
909    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
910    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
911    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
912    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
913    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
914    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
915    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
916    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
917    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
918 
919    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
920     * Base Address:
921     *
922     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
923     *    specifies the base address of the first element of the surface,
924     *    computed in software by adding the surface base address to the
925     *    byte offset of the element in the buffer. The base address must
926     *    be aligned to element size."
927     *
928     * The typed dataport messages require that things be texel aligned.
929     * Otherwise, we may just load/store the wrong data or, in the worst
930     * case, there may be hangs.
931     */
932    p->storageTexelBufferOffsetAlignmentBytes = 16;
933    p->storageTexelBufferOffsetSingleTexelAlignment = true;
934 
935    /* The sampler, however, is much more forgiving and it can handle
936     * arbitrary byte alignment for linear and buffer surfaces.  It's
937     * hard to find a good PRM citation for this but years of empirical
938     * experience demonstrate that this is true.
939     */
940    p->uniformTexelBufferOffsetAlignmentBytes = 1;
941    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
942 
943    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
944 }
945 
946 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)947 get_properties(const struct anv_physical_device *pdevice,
948                struct vk_properties *props)
949 {
950    const struct intel_device_info *devinfo = &pdevice->info;
951 
952    const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
953    const uint32_t max_textures = 128;
954    const uint32_t max_samplers =
955       pdevice->has_bindless_samplers ? UINT16_MAX :
956       (devinfo->verx10 >= 75) ? 128 : 16;
957    const uint32_t max_images = MAX_IMAGES;
958 
959    /* If we can use bindless for everything, claim a high per-stage limit,
960     * otherwise use the binding table size, minus the slots reserved for
961     * render targets and one slot for the descriptor buffer. */
962    const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
963 
964    const uint32_t max_workgroup_size =
965       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
966 
967    VkSampleCountFlags sample_counts =
968       isl_device_get_sample_counts(&pdevice->isl_dev);
969 
970    *props = (struct vk_properties) {
971 #if DETECT_OS_ANDROID
972       .apiVersion = ANV_API_VERSION,
973 #else
974       .apiVersion = pdevice->use_softpin ? ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
975 #endif /* DETECT_OS_ANDROID */
976       .driverVersion = vk_get_driver_version(),
977       .vendorID = 0x8086,
978       .deviceID = pdevice->info.pci_device_id,
979       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
980 
981       /* Limits: */
982       .maxImageDimension1D                      = (1 << 14),
983       /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
984        * is greater than 8192 pixels. */
985       .maxImageDimension2D                      = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
986       .maxImageDimension3D                      = (1 << 11),
987       .maxImageDimensionCube                    = (1 << 14),
988       .maxImageArrayLayers                      = (1 << 11),
989       .maxTexelBufferElements                   = 128 * 1024 * 1024,
990       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
991       .maxStorageBufferRange                    = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
992       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
993       .maxMemoryAllocationCount                 = UINT32_MAX,
994       .maxSamplerAllocationCount                = 64 * 1024,
995       .bufferImageGranularity                   = 1,
996       .sparseAddressSpaceSize                   = 0,
997       .maxBoundDescriptorSets                   = MAX_SETS,
998       .maxPerStageDescriptorSamplers            = max_samplers,
999       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1000       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1001       .maxPerStageDescriptorSampledImages       = max_textures,
1002       .maxPerStageDescriptorStorageImages       = max_images,
1003       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1004       .maxPerStageResources                     = max_per_stage,
1005       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1006       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1007       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1008       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1009       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1010       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1011       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1012       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1013       .maxVertexInputAttributes                 = MAX_VES,
1014       .maxVertexInputBindings                   = MAX_VBS,
1015       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1016        *
1017        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1018        */
1019       .maxVertexInputAttributeOffset            = 2047,
1020       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1021        *
1022        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1023        *
1024        * Skylake PRMs: Volume 2d: Command Reference: Structures:
1025        *
1026        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1027        */
1028       .maxVertexInputBindingStride              = devinfo->ver < 9 ? 2048 : 4095,
1029       .maxVertexOutputComponents                = 128,
1030       .maxTessellationGenerationLevel           = 64,
1031       .maxTessellationPatchSize                 = 32,
1032       .maxTessellationControlPerVertexInputComponents = 128,
1033       .maxTessellationControlPerVertexOutputComponents = 128,
1034       .maxTessellationControlPerPatchOutputComponents = 128,
1035       .maxTessellationControlTotalOutputComponents = 2048,
1036       .maxTessellationEvaluationInputComponents = 128,
1037       .maxTessellationEvaluationOutputComponents = 128,
1038       .maxGeometryShaderInvocations             = 32,
1039       .maxGeometryInputComponents               = devinfo->ver >= 8 ? 128 : 64,
1040       .maxGeometryOutputComponents              = 128,
1041       .maxGeometryOutputVertices                = 256,
1042       .maxGeometryTotalOutputComponents         = 1024,
1043       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1044       .maxFragmentOutputAttachments             = 8,
1045       .maxFragmentDualSrcAttachments            = 1,
1046       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1047       .maxComputeSharedMemorySize               = 64 * 1024,
1048       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1049       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1050       .maxComputeWorkGroupSize = {
1051          max_workgroup_size,
1052          max_workgroup_size,
1053          max_workgroup_size,
1054       },
1055       .subPixelPrecisionBits                    = 8,
1056       .subTexelPrecisionBits                    = 8,
1057       .mipmapPrecisionBits                      = 8,
1058       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1059       .maxDrawIndirectCount                     = UINT32_MAX,
1060       .maxSamplerLodBias                        = 16,
1061       .maxSamplerAnisotropy                     = 16,
1062       .maxViewports                             = MAX_VIEWPORTS,
1063       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1064       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1065       .viewportSubPixelBits                     = 13, /* We take a float? */
1066       .minMemoryMapAlignment                    = 4096, /* A page */
1067       /* The dataport requires texel alignment so we need to assume a worst
1068        * case of R32G32B32A32 which is 16 bytes.
1069        */
1070       .minTexelBufferOffsetAlignment            = 16,
1071       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1072       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1073       .minTexelOffset                           = -8,
1074       .maxTexelOffset                           = 7,
1075       .minTexelGatherOffset                     = -32,
1076       .maxTexelGatherOffset                     = 31,
1077       .minInterpolationOffset                   = -0.5,
1078       .maxInterpolationOffset                   = 0.4375,
1079       .subPixelInterpolationOffsetBits          = 4,
1080       .maxFramebufferWidth                      = (1 << 14),
1081       .maxFramebufferHeight                     = (1 << 14),
1082       .maxFramebufferLayers                     = (1 << 11),
1083       .framebufferColorSampleCounts             = sample_counts,
1084       .framebufferDepthSampleCounts             = sample_counts,
1085       .framebufferStencilSampleCounts           = sample_counts,
1086       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1087       .maxColorAttachments                      = MAX_RTS,
1088       .sampledImageColorSampleCounts            = sample_counts,
1089       /* Multisampling with SINT formats is not supported on gfx7 */
1090       .sampledImageIntegerSampleCounts          = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1091       .sampledImageDepthSampleCounts            = sample_counts,
1092       .sampledImageStencilSampleCounts          = sample_counts,
1093       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1094       .maxSampleMaskWords                       = 1,
1095       .timestampComputeAndGraphics              = true,
1096       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1097       .maxClipDistances                         = 8,
1098       .maxCullDistances                         = 8,
1099       .maxCombinedClipAndCullDistances          = 8,
1100       .discreteQueuePriorities                  = 2,
1101       .pointSizeRange                           = { 0.125, 255.875 },
1102       /* While SKL and up support much wider lines than we are setting here,
1103        * in practice we run into conformance issues if we go past this limit.
1104        * Since the Windows driver does the same, it's probably fair to assume
1105        * that no one needs more than this.
1106        */
1107       .lineWidthRange                           = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1108       .pointSizeGranularity                     = (1.0 / 8.0),
1109       .lineWidthGranularity                     = (1.0 / 128.0),
1110       .strictLines                              = false,
1111       .standardSampleLocations                  = true,
1112       .optimalBufferCopyOffsetAlignment         = 128,
1113       .optimalBufferCopyRowPitchAlignment       = 128,
1114       .nonCoherentAtomSize                      = 64,
1115 
1116       /* Broadwell doesn't do sparse. */
1117       .sparseResidencyStandard2DBlockShape = false,
1118       .sparseResidencyStandard2DMultisampleBlockShape = false,
1119       .sparseResidencyStandard3DBlockShape = false,
1120       .sparseResidencyAlignedMipSize = false,
1121       .sparseResidencyNonResidentStrict = false,
1122    };
1123 
1124    snprintf(props->deviceName, sizeof(props->deviceName),
1125             "%s", pdevice->info.name);
1126    memcpy(props->pipelineCacheUUID,
1127           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1128 
1129    get_properties_1_1(pdevice, props);
1130    get_properties_1_2(pdevice, props);
1131    get_properties_1_3(pdevice, props);
1132 
1133    /* VK_KHR_performance_query */
1134    {
1135       /* We could support this by spawning a shader to do the equation normalization. */
1136       props->allowCommandBufferQueryCopies = false;
1137    }
1138 
1139    /* VK_KHR_push_descriptor */
1140    {
1141       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1142    }
1143 
1144    /* VK_KHR_vertex_attribute_divisor */
1145    {
1146       /* We have to restrict this a bit for multiview */
1147       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1148    }
1149 
1150    /* VK_EXT_custom_border_color */
1151    {
1152       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1153    }
1154 
1155    /* VK_EXT_external_memory_host */
1156    {
1157       /* Userptr needs page aligned memory. */
1158       props->minImportedHostPointerAlignment = 4096;
1159    }
1160 
1161    /* VK_EXT_line_rasterization */
1162    {
1163       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1164        * Rules - Legacy Mode", it says the following:
1165        *
1166        *    "Note that the device divides a pixel into a 16x16 array of
1167        *     subpixels, referenced by their upper left corners."
1168        *
1169        * This is the only known reference in the PRMs to the subpixel
1170        * precision of line rasterization and a "16x16 array of subpixels"
1171        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1172        * subpixel precision bits applies to all line rasterization types.
1173        */
1174       props->lineSubPixelPrecisionBits = 4;
1175    }
1176 
1177    /* VK_EXT_multi_draw */
1178    {
1179       props->maxMultiDrawCount = 2048;
1180    }
1181 
1182    /* VK_EXT_pci_bus_info */
1183    {
1184       props->pciDomain = pdevice->info.pci_domain;
1185       props->pciBus = pdevice->info.pci_bus;
1186       props->pciDevice = pdevice->info.pci_dev;
1187       props->pciFunction = pdevice->info.pci_func;
1188    }
1189 
1190    /* VK_EXT_physical_device_drm */
1191    {
1192       props->drmHasPrimary = pdevice->has_master;
1193       props->drmPrimaryMajor = pdevice->master_major;
1194       props->drmPrimaryMinor = pdevice->master_minor;
1195       props->drmHasRender = pdevice->has_local;
1196       props->drmRenderMajor = pdevice->local_major;
1197       props->drmRenderMinor = pdevice->local_minor;
1198    }
1199 
1200    /* VK_EXT_provoking_vertex */
1201    {
1202       props->provokingVertexModePerPipeline = true;
1203       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1204    }
1205 
1206    /* VK_EXT_robustness2 */
1207    {
1208       props->robustStorageBufferAccessSizeAlignment =
1209          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1210       props->robustUniformBufferAccessSizeAlignment =
1211          ANV_UBO_ALIGNMENT;
1212    }
1213 
1214    /* VK_EXT_sample_locations */
1215    {
1216       props->sampleLocationSampleCounts =
1217          isl_device_get_sample_counts(&pdevice->isl_dev);
1218 
1219       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1220       props->maxSampleLocationGridSize.width = 1;
1221       props->maxSampleLocationGridSize.height = 1;
1222 
1223       props->sampleLocationCoordinateRange[0] = 0;
1224       props->sampleLocationCoordinateRange[1] = 0.9375;
1225       props->sampleLocationSubPixelBits = 4;
1226 
1227       props->variableSampleLocations = true;
1228    }
1229 
1230    /* VK_EXT_shader_module_identifier */
1231    {
1232       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1233                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1234       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1235              vk_shaderModuleIdentifierAlgorithmUUID,
1236              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1237    }
1238 
1239    /* VK_EXT_transform_feedback */
1240    {
1241       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1242       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1243       props->maxTransformFeedbackBufferSize = (1ull << 32);
1244       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1245       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1246       props->maxTransformFeedbackBufferDataStride = 2048;
1247       props->transformFeedbackQueries = true;
1248       props->transformFeedbackStreamsLinesTriangles = false;
1249       props->transformFeedbackRasterizationStreamSelect = false;
1250       /* This requires MI_MATH */
1251       props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
1252    }
1253 
1254    /* VK_ANDROID_native_buffer */
1255 #if DETECT_OS_ANDROID
1256    {
1257       props->sharedImage = VK_FALSE;
1258    }
1259 #endif /* DETECT_OS_ANDROID */
1260 
1261 }
1262 
1263 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)1264 anv_compute_sys_heap_size(struct anv_physical_device *device,
1265                           uint64_t available_ram)
1266 {
1267    /* We want to leave some padding for things we allocate in the driver,
1268     * so don't go over 3/4 of the GTT either.
1269     */
1270    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
1271 
1272    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
1273       /* When running with an overridden PCI ID, we may get a GTT size from
1274        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
1275        * address support can still fail.  Just clamp the address space size to
1276        * 2 GiB if we don't have 48-bit support.
1277        */
1278       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
1279                 "not support for 48-bit addresses",
1280                 __FILE__, __LINE__);
1281       available_ram = 2ull << 30;
1282    }
1283 
1284    return available_ram;
1285 }
1286 
1287 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1288 anv_init_meminfo(struct anv_physical_device *device, int fd)
1289 {
1290    const struct intel_device_info *devinfo = &device->info;
1291 
1292    device->sys.size =
1293       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
1294    device->sys.available = devinfo->mem.sram.mappable.free;
1295 
1296    return VK_SUCCESS;
1297 }
1298 
1299 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1300 anv_update_meminfo(struct anv_physical_device *device, int fd)
1301 {
1302    if (!intel_device_info_update_memory_info(&device->info, fd))
1303       return;
1304 
1305    const struct intel_device_info *devinfo = &device->info;
1306    device->sys.available = devinfo->mem.sram.mappable.free;
1307 }
1308 
1309 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1310 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1311 {
1312    VkResult result = anv_init_meminfo(device, fd);
1313    if (result != VK_SUCCESS)
1314       return result;
1315 
1316    assert(device->sys.size != 0);
1317 
1318    if (device->info.has_llc) {
1319       device->memory.heap_count = 1;
1320       device->memory.heaps[0] = (struct anv_memory_heap) {
1321          .size = device->sys.size,
1322          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1323       };
1324 
1325       /* Big core GPUs share LLC with the CPU and thus one memory type can be
1326        * both cached and coherent at the same time.
1327        *
1328        * But some game engines can't handle single type well
1329        * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
1330        *
1331        * And Intel on Windows uses 3 types so it's better to add extra one here
1332        */
1333       device->memory.type_count = 2;
1334       device->memory.types[0] = (struct anv_memory_type) {
1335           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1336           .heapIndex = 0,
1337       };
1338       device->memory.types[1] = (struct anv_memory_type) {
1339           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1340                            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1341                            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1342                            VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1343           .heapIndex = 0,
1344       };
1345    } else {
1346       device->memory.heap_count = 1;
1347       device->memory.heaps[0] = (struct anv_memory_heap) {
1348          .size = device->sys.size,
1349          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1350       };
1351 
1352       /* The spec requires that we expose a host-visible, coherent memory
1353        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
1354        * to give the application a choice between cached, but not coherent and
1355        * coherent but uncached (WC though).
1356        */
1357       device->memory.type_count = 2;
1358       device->memory.types[0] = (struct anv_memory_type) {
1359          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1360                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1361                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1362          .heapIndex = 0,
1363       };
1364       device->memory.types[1] = (struct anv_memory_type) {
1365          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1366                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1367                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1368          .heapIndex = 0,
1369       };
1370    }
1371 
1372    device->memory.need_flush = false;
1373    for (unsigned i = 0; i < device->memory.type_count; i++) {
1374       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1375       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1376           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1377          device->memory.need_flush = true;
1378    }
1379 
1380    return VK_SUCCESS;
1381 }
1382 
1383 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1384 anv_physical_device_init_uuids(struct anv_physical_device *device)
1385 {
1386    const struct build_id_note *note =
1387       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1388    if (!note) {
1389       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1390                        "Failed to find build-id");
1391    }
1392 
1393    unsigned build_id_len = build_id_length(note);
1394    if (build_id_len < 20) {
1395       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1396                        "build-id too short.  It needs to be a SHA");
1397    }
1398 
1399    memcpy(device->driver_build_sha1, build_id_data(note), 20);
1400 
1401    struct mesa_sha1 sha1_ctx;
1402    uint8_t sha1[20];
1403    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1404 
1405    /* The pipeline cache UUID is used for determining when a pipeline cache is
1406     * invalid.  It needs both a driver build and the PCI ID of the device.
1407     */
1408    _mesa_sha1_init(&sha1_ctx);
1409    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1410    _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
1411                      sizeof(device->info.pci_device_id));
1412    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1413                      sizeof(device->always_use_bindless));
1414    _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
1415                      sizeof(device->has_a64_buffer_access));
1416    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
1417                      sizeof(device->has_bindless_samplers));
1418    _mesa_sha1_final(&sha1_ctx, sha1);
1419    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1420 
1421    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1422    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1423 
1424    return VK_SUCCESS;
1425 }
1426 
1427 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1428 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1429 {
1430 #ifdef ENABLE_SHADER_CACHE
1431    char renderer[10];
1432    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
1433                                device->info.pci_device_id);
1434    assert(len == sizeof(renderer) - 2);
1435 
1436    char timestamp[41];
1437    _mesa_sha1_format(timestamp, device->driver_build_sha1);
1438 
1439    const uint64_t driver_flags =
1440       elk_get_compiler_config_value(device->compiler);
1441    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1442 #endif
1443 }
1444 
1445 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)1446 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
1447 {
1448 #ifdef ENABLE_SHADER_CACHE
1449    if (device->vk.disk_cache) {
1450       disk_cache_destroy(device->vk.disk_cache);
1451       device->vk.disk_cache = NULL;
1452    }
1453 #else
1454    assert(device->vk.disk_cache == NULL);
1455 #endif
1456 }
1457 
1458 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
1459  * queue overrides.
1460  *
1461  * To override the number queues:
1462  *  * "gc" is for graphics queues with compute support
1463  *  * "g" is for graphics queues with no compute support
1464  *  * "c" is for compute queues with no graphics support
1465  *
1466  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
1467  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
1468  * with compute-only support.
1469  *
1470  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
1471  * include 1 queue with compute-only support, but it will not change the
1472  * number of graphics+compute queues.
1473  *
1474  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
1475  * to include 1 queue with compute-only support, and it would override the
1476  * number of graphics+compute queues to be 0.
1477  */
1478 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)1479 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
1480 {
1481    int gc_override = -1;
1482    int g_override = -1;
1483    int c_override = -1;
1484    const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
1485 
1486    if (env_ == NULL)
1487       return;
1488 
1489    char *env = strdup(env_);
1490    char *save = NULL;
1491    char *next = strtok_r(env, ",", &save);
1492    while (next != NULL) {
1493       if (strncmp(next, "gc=", 3) == 0) {
1494          gc_override = strtol(next + 3, NULL, 0);
1495       } else if (strncmp(next, "g=", 2) == 0) {
1496          g_override = strtol(next + 2, NULL, 0);
1497       } else if (strncmp(next, "c=", 2) == 0) {
1498          c_override = strtol(next + 2, NULL, 0);
1499       } else {
1500          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
1501       }
1502       next = strtok_r(NULL, ",", &save);
1503    }
1504    free(env);
1505    if (gc_override >= 0)
1506       *gc_count = gc_override;
1507    if (g_override >= 0)
1508       *g_count = g_override;
1509    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
1510       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
1511                 "Vulkan specification");
1512    if (c_override >= 0)
1513       *c_count = c_override;
1514 }
1515 
1516 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)1517 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
1518 {
1519    uint32_t family_count = 0;
1520 
1521    if (pdevice->engine_info) {
1522       int gc_count =
1523          intel_engines_count(pdevice->engine_info,
1524                              INTEL_ENGINE_CLASS_RENDER);
1525       int g_count = 0;
1526       int c_count = 0;
1527 
1528       anv_override_engine_counts(&gc_count, &g_count, &c_count);
1529 
1530       if (gc_count > 0) {
1531          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1532             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1533                           VK_QUEUE_COMPUTE_BIT |
1534                           VK_QUEUE_TRANSFER_BIT,
1535             .queueCount = gc_count,
1536             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1537          };
1538       }
1539       if (g_count > 0) {
1540          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1541             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1542                           VK_QUEUE_TRANSFER_BIT,
1543             .queueCount = g_count,
1544             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1545          };
1546       }
1547       if (c_count > 0) {
1548          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1549             .queueFlags = VK_QUEUE_COMPUTE_BIT |
1550                           VK_QUEUE_TRANSFER_BIT,
1551             .queueCount = c_count,
1552             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1553          };
1554       }
1555       /* Increase count below when other families are added as a reminder to
1556        * increase the ANV_MAX_QUEUE_FAMILIES value.
1557        */
1558       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
1559    } else {
1560       /* Default to a single render queue */
1561       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1562          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1563                        VK_QUEUE_COMPUTE_BIT |
1564                        VK_QUEUE_TRANSFER_BIT,
1565          .queueCount = 1,
1566          .engine_class = INTEL_ENGINE_CLASS_RENDER,
1567       };
1568       family_count = 1;
1569    }
1570    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
1571    pdevice->queue.family_count = family_count;
1572 }
1573 
1574 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)1575 anv_physical_device_try_create(struct vk_instance *vk_instance,
1576                                struct _drmDevice *drm_device,
1577                                struct vk_physical_device **out)
1578 {
1579    struct anv_instance *instance =
1580       container_of(vk_instance, struct anv_instance, vk);
1581 
1582    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1583        drm_device->bustype != DRM_BUS_PCI ||
1584        drm_device->deviceinfo.pci->vendor_id != 0x8086)
1585       return VK_ERROR_INCOMPATIBLE_DRIVER;
1586 
1587    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
1588    const char *path = drm_device->nodes[DRM_NODE_RENDER];
1589    VkResult result;
1590    int fd;
1591    int master_fd = -1;
1592 
1593    process_intel_debug_variable();
1594 
1595    fd = open(path, O_RDWR | O_CLOEXEC);
1596    if (fd < 0) {
1597       if (errno == ENOMEM) {
1598          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1599                           "Unable to open device %s: out of memory", path);
1600       }
1601       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1602                        "Unable to open device %s: %m", path);
1603    }
1604 
1605    struct intel_device_info devinfo;
1606    if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1607       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1608       goto fail_fd;
1609    }
1610 
1611    bool is_alpha = true;
1612    bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1613    if (devinfo.platform == INTEL_PLATFORM_HSW) {
1614       if (warn)
1615          mesa_logw("Haswell Vulkan support is incomplete");
1616    } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1617       if (warn)
1618          mesa_logw("Ivy Bridge Vulkan support is incomplete");
1619    } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1620       if (warn)
1621          mesa_logw("Bay Trail Vulkan support is incomplete");
1622    } else if (devinfo.ver == 8) {
1623       /* Gfx8 fully supported */
1624       is_alpha = false;
1625    } else {
1626       /* Silently fail here, anv will either pick up this device or display an
1627        * error message.
1628        */
1629       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1630       goto fail_fd;
1631    }
1632 
1633    struct anv_physical_device *device =
1634       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1635                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1636    if (device == NULL) {
1637       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1638       goto fail_fd;
1639    }
1640 
1641    struct vk_physical_device_dispatch_table dispatch_table;
1642    vk_physical_device_dispatch_table_from_entrypoints(
1643       &dispatch_table, &anv_physical_device_entrypoints, true);
1644    vk_physical_device_dispatch_table_from_entrypoints(
1645       &dispatch_table, &wsi_physical_device_entrypoints, false);
1646 
1647    result = vk_physical_device_init(&device->vk, &instance->vk,
1648                                     NULL, NULL, NULL, /* We set up extensions later */
1649                                     &dispatch_table);
1650    if (result != VK_SUCCESS) {
1651       vk_error(instance, result);
1652       goto fail_alloc;
1653    }
1654    device->instance = instance;
1655 
1656    assert(strlen(path) < ARRAY_SIZE(device->path));
1657    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1658 
1659    device->info = devinfo;
1660    device->is_alpha = is_alpha;
1661 
1662    device->cmd_parser_version = -1;
1663    if (device->info.ver == 7) {
1664       if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1665           device->cmd_parser_version == -1) {
1666          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1667                             "failed to get command parser version");
1668          goto fail_base;
1669       }
1670    }
1671 
1672    int val;
1673    if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1674       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1675                          "kernel missing gem wait");
1676       goto fail_base;
1677    }
1678 
1679    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1680       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1681                          "kernel missing execbuf2");
1682       goto fail_base;
1683    }
1684 
1685    if (!device->info.has_llc &&
1686        (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1687        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1688                           "kernel missing wc mmap");
1689       goto fail_base;
1690    }
1691 
1692    device->use_relocations = device->info.ver < 8 ||
1693                              device->info.platform == INTEL_PLATFORM_CHV;
1694 
1695    if (!device->use_relocations &&
1696        (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1697       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1698                          "kernel missing softpin");
1699       goto fail_alloc;
1700    }
1701 
1702    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1703       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1704                          "kernel missing syncobj support");
1705       goto fail_base;
1706    }
1707 
1708    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1709       device->has_exec_async = val;
1710    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1711       device->has_exec_capture = val;
1712 
1713    /* Start with medium; sorted low to high */
1714    const int priorities[] = {
1715       INTEL_CONTEXT_MEDIUM_PRIORITY,
1716       INTEL_CONTEXT_HIGH_PRIORITY,
1717       INTEL_CONTEXT_REALTIME_PRIORITY,
1718    };
1719    device->max_context_priority = INT_MIN;
1720    for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1721       if (!anv_gem_has_context_priority(fd, priorities[i]))
1722          break;
1723       device->max_context_priority = priorities[i];
1724    }
1725 
1726    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1727                                               device->info.aperture_bytes;
1728 
1729    /* We only allow 48-bit addresses with softpin because knowing the actual
1730     * address is required for the vertex cache flush workaround.
1731     */
1732    device->supports_48bit_addresses = (device->info.ver >= 8) &&
1733                                       device->gtt_size > (4ULL << 30 /* GiB */);
1734 
1735    result = anv_physical_device_init_heaps(device, fd);
1736    if (result != VK_SUCCESS)
1737       goto fail_base;
1738 
1739    assert(device->supports_48bit_addresses == !device->use_relocations);
1740    device->use_softpin = !device->use_relocations;
1741 
1742    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1743       device->has_exec_timeline = val;
1744    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1745       device->has_exec_timeline = false;
1746 
1747    unsigned st_idx = 0;
1748 
1749    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1750    if (!device->has_exec_timeline)
1751       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1752    device->sync_types[st_idx++] = &device->sync_syncobj_type;
1753 
1754    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1755       device->sync_types[st_idx++] = &anv_bo_sync_type;
1756 
1757    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1758       device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1759       device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1760    }
1761 
1762    device->sync_types[st_idx++] = NULL;
1763    assert(st_idx <= ARRAY_SIZE(device->sync_types));
1764    device->vk.supported_sync_types = device->sync_types;
1765 
1766    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1767 
1768    device->always_use_bindless =
1769       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1770 
1771    device->use_call_secondary =
1772       device->use_softpin &&
1773       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1774 
1775    /* We first got the A64 messages on broadwell and we can only use them if
1776     * we can pass addresses directly into the shader which requires softpin.
1777     */
1778    device->has_a64_buffer_access = device->info.ver >= 8 &&
1779                                    device->use_softpin;
1780 
1781    /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1782     * because it's just a matter of setting the sampler address in the sample
1783     * message header.  However, we've not bothered to wire it up for vec4 so
1784     * we leave it disabled on gfx7.
1785     */
1786    device->has_bindless_samplers = device->info.ver >= 8;
1787 
1788    /* Check if we can read the GPU timestamp register from the CPU */
1789    uint64_t u64_ignore;
1790    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1791                                                                device->info.kmd_type,
1792                                                                &u64_ignore);
1793 
1794    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1795       driQueryOptionb(&instance->dri_options, "always_flush_cache");
1796 
1797    device->compiler = elk_compiler_create(NULL, &device->info);
1798    if (device->compiler == NULL) {
1799       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1800       goto fail_base;
1801    }
1802    device->compiler->shader_debug_log = compiler_debug_log;
1803    device->compiler->shader_perf_log = compiler_perf_log;
1804    device->compiler->constant_buffer_0_is_relative =
1805       device->info.ver < 8 || !device->info.has_context_isolation;
1806    device->compiler->supports_shader_constants = true;
1807 
1808    isl_device_init(&device->isl_dev, &device->info);
1809 
1810    result = anv_physical_device_init_uuids(device);
1811    if (result != VK_SUCCESS)
1812       goto fail_compiler;
1813 
1814    anv_physical_device_init_disk_cache(device);
1815 
1816    if (instance->vk.enabled_extensions.KHR_display) {
1817       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1818       if (master_fd >= 0) {
1819          /* fail if we don't have permission to even render on this device */
1820          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1821             close(master_fd);
1822             master_fd = -1;
1823          }
1824       }
1825    }
1826    device->master_fd = master_fd;
1827 
1828    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1829    anv_physical_device_init_queue_families(device);
1830 
1831    device->local_fd = fd;
1832 
1833    anv_physical_device_init_perf(device, fd);
1834 
1835    /* Gather major/minor before WSI. */
1836    struct stat st;
1837 
1838    if (stat(primary_path, &st) == 0) {
1839       device->has_master = true;
1840       device->master_major = major(st.st_rdev);
1841       device->master_minor = minor(st.st_rdev);
1842    } else {
1843       device->has_master = false;
1844       device->master_major = 0;
1845       device->master_minor = 0;
1846    }
1847 
1848    if (stat(path, &st) == 0) {
1849       device->has_local = true;
1850       device->local_major = major(st.st_rdev);
1851       device->local_minor = minor(st.st_rdev);
1852    } else {
1853       device->has_local = false;
1854       device->local_major = 0;
1855       device->local_minor = 0;
1856    }
1857 
1858    get_device_extensions(device, &device->vk.supported_extensions);
1859    get_features(device, &device->vk.supported_features);
1860    get_properties(device, &device->vk.properties);
1861 
1862    result = anv_init_wsi(device);
1863    if (result != VK_SUCCESS)
1864       goto fail_perf;
1865 
1866    anv_measure_device_init(device);
1867 
1868    anv_genX(&device->info, init_physical_device_state)(device);
1869 
1870    *out = &device->vk;
1871 
1872    return VK_SUCCESS;
1873 
1874 fail_perf:
1875    intel_perf_free(device->perf);
1876    free(device->engine_info);
1877    anv_physical_device_free_disk_cache(device);
1878 fail_compiler:
1879    ralloc_free(device->compiler);
1880 fail_base:
1881    vk_physical_device_finish(&device->vk);
1882 fail_alloc:
1883    vk_free(&instance->vk.alloc, device);
1884 fail_fd:
1885    close(fd);
1886    if (master_fd != -1)
1887       close(master_fd);
1888    return result;
1889 }
1890 
1891 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1892 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1893 {
1894    struct anv_physical_device *device =
1895       container_of(vk_device, struct anv_physical_device, vk);
1896 
1897    anv_finish_wsi(device);
1898    anv_measure_device_destroy(device);
1899    free(device->engine_info);
1900    anv_physical_device_free_disk_cache(device);
1901    ralloc_free(device->compiler);
1902    intel_perf_free(device->perf);
1903    close(device->local_fd);
1904    if (device->master_fd >= 0)
1905       close(device->master_fd);
1906    vk_physical_device_finish(&device->vk);
1907    vk_free(&device->instance->vk.alloc, device);
1908 }
1909 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1910 VkResult anv_EnumerateInstanceExtensionProperties(
1911     const char*                                 pLayerName,
1912     uint32_t*                                   pPropertyCount,
1913     VkExtensionProperties*                      pProperties)
1914 {
1915    if (pLayerName)
1916       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1917 
1918    return vk_enumerate_instance_extension_properties(
1919       &instance_extensions, pPropertyCount, pProperties);
1920 }
1921 
1922 static void
anv_init_dri_options(struct anv_instance * instance)1923 anv_init_dri_options(struct anv_instance *instance)
1924 {
1925    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1926                       ARRAY_SIZE(anv_dri_options));
1927    driParseConfigFiles(&instance->dri_options,
1928                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1929                        instance->vk.app_info.app_name,
1930                        instance->vk.app_info.app_version,
1931                        instance->vk.app_info.engine_name,
1932                        instance->vk.app_info.engine_version);
1933 
1934     instance->assume_full_subgroups =
1935             driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1936     instance->limit_trig_input_range =
1937             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1938     instance->sample_mask_out_opengl_behaviour =
1939             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1940     instance->lower_depth_range_rate =
1941             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1942     instance->no_16bit =
1943             driQueryOptionb(&instance->dri_options, "no_16bit");
1944     instance->report_vk_1_3 =
1945             driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1946 }
1947 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1948 VkResult anv_CreateInstance(
1949     const VkInstanceCreateInfo*                 pCreateInfo,
1950     const VkAllocationCallbacks*                pAllocator,
1951     VkInstance*                                 pInstance)
1952 {
1953    struct anv_instance *instance;
1954    VkResult result;
1955 
1956    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1957 
1958    if (pAllocator == NULL)
1959       pAllocator = vk_default_allocator();
1960 
1961    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1962                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1963    if (!instance)
1964       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1965 
1966    struct vk_instance_dispatch_table dispatch_table;
1967    vk_instance_dispatch_table_from_entrypoints(
1968       &dispatch_table, &anv_instance_entrypoints, true);
1969    vk_instance_dispatch_table_from_entrypoints(
1970       &dispatch_table, &wsi_instance_entrypoints, false);
1971 
1972    result = vk_instance_init(&instance->vk, &instance_extensions,
1973                              &dispatch_table, pCreateInfo, pAllocator);
1974    if (result != VK_SUCCESS) {
1975       vk_free(pAllocator, instance);
1976       return vk_error(NULL, result);
1977    }
1978 
1979    instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1980    instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1981 
1982    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1983 
1984    anv_init_dri_options(instance);
1985 
1986    intel_driver_ds_init();
1987 
1988    *pInstance = anv_instance_to_handle(instance);
1989 
1990    return VK_SUCCESS;
1991 }
1992 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1993 void anv_DestroyInstance(
1994     VkInstance                                  _instance,
1995     const VkAllocationCallbacks*                pAllocator)
1996 {
1997    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1998 
1999    if (!instance)
2000       return;
2001 
2002    VG(VALGRIND_DESTROY_MEMPOOL(instance));
2003 
2004    driDestroyOptionCache(&instance->dri_options);
2005    driDestroyOptionInfo(&instance->available_dri_options);
2006 
2007    vk_instance_finish(&instance->vk);
2008    vk_free(&instance->vk.alloc, instance);
2009 }
2010 
2011 static int
vk_priority_to_gen(int priority)2012 vk_priority_to_gen(int priority)
2013 {
2014    switch (priority) {
2015    case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2016       return INTEL_CONTEXT_LOW_PRIORITY;
2017    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2018       return INTEL_CONTEXT_MEDIUM_PRIORITY;
2019    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2020       return INTEL_CONTEXT_HIGH_PRIORITY;
2021    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2022       return INTEL_CONTEXT_REALTIME_PRIORITY;
2023    default:
2024       unreachable("Invalid priority");
2025    }
2026 }
2027 
2028 static const VkQueueFamilyProperties
2029 anv_queue_family_properties_template = {
2030    .timestampValidBits = 36, /* XXX: Real value here */
2031    .minImageTransferGranularity = { 1, 1, 1 },
2032 };
2033 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2034 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2035     VkPhysicalDevice                            physicalDevice,
2036     uint32_t*                                   pQueueFamilyPropertyCount,
2037     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2038 {
2039    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2040    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2041                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2042 
2043    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2044       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2045       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2046          p->queueFamilyProperties = anv_queue_family_properties_template;
2047          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2048          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2049 
2050          vk_foreach_struct(ext, p->pNext) {
2051             switch (ext->sType) {
2052             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2053                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2054                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2055 
2056                /* Deliberately sorted low to high */
2057                VkQueueGlobalPriorityKHR all_priorities[] = {
2058                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2059                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2060                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2061                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2062                };
2063 
2064                uint32_t count = 0;
2065                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2066                   if (vk_priority_to_gen(all_priorities[i]) >
2067                       pdevice->max_context_priority)
2068                      break;
2069 
2070                   properties->priorities[count++] = all_priorities[i];
2071                }
2072                properties->priorityCount = count;
2073                break;
2074             }
2075 
2076             default:
2077                vk_debug_ignored_stype(ext->sType);
2078             }
2079          }
2080       }
2081    }
2082 }
2083 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2084 void anv_GetPhysicalDeviceMemoryProperties(
2085     VkPhysicalDevice                            physicalDevice,
2086     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2087 {
2088    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2089 
2090    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2091    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2092       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2093          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2094          .heapIndex     = physical_device->memory.types[i].heapIndex,
2095       };
2096    }
2097 
2098    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2099    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2100       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2101          .size    = physical_device->memory.heaps[i].size,
2102          .flags   = physical_device->memory.heaps[i].flags,
2103       };
2104    }
2105 }
2106 
2107 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2108 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2109                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2110 {
2111    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2112 
2113    if (!device->vk.supported_extensions.EXT_memory_budget)
2114       return;
2115 
2116    anv_update_meminfo(device, device->local_fd);
2117 
2118    VkDeviceSize total_sys_heaps_size = 0;
2119    for (size_t i = 0; i < device->memory.heap_count; i++)
2120       total_sys_heaps_size += device->memory.heaps[i].size;
2121 
2122    for (size_t i = 0; i < device->memory.heap_count; i++) {
2123       VkDeviceSize heap_size = device->memory.heaps[i].size;
2124       VkDeviceSize heap_used = device->memory.heaps[i].used;
2125       VkDeviceSize heap_budget, total_heaps_size;
2126       uint64_t mem_available = 0;
2127 
2128       total_heaps_size = total_sys_heaps_size;
2129       mem_available = device->sys.available;
2130 
2131       double heap_proportion = (double) heap_size / total_heaps_size;
2132       VkDeviceSize available_prop = mem_available * heap_proportion;
2133 
2134       /*
2135        * Let's not incite the app to starve the system: report at most 90% of
2136        * the available heap memory.
2137        */
2138       uint64_t heap_available = available_prop * 9 / 10;
2139       heap_budget = MIN2(heap_size, heap_used + heap_available);
2140 
2141       /*
2142        * Round down to the nearest MB
2143        */
2144       heap_budget &= ~((1ull << 20) - 1);
2145 
2146       /*
2147        * The heapBudget value must be non-zero for array elements less than
2148        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2149        * value must be less than or equal to VkMemoryHeap::size for each heap.
2150        */
2151       assert(0 < heap_budget && heap_budget <= heap_size);
2152 
2153       memoryBudget->heapUsage[i] = heap_used;
2154       memoryBudget->heapBudget[i] = heap_budget;
2155    }
2156 
2157    /* The heapBudget and heapUsage values must be zero for array elements
2158     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2159     */
2160    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2161       memoryBudget->heapBudget[i] = 0;
2162       memoryBudget->heapUsage[i] = 0;
2163    }
2164 }
2165 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2166 void anv_GetPhysicalDeviceMemoryProperties2(
2167     VkPhysicalDevice                            physicalDevice,
2168     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2169 {
2170    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2171                                          &pMemoryProperties->memoryProperties);
2172 
2173    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2174       switch (ext->sType) {
2175       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2176          anv_get_memory_budget(physicalDevice, (void*)ext);
2177          break;
2178       default:
2179          vk_debug_ignored_stype(ext->sType);
2180          break;
2181       }
2182    }
2183 }
2184 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2185 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2186     VkInstance                                  _instance,
2187     const char*                                 pName)
2188 {
2189    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2190    return vk_instance_get_proc_addr(&instance->vk,
2191                                     &anv_instance_entrypoints,
2192                                     pName);
2193 }
2194 
2195 /* With version 1+ of the loader interface the ICD should expose
2196  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2197  */
2198 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2199 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2200     VkInstance                                  instance,
2201     const char*                                 pName)
2202 {
2203    return anv_GetInstanceProcAddr(instance, pName);
2204 }
2205 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2206 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2207 {
2208    struct anv_state state;
2209 
2210    state = anv_state_pool_alloc(pool, size, align);
2211    memcpy(state.map, p, size);
2212 
2213    return state;
2214 }
2215 
2216 static void
anv_device_init_border_colors(struct anv_device * device)2217 anv_device_init_border_colors(struct anv_device *device)
2218 {
2219    if (device->info->platform == INTEL_PLATFORM_HSW) {
2220       static const struct hsw_border_color border_colors[] = {
2221          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2222          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2223          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2224          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2225          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2226          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2227       };
2228 
2229       device->border_colors =
2230          anv_state_pool_emit_data(&device->dynamic_state_pool,
2231                                   sizeof(border_colors), 512, border_colors);
2232    } else {
2233       static const struct gfx8_border_color border_colors[] = {
2234          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2235          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2236          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2237          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2238          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2239          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2240       };
2241 
2242       device->border_colors =
2243          anv_state_pool_emit_data(&device->dynamic_state_pool,
2244                                   sizeof(border_colors), 64, border_colors);
2245    }
2246 }
2247 
2248 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2249 anv_device_init_trivial_batch(struct anv_device *device)
2250 {
2251    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2252                                          ANV_BO_ALLOC_MAPPED,
2253                                          0 /* explicit_address */,
2254                                          &device->trivial_batch_bo);
2255    if (result != VK_SUCCESS)
2256       return result;
2257 
2258    struct anv_batch batch = {
2259       .start = device->trivial_batch_bo->map,
2260       .next = device->trivial_batch_bo->map,
2261       .end = device->trivial_batch_bo->map + 4096,
2262    };
2263 
2264    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2265    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2266 
2267 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2268    if (device->physical->memory.need_flush)
2269       intel_flush_range(batch.start, batch.next - batch.start);
2270 #endif
2271 
2272    return VK_SUCCESS;
2273 }
2274 
2275 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2276 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2277                  struct anv_block_pool *pool,
2278                  uint64_t address)
2279 {
2280    anv_block_pool_foreach_bo(bo, pool) {
2281       uint64_t bo_address = intel_48b_address(bo->offset);
2282       if (address >= bo_address && address < (bo_address + bo->size)) {
2283          *ret = (struct intel_batch_decode_bo) {
2284             .addr = bo_address,
2285             .size = bo->size,
2286             .map = bo->map,
2287          };
2288          return true;
2289       }
2290    }
2291    return false;
2292 }
2293 
2294 /* Finding a buffer for batch decoding */
2295 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2296 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2297 {
2298    struct anv_device *device = v_batch;
2299    struct intel_batch_decode_bo ret_bo = {};
2300 
2301    assert(ppgtt);
2302 
2303    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2304       return ret_bo;
2305    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2306       return ret_bo;
2307    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2308       return ret_bo;
2309    if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2310       return ret_bo;
2311 
2312    if (!device->cmd_buffer_being_decoded)
2313       return (struct intel_batch_decode_bo) { };
2314 
2315    struct anv_batch_bo **bo;
2316 
2317    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2318       /* The decoder zeroes out the top 16 bits, so we need to as well */
2319       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2320 
2321       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2322          return (struct intel_batch_decode_bo) {
2323             .addr = bo_address,
2324             .size = (*bo)->bo->size,
2325             .map = (*bo)->bo->map,
2326          };
2327       }
2328    }
2329 
2330    return (struct intel_batch_decode_bo) { };
2331 }
2332 
2333 static VkResult anv_device_check_status(struct vk_device *vk_device);
2334 
anv_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2335 static VkResult anv_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2336 {
2337    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2338 
2339    if (!intel_gem_read_render_timestamp(device->fd,
2340                                         device->info->kmd_type,
2341                                         timestamp)) {
2342       return vk_device_set_lost(&device->vk,
2343                                 "Failed to read the TIMESTAMP register: %m");
2344    }
2345 
2346    return VK_SUCCESS;
2347 }
2348 
2349 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2350 anv_device_setup_context(struct anv_device *device,
2351                          const VkDeviceCreateInfo *pCreateInfo,
2352                          const uint32_t num_queues)
2353 {
2354    struct anv_physical_device *physical_device = device->physical;
2355    VkResult result = VK_SUCCESS;
2356 
2357    if (device->physical->engine_info) {
2358       /* The kernel API supports at most 64 engines */
2359       assert(num_queues <= 64);
2360       enum intel_engine_class engine_classes[64];
2361       int engine_count = 0;
2362       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2363          const VkDeviceQueueCreateInfo *queueCreateInfo =
2364             &pCreateInfo->pQueueCreateInfos[i];
2365 
2366          assert(queueCreateInfo->queueFamilyIndex <
2367                 physical_device->queue.family_count);
2368          struct anv_queue_family *queue_family =
2369             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2370 
2371          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2372             engine_classes[engine_count++] = queue_family->engine_class;
2373       }
2374       if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2375                                             physical_device->engine_info,
2376                                             engine_count, engine_classes,
2377                                             0 /* vm_id */,
2378                                             (uint32_t *)&device->context_id))
2379          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2380                             "kernel context creation failed");
2381    } else {
2382       assert(num_queues == 1);
2383       if (!intel_gem_create_context(device->fd, &device->context_id))
2384          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2385    }
2386 
2387    if (result != VK_SUCCESS)
2388       return result;
2389 
2390    /* Here we tell the kernel not to attempt to recover our context but
2391     * immediately (on the next batchbuffer submission) report that the
2392     * context is lost, and we will do the recovery ourselves.  In the case
2393     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2394     * the client clean up the pieces.
2395     */
2396    anv_gem_set_context_param(device->fd, device->context_id,
2397                              I915_CONTEXT_PARAM_RECOVERABLE, false);
2398 
2399    /* Check if client specified queue priority. */
2400    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2401       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2402                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2403 
2404    VkQueueGlobalPriorityKHR priority =
2405       queue_priority ? queue_priority->globalPriority :
2406          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2407 
2408    /* As per spec, the driver implementation may deny requests to acquire
2409     * a priority above the default priority (MEDIUM) if the caller does not
2410     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2411     * is returned.
2412     */
2413    if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2414       int err = anv_gem_set_context_param(device->fd, device->context_id,
2415                                           I915_CONTEXT_PARAM_PRIORITY,
2416                                           vk_priority_to_gen(priority));
2417       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2418          result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2419          goto fail_context;
2420       }
2421    }
2422 
2423    return result;
2424 
2425 fail_context:
2426    intel_gem_destroy_context(device->fd, device->context_id);
2427    return result;
2428 }
2429 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2430 VkResult anv_CreateDevice(
2431     VkPhysicalDevice                            physicalDevice,
2432     const VkDeviceCreateInfo*                   pCreateInfo,
2433     const VkAllocationCallbacks*                pAllocator,
2434     VkDevice*                                   pDevice)
2435 {
2436    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2437    VkResult result;
2438    struct anv_device *device;
2439 
2440    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2441 
2442    /* Check requested queues and fail if we are requested to create any
2443     * queues with flags we don't support.
2444     */
2445    assert(pCreateInfo->queueCreateInfoCount > 0);
2446    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2447       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2448          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2449    }
2450 
2451    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2452                        sizeof(*device), 8,
2453                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2454    if (!device)
2455       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2456 
2457    struct vk_device_dispatch_table dispatch_table;
2458 
2459    bool override_initial_entrypoints = true;
2460    if (physical_device->instance->vk.app_info.app_name &&
2461        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2462       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2463       override_initial_entrypoints = false;
2464    }
2465    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2466       anv_genX(&physical_device->info, device_entrypoints),
2467       override_initial_entrypoints);
2468    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2469       &anv_device_entrypoints, false);
2470    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2471       &wsi_device_entrypoints, false);
2472 
2473    result = vk_device_init(&device->vk, &physical_device->vk,
2474                            &dispatch_table, pCreateInfo, pAllocator);
2475    if (result != VK_SUCCESS)
2476       goto fail_alloc;
2477 
2478    if (INTEL_DEBUG(DEBUG_BATCH)) {
2479       const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2480 
2481       intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2482                                       &physical_device->compiler->isa,
2483                                       &physical_device->info,
2484                                       stderr, decode_flags, NULL,
2485                                       decode_get_bo, NULL, device);
2486 
2487       device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2488       device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2489       device->decoder_ctx.instruction_base =
2490          INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2491    }
2492 
2493    anv_device_set_physical(device, physical_device);
2494 
2495    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2496    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2497    if (device->fd == -1) {
2498       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2499       goto fail_device;
2500    }
2501 
2502    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2503    device->vk.check_status = anv_device_check_status;
2504    device->vk.get_timestamp = anv_device_get_timestamp;
2505    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2506    vk_device_set_drm_fd(&device->vk, device->fd);
2507 
2508    uint32_t num_queues = 0;
2509    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2510       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2511 
2512    result = anv_device_setup_context(device, pCreateInfo, num_queues);
2513    if (result != VK_SUCCESS)
2514       goto fail_fd;
2515 
2516    device->queues =
2517       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2518                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2519    if (device->queues == NULL) {
2520       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2521       goto fail_context_id;
2522    }
2523 
2524    device->queue_count = 0;
2525    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2526       const VkDeviceQueueCreateInfo *queueCreateInfo =
2527          &pCreateInfo->pQueueCreateInfos[i];
2528 
2529       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2530          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2531           * engine-based contexts, the bottom 6 bits of exec_flags are used
2532           * for the engine ID.
2533           */
2534          uint32_t exec_flags = device->physical->engine_info ?
2535                                device->queue_count : I915_EXEC_RENDER;
2536 
2537          result = anv_queue_init(device, &device->queues[device->queue_count],
2538                                  exec_flags, queueCreateInfo, j);
2539          if (result != VK_SUCCESS)
2540             goto fail_queues;
2541 
2542          device->queue_count++;
2543       }
2544    }
2545 
2546    if (!anv_use_relocations(physical_device)) {
2547       if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2548          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2549          goto fail_queues;
2550       }
2551 
2552       /* keep the page with address zero out of the allocator */
2553       util_vma_heap_init(&device->vma_lo,
2554                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2555 
2556       util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2557                          CLIENT_VISIBLE_HEAP_SIZE);
2558 
2559       /* Leave the last 4GiB out of the high vma range, so that no state
2560        * base address + size can overflow 48 bits. For more information see
2561        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2562        */
2563       util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2564                          physical_device->gtt_size - (1ull << 32) -
2565                          HIGH_HEAP_MIN_ADDRESS);
2566    }
2567 
2568    list_inithead(&device->memory_objects);
2569 
2570    /* On Broadwell and later, we can use batch chaining to more efficiently
2571     * implement growing command buffers.  Prior to Haswell, the kernel
2572     * command parser gets in the way and we have to fall back to growing
2573     * the batch.
2574     */
2575    device->can_chain_batches = device->info->ver >= 8;
2576 
2577    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2578       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2579       goto fail_vmas;
2580    }
2581 
2582    pthread_condattr_t condattr;
2583    if (pthread_condattr_init(&condattr) != 0) {
2584       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2585       goto fail_mutex;
2586    }
2587    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2588       pthread_condattr_destroy(&condattr);
2589       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2590       goto fail_mutex;
2591    }
2592    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2593       pthread_condattr_destroy(&condattr);
2594       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2595       goto fail_mutex;
2596    }
2597    pthread_condattr_destroy(&condattr);
2598 
2599    result = anv_bo_cache_init(&device->bo_cache, device);
2600    if (result != VK_SUCCESS)
2601       goto fail_queue_cond;
2602 
2603    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2604 
2605    /* Because scratch is also relative to General State Base Address, we leave
2606     * the base address 0 and start the pool memory at an offset.  This way we
2607     * get the correct offsets in the anv_states that get allocated from it.
2608     */
2609    result = anv_state_pool_init(&device->general_state_pool, device,
2610                                 "general pool",
2611                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2612    if (result != VK_SUCCESS)
2613       goto fail_batch_bo_pool;
2614 
2615    result = anv_state_pool_init(&device->dynamic_state_pool, device,
2616                                 "dynamic pool",
2617                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2618    if (result != VK_SUCCESS)
2619       goto fail_general_state_pool;
2620 
2621    if (device->info->ver >= 8) {
2622       /* The border color pointer is limited to 24 bits, so we need to make
2623        * sure that any such color used at any point in the program doesn't
2624        * exceed that limit.
2625        * We achieve that by reserving all the custom border colors we support
2626        * right off the bat, so they are close to the base address.
2627        */
2628       anv_state_reserved_pool_init(&device->custom_border_colors,
2629                                    &device->dynamic_state_pool,
2630                                    MAX_CUSTOM_BORDER_COLORS,
2631                                    sizeof(struct gfx8_border_color), 64);
2632    }
2633 
2634    result = anv_state_pool_init(&device->instruction_state_pool, device,
2635                                 "instruction pool",
2636                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2637    if (result != VK_SUCCESS)
2638       goto fail_dynamic_state_pool;
2639 
2640    result = anv_state_pool_init(&device->surface_state_pool, device,
2641                                 "surface state pool",
2642                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2643    if (result != VK_SUCCESS)
2644       goto fail_instruction_state_pool;
2645 
2646    if (!anv_use_relocations(physical_device)) {
2647       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2648                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2649       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2650       result = anv_state_pool_init(&device->binding_table_pool, device,
2651                                    "binding table pool",
2652                                    SURFACE_STATE_POOL_MIN_ADDRESS,
2653                                    bt_pool_offset,
2654                                    BINDING_TABLE_POOL_BLOCK_SIZE);
2655    }
2656    if (result != VK_SUCCESS)
2657       goto fail_surface_state_pool;
2658 
2659    result = anv_device_alloc_bo(device, "workaround", 4096,
2660                                 ANV_BO_ALLOC_CAPTURE |
2661                                 ANV_BO_ALLOC_MAPPED,
2662                                 0 /* explicit_address */,
2663                                 &device->workaround_bo);
2664    if (result != VK_SUCCESS)
2665       goto fail_binding_table_pool;
2666 
2667    device->workaround_address = (struct anv_address) {
2668       .bo = device->workaround_bo,
2669       .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2670                                                     device->workaround_bo->size,
2671                                                     "hasvk"), 32),
2672    };
2673 
2674    device->workarounds.doom64_images = NULL;
2675 
2676    device->debug_frame_desc =
2677       intel_debug_get_identifier_block(device->workaround_bo->map,
2678                                        device->workaround_bo->size,
2679                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
2680 
2681    result = anv_device_init_trivial_batch(device);
2682    if (result != VK_SUCCESS)
2683       goto fail_workaround_bo;
2684 
2685    /* Allocate a null surface state at surface state offset 0.  This makes
2686     * NULL descriptor handling trivial because we can just memset structures
2687     * to zero and they have a valid descriptor.
2688     */
2689    device->null_surface_state =
2690       anv_state_pool_alloc(&device->surface_state_pool,
2691                            device->isl_dev.ss.size,
2692                            device->isl_dev.ss.align);
2693    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2694                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2695    assert(device->null_surface_state.offset == 0);
2696 
2697    anv_scratch_pool_init(device, &device->scratch_pool);
2698 
2699    result = anv_genX(device->info, init_device_state)(device);
2700    if (result != VK_SUCCESS)
2701       goto fail_trivial_batch_bo_and_scratch_pool;
2702 
2703    struct vk_pipeline_cache_create_info pcc_info = { };
2704    device->default_pipeline_cache =
2705       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2706    if (!device->default_pipeline_cache) {
2707       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2708       goto fail_trivial_batch_bo_and_scratch_pool;
2709    }
2710 
2711    /* Internal shaders need their own pipeline cache because, unlike the rest
2712     * of ANV, it won't work at all without the cache. It depends on it for
2713     * shaders to remain resident while it runs. Therefore, we need a special
2714     * cache just for BLORP/RT that's forced to always be enabled.
2715     */
2716    pcc_info.force_enable = true;
2717    device->internal_cache =
2718       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2719    if (device->internal_cache == NULL) {
2720       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2721       goto fail_default_pipeline_cache;
2722    }
2723 
2724    device->robust_buffer_access =
2725       device->vk.enabled_features.robustBufferAccess ||
2726       device->vk.enabled_features.nullDescriptor;
2727 
2728    anv_device_init_blorp(device);
2729 
2730    anv_device_init_border_colors(device);
2731 
2732    anv_device_perf_init(device);
2733 
2734    anv_device_utrace_init(device);
2735 
2736    *pDevice = anv_device_to_handle(device);
2737 
2738    return VK_SUCCESS;
2739 
2740  fail_default_pipeline_cache:
2741    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2742  fail_trivial_batch_bo_and_scratch_pool:
2743    anv_scratch_pool_finish(device, &device->scratch_pool);
2744    anv_device_release_bo(device, device->trivial_batch_bo);
2745  fail_workaround_bo:
2746    anv_device_release_bo(device, device->workaround_bo);
2747  fail_binding_table_pool:
2748    if (!anv_use_relocations(physical_device))
2749       anv_state_pool_finish(&device->binding_table_pool);
2750  fail_surface_state_pool:
2751    anv_state_pool_finish(&device->surface_state_pool);
2752  fail_instruction_state_pool:
2753    anv_state_pool_finish(&device->instruction_state_pool);
2754  fail_dynamic_state_pool:
2755    if (device->info->ver >= 8)
2756       anv_state_reserved_pool_finish(&device->custom_border_colors);
2757    anv_state_pool_finish(&device->dynamic_state_pool);
2758  fail_general_state_pool:
2759    anv_state_pool_finish(&device->general_state_pool);
2760  fail_batch_bo_pool:
2761    anv_bo_pool_finish(&device->batch_bo_pool);
2762    anv_bo_cache_finish(&device->bo_cache);
2763  fail_queue_cond:
2764    pthread_cond_destroy(&device->queue_submit);
2765  fail_mutex:
2766    pthread_mutex_destroy(&device->mutex);
2767  fail_vmas:
2768    if (!anv_use_relocations(physical_device)) {
2769       util_vma_heap_finish(&device->vma_hi);
2770       util_vma_heap_finish(&device->vma_cva);
2771       util_vma_heap_finish(&device->vma_lo);
2772    }
2773  fail_queues:
2774    for (uint32_t i = 0; i < device->queue_count; i++)
2775       anv_queue_finish(&device->queues[i]);
2776    vk_free(&device->vk.alloc, device->queues);
2777  fail_context_id:
2778    intel_gem_destroy_context(device->fd, device->context_id);
2779  fail_fd:
2780    close(device->fd);
2781  fail_device:
2782    vk_device_finish(&device->vk);
2783  fail_alloc:
2784    vk_free(&device->vk.alloc, device);
2785 
2786    return result;
2787 }
2788 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2789 void anv_DestroyDevice(
2790     VkDevice                                    _device,
2791     const VkAllocationCallbacks*                pAllocator)
2792 {
2793    ANV_FROM_HANDLE(anv_device, device, _device);
2794 
2795    if (!device)
2796       return;
2797 
2798    anv_device_utrace_finish(device);
2799 
2800    anv_device_finish_blorp(device);
2801 
2802    vk_pipeline_cache_destroy(device->internal_cache, NULL);
2803    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2804 
2805 #ifdef HAVE_VALGRIND
2806    /* We only need to free these to prevent valgrind errors.  The backing
2807     * BO will go away in a couple of lines so we don't actually leak.
2808     */
2809    if (device->info->ver >= 8)
2810       anv_state_reserved_pool_finish(&device->custom_border_colors);
2811    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2812    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2813 #endif
2814 
2815    anv_scratch_pool_finish(device, &device->scratch_pool);
2816 
2817    anv_device_release_bo(device, device->workaround_bo);
2818    anv_device_release_bo(device, device->trivial_batch_bo);
2819 
2820    if (!anv_use_relocations(device->physical))
2821       anv_state_pool_finish(&device->binding_table_pool);
2822    anv_state_pool_finish(&device->surface_state_pool);
2823    anv_state_pool_finish(&device->instruction_state_pool);
2824    anv_state_pool_finish(&device->dynamic_state_pool);
2825    anv_state_pool_finish(&device->general_state_pool);
2826 
2827    anv_bo_pool_finish(&device->batch_bo_pool);
2828 
2829    anv_bo_cache_finish(&device->bo_cache);
2830 
2831    if (!anv_use_relocations(device->physical)) {
2832       util_vma_heap_finish(&device->vma_hi);
2833       util_vma_heap_finish(&device->vma_cva);
2834       util_vma_heap_finish(&device->vma_lo);
2835    }
2836 
2837    pthread_cond_destroy(&device->queue_submit);
2838    pthread_mutex_destroy(&device->mutex);
2839 
2840    for (uint32_t i = 0; i < device->queue_count; i++)
2841       anv_queue_finish(&device->queues[i]);
2842    vk_free(&device->vk.alloc, device->queues);
2843 
2844    intel_gem_destroy_context(device->fd, device->context_id);
2845 
2846    if (INTEL_DEBUG(DEBUG_BATCH))
2847       intel_batch_decode_ctx_finish(&device->decoder_ctx);
2848 
2849    close(device->fd);
2850 
2851    vk_device_finish(&device->vk);
2852    vk_free(&device->vk.alloc, device);
2853 }
2854 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2855 VkResult anv_EnumerateInstanceLayerProperties(
2856     uint32_t*                                   pPropertyCount,
2857     VkLayerProperties*                          pProperties)
2858 {
2859    if (pProperties == NULL) {
2860       *pPropertyCount = 0;
2861       return VK_SUCCESS;
2862    }
2863 
2864    /* None supported at this time */
2865    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2866 }
2867 
2868 static VkResult
anv_device_check_status(struct vk_device * vk_device)2869 anv_device_check_status(struct vk_device *vk_device)
2870 {
2871    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2872 
2873    uint32_t active, pending;
2874    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2875                                              &active, &pending);
2876    if (ret == -1) {
2877       /* We don't know the real error. */
2878       return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2879    }
2880 
2881    if (active) {
2882       return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2883    } else if (pending) {
2884       return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2885    }
2886 
2887    return VK_SUCCESS;
2888 }
2889 
2890 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2891 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2892                 int64_t timeout)
2893 {
2894    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2895    if (ret == -1 && errno == ETIME) {
2896       return VK_TIMEOUT;
2897    } else if (ret == -1) {
2898       /* We don't know the real error. */
2899       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2900    } else {
2901       return VK_SUCCESS;
2902    }
2903 }
2904 
2905 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2906 anv_vma_alloc(struct anv_device *device,
2907               uint64_t size, uint64_t align,
2908               enum anv_bo_alloc_flags alloc_flags,
2909               uint64_t client_address)
2910 {
2911    pthread_mutex_lock(&device->vma_mutex);
2912 
2913    uint64_t addr = 0;
2914 
2915    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2916       if (client_address) {
2917          if (util_vma_heap_alloc_addr(&device->vma_cva,
2918                                       client_address, size)) {
2919             addr = client_address;
2920          }
2921       } else {
2922          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2923       }
2924       /* We don't want to fall back to other heaps */
2925       goto done;
2926    }
2927 
2928    assert(client_address == 0);
2929 
2930    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2931       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2932 
2933    if (addr == 0)
2934       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2935 
2936 done:
2937    pthread_mutex_unlock(&device->vma_mutex);
2938 
2939    assert(addr == intel_48b_address(addr));
2940    return intel_canonical_address(addr);
2941 }
2942 
2943 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2944 anv_vma_free(struct anv_device *device,
2945              uint64_t address, uint64_t size)
2946 {
2947    const uint64_t addr_48b = intel_48b_address(address);
2948 
2949    pthread_mutex_lock(&device->vma_mutex);
2950 
2951    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2952        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2953       util_vma_heap_free(&device->vma_lo, addr_48b, size);
2954    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2955               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2956       util_vma_heap_free(&device->vma_cva, addr_48b, size);
2957    } else {
2958       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2959       util_vma_heap_free(&device->vma_hi, addr_48b, size);
2960    }
2961 
2962    pthread_mutex_unlock(&device->vma_mutex);
2963 }
2964 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2965 VkResult anv_AllocateMemory(
2966     VkDevice                                    _device,
2967     const VkMemoryAllocateInfo*                 pAllocateInfo,
2968     const VkAllocationCallbacks*                pAllocator,
2969     VkDeviceMemory*                             pMem)
2970 {
2971    ANV_FROM_HANDLE(anv_device, device, _device);
2972    struct anv_physical_device *pdevice = device->physical;
2973    struct anv_device_memory *mem;
2974    VkResult result = VK_SUCCESS;
2975 
2976    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2977 
2978    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
2979    assert(pAllocateInfo->allocationSize > 0);
2980 
2981    VkDeviceSize aligned_alloc_size =
2982       align64(pAllocateInfo->allocationSize, 4096);
2983 
2984    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
2985       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2986 
2987    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
2988    struct anv_memory_type *mem_type =
2989       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
2990    assert(mem_type->heapIndex < pdevice->memory.heap_count);
2991    struct anv_memory_heap *mem_heap =
2992       &pdevice->memory.heaps[mem_type->heapIndex];
2993 
2994    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2995    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
2996       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2997 
2998    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2999                          VK_OBJECT_TYPE_DEVICE_MEMORY);
3000    if (mem == NULL)
3001       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3002 
3003    mem->type = mem_type;
3004    mem->map = NULL;
3005    mem->map_size = 0;
3006    mem->map_delta = 0;
3007    mem->ahw = NULL;
3008    mem->host_ptr = NULL;
3009 
3010    enum anv_bo_alloc_flags alloc_flags = 0;
3011 
3012    const VkExportMemoryAllocateInfo *export_info = NULL;
3013    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3014    const VkImportMemoryFdInfoKHR *fd_info = NULL;
3015    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3016    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3017    VkMemoryAllocateFlags vk_flags = 0;
3018    uint64_t client_address = 0;
3019 
3020    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3021       switch (ext->sType) {
3022       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3023          export_info = (void *)ext;
3024          break;
3025 
3026       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3027          ahw_import_info = (void *)ext;
3028          break;
3029 
3030       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3031          fd_info = (void *)ext;
3032          break;
3033 
3034       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3035          host_ptr_info = (void *)ext;
3036          break;
3037 
3038       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3039          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3040          vk_flags = flags_info->flags;
3041          break;
3042       }
3043 
3044       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3045          dedicated_info = (void *)ext;
3046          break;
3047 
3048       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3049          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3050             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3051          client_address = addr_info->opaqueCaptureAddress;
3052          break;
3053       }
3054 
3055       default:
3056          if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3057             /* this isn't a real enum value,
3058              * so use conditional to avoid compiler warn
3059              */
3060             vk_debug_ignored_stype(ext->sType);
3061          break;
3062       }
3063    }
3064 
3065    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3066       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3067 
3068    if ((export_info && export_info->handleTypes) ||
3069        (fd_info && fd_info->handleType) ||
3070        (host_ptr_info && host_ptr_info->handleType)) {
3071       /* Anything imported or exported is EXTERNAL */
3072       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3073    }
3074 
3075    /* Check if we need to support Android HW buffer export. If so,
3076     * create AHardwareBuffer and import memory from it.
3077     */
3078    bool android_export = false;
3079    if (export_info && export_info->handleTypes &
3080        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3081       android_export = true;
3082 
3083    if (ahw_import_info) {
3084       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3085       if (result != VK_SUCCESS)
3086          goto fail;
3087 
3088       goto success;
3089    } else if (android_export) {
3090       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3091       if (result != VK_SUCCESS)
3092          goto fail;
3093 
3094       goto success;
3095    }
3096 
3097    /* The Vulkan spec permits handleType to be 0, in which case the struct is
3098     * ignored.
3099     */
3100    if (fd_info && fd_info->handleType) {
3101       /* At the moment, we support only the below handle types. */
3102       assert(fd_info->handleType ==
3103                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3104              fd_info->handleType ==
3105                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3106 
3107       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3108                                     client_address, &mem->bo);
3109       if (result != VK_SUCCESS)
3110          goto fail;
3111 
3112       /* For security purposes, we reject importing the bo if it's smaller
3113        * than the requested allocation size.  This prevents a malicious client
3114        * from passing a buffer to a trusted client, lying about the size, and
3115        * telling the trusted client to try and texture from an image that goes
3116        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
3117        * in the trusted client.  The trusted client can protect itself against
3118        * this sort of attack but only if it can trust the buffer size.
3119        */
3120       if (mem->bo->size < aligned_alloc_size) {
3121          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3122                             "aligned allocationSize too large for "
3123                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3124                             "%"PRIu64"B > %"PRIu64"B",
3125                             aligned_alloc_size, mem->bo->size);
3126          anv_device_release_bo(device, mem->bo);
3127          goto fail;
3128       }
3129 
3130       /* From the Vulkan spec:
3131        *
3132        *    "Importing memory from a file descriptor transfers ownership of
3133        *    the file descriptor from the application to the Vulkan
3134        *    implementation. The application must not perform any operations on
3135        *    the file descriptor after a successful import."
3136        *
3137        * If the import fails, we leave the file descriptor open.
3138        */
3139       close(fd_info->fd);
3140       goto success;
3141    }
3142 
3143    if (host_ptr_info && host_ptr_info->handleType) {
3144       if (host_ptr_info->handleType ==
3145           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3146          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3147          goto fail;
3148       }
3149 
3150       assert(host_ptr_info->handleType ==
3151              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3152 
3153       result = anv_device_import_bo_from_host_ptr(device,
3154                                                   host_ptr_info->pHostPointer,
3155                                                   pAllocateInfo->allocationSize,
3156                                                   alloc_flags,
3157                                                   client_address,
3158                                                   &mem->bo);
3159       if (result != VK_SUCCESS)
3160          goto fail;
3161 
3162       mem->host_ptr = host_ptr_info->pHostPointer;
3163       goto success;
3164    }
3165 
3166    /* Regular allocate (not importing memory). */
3167 
3168    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3169                                 alloc_flags, client_address, &mem->bo);
3170    if (result != VK_SUCCESS)
3171       goto fail;
3172 
3173    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3174       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3175 
3176       /* Some legacy (non-modifiers) consumers need the tiling to be set on
3177        * the BO.  In this case, we have a dedicated allocation.
3178        */
3179       if (image->vk.wsi_legacy_scanout) {
3180          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3181          result = anv_device_set_bo_tiling(device, mem->bo,
3182                                            surf->row_pitch_B,
3183                                            surf->tiling);
3184          if (result != VK_SUCCESS) {
3185             anv_device_release_bo(device, mem->bo);
3186             goto fail;
3187          }
3188       }
3189    }
3190 
3191  success:
3192    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3193    if (mem_heap_used > mem_heap->size) {
3194       p_atomic_add(&mem_heap->used, -mem->bo->size);
3195       anv_device_release_bo(device, mem->bo);
3196       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3197                          "Out of heap memory");
3198       goto fail;
3199    }
3200 
3201    pthread_mutex_lock(&device->mutex);
3202    list_addtail(&mem->link, &device->memory_objects);
3203    pthread_mutex_unlock(&device->mutex);
3204 
3205    *pMem = anv_device_memory_to_handle(mem);
3206 
3207    return VK_SUCCESS;
3208 
3209  fail:
3210    vk_object_free(&device->vk, pAllocator, mem);
3211 
3212    return result;
3213 }
3214 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3215 VkResult anv_GetMemoryFdKHR(
3216     VkDevice                                    device_h,
3217     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
3218     int*                                        pFd)
3219 {
3220    ANV_FROM_HANDLE(anv_device, dev, device_h);
3221    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3222 
3223    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3224 
3225    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3226           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3227 
3228    return anv_device_export_bo(dev, mem->bo, pFd);
3229 }
3230 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3231 VkResult anv_GetMemoryFdPropertiesKHR(
3232     VkDevice                                    _device,
3233     VkExternalMemoryHandleTypeFlagBits          handleType,
3234     int                                         fd,
3235     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
3236 {
3237    ANV_FROM_HANDLE(anv_device, device, _device);
3238 
3239    switch (handleType) {
3240    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3241       /* dma-buf can be imported as any memory type */
3242       pMemoryFdProperties->memoryTypeBits =
3243          (1 << device->physical->memory.type_count) - 1;
3244       return VK_SUCCESS;
3245 
3246    default:
3247       /* The valid usage section for this function says:
3248        *
3249        *    "handleType must not be one of the handle types defined as
3250        *    opaque."
3251        *
3252        * So opaque handle types fall into the default "unsupported" case.
3253        */
3254       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3255    }
3256 }
3257 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3258 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3259    VkDevice                                    _device,
3260    VkExternalMemoryHandleTypeFlagBits          handleType,
3261    const void*                                 pHostPointer,
3262    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
3263 {
3264    ANV_FROM_HANDLE(anv_device, device, _device);
3265 
3266    assert(pMemoryHostPointerProperties->sType ==
3267           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3268 
3269    switch (handleType) {
3270    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3271       /* Host memory can be imported as any memory type. */
3272       pMemoryHostPointerProperties->memoryTypeBits =
3273          (1ull << device->physical->memory.type_count) - 1;
3274 
3275       return VK_SUCCESS;
3276 
3277    default:
3278       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3279    }
3280 }
3281 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3282 void anv_FreeMemory(
3283     VkDevice                                    _device,
3284     VkDeviceMemory                              _mem,
3285     const VkAllocationCallbacks*                pAllocator)
3286 {
3287    ANV_FROM_HANDLE(anv_device, device, _device);
3288    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3289 
3290    if (mem == NULL)
3291       return;
3292 
3293    pthread_mutex_lock(&device->mutex);
3294    list_del(&mem->link);
3295    pthread_mutex_unlock(&device->mutex);
3296 
3297    if (mem->map)
3298       anv_UnmapMemory(_device, _mem);
3299 
3300    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3301                 -mem->bo->size);
3302 
3303    anv_device_release_bo(device, mem->bo);
3304 
3305 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3306    if (mem->ahw)
3307       AHardwareBuffer_release(mem->ahw);
3308 #endif
3309 
3310    vk_object_free(&device->vk, pAllocator, mem);
3311 }
3312 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3313 VkResult anv_MapMemory(
3314     VkDevice                                    _device,
3315     VkDeviceMemory                              _memory,
3316     VkDeviceSize                                offset,
3317     VkDeviceSize                                size,
3318     VkMemoryMapFlags                            flags,
3319     void**                                      ppData)
3320 {
3321    ANV_FROM_HANDLE(anv_device, device, _device);
3322    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3323 
3324    if (mem == NULL) {
3325       *ppData = NULL;
3326       return VK_SUCCESS;
3327    }
3328 
3329    if (mem->host_ptr) {
3330       *ppData = mem->host_ptr + offset;
3331       return VK_SUCCESS;
3332    }
3333 
3334    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3335     *
3336     *  * memory must have been created with a memory type that reports
3337     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3338     */
3339    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3340       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3341                        "Memory object not mappable.");
3342    }
3343 
3344    if (size == VK_WHOLE_SIZE)
3345       size = mem->bo->size - offset;
3346 
3347    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3348     *
3349     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3350     *    assert(size != 0);
3351     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3352     *    equal to the size of the memory minus offset
3353     */
3354    assert(size > 0);
3355    assert(offset + size <= mem->bo->size);
3356 
3357    if (size != (size_t)size) {
3358       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3359                        "requested size 0x%"PRIx64" does not fit in %u bits",
3360                        size, (unsigned)(sizeof(size_t) * 8));
3361    }
3362 
3363    /* From the Vulkan 1.2.194 spec:
3364     *
3365     *    "memory must not be currently host mapped"
3366     */
3367    if (mem->map != NULL) {
3368       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3369                        "Memory object already mapped.");
3370    }
3371 
3372    uint32_t gem_flags = 0;
3373 
3374    if (!device->info->has_llc &&
3375        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3376       gem_flags |= I915_MMAP_WC;
3377 
3378    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
3379    uint64_t map_offset;
3380    if (!device->physical->info.has_mmap_offset)
3381       map_offset = offset & ~4095ull;
3382    else
3383       map_offset = 0;
3384    assert(offset >= map_offset);
3385    uint64_t map_size = (offset + size) - map_offset;
3386 
3387    /* Let's map whole pages */
3388    map_size = align64(map_size, 4096);
3389 
3390    void *map;
3391    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3392                                        map_size, gem_flags, &map);
3393    if (result != VK_SUCCESS)
3394       return result;
3395 
3396    mem->map = map;
3397    mem->map_size = map_size;
3398    mem->map_delta = (offset - map_offset);
3399    *ppData = mem->map + mem->map_delta;
3400 
3401    return VK_SUCCESS;
3402 }
3403 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3404 void anv_UnmapMemory(
3405     VkDevice                                    _device,
3406     VkDeviceMemory                              _memory)
3407 {
3408    ANV_FROM_HANDLE(anv_device, device, _device);
3409    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3410 
3411    if (mem == NULL || mem->host_ptr)
3412       return;
3413 
3414    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3415 
3416    mem->map = NULL;
3417    mem->map_size = 0;
3418    mem->map_delta = 0;
3419 }
3420 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3421 VkResult anv_FlushMappedMemoryRanges(
3422     VkDevice                                    _device,
3423     uint32_t                                    memoryRangeCount,
3424     const VkMappedMemoryRange*                  pMemoryRanges)
3425 {
3426    ANV_FROM_HANDLE(anv_device, device, _device);
3427 
3428    if (!device->physical->memory.need_flush)
3429       return VK_SUCCESS;
3430 
3431 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3432    /* Make sure the writes we're flushing have landed. */
3433    __builtin_ia32_mfence();
3434 #endif
3435 
3436    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3437       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3438       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3439          continue;
3440 
3441       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3442       if (map_offset >= mem->map_size)
3443          continue;
3444 
3445 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3446       intel_flush_range(mem->map + map_offset,
3447                         MIN2(pMemoryRanges[i].size,
3448                              mem->map_size - map_offset));
3449 #endif
3450    }
3451 
3452    return VK_SUCCESS;
3453 }
3454 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3455 VkResult anv_InvalidateMappedMemoryRanges(
3456     VkDevice                                    _device,
3457     uint32_t                                    memoryRangeCount,
3458     const VkMappedMemoryRange*                  pMemoryRanges)
3459 {
3460    ANV_FROM_HANDLE(anv_device, device, _device);
3461 
3462    if (!device->physical->memory.need_flush)
3463       return VK_SUCCESS;
3464 
3465    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3466       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3467       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3468          continue;
3469 
3470       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3471       if (map_offset >= mem->map_size)
3472          continue;
3473 
3474 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3475       intel_invalidate_range(mem->map + map_offset,
3476                              MIN2(pMemoryRanges[i].size,
3477                                   mem->map_size - map_offset));
3478 #endif
3479    }
3480 
3481 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3482    /* Make sure no reads get moved up above the invalidate. */
3483    __builtin_ia32_mfence();
3484 #endif
3485 
3486    return VK_SUCCESS;
3487 }
3488 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3489 void anv_GetDeviceMemoryCommitment(
3490     VkDevice                                    device,
3491     VkDeviceMemory                              memory,
3492     VkDeviceSize*                               pCommittedMemoryInBytes)
3493 {
3494    *pCommittedMemoryInBytes = 0;
3495 }
3496 
3497 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3498 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3499 {
3500    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3501    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3502 
3503    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3504 
3505    if (mem) {
3506       assert(pBindInfo->memoryOffset < mem->bo->size);
3507       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3508       buffer->address = (struct anv_address) {
3509          .bo = mem->bo,
3510          .offset = pBindInfo->memoryOffset,
3511       };
3512    } else {
3513       buffer->address = ANV_NULL_ADDRESS;
3514    }
3515 }
3516 
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3517 VkResult anv_BindBufferMemory2(
3518     VkDevice                                    device,
3519     uint32_t                                    bindInfoCount,
3520     const VkBindBufferMemoryInfo*               pBindInfos)
3521 {
3522    for (uint32_t i = 0; i < bindInfoCount; i++)
3523       anv_bind_buffer_memory(&pBindInfos[i]);
3524 
3525    return VK_SUCCESS;
3526 }
3527 
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3528 VkResult anv_QueueBindSparse(
3529     VkQueue                                     _queue,
3530     uint32_t                                    bindInfoCount,
3531     const VkBindSparseInfo*                     pBindInfo,
3532     VkFence                                     fence)
3533 {
3534    ANV_FROM_HANDLE(anv_queue, queue, _queue);
3535    if (vk_device_is_lost(&queue->device->vk))
3536       return VK_ERROR_DEVICE_LOST;
3537 
3538    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3539 }
3540 
3541 // Event functions
3542 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3543 VkResult anv_CreateEvent(
3544     VkDevice                                    _device,
3545     const VkEventCreateInfo*                    pCreateInfo,
3546     const VkAllocationCallbacks*                pAllocator,
3547     VkEvent*                                    pEvent)
3548 {
3549    ANV_FROM_HANDLE(anv_device, device, _device);
3550    struct anv_event *event;
3551 
3552    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3553 
3554    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3555                            VK_OBJECT_TYPE_EVENT);
3556    if (event == NULL)
3557       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3558 
3559    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3560                                        sizeof(uint64_t), 8);
3561    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3562 
3563    *pEvent = anv_event_to_handle(event);
3564 
3565    return VK_SUCCESS;
3566 }
3567 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3568 void anv_DestroyEvent(
3569     VkDevice                                    _device,
3570     VkEvent                                     _event,
3571     const VkAllocationCallbacks*                pAllocator)
3572 {
3573    ANV_FROM_HANDLE(anv_device, device, _device);
3574    ANV_FROM_HANDLE(anv_event, event, _event);
3575 
3576    if (!event)
3577       return;
3578 
3579    anv_state_pool_free(&device->dynamic_state_pool, event->state);
3580 
3581    vk_object_free(&device->vk, pAllocator, event);
3582 }
3583 
anv_GetEventStatus(VkDevice _device,VkEvent _event)3584 VkResult anv_GetEventStatus(
3585     VkDevice                                    _device,
3586     VkEvent                                     _event)
3587 {
3588    ANV_FROM_HANDLE(anv_device, device, _device);
3589    ANV_FROM_HANDLE(anv_event, event, _event);
3590 
3591    if (vk_device_is_lost(&device->vk))
3592       return VK_ERROR_DEVICE_LOST;
3593 
3594    return *(uint64_t *)event->state.map;
3595 }
3596 
anv_SetEvent(VkDevice _device,VkEvent _event)3597 VkResult anv_SetEvent(
3598     VkDevice                                    _device,
3599     VkEvent                                     _event)
3600 {
3601    ANV_FROM_HANDLE(anv_event, event, _event);
3602 
3603    *(uint64_t *)event->state.map = VK_EVENT_SET;
3604 
3605    return VK_SUCCESS;
3606 }
3607 
anv_ResetEvent(VkDevice _device,VkEvent _event)3608 VkResult anv_ResetEvent(
3609     VkDevice                                    _device,
3610     VkEvent                                     _event)
3611 {
3612    ANV_FROM_HANDLE(anv_event, event, _event);
3613 
3614    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3615 
3616    return VK_SUCCESS;
3617 }
3618 
3619 // Buffer functions
3620 
3621 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3622 anv_get_buffer_memory_requirements(struct anv_device *device,
3623                                    VkDeviceSize size,
3624                                    VkBufferUsageFlags usage,
3625                                    VkMemoryRequirements2* pMemoryRequirements)
3626 {
3627    /* The Vulkan spec (git aaed022) says:
3628     *
3629     *    memoryTypeBits is a bitfield and contains one bit set for every
3630     *    supported memory type for the resource. The bit `1<<i` is set if and
3631     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3632     *    structure for the physical device is supported.
3633     */
3634    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3635 
3636    /* Base alignment requirement of a cache line */
3637    uint32_t alignment = 16;
3638 
3639    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3640       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3641 
3642    pMemoryRequirements->memoryRequirements.size = size;
3643    pMemoryRequirements->memoryRequirements.alignment = alignment;
3644 
3645    /* Storage and Uniform buffers should have their size aligned to
3646     * 32-bits to avoid boundary checks when last DWord is not complete.
3647     * This would ensure that not internal padding would be needed for
3648     * 16-bit types.
3649     */
3650    if (device->vk.enabled_features.robustBufferAccess &&
3651        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3652         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3653       pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3654 
3655    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3656 
3657    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3658       switch (ext->sType) {
3659       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3660          VkMemoryDedicatedRequirements *requirements = (void *)ext;
3661          requirements->prefersDedicatedAllocation = false;
3662          requirements->requiresDedicatedAllocation = false;
3663          break;
3664       }
3665 
3666       default:
3667          vk_debug_ignored_stype(ext->sType);
3668          break;
3669       }
3670    }
3671 }
3672 
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3673 void anv_GetBufferMemoryRequirements2(
3674     VkDevice                                    _device,
3675     const VkBufferMemoryRequirementsInfo2*      pInfo,
3676     VkMemoryRequirements2*                      pMemoryRequirements)
3677 {
3678    ANV_FROM_HANDLE(anv_device, device, _device);
3679    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3680 
3681    anv_get_buffer_memory_requirements(device,
3682                                       buffer->vk.size,
3683                                       buffer->vk.usage,
3684                                       pMemoryRequirements);
3685 }
3686 
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3687 void anv_GetDeviceBufferMemoryRequirements(
3688     VkDevice                                    _device,
3689     const VkDeviceBufferMemoryRequirements*     pInfo,
3690     VkMemoryRequirements2*                      pMemoryRequirements)
3691 {
3692    ANV_FROM_HANDLE(anv_device, device, _device);
3693 
3694    anv_get_buffer_memory_requirements(device,
3695                                       pInfo->pCreateInfo->size,
3696                                       pInfo->pCreateInfo->usage,
3697                                       pMemoryRequirements);
3698 }
3699 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3700 VkResult anv_CreateBuffer(
3701     VkDevice                                    _device,
3702     const VkBufferCreateInfo*                   pCreateInfo,
3703     const VkAllocationCallbacks*                pAllocator,
3704     VkBuffer*                                   pBuffer)
3705 {
3706    ANV_FROM_HANDLE(anv_device, device, _device);
3707    struct anv_buffer *buffer;
3708 
3709    /* Don't allow creating buffers bigger than our address space.  The real
3710     * issue here is that we may align up the buffer size and we don't want
3711     * doing so to cause roll-over.  However, no one has any business
3712     * allocating a buffer larger than our GTT size.
3713     */
3714    if (pCreateInfo->size > device->physical->gtt_size)
3715       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3716 
3717    buffer = vk_buffer_create(&device->vk, pCreateInfo,
3718                              pAllocator, sizeof(*buffer));
3719    if (buffer == NULL)
3720       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3721 
3722    buffer->address = ANV_NULL_ADDRESS;
3723 
3724    *pBuffer = anv_buffer_to_handle(buffer);
3725 
3726    return VK_SUCCESS;
3727 }
3728 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3729 void anv_DestroyBuffer(
3730     VkDevice                                    _device,
3731     VkBuffer                                    _buffer,
3732     const VkAllocationCallbacks*                pAllocator)
3733 {
3734    ANV_FROM_HANDLE(anv_device, device, _device);
3735    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3736 
3737    if (!buffer)
3738       return;
3739 
3740    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3741 }
3742 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3743 VkDeviceAddress anv_GetBufferDeviceAddress(
3744     VkDevice                                    device,
3745     const VkBufferDeviceAddressInfo*            pInfo)
3746 {
3747    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3748 
3749    assert(!anv_address_is_null(buffer->address));
3750    assert(anv_bo_is_pinned(buffer->address.bo));
3751 
3752    return anv_address_physical(buffer->address);
3753 }
3754 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3755 uint64_t anv_GetBufferOpaqueCaptureAddress(
3756     VkDevice                                    device,
3757     const VkBufferDeviceAddressInfo*            pInfo)
3758 {
3759    return 0;
3760 }
3761 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3762 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3763     VkDevice                                    device,
3764     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3765 {
3766    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3767 
3768    assert(anv_bo_is_pinned(memory->bo));
3769    assert(memory->bo->has_client_visible_address);
3770 
3771    return intel_48b_address(memory->bo->offset);
3772 }
3773 
3774 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3775 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3776                               enum isl_format format,
3777                               struct isl_swizzle swizzle,
3778                               isl_surf_usage_flags_t usage,
3779                               struct anv_address address,
3780                               uint32_t range, uint32_t stride)
3781 {
3782    isl_buffer_fill_state(&device->isl_dev, state.map,
3783                          .address = anv_address_physical(address),
3784                          .mocs = isl_mocs(&device->isl_dev, usage,
3785                                           address.bo && address.bo->is_external),
3786                          .size_B = range,
3787                          .format = format,
3788                          .swizzle = swizzle,
3789                          .stride_B = stride);
3790 }
3791 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3792 void anv_DestroySampler(
3793     VkDevice                                    _device,
3794     VkSampler                                   _sampler,
3795     const VkAllocationCallbacks*                pAllocator)
3796 {
3797    ANV_FROM_HANDLE(anv_device, device, _device);
3798    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3799 
3800    if (!sampler)
3801       return;
3802 
3803    if (sampler->bindless_state.map) {
3804       anv_state_pool_free(&device->dynamic_state_pool,
3805                           sampler->bindless_state);
3806    }
3807 
3808    if (sampler->custom_border_color.map) {
3809       anv_state_reserved_pool_free(&device->custom_border_colors,
3810                                    sampler->custom_border_color);
3811    }
3812 
3813    vk_object_free(&device->vk, pAllocator, sampler);
3814 }
3815 
3816 static uint64_t
anv_clock_gettime(clockid_t clock_id)3817 anv_clock_gettime(clockid_t clock_id)
3818 {
3819    struct timespec current;
3820    int ret;
3821 
3822    ret = clock_gettime(clock_id, &current);
3823 #ifdef CLOCK_MONOTONIC_RAW
3824    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3825       ret = clock_gettime(CLOCK_MONOTONIC, &current);
3826 #endif
3827    if (ret < 0)
3828       return 0;
3829 
3830    return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3831 }
3832 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3833 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3834     VkPhysicalDevice                            physicalDevice,
3835     VkSampleCountFlagBits                       samples,
3836     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3837 {
3838    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3839 
3840    assert(pMultisampleProperties->sType ==
3841           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3842 
3843    VkExtent2D grid_size;
3844    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
3845       grid_size.width = 1;
3846       grid_size.height = 1;
3847    } else {
3848       grid_size.width = 0;
3849       grid_size.height = 0;
3850    }
3851    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
3852 
3853    vk_foreach_struct(ext, pMultisampleProperties->pNext)
3854       vk_debug_ignored_stype(ext->sType);
3855 }
3856