• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41 
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_uuid.h"
59 #include "perf/intel_perf.h"
60 
61 #include "genxml/gen7_pack.h"
62 #include "genxml/genX_bits.h"
63 
64 static const driOptionDescription anv_dri_options[] = {
65    DRI_CONF_SECTION_PERFORMANCE
66       DRI_CONF_ADAPTIVE_SYNC(true)
67       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
68       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
69       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
70       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
71       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
72       DRI_CONF_NO_16BIT(false)
73       DRI_CONF_ANV_HASVK_OVERRIDE_API_VERSION(false)
74    DRI_CONF_SECTION_END
75 
76    DRI_CONF_SECTION_DEBUG
77       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
78       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
79       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
80       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
81       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
82    DRI_CONF_SECTION_END
83 
84    DRI_CONF_SECTION_QUALITY
85       DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
86    DRI_CONF_SECTION_END
87 };
88 
89 /* This is probably far to big but it reflects the max size used for messages
90  * in OpenGLs KHR_debug.
91  */
92 #define MAX_DEBUG_MESSAGE_LENGTH    4096
93 
94 /* Render engine timestamp register */
95 #define TIMESTAMP 0x2358
96 
97 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
98 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
99 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
100 #endif
101 
102 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)103 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
104 {
105    char str[MAX_DEBUG_MESSAGE_LENGTH];
106    struct anv_device *device = (struct anv_device *)data;
107    UNUSED struct anv_instance *instance = device->physical->instance;
108 
109    va_list args;
110    va_start(args, fmt);
111    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
112    va_end(args);
113 
114    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
115 }
116 
117 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)118 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
119 {
120    va_list args;
121    va_start(args, fmt);
122 
123    if (INTEL_DEBUG(DEBUG_PERF))
124       mesa_logd_v(fmt, args);
125 
126    va_end(args);
127 }
128 
129 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
130     defined(VK_USE_PLATFORM_XCB_KHR) || \
131     defined(VK_USE_PLATFORM_XLIB_KHR) || \
132     defined(VK_USE_PLATFORM_DISPLAY_KHR)
133 #define ANV_USE_WSI_PLATFORM
134 #endif
135 
136 #ifdef ANDROID_STRICT
137 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
138 #else
139 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
140 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
141 #endif
142 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)143 VkResult anv_EnumerateInstanceVersion(
144     uint32_t*                                   pApiVersion)
145 {
146 #ifdef ANDROID_STRICT
147    *pApiVersion = ANV_API_VERSION;
148 #else
149    *pApiVersion = ANV_API_VERSION_1_3;
150 #endif
151    return VK_SUCCESS;
152 }
153 
154 static const struct vk_instance_extension_table instance_extensions = {
155    .KHR_device_group_creation                = true,
156    .KHR_external_fence_capabilities          = true,
157    .KHR_external_memory_capabilities         = true,
158    .KHR_external_semaphore_capabilities      = true,
159    .KHR_get_physical_device_properties2      = true,
160    .EXT_debug_report                         = true,
161    .EXT_debug_utils                          = true,
162 
163 #ifdef ANV_USE_WSI_PLATFORM
164    .KHR_get_surface_capabilities2            = true,
165    .KHR_surface                              = true,
166    .KHR_surface_protected_capabilities       = true,
167 #endif
168 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
169    .KHR_wayland_surface                      = true,
170 #endif
171 #ifdef VK_USE_PLATFORM_XCB_KHR
172    .KHR_xcb_surface                          = true,
173 #endif
174 #ifdef VK_USE_PLATFORM_XLIB_KHR
175    .KHR_xlib_surface                         = true,
176 #endif
177 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
178    .EXT_acquire_xlib_display                 = true,
179 #endif
180 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
181    .KHR_display                              = true,
182    .KHR_get_display_properties2              = true,
183    .EXT_direct_mode_display                  = true,
184    .EXT_display_surface_counter              = true,
185    .EXT_acquire_drm_display                  = true,
186 #endif
187 #ifndef VK_USE_PLATFORM_WIN32_KHR
188    .EXT_headless_surface                     = true,
189 #endif
190 };
191 
192 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)193 get_device_extensions(const struct anv_physical_device *device,
194                       struct vk_device_extension_table *ext)
195 {
196    const bool has_syncobj_wait =
197       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
198 
199    *ext = (struct vk_device_extension_table) {
200       .KHR_8bit_storage                      = device->info.ver >= 8,
201       .KHR_16bit_storage                     = device->info.ver >= 8 && !device->instance->no_16bit,
202       .KHR_bind_memory2                      = true,
203       .KHR_buffer_device_address             = device->has_a64_buffer_access,
204       .KHR_copy_commands2                    = true,
205       .KHR_create_renderpass2                = true,
206       .KHR_dedicated_allocation              = true,
207       .KHR_deferred_host_operations          = true,
208       .KHR_depth_stencil_resolve             = true,
209       .KHR_descriptor_update_template        = true,
210       .KHR_device_group                      = true,
211       .KHR_draw_indirect_count               = true,
212       .KHR_driver_properties                 = true,
213       .KHR_dynamic_rendering                 = true,
214       .KHR_external_fence                    = has_syncobj_wait,
215       .KHR_external_fence_fd                 = has_syncobj_wait,
216       .KHR_external_memory                   = true,
217       .KHR_external_memory_fd                = true,
218       .KHR_external_semaphore                = true,
219       .KHR_external_semaphore_fd             = true,
220       .KHR_format_feature_flags2             = true,
221       .KHR_get_memory_requirements2          = true,
222       .KHR_image_format_list                 = true,
223       .KHR_imageless_framebuffer             = true,
224 #ifdef ANV_USE_WSI_PLATFORM
225       .KHR_incremental_present               = true,
226 #endif
227       .KHR_maintenance1                      = true,
228       .KHR_maintenance2                      = true,
229       .KHR_maintenance3                      = true,
230       .KHR_maintenance4                      = true,
231       .KHR_multiview                         = true,
232       .KHR_performance_query =
233          !anv_use_relocations(device) && device->perf &&
234          (device->perf->i915_perf_version >= 3 ||
235           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
236          device->use_call_secondary,
237       .KHR_pipeline_executable_properties    = true,
238       .KHR_push_descriptor                   = true,
239       .KHR_relaxed_block_layout              = true,
240       .KHR_sampler_mirror_clamp_to_edge      = true,
241       .KHR_sampler_ycbcr_conversion          = true,
242       .KHR_separate_depth_stencil_layouts    = true,
243       .KHR_shader_clock                      = true,
244       .KHR_shader_draw_parameters            = true,
245       .KHR_shader_expect_assume              = true,
246       .KHR_shader_float16_int8               = device->info.ver >= 8 && !device->instance->no_16bit,
247       .KHR_shader_float_controls             = true,
248       .KHR_shader_integer_dot_product        = true,
249       .KHR_shader_non_semantic_info          = true,
250       .KHR_shader_subgroup_extended_types    = device->info.ver >= 8,
251       .KHR_shader_subgroup_uniform_control_flow = true,
252       .KHR_shader_terminate_invocation       = true,
253       .KHR_spirv_1_4                         = true,
254       .KHR_storage_buffer_storage_class      = true,
255 #ifdef ANV_USE_WSI_PLATFORM
256       .KHR_swapchain                         = true,
257       .KHR_swapchain_mutable_format          = true,
258 #endif
259       .KHR_synchronization2                  = true,
260       .KHR_timeline_semaphore                = true,
261       .KHR_uniform_buffer_standard_layout    = true,
262       .KHR_variable_pointers                 = true,
263       .KHR_vulkan_memory_model               = true,
264       .KHR_workgroup_memory_explicit_layout  = true,
265       .KHR_zero_initialize_workgroup_memory  = true,
266       .EXT_4444_formats                      = true,
267       .EXT_border_color_swizzle              = device->info.ver >= 8,
268       .EXT_buffer_device_address             = device->has_a64_buffer_access,
269       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
270       .EXT_color_write_enable                = true,
271       .EXT_conditional_rendering             = device->info.verx10 >= 75,
272       .EXT_custom_border_color               = device->info.ver >= 8,
273       .EXT_depth_clamp_zero_one              = true,
274       .EXT_depth_clip_control                = true,
275       .EXT_depth_clip_enable                 = true,
276 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
277       .EXT_display_control                   = true,
278 #endif
279       .EXT_extended_dynamic_state            = true,
280       .EXT_extended_dynamic_state2           = true,
281       .EXT_external_memory_dma_buf           = true,
282       .EXT_external_memory_host              = true,
283       .EXT_global_priority                   = device->max_context_priority >=
284                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
285       .EXT_global_priority_query             = device->max_context_priority >=
286                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
287       .EXT_host_query_reset                  = true,
288       .EXT_image_2d_view_of_3d               = true,
289       .EXT_image_robustness                  = true,
290       .EXT_image_drm_format_modifier         = true,
291       .EXT_image_view_min_lod                = true,
292       .EXT_index_type_uint8                  = true,
293       .EXT_inline_uniform_block              = true,
294       .EXT_line_rasterization                = true,
295       /* Enable the extension only if we have support on both the local &
296        * system memory
297        */
298       .EXT_memory_budget                     = device->sys.available,
299       .EXT_non_seamless_cube_map             = true,
300       .EXT_pci_bus_info                      = true,
301       .EXT_physical_device_drm               = true,
302       .EXT_pipeline_creation_cache_control   = true,
303       .EXT_pipeline_creation_feedback        = true,
304       .EXT_primitives_generated_query        = true,
305       .EXT_primitive_topology_list_restart   = true,
306       .EXT_private_data                      = true,
307       .EXT_provoking_vertex                  = true,
308       .EXT_queue_family_foreign              = true,
309       .EXT_robustness2                       = true,
310       .EXT_sample_locations                  = true,
311       .EXT_scalar_block_layout               = true,
312       .EXT_separate_stencil_usage            = true,
313       .EXT_shader_atomic_float               = true,
314       .EXT_shader_demote_to_helper_invocation = true,
315       .EXT_shader_module_identifier          = true,
316       .EXT_shader_subgroup_ballot            = true,
317       .EXT_shader_subgroup_vote              = true,
318       .EXT_shader_viewport_index_layer       = true,
319       .EXT_subgroup_size_control             = true,
320       .EXT_texel_buffer_alignment            = true,
321       .EXT_tooling_info                      = true,
322       .EXT_transform_feedback                = true,
323       .EXT_vertex_attribute_divisor          = true,
324       .EXT_ycbcr_image_arrays                = true,
325 #if DETECT_OS_ANDROID
326       .ANDROID_external_memory_android_hardware_buffer = true,
327       .ANDROID_native_buffer                 = true,
328 #endif
329       .GOOGLE_decorate_string                = true,
330       .GOOGLE_hlsl_functionality1            = true,
331       .GOOGLE_user_type                      = true,
332       .INTEL_performance_query               = device->perf &&
333                                                device->perf->i915_perf_version >= 3,
334       .INTEL_shader_integer_functions2       = device->info.ver >= 8,
335       .EXT_multi_draw                        = true,
336       .NV_compute_shader_derivatives         = true,
337       .VALVE_mutable_descriptor_type         = true,
338    };
339 }
340 
341 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)342 get_features(const struct anv_physical_device *pdevice,
343              struct vk_features *features)
344 {
345    /* Just pick one; they're all the same */
346    const bool has_astc_ldr =
347       isl_format_supports_sampling(&pdevice->info,
348                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
349 
350    *features = (struct vk_features) {
351       /* Vulkan 1.0 */
352       .robustBufferAccess                       = true,
353       .fullDrawIndexUint32                      = true,
354       .imageCubeArray                           = true,
355       .independentBlend                         = true,
356       .geometryShader                           = true,
357       .tessellationShader                       = true,
358       .sampleRateShading                        = true,
359       .dualSrcBlend                             = true,
360       .logicOp                                  = true,
361       .multiDrawIndirect                        = true,
362       .drawIndirectFirstInstance                = true,
363       .depthClamp                               = true,
364       .depthBiasClamp                           = true,
365       .fillModeNonSolid                         = true,
366       .depthBounds                              = pdevice->info.ver >= 12,
367       .wideLines                                = true,
368       .largePoints                              = true,
369       .alphaToOne                               = true,
370       .multiViewport                            = true,
371       .samplerAnisotropy                        = true,
372       .textureCompressionETC2                   = pdevice->info.ver >= 8 ||
373                                                   pdevice->info.platform == INTEL_PLATFORM_BYT,
374       .textureCompressionASTC_LDR               = has_astc_ldr,
375       .textureCompressionBC                     = true,
376       .occlusionQueryPrecise                    = true,
377       .pipelineStatisticsQuery                  = true,
378       .fragmentStoresAndAtomics                 = true,
379       .shaderTessellationAndGeometryPointSize   = true,
380       .shaderImageGatherExtended                = true,
381       .shaderStorageImageExtendedFormats        = true,
382       .shaderStorageImageMultisample            = false,
383       .shaderStorageImageReadWithoutFormat      = false,
384       .shaderStorageImageWriteWithoutFormat     = true,
385       .shaderUniformBufferArrayDynamicIndexing  = true,
386       .shaderSampledImageArrayDynamicIndexing   = true,
387       .shaderStorageBufferArrayDynamicIndexing  = true,
388       .shaderStorageImageArrayDynamicIndexing   = true,
389       .shaderClipDistance                       = true,
390       .shaderCullDistance                       = true,
391       .shaderFloat64                            = pdevice->info.ver >= 8 &&
392                                                   pdevice->info.has_64bit_float,
393       .shaderInt64                              = pdevice->info.ver >= 8,
394       .shaderInt16                              = pdevice->info.ver >= 8,
395       .shaderResourceMinLod                     = false,
396       .variableMultisampleRate                  = true,
397       .inheritedQueries                         = true,
398 
399       /* Vulkan 1.1 */
400       .storageBuffer16BitAccess            = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
401       .uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
402       .storagePushConstant16               = pdevice->info.ver >= 8,
403       .storageInputOutput16                = false,
404       .multiview                           = true,
405       .multiviewGeometryShader             = true,
406       .multiviewTessellationShader         = true,
407       .variablePointersStorageBuffer       = true,
408       .variablePointers                    = true,
409       .protectedMemory                     = false,
410       .samplerYcbcrConversion              = true,
411       .shaderDrawParameters                = true,
412 
413       /* Vulkan 1.2 */
414       .samplerMirrorClampToEdge            = true,
415       .drawIndirectCount                   = true,
416       .storageBuffer8BitAccess             = pdevice->info.ver >= 8,
417       .uniformAndStorageBuffer8BitAccess   = pdevice->info.ver >= 8,
418       .storagePushConstant8                = pdevice->info.ver >= 8,
419       .shaderBufferInt64Atomics            = false,
420       .shaderSharedInt64Atomics            = false,
421       .shaderFloat16                       = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
422       .shaderInt8                          = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
423 
424       .descriptorIndexing                                 = false,
425       .shaderInputAttachmentArrayDynamicIndexing          = false,
426       .shaderUniformTexelBufferArrayDynamicIndexing       = false,
427       .shaderStorageTexelBufferArrayDynamicIndexing       = false,
428       .shaderUniformBufferArrayNonUniformIndexing         = false,
429       .shaderSampledImageArrayNonUniformIndexing          = false,
430       .shaderStorageBufferArrayNonUniformIndexing         = false,
431       .shaderStorageImageArrayNonUniformIndexing          = false,
432       .shaderInputAttachmentArrayNonUniformIndexing       = false,
433       .shaderUniformTexelBufferArrayNonUniformIndexing    = false,
434       .shaderStorageTexelBufferArrayNonUniformIndexing    = false,
435       .descriptorBindingUniformBufferUpdateAfterBind      = false,
436       .descriptorBindingSampledImageUpdateAfterBind       = false,
437       .descriptorBindingStorageImageUpdateAfterBind       = false,
438       .descriptorBindingStorageBufferUpdateAfterBind      = false,
439       .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
440       .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
441       .descriptorBindingUpdateUnusedWhilePending          = false,
442       .descriptorBindingPartiallyBound                    = false,
443       .descriptorBindingVariableDescriptorCount           = false,
444       .runtimeDescriptorArray                             = false,
445 
446       .samplerFilterMinmax                 = false,
447       .scalarBlockLayout                   = true,
448       .imagelessFramebuffer                = true,
449       .uniformBufferStandardLayout         = true,
450       .shaderSubgroupExtendedTypes         = true,
451       .separateDepthStencilLayouts         = true,
452       .hostQueryReset                      = true,
453       .timelineSemaphore                   = true,
454       .bufferDeviceAddress                 = pdevice->has_a64_buffer_access,
455       .bufferDeviceAddressCaptureReplay    = pdevice->has_a64_buffer_access,
456       .bufferDeviceAddressMultiDevice      = false,
457       .vulkanMemoryModel                   = true,
458       .vulkanMemoryModelDeviceScope        = true,
459       .vulkanMemoryModelAvailabilityVisibilityChains = true,
460       .shaderOutputViewportIndex           = true,
461       .shaderOutputLayer                   = true,
462       .subgroupBroadcastDynamicId          = true,
463 
464       /* Vulkan 1.3 */
465       .robustImageAccess = true,
466       .inlineUniformBlock = true,
467       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
468       .pipelineCreationCacheControl = true,
469       .privateData = true,
470       .shaderDemoteToHelperInvocation = true,
471       .shaderTerminateInvocation = true,
472       .subgroupSizeControl = true,
473       .computeFullSubgroups = true,
474       .synchronization2 = true,
475       .textureCompressionASTC_HDR = false,
476       .shaderZeroInitializeWorkgroupMemory = true,
477       .dynamicRendering = true,
478       .shaderIntegerDotProduct = true,
479       .maintenance4 = true,
480 
481       /* VK_EXT_4444_formats */
482       .formatA4R4G4B4 = true,
483       .formatA4B4G4R4 = false,
484 
485       /* VK_EXT_border_color_swizzle */
486       .borderColorSwizzle = true,
487       .borderColorSwizzleFromImage = true,
488 
489       /* VK_EXT_color_write_enable */
490       .colorWriteEnable = true,
491 
492       /* VK_EXT_image_2d_view_of_3d */
493       .image2DViewOf3D = true,
494       .sampler2DViewOf3D = false,
495 
496       /* VK_NV_compute_shader_derivatives */
497       .computeDerivativeGroupQuads = true,
498       .computeDerivativeGroupLinear = true,
499 
500       /* VK_EXT_conditional_rendering */
501       .conditionalRendering = pdevice->info.verx10 >= 75,
502       .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
503 
504       /* VK_EXT_custom_border_color */
505       .customBorderColors = pdevice->info.ver >= 8,
506       .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
507 
508       /* VK_EXT_depth_clamp_zero_one */
509       .depthClampZeroOne = true,
510 
511       /* VK_EXT_depth_clip_enable */
512       .depthClipEnable = true,
513 
514       /* VK_KHR_global_priority */
515       .globalPriorityQuery = true,
516 
517       /* VK_EXT_image_view_min_lod */
518       .minLod = true,
519 
520       /* VK_EXT_index_type_uint8 */
521       .indexTypeUint8 = true,
522 
523       /* VK_EXT_line_rasterization */
524       /* Rectangular lines must use the strict algorithm, which is not
525        * supported for wide lines prior to ICL.  See rasterization_mode for
526        * details and how the HW states are programmed.
527        */
528       .rectangularLines = false,
529       .bresenhamLines = true,
530       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
531        * BSpec section "Multisample ModesState" table for "AA Line Support
532        * Requirements":
533        *
534        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
535        *
536        * Fortunately, this isn't a case most people care about.
537        */
538       .smoothLines = pdevice->info.ver < 10,
539       .stippledRectangularLines = false,
540       .stippledBresenhamLines = true,
541       .stippledSmoothLines = false,
542 
543       /* VK_EXT_mutable_descriptor_type */
544       .mutableDescriptorType = true,
545 
546       /* VK_KHR_performance_query */
547       .performanceCounterQueryPools = true,
548       /* HW only supports a single configuration at a time. */
549       .performanceCounterMultipleQueryPools = false,
550 
551       /* VK_KHR_pipeline_executable_properties */
552       .pipelineExecutableInfo = true,
553 
554       /* VK_EXT_primitives_generated_query */
555       .primitivesGeneratedQuery = true,
556       .primitivesGeneratedQueryWithRasterizerDiscard = false,
557       .primitivesGeneratedQueryWithNonZeroStreams = false,
558 
559       /* VK_EXT_provoking_vertex */
560       .provokingVertexLast = true,
561       .transformFeedbackPreservesProvokingVertex = true,
562 
563       /* VK_EXT_robustness2 */
564       .robustBufferAccess2 = true,
565       .robustImageAccess2 = true,
566       .nullDescriptor = true,
567 
568       /* VK_EXT_shader_atomic_float */
569       .shaderBufferFloat32Atomics =    true,
570       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
571       .shaderBufferFloat64Atomics =
572          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
573       .shaderBufferFloat64AtomicAdd =  false,
574       .shaderSharedFloat32Atomics =    true,
575       .shaderSharedFloat32AtomicAdd =  false,
576       .shaderSharedFloat64Atomics =    false,
577       .shaderSharedFloat64AtomicAdd =  false,
578       .shaderImageFloat32Atomics =     true,
579       .shaderImageFloat32AtomicAdd =   false,
580       .sparseImageFloat32Atomics =     false,
581       .sparseImageFloat32AtomicAdd =   false,
582 
583       /* VK_KHR_shader_clock */
584       .shaderSubgroupClock = true,
585       .shaderDeviceClock = false,
586 
587       /* VK_INTEL_shader_integer_functions2 */
588       .shaderIntegerFunctions2 = true,
589 
590       /* VK_EXT_shader_module_identifier */
591       .shaderModuleIdentifier = true,
592 
593       /* VK_KHR_shader_subgroup_uniform_control_flow */
594       .shaderSubgroupUniformControlFlow = true,
595 
596       /* VK_EXT_texel_buffer_alignment */
597       .texelBufferAlignment = true,
598 
599       /* VK_EXT_transform_feedback */
600       .transformFeedback = true,
601       .geometryStreams = true,
602 
603       /* VK_EXT_vertex_attribute_divisor */
604       .vertexAttributeInstanceRateDivisor = true,
605       .vertexAttributeInstanceRateZeroDivisor = true,
606 
607       /* VK_KHR_workgroup_memory_explicit_layout */
608       .workgroupMemoryExplicitLayout = true,
609       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
610       .workgroupMemoryExplicitLayout8BitAccess = true,
611       .workgroupMemoryExplicitLayout16BitAccess = true,
612 
613       /* VK_EXT_ycbcr_image_arrays */
614       .ycbcrImageArrays = true,
615 
616       /* VK_EXT_extended_dynamic_state */
617       .extendedDynamicState = true,
618 
619       /* VK_EXT_extended_dynamic_state2 */
620       .extendedDynamicState2 = true,
621       .extendedDynamicState2LogicOp = true,
622       .extendedDynamicState2PatchControlPoints = false,
623 
624       /* VK_EXT_multi_draw */
625       .multiDraw = true,
626 
627       /* VK_EXT_non_seamless_cube_map */
628       .nonSeamlessCubeMap = true,
629 
630       /* VK_EXT_primitive_topology_list_restart */
631       .primitiveTopologyListRestart = true,
632       .primitiveTopologyPatchListRestart = true,
633 
634       /* VK_EXT_depth_clip_control */
635       .depthClipControl = true,
636 
637       /* VK_KHR_shader_expect_assume */
638       .shaderExpectAssume = true,
639    };
640 
641    /* We can't do image stores in vec4 shaders */
642    features->vertexPipelineStoresAndAtomics =
643       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
644       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
645 
646    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
647 
648    /* The new DOOM and Wolfenstein games require depthBounds without
649     * checking for it.  They seem to run fine without it so just claim it's
650     * there and accept the consequences.
651     */
652    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
653       features->depthBounds = true;
654 }
655 
656 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)657 anv_compute_sys_heap_size(struct anv_physical_device *device,
658                           uint64_t available_ram)
659 {
660    /* We want to leave some padding for things we allocate in the driver,
661     * so don't go over 3/4 of the GTT either.
662     */
663    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
664 
665    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
666       /* When running with an overridden PCI ID, we may get a GTT size from
667        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
668        * address support can still fail.  Just clamp the address space size to
669        * 2 GiB if we don't have 48-bit support.
670        */
671       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
672                 "not support for 48-bit addresses",
673                 __FILE__, __LINE__);
674       available_ram = 2ull << 30;
675    }
676 
677    return available_ram;
678 }
679 
680 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)681 anv_init_meminfo(struct anv_physical_device *device, int fd)
682 {
683    const struct intel_device_info *devinfo = &device->info;
684 
685    device->sys.size =
686       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
687    device->sys.available = devinfo->mem.sram.mappable.free;
688 
689    return VK_SUCCESS;
690 }
691 
692 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)693 anv_update_meminfo(struct anv_physical_device *device, int fd)
694 {
695    if (!intel_device_info_update_memory_info(&device->info, fd))
696       return;
697 
698    const struct intel_device_info *devinfo = &device->info;
699    device->sys.available = devinfo->mem.sram.mappable.free;
700 }
701 
702 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)703 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
704 {
705    VkResult result = anv_init_meminfo(device, fd);
706    if (result != VK_SUCCESS)
707       return result;
708 
709    assert(device->sys.size != 0);
710 
711    if (device->info.has_llc) {
712       device->memory.heap_count = 1;
713       device->memory.heaps[0] = (struct anv_memory_heap) {
714          .size = device->sys.size,
715          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
716       };
717 
718       /* Big core GPUs share LLC with the CPU and thus one memory type can be
719        * both cached and coherent at the same time.
720        *
721        * But some game engines can't handle single type well
722        * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
723        *
724        * And Intel on Windows uses 3 types so it's better to add extra one here
725        */
726       device->memory.type_count = 2;
727       device->memory.types[0] = (struct anv_memory_type) {
728           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
729           .heapIndex = 0,
730       };
731       device->memory.types[1] = (struct anv_memory_type) {
732           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
733                            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
734                            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
735                            VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
736           .heapIndex = 0,
737       };
738    } else {
739       device->memory.heap_count = 1;
740       device->memory.heaps[0] = (struct anv_memory_heap) {
741          .size = device->sys.size,
742          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
743       };
744 
745       /* The spec requires that we expose a host-visible, coherent memory
746        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
747        * to give the application a choice between cached, but not coherent and
748        * coherent but uncached (WC though).
749        */
750       device->memory.type_count = 2;
751       device->memory.types[0] = (struct anv_memory_type) {
752          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
753                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
754                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
755          .heapIndex = 0,
756       };
757       device->memory.types[1] = (struct anv_memory_type) {
758          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
759                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
760                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
761          .heapIndex = 0,
762       };
763    }
764 
765    device->memory.need_flush = false;
766    for (unsigned i = 0; i < device->memory.type_count; i++) {
767       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
768       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
769           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
770          device->memory.need_flush = true;
771    }
772 
773    return VK_SUCCESS;
774 }
775 
776 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)777 anv_physical_device_init_uuids(struct anv_physical_device *device)
778 {
779    const struct build_id_note *note =
780       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
781    if (!note) {
782       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
783                        "Failed to find build-id");
784    }
785 
786    unsigned build_id_len = build_id_length(note);
787    if (build_id_len < 20) {
788       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
789                        "build-id too short.  It needs to be a SHA");
790    }
791 
792    memcpy(device->driver_build_sha1, build_id_data(note), 20);
793 
794    struct mesa_sha1 sha1_ctx;
795    uint8_t sha1[20];
796    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
797 
798    /* The pipeline cache UUID is used for determining when a pipeline cache is
799     * invalid.  It needs both a driver build and the PCI ID of the device.
800     */
801    _mesa_sha1_init(&sha1_ctx);
802    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
803    _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
804                      sizeof(device->info.pci_device_id));
805    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
806                      sizeof(device->always_use_bindless));
807    _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
808                      sizeof(device->has_a64_buffer_access));
809    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
810                      sizeof(device->has_bindless_samplers));
811    _mesa_sha1_final(&sha1_ctx, sha1);
812    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
813 
814    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
815    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
816 
817    return VK_SUCCESS;
818 }
819 
820 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)821 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
822 {
823 #ifdef ENABLE_SHADER_CACHE
824    char renderer[10];
825    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
826                                device->info.pci_device_id);
827    assert(len == sizeof(renderer) - 2);
828 
829    char timestamp[41];
830    _mesa_sha1_format(timestamp, device->driver_build_sha1);
831 
832    const uint64_t driver_flags =
833       elk_get_compiler_config_value(device->compiler);
834    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
835 #endif
836 }
837 
838 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)839 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
840 {
841 #ifdef ENABLE_SHADER_CACHE
842    if (device->vk.disk_cache) {
843       disk_cache_destroy(device->vk.disk_cache);
844       device->vk.disk_cache = NULL;
845    }
846 #else
847    assert(device->vk.disk_cache == NULL);
848 #endif
849 }
850 
851 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
852  * queue overrides.
853  *
854  * To override the number queues:
855  *  * "gc" is for graphics queues with compute support
856  *  * "g" is for graphics queues with no compute support
857  *  * "c" is for compute queues with no graphics support
858  *
859  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
860  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
861  * with compute-only support.
862  *
863  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
864  * include 1 queue with compute-only support, but it will not change the
865  * number of graphics+compute queues.
866  *
867  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
868  * to include 1 queue with compute-only support, and it would override the
869  * number of graphics+compute queues to be 0.
870  */
871 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)872 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
873 {
874    int gc_override = -1;
875    int g_override = -1;
876    int c_override = -1;
877    char *env = getenv("ANV_QUEUE_OVERRIDE");
878 
879    if (env == NULL)
880       return;
881 
882    env = strdup(env);
883    char *save = NULL;
884    char *next = strtok_r(env, ",", &save);
885    while (next != NULL) {
886       if (strncmp(next, "gc=", 3) == 0) {
887          gc_override = strtol(next + 3, NULL, 0);
888       } else if (strncmp(next, "g=", 2) == 0) {
889          g_override = strtol(next + 2, NULL, 0);
890       } else if (strncmp(next, "c=", 2) == 0) {
891          c_override = strtol(next + 2, NULL, 0);
892       } else {
893          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
894       }
895       next = strtok_r(NULL, ",", &save);
896    }
897    free(env);
898    if (gc_override >= 0)
899       *gc_count = gc_override;
900    if (g_override >= 0)
901       *g_count = g_override;
902    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
903       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
904                 "Vulkan specification");
905    if (c_override >= 0)
906       *c_count = c_override;
907 }
908 
909 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)910 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
911 {
912    uint32_t family_count = 0;
913 
914    if (pdevice->engine_info) {
915       int gc_count =
916          intel_engines_count(pdevice->engine_info,
917                              INTEL_ENGINE_CLASS_RENDER);
918       int g_count = 0;
919       int c_count = 0;
920 
921       anv_override_engine_counts(&gc_count, &g_count, &c_count);
922 
923       if (gc_count > 0) {
924          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
925             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
926                           VK_QUEUE_COMPUTE_BIT |
927                           VK_QUEUE_TRANSFER_BIT,
928             .queueCount = gc_count,
929             .engine_class = INTEL_ENGINE_CLASS_RENDER,
930          };
931       }
932       if (g_count > 0) {
933          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
934             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
935                           VK_QUEUE_TRANSFER_BIT,
936             .queueCount = g_count,
937             .engine_class = INTEL_ENGINE_CLASS_RENDER,
938          };
939       }
940       if (c_count > 0) {
941          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
942             .queueFlags = VK_QUEUE_COMPUTE_BIT |
943                           VK_QUEUE_TRANSFER_BIT,
944             .queueCount = c_count,
945             .engine_class = INTEL_ENGINE_CLASS_RENDER,
946          };
947       }
948       /* Increase count below when other families are added as a reminder to
949        * increase the ANV_MAX_QUEUE_FAMILIES value.
950        */
951       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
952    } else {
953       /* Default to a single render queue */
954       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
955          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
956                        VK_QUEUE_COMPUTE_BIT |
957                        VK_QUEUE_TRANSFER_BIT,
958          .queueCount = 1,
959          .engine_class = INTEL_ENGINE_CLASS_RENDER,
960       };
961       family_count = 1;
962    }
963    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
964    pdevice->queue.family_count = family_count;
965 }
966 
967 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)968 anv_physical_device_try_create(struct vk_instance *vk_instance,
969                                struct _drmDevice *drm_device,
970                                struct vk_physical_device **out)
971 {
972    struct anv_instance *instance =
973       container_of(vk_instance, struct anv_instance, vk);
974 
975    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
976        drm_device->bustype != DRM_BUS_PCI ||
977        drm_device->deviceinfo.pci->vendor_id != 0x8086)
978       return VK_ERROR_INCOMPATIBLE_DRIVER;
979 
980    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
981    const char *path = drm_device->nodes[DRM_NODE_RENDER];
982    VkResult result;
983    int fd;
984    int master_fd = -1;
985 
986    process_intel_debug_variable();
987 
988    fd = open(path, O_RDWR | O_CLOEXEC);
989    if (fd < 0) {
990       if (errno == ENOMEM) {
991          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
992                           "Unable to open device %s: out of memory", path);
993       }
994       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
995                        "Unable to open device %s: %m", path);
996    }
997 
998    struct intel_device_info devinfo;
999    if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1000       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1001       goto fail_fd;
1002    }
1003 
1004    bool is_alpha = true;
1005    bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1006    if (devinfo.platform == INTEL_PLATFORM_HSW) {
1007       if (warn)
1008          mesa_logw("Haswell Vulkan support is incomplete");
1009    } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1010       if (warn)
1011          mesa_logw("Ivy Bridge Vulkan support is incomplete");
1012    } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1013       if (warn)
1014          mesa_logw("Bay Trail Vulkan support is incomplete");
1015    } else if (devinfo.ver == 8) {
1016       /* Gfx8 fully supported */
1017       is_alpha = false;
1018    } else {
1019       /* Silently fail here, anv will either pick up this device or display an
1020        * error message.
1021        */
1022       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1023       goto fail_fd;
1024    }
1025 
1026    struct anv_physical_device *device =
1027       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1028                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1029    if (device == NULL) {
1030       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1031       goto fail_fd;
1032    }
1033 
1034    struct vk_physical_device_dispatch_table dispatch_table;
1035    vk_physical_device_dispatch_table_from_entrypoints(
1036       &dispatch_table, &anv_physical_device_entrypoints, true);
1037    vk_physical_device_dispatch_table_from_entrypoints(
1038       &dispatch_table, &wsi_physical_device_entrypoints, false);
1039 
1040    result = vk_physical_device_init(&device->vk, &instance->vk,
1041                                     NULL, NULL, NULL, /* We set up extensions later */
1042                                     &dispatch_table);
1043    if (result != VK_SUCCESS) {
1044       vk_error(instance, result);
1045       goto fail_alloc;
1046    }
1047    device->instance = instance;
1048 
1049    assert(strlen(path) < ARRAY_SIZE(device->path));
1050    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1051 
1052    device->info = devinfo;
1053    device->is_alpha = is_alpha;
1054 
1055    device->cmd_parser_version = -1;
1056    if (device->info.ver == 7) {
1057       if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1058           device->cmd_parser_version == -1) {
1059          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1060                             "failed to get command parser version");
1061          goto fail_base;
1062       }
1063    }
1064 
1065    int val;
1066    if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1067       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1068                          "kernel missing gem wait");
1069       goto fail_base;
1070    }
1071 
1072    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1073       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1074                          "kernel missing execbuf2");
1075       goto fail_base;
1076    }
1077 
1078    if (!device->info.has_llc &&
1079        (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1080        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1081                           "kernel missing wc mmap");
1082       goto fail_base;
1083    }
1084 
1085    device->use_relocations = device->info.ver < 8 ||
1086                              device->info.platform == INTEL_PLATFORM_CHV;
1087 
1088    if (!device->use_relocations &&
1089        (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1090       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1091                          "kernel missing softpin");
1092       goto fail_alloc;
1093    }
1094 
1095    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1096       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1097                          "kernel missing syncobj support");
1098       goto fail_base;
1099    }
1100 
1101    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1102       device->has_exec_async = val;
1103    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1104       device->has_exec_capture = val;
1105 
1106    /* Start with medium; sorted low to high */
1107    const int priorities[] = {
1108       INTEL_CONTEXT_MEDIUM_PRIORITY,
1109       INTEL_CONTEXT_HIGH_PRIORITY,
1110       INTEL_CONTEXT_REALTIME_PRIORITY,
1111    };
1112    device->max_context_priority = INT_MIN;
1113    for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1114       if (!anv_gem_has_context_priority(fd, priorities[i]))
1115          break;
1116       device->max_context_priority = priorities[i];
1117    }
1118 
1119    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1120                                               device->info.aperture_bytes;
1121 
1122    /* We only allow 48-bit addresses with softpin because knowing the actual
1123     * address is required for the vertex cache flush workaround.
1124     */
1125    device->supports_48bit_addresses = (device->info.ver >= 8) &&
1126                                       device->gtt_size > (4ULL << 30 /* GiB */);
1127 
1128    result = anv_physical_device_init_heaps(device, fd);
1129    if (result != VK_SUCCESS)
1130       goto fail_base;
1131 
1132    assert(device->supports_48bit_addresses == !device->use_relocations);
1133    device->use_softpin = !device->use_relocations;
1134 
1135    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1136       device->has_exec_timeline = val;
1137    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1138       device->has_exec_timeline = false;
1139 
1140    unsigned st_idx = 0;
1141 
1142    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1143    if (!device->has_exec_timeline)
1144       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1145    device->sync_types[st_idx++] = &device->sync_syncobj_type;
1146 
1147    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1148       device->sync_types[st_idx++] = &anv_bo_sync_type;
1149 
1150    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1151       device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1152       device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1153    }
1154 
1155    device->sync_types[st_idx++] = NULL;
1156    assert(st_idx <= ARRAY_SIZE(device->sync_types));
1157    device->vk.supported_sync_types = device->sync_types;
1158 
1159    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1160 
1161    device->always_use_bindless =
1162       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1163 
1164    device->use_call_secondary =
1165       device->use_softpin &&
1166       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1167 
1168    /* We first got the A64 messages on broadwell and we can only use them if
1169     * we can pass addresses directly into the shader which requires softpin.
1170     */
1171    device->has_a64_buffer_access = device->info.ver >= 8 &&
1172                                    device->use_softpin;
1173 
1174    /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1175     * because it's just a matter of setting the sampler address in the sample
1176     * message header.  However, we've not bothered to wire it up for vec4 so
1177     * we leave it disabled on gfx7.
1178     */
1179    device->has_bindless_samplers = device->info.ver >= 8;
1180 
1181    /* Check if we can read the GPU timestamp register from the CPU */
1182    uint64_t u64_ignore;
1183    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1184                                                                device->info.kmd_type,
1185                                                                &u64_ignore);
1186 
1187    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1188       driQueryOptionb(&instance->dri_options, "always_flush_cache");
1189 
1190    device->compiler = elk_compiler_create(NULL, &device->info);
1191    if (device->compiler == NULL) {
1192       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1193       goto fail_base;
1194    }
1195    device->compiler->shader_debug_log = compiler_debug_log;
1196    device->compiler->shader_perf_log = compiler_perf_log;
1197    device->compiler->constant_buffer_0_is_relative =
1198       device->info.ver < 8 || !device->info.has_context_isolation;
1199    device->compiler->supports_shader_constants = true;
1200    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
1201 
1202    isl_device_init(&device->isl_dev, &device->info);
1203 
1204    result = anv_physical_device_init_uuids(device);
1205    if (result != VK_SUCCESS)
1206       goto fail_compiler;
1207 
1208    anv_physical_device_init_disk_cache(device);
1209 
1210    if (instance->vk.enabled_extensions.KHR_display) {
1211       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1212       if (master_fd >= 0) {
1213          /* fail if we don't have permission to even render on this device */
1214          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1215             close(master_fd);
1216             master_fd = -1;
1217          }
1218       }
1219    }
1220    device->master_fd = master_fd;
1221 
1222    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1223    anv_physical_device_init_queue_families(device);
1224 
1225    device->local_fd = fd;
1226 
1227    anv_physical_device_init_perf(device, fd);
1228 
1229    get_device_extensions(device, &device->vk.supported_extensions);
1230    get_features(device, &device->vk.supported_features);
1231 
1232    result = anv_init_wsi(device);
1233    if (result != VK_SUCCESS)
1234       goto fail_perf;
1235 
1236    anv_measure_device_init(device);
1237 
1238    anv_genX(&device->info, init_physical_device_state)(device);
1239 
1240    *out = &device->vk;
1241 
1242    struct stat st;
1243 
1244    if (stat(primary_path, &st) == 0) {
1245       device->has_master = true;
1246       device->master_major = major(st.st_rdev);
1247       device->master_minor = minor(st.st_rdev);
1248    } else {
1249       device->has_master = false;
1250       device->master_major = 0;
1251       device->master_minor = 0;
1252    }
1253 
1254    if (stat(path, &st) == 0) {
1255       device->has_local = true;
1256       device->local_major = major(st.st_rdev);
1257       device->local_minor = minor(st.st_rdev);
1258    } else {
1259       device->has_local = false;
1260       device->local_major = 0;
1261       device->local_minor = 0;
1262    }
1263 
1264    return VK_SUCCESS;
1265 
1266 fail_perf:
1267    ralloc_free(device->perf);
1268    free(device->engine_info);
1269    anv_physical_device_free_disk_cache(device);
1270 fail_compiler:
1271    ralloc_free(device->compiler);
1272 fail_base:
1273    vk_physical_device_finish(&device->vk);
1274 fail_alloc:
1275    vk_free(&instance->vk.alloc, device);
1276 fail_fd:
1277    close(fd);
1278    if (master_fd != -1)
1279       close(master_fd);
1280    return result;
1281 }
1282 
1283 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1284 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1285 {
1286    struct anv_physical_device *device =
1287       container_of(vk_device, struct anv_physical_device, vk);
1288 
1289    anv_finish_wsi(device);
1290    anv_measure_device_destroy(device);
1291    free(device->engine_info);
1292    anv_physical_device_free_disk_cache(device);
1293    ralloc_free(device->compiler);
1294    ralloc_free(device->perf);
1295    close(device->local_fd);
1296    if (device->master_fd >= 0)
1297       close(device->master_fd);
1298    vk_physical_device_finish(&device->vk);
1299    vk_free(&device->instance->vk.alloc, device);
1300 }
1301 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1302 VkResult anv_EnumerateInstanceExtensionProperties(
1303     const char*                                 pLayerName,
1304     uint32_t*                                   pPropertyCount,
1305     VkExtensionProperties*                      pProperties)
1306 {
1307    if (pLayerName)
1308       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1309 
1310    return vk_enumerate_instance_extension_properties(
1311       &instance_extensions, pPropertyCount, pProperties);
1312 }
1313 
1314 static void
anv_init_dri_options(struct anv_instance * instance)1315 anv_init_dri_options(struct anv_instance *instance)
1316 {
1317    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1318                       ARRAY_SIZE(anv_dri_options));
1319    driParseConfigFiles(&instance->dri_options,
1320                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1321                        instance->vk.app_info.app_name,
1322                        instance->vk.app_info.app_version,
1323                        instance->vk.app_info.engine_name,
1324                        instance->vk.app_info.engine_version);
1325 
1326     instance->assume_full_subgroups =
1327             driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1328     instance->limit_trig_input_range =
1329             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1330     instance->sample_mask_out_opengl_behaviour =
1331             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1332     instance->lower_depth_range_rate =
1333             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1334     instance->no_16bit =
1335             driQueryOptionb(&instance->dri_options, "no_16bit");
1336     instance->report_vk_1_3 =
1337             driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1338 }
1339 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1340 VkResult anv_CreateInstance(
1341     const VkInstanceCreateInfo*                 pCreateInfo,
1342     const VkAllocationCallbacks*                pAllocator,
1343     VkInstance*                                 pInstance)
1344 {
1345    struct anv_instance *instance;
1346    VkResult result;
1347 
1348    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1349 
1350    if (pAllocator == NULL)
1351       pAllocator = vk_default_allocator();
1352 
1353    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1354                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1355    if (!instance)
1356       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1357 
1358    struct vk_instance_dispatch_table dispatch_table;
1359    vk_instance_dispatch_table_from_entrypoints(
1360       &dispatch_table, &anv_instance_entrypoints, true);
1361    vk_instance_dispatch_table_from_entrypoints(
1362       &dispatch_table, &wsi_instance_entrypoints, false);
1363 
1364    result = vk_instance_init(&instance->vk, &instance_extensions,
1365                              &dispatch_table, pCreateInfo, pAllocator);
1366    if (result != VK_SUCCESS) {
1367       vk_free(pAllocator, instance);
1368       return vk_error(NULL, result);
1369    }
1370 
1371    instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1372    instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1373 
1374    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1375 
1376    anv_init_dri_options(instance);
1377 
1378    intel_driver_ds_init();
1379 
1380    *pInstance = anv_instance_to_handle(instance);
1381 
1382    return VK_SUCCESS;
1383 }
1384 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1385 void anv_DestroyInstance(
1386     VkInstance                                  _instance,
1387     const VkAllocationCallbacks*                pAllocator)
1388 {
1389    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1390 
1391    if (!instance)
1392       return;
1393 
1394    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1395 
1396    driDestroyOptionCache(&instance->dri_options);
1397    driDestroyOptionInfo(&instance->available_dri_options);
1398 
1399    vk_instance_finish(&instance->vk);
1400    vk_free(&instance->vk.alloc, instance);
1401 }
1402 
1403 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
1404 
1405 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1406 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
1407 
1408 #define MAX_CUSTOM_BORDER_COLORS                   4096
1409 
anv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1410 void anv_GetPhysicalDeviceProperties(
1411     VkPhysicalDevice                            physicalDevice,
1412     VkPhysicalDeviceProperties*                 pProperties)
1413 {
1414    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1415    const struct intel_device_info *devinfo = &pdevice->info;
1416 
1417    const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1418    const uint32_t max_textures = 128;
1419    const uint32_t max_samplers =
1420       pdevice->has_bindless_samplers ? UINT16_MAX :
1421       (devinfo->verx10 >= 75) ? 128 : 16;
1422    const uint32_t max_images = MAX_IMAGES;
1423 
1424    /* If we can use bindless for everything, claim a high per-stage limit,
1425     * otherwise use the binding table size, minus the slots reserved for
1426     * render targets and one slot for the descriptor buffer. */
1427    const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1428 
1429    const uint32_t max_workgroup_size =
1430       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1431 
1432    VkSampleCountFlags sample_counts =
1433       isl_device_get_sample_counts(&pdevice->isl_dev);
1434 
1435 
1436    VkPhysicalDeviceLimits limits = {
1437       .maxImageDimension1D                      = (1 << 14),
1438       /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
1439        * is greater than 8192 pixels. */
1440       .maxImageDimension2D                      = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
1441       .maxImageDimension3D                      = (1 << 11),
1442       .maxImageDimensionCube                    = (1 << 14),
1443       .maxImageArrayLayers                      = (1 << 11),
1444       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1445       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1446       .maxStorageBufferRange                    = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
1447       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1448       .maxMemoryAllocationCount                 = UINT32_MAX,
1449       .maxSamplerAllocationCount                = 64 * 1024,
1450       .bufferImageGranularity                   = 1,
1451       .sparseAddressSpaceSize                   = 0,
1452       .maxBoundDescriptorSets                   = MAX_SETS,
1453       .maxPerStageDescriptorSamplers            = max_samplers,
1454       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1455       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1456       .maxPerStageDescriptorSampledImages       = max_textures,
1457       .maxPerStageDescriptorStorageImages       = max_images,
1458       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1459       .maxPerStageResources                     = max_per_stage,
1460       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1461       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1462       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1463       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1464       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1465       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1466       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1467       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1468       .maxVertexInputAttributes                 = MAX_VES,
1469       .maxVertexInputBindings                   = MAX_VBS,
1470       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1471        *
1472        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1473        */
1474       .maxVertexInputAttributeOffset            = 2047,
1475       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1476        *
1477        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1478        *
1479        * Skylake PRMs: Volume 2d: Command Reference: Structures:
1480        *
1481        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1482        */
1483       .maxVertexInputBindingStride              = devinfo->ver < 9 ? 2048 : 4095,
1484       .maxVertexOutputComponents                = 128,
1485       .maxTessellationGenerationLevel           = 64,
1486       .maxTessellationPatchSize                 = 32,
1487       .maxTessellationControlPerVertexInputComponents = 128,
1488       .maxTessellationControlPerVertexOutputComponents = 128,
1489       .maxTessellationControlPerPatchOutputComponents = 128,
1490       .maxTessellationControlTotalOutputComponents = 2048,
1491       .maxTessellationEvaluationInputComponents = 128,
1492       .maxTessellationEvaluationOutputComponents = 128,
1493       .maxGeometryShaderInvocations             = 32,
1494       .maxGeometryInputComponents               = devinfo->ver >= 8 ? 128 : 64,
1495       .maxGeometryOutputComponents              = 128,
1496       .maxGeometryOutputVertices                = 256,
1497       .maxGeometryTotalOutputComponents         = 1024,
1498       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1499       .maxFragmentOutputAttachments             = 8,
1500       .maxFragmentDualSrcAttachments            = 1,
1501       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1502       .maxComputeSharedMemorySize               = 64 * 1024,
1503       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1504       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1505       .maxComputeWorkGroupSize = {
1506          max_workgroup_size,
1507          max_workgroup_size,
1508          max_workgroup_size,
1509       },
1510       .subPixelPrecisionBits                    = 8,
1511       .subTexelPrecisionBits                    = 8,
1512       .mipmapPrecisionBits                      = 8,
1513       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1514       .maxDrawIndirectCount                     = UINT32_MAX,
1515       .maxSamplerLodBias                        = 16,
1516       .maxSamplerAnisotropy                     = 16,
1517       .maxViewports                             = MAX_VIEWPORTS,
1518       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1519       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1520       .viewportSubPixelBits                     = 13, /* We take a float? */
1521       .minMemoryMapAlignment                    = 4096, /* A page */
1522       /* The dataport requires texel alignment so we need to assume a worst
1523        * case of R32G32B32A32 which is 16 bytes.
1524        */
1525       .minTexelBufferOffsetAlignment            = 16,
1526       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1527       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1528       .minTexelOffset                           = -8,
1529       .maxTexelOffset                           = 7,
1530       .minTexelGatherOffset                     = -32,
1531       .maxTexelGatherOffset                     = 31,
1532       .minInterpolationOffset                   = -0.5,
1533       .maxInterpolationOffset                   = 0.4375,
1534       .subPixelInterpolationOffsetBits          = 4,
1535       .maxFramebufferWidth                      = (1 << 14),
1536       .maxFramebufferHeight                     = (1 << 14),
1537       .maxFramebufferLayers                     = (1 << 11),
1538       .framebufferColorSampleCounts             = sample_counts,
1539       .framebufferDepthSampleCounts             = sample_counts,
1540       .framebufferStencilSampleCounts           = sample_counts,
1541       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1542       .maxColorAttachments                      = MAX_RTS,
1543       .sampledImageColorSampleCounts            = sample_counts,
1544       /* Multisampling with SINT formats is not supported on gfx7 */
1545       .sampledImageIntegerSampleCounts          = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1546       .sampledImageDepthSampleCounts            = sample_counts,
1547       .sampledImageStencilSampleCounts          = sample_counts,
1548       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1549       .maxSampleMaskWords                       = 1,
1550       .timestampComputeAndGraphics              = true,
1551       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1552       .maxClipDistances                         = 8,
1553       .maxCullDistances                         = 8,
1554       .maxCombinedClipAndCullDistances          = 8,
1555       .discreteQueuePriorities                  = 2,
1556       .pointSizeRange                           = { 0.125, 255.875 },
1557       /* While SKL and up support much wider lines than we are setting here,
1558        * in practice we run into conformance issues if we go past this limit.
1559        * Since the Windows driver does the same, it's probably fair to assume
1560        * that no one needs more than this.
1561        */
1562       .lineWidthRange                           = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1563       .pointSizeGranularity                     = (1.0 / 8.0),
1564       .lineWidthGranularity                     = (1.0 / 128.0),
1565       .strictLines                              = false,
1566       .standardSampleLocations                  = true,
1567       .optimalBufferCopyOffsetAlignment         = 128,
1568       .optimalBufferCopyRowPitchAlignment       = 128,
1569       .nonCoherentAtomSize                      = 64,
1570    };
1571 
1572    *pProperties = (VkPhysicalDeviceProperties) {
1573 #ifdef ANDROID_STRICT
1574       .apiVersion = ANV_API_VERSION,
1575 #else
1576       .apiVersion =  (pdevice->use_softpin || pdevice->instance->report_vk_1_3) ?
1577                      ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
1578 #endif
1579       .driverVersion = vk_get_driver_version(),
1580       .vendorID = 0x8086,
1581       .deviceID = pdevice->info.pci_device_id,
1582       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1583       .limits = limits,
1584       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1585    };
1586 
1587    snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1588             "%s", pdevice->info.name);
1589    memcpy(pProperties->pipelineCacheUUID,
1590           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1591 }
1592 
1593 static void
anv_get_physical_device_properties_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1594 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
1595                                        VkPhysicalDeviceVulkan11Properties *p)
1596 {
1597    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1598 
1599    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1600    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1601    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1602    p->deviceNodeMask = 0;
1603    p->deviceLUIDValid = false;
1604 
1605    p->subgroupSize = ELK_SUBGROUP_SIZE;
1606    VkShaderStageFlags scalar_stages = 0;
1607    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
1608       if (pdevice->compiler->scalar_stage[stage])
1609          scalar_stages |= mesa_to_vk_shader_stage(stage);
1610    }
1611    p->subgroupSupportedStages = scalar_stages;
1612    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1613                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
1614                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
1615                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1616                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1617                                     VK_SUBGROUP_FEATURE_QUAD_BIT;
1618    if (pdevice->info.ver >= 8) {
1619       /* TODO: There's no technical reason why these can't be made to
1620        * work on gfx7 but they don't at the moment so it's best to leave
1621        * the feature disabled than enabled and broken.
1622        */
1623       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1624                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
1625    }
1626    p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
1627 
1628    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
1629    p->maxMultiviewViewCount      = 16;
1630    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
1631    p->protectedNoFault           = false;
1632    /* This value doesn't matter for us today as our per-stage descriptors are
1633     * the real limit.
1634     */
1635    p->maxPerSetDescriptors       = 1024;
1636    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
1637 }
1638 
1639 static void
anv_get_physical_device_properties_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1640 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
1641                                        VkPhysicalDeviceVulkan12Properties *p)
1642 {
1643    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1644 
1645    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1646    memset(p->driverName, 0, sizeof(p->driverName));
1647    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1648             "Intel open-source Mesa driver");
1649    memset(p->driverInfo, 0, sizeof(p->driverInfo));
1650    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1651             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1652 
1653    /* Don't advertise conformance with a particular version if the hardware's
1654     * support is incomplete/alpha.
1655     */
1656    if (pdevice->is_alpha) {
1657       p->conformanceVersion = (VkConformanceVersion) {
1658          .major = 0,
1659          .minor = 0,
1660          .subminor = 0,
1661          .patch = 0,
1662       };
1663    }
1664    else {
1665       p->conformanceVersion = (VkConformanceVersion) {
1666          .major = 1,
1667          .minor = pdevice->use_softpin ? 3 : 2,
1668          .subminor = 0,
1669          .patch = 0,
1670       };
1671    }
1672 
1673    p->denormBehaviorIndependence =
1674       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1675    p->roundingModeIndependence =
1676       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1677 
1678    /* Broadwell does not support HF denorms and there are restrictions
1679     * other gens. According to Kabylake's PRM:
1680     *
1681     * "math - Extended Math Function
1682     * [...]
1683     * Restriction : Half-float denorms are always retained."
1684     */
1685    p->shaderDenormFlushToZeroFloat16         = false;
1686    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
1687    p->shaderRoundingModeRTEFloat16           = true;
1688    p->shaderRoundingModeRTZFloat16           = true;
1689    p->shaderSignedZeroInfNanPreserveFloat16  = true;
1690 
1691    p->shaderDenormFlushToZeroFloat32         = true;
1692    p->shaderDenormPreserveFloat32            = pdevice->info.ver >= 8;
1693    p->shaderRoundingModeRTEFloat32           = true;
1694    p->shaderRoundingModeRTZFloat32           = true;
1695    p->shaderSignedZeroInfNanPreserveFloat32  = true;
1696 
1697    p->shaderDenormFlushToZeroFloat64         = true;
1698    p->shaderDenormPreserveFloat64            = true;
1699    p->shaderRoundingModeRTEFloat64           = true;
1700    p->shaderRoundingModeRTZFloat64           = true;
1701    p->shaderSignedZeroInfNanPreserveFloat64  = true;
1702 
1703    /* It's a bit hard to exactly map our implementation to the limits
1704     * described by Vulkan.  The bindless surface handle in the extended
1705     * message descriptors is 20 bits and it's an index into the table of
1706     * RENDER_SURFACE_STATE structs that starts at bindless surface base
1707     * address.  This means that we can have at must 1M surface states
1708     * allocated at any given time.  Since most image views take two
1709     * descriptors, this means we have a limit of about 500K image views.
1710     *
1711     * However, since we allocate surface states at vkCreateImageView time,
1712     * this means our limit is actually something on the order of 500K image
1713     * views allocated at any time.  The actual limit describe by Vulkan, on
1714     * the other hand, is a limit of how many you can have in a descriptor set.
1715     * Assuming anyone using 1M descriptors will be using the same image view
1716     * twice a bunch of times (or a bunch of null descriptors), we can safely
1717     * advertise a larger limit here.
1718     */
1719    const unsigned max_bindless_views = 1 << 20;
1720    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
1721    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
1722    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
1723    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
1724    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
1725    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1726    p->robustBufferAccessUpdateAfterBind                  = true;
1727    p->quadDivergentImplicitLod                           = false;
1728    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
1729    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1730    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
1731    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
1732    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
1733    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1734    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
1735    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
1736    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1737    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1738    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
1739    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1740    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
1741    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
1742    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1743 
1744    /* We support all of the depth resolve modes */
1745    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1746                                       VK_RESOLVE_MODE_AVERAGE_BIT |
1747                                       VK_RESOLVE_MODE_MIN_BIT |
1748                                       VK_RESOLVE_MODE_MAX_BIT;
1749    /* Average doesn't make sense for stencil so we don't support that */
1750    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1751    if (pdevice->info.ver >= 8) {
1752       /* The advanced stencil resolve modes currently require stencil
1753        * sampling be supported by the hardware.
1754        */
1755       p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
1756                                          VK_RESOLVE_MODE_MAX_BIT;
1757    }
1758    p->independentResolveNone  = true;
1759    p->independentResolve      = true;
1760 
1761    p->filterMinmaxSingleComponentFormats  = false;
1762    p->filterMinmaxImageComponentMapping   = false;
1763 
1764    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1765 
1766    p->framebufferIntegerColorSampleCounts =
1767       pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
1768 }
1769 
1770 static void
anv_get_physical_device_properties_1_3(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)1771 anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
1772                                        VkPhysicalDeviceVulkan13Properties *p)
1773 {
1774    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
1775 
1776    p->minSubgroupSize = 8;
1777    p->maxSubgroupSize = 32;
1778    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1779    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1780 
1781    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1782    p->maxPerStageDescriptorInlineUniformBlocks =
1783       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1784    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1785       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1786    p->maxDescriptorSetInlineUniformBlocks =
1787       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1788    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1789       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1790    p->maxInlineUniformTotalSize = UINT16_MAX;
1791 
1792    p->integerDotProduct8BitUnsignedAccelerated = false;
1793    p->integerDotProduct8BitSignedAccelerated = false;
1794    p->integerDotProduct8BitMixedSignednessAccelerated = false;
1795    p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
1796    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1797    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
1798    p->integerDotProduct16BitUnsignedAccelerated = false;
1799    p->integerDotProduct16BitSignedAccelerated = false;
1800    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1801    p->integerDotProduct32BitUnsignedAccelerated = false;
1802    p->integerDotProduct32BitSignedAccelerated = false;
1803    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1804    p->integerDotProduct64BitUnsignedAccelerated = false;
1805    p->integerDotProduct64BitSignedAccelerated = false;
1806    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1807    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1808    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1809    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1810    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
1811    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1812    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
1813    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1814    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1815    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1816    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1817    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1818    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1819    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1820    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1821    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1822 
1823    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1824     * Base Address:
1825     *
1826     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1827     *    specifies the base address of the first element of the surface,
1828     *    computed in software by adding the surface base address to the
1829     *    byte offset of the element in the buffer. The base address must
1830     *    be aligned to element size."
1831     *
1832     * The typed dataport messages require that things be texel aligned.
1833     * Otherwise, we may just load/store the wrong data or, in the worst
1834     * case, there may be hangs.
1835     */
1836    p->storageTexelBufferOffsetAlignmentBytes = 16;
1837    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1838 
1839    /* The sampler, however, is much more forgiving and it can handle
1840     * arbitrary byte alignment for linear and buffer surfaces.  It's
1841     * hard to find a good PRM citation for this but years of empirical
1842     * experience demonstrate that this is true.
1843     */
1844    p->uniformTexelBufferOffsetAlignmentBytes = 1;
1845    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1846 
1847    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1848 }
1849 
anv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1850 void anv_GetPhysicalDeviceProperties2(
1851     VkPhysicalDevice                            physicalDevice,
1852     VkPhysicalDeviceProperties2*                pProperties)
1853 {
1854    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1855 
1856    anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1857 
1858    VkPhysicalDeviceVulkan11Properties core_1_1 = {
1859       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1860    };
1861    anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1862 
1863    VkPhysicalDeviceVulkan12Properties core_1_2 = {
1864       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1865    };
1866    anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1867 
1868    VkPhysicalDeviceVulkan13Properties core_1_3 = {
1869       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1870    };
1871    anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
1872 
1873    vk_foreach_struct(ext, pProperties->pNext) {
1874       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1875          continue;
1876       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1877          continue;
1878       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1879          continue;
1880 
1881       switch (ext->sType) {
1882       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1883          VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
1884             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
1885          properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1886          break;
1887       }
1888 
1889       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1890          VkPhysicalDeviceDrmPropertiesEXT *props =
1891             (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1892 
1893          props->hasPrimary = pdevice->has_master;
1894          props->primaryMajor = pdevice->master_major;
1895          props->primaryMinor = pdevice->master_minor;
1896 
1897          props->hasRender = pdevice->has_local;
1898          props->renderMajor = pdevice->local_major;
1899          props->renderMinor = pdevice->local_minor;
1900 
1901          break;
1902       }
1903 
1904       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1905          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
1906             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1907          /* Userptr needs page aligned memory. */
1908          props->minImportedHostPointerAlignment = 4096;
1909          break;
1910       }
1911 
1912       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1913          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1914             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1915          /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
1916           * Sampling Rules - Legacy Mode", it says the following:
1917           *
1918           *    "Note that the device divides a pixel into a 16x16 array of
1919           *    subpixels, referenced by their upper left corners."
1920           *
1921           * This is the only known reference in the PRMs to the subpixel
1922           * precision of line rasterization and a "16x16 array of subpixels"
1923           * implies 4 subpixel precision bits.  Empirical testing has shown
1924           * that 4 subpixel precision bits applies to all line rasterization
1925           * types.
1926           */
1927          props->lineSubPixelPrecisionBits = 4;
1928          break;
1929       }
1930 
1931       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
1932          VkPhysicalDeviceMaintenance4Properties *properties =
1933             (VkPhysicalDeviceMaintenance4Properties *)ext;
1934          properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1935          break;
1936       }
1937 
1938       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1939          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1940             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1941          properties->pciDomain = pdevice->info.pci_domain;
1942          properties->pciBus = pdevice->info.pci_bus;
1943          properties->pciDevice = pdevice->info.pci_dev;
1944          properties->pciFunction = pdevice->info.pci_func;
1945          break;
1946       }
1947 
1948       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1949          VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1950             (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1951          /* We could support this by spawning a shader to do the equation
1952           * normalization.
1953           */
1954          properties->allowCommandBufferQueryCopies = false;
1955          break;
1956       }
1957 
1958 #pragma GCC diagnostic push
1959 #pragma GCC diagnostic ignored "-Wswitch"
1960       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
1961          VkPhysicalDevicePresentationPropertiesANDROID *props =
1962             (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
1963          props->sharedImage = VK_FALSE;
1964          break;
1965       }
1966 #pragma GCC diagnostic pop
1967 
1968       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1969          VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1970             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1971          properties->provokingVertexModePerPipeline = true;
1972          properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1973          break;
1974       }
1975 
1976       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1977          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1978             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1979          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1980          break;
1981       }
1982 
1983       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1984          VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
1985          properties->robustStorageBufferAccessSizeAlignment =
1986             ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1987          properties->robustUniformBufferAccessSizeAlignment =
1988             ANV_UBO_ALIGNMENT;
1989          break;
1990       }
1991 
1992       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1993          VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
1994             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1995 
1996          props->sampleLocationSampleCounts =
1997             isl_device_get_sample_counts(&pdevice->isl_dev);
1998 
1999          /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2000          props->maxSampleLocationGridSize.width = 1;
2001          props->maxSampleLocationGridSize.height = 1;
2002 
2003          props->sampleLocationCoordinateRange[0] = 0;
2004          props->sampleLocationCoordinateRange[1] = 0.9375;
2005          props->sampleLocationSubPixelBits = 4;
2006 
2007          props->variableSampleLocations = true;
2008          break;
2009       }
2010 
2011       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2012          VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
2013             (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2014          STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2015                        sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2016          memcpy(props->shaderModuleIdentifierAlgorithmUUID,
2017                 vk_shaderModuleIdentifierAlgorithmUUID,
2018                 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2019          break;
2020       }
2021 
2022       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2023          VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2024             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2025 
2026          props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2027          props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2028          props->maxTransformFeedbackBufferSize = (1ull << 32);
2029          props->maxTransformFeedbackStreamDataSize = 128 * 4;
2030          props->maxTransformFeedbackBufferDataSize = 128 * 4;
2031          props->maxTransformFeedbackBufferDataStride = 2048;
2032          props->transformFeedbackQueries = true;
2033          props->transformFeedbackStreamsLinesTriangles = false;
2034          props->transformFeedbackRasterizationStreamSelect = false;
2035          /* This requires MI_MATH */
2036          props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2037          break;
2038       }
2039 
2040       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2041          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2042             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2043          /* We have to restrict this a bit for multiview */
2044          props->maxVertexAttribDivisor = UINT32_MAX / 16;
2045          break;
2046       }
2047 
2048       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2049          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2050          props->maxMultiDrawCount = 2048;
2051          break;
2052       }
2053 
2054       default:
2055          anv_debug_ignored_stype(ext->sType);
2056          break;
2057       }
2058    }
2059 }
2060 
2061 static int
vk_priority_to_gen(int priority)2062 vk_priority_to_gen(int priority)
2063 {
2064    switch (priority) {
2065    case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2066       return INTEL_CONTEXT_LOW_PRIORITY;
2067    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2068       return INTEL_CONTEXT_MEDIUM_PRIORITY;
2069    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2070       return INTEL_CONTEXT_HIGH_PRIORITY;
2071    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2072       return INTEL_CONTEXT_REALTIME_PRIORITY;
2073    default:
2074       unreachable("Invalid priority");
2075    }
2076 }
2077 
2078 static const VkQueueFamilyProperties
2079 anv_queue_family_properties_template = {
2080    .timestampValidBits = 36, /* XXX: Real value here */
2081    .minImageTransferGranularity = { 1, 1, 1 },
2082 };
2083 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2084 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2085     VkPhysicalDevice                            physicalDevice,
2086     uint32_t*                                   pQueueFamilyPropertyCount,
2087     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2088 {
2089    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2090    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2091                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2092 
2093    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2094       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2095       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2096          p->queueFamilyProperties = anv_queue_family_properties_template;
2097          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2098          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2099 
2100          vk_foreach_struct(ext, p->pNext) {
2101             switch (ext->sType) {
2102             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2103                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2104                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2105 
2106                /* Deliberately sorted low to high */
2107                VkQueueGlobalPriorityKHR all_priorities[] = {
2108                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2109                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2110                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2111                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2112                };
2113 
2114                uint32_t count = 0;
2115                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2116                   if (vk_priority_to_gen(all_priorities[i]) >
2117                       pdevice->max_context_priority)
2118                      break;
2119 
2120                   properties->priorities[count++] = all_priorities[i];
2121                }
2122                properties->priorityCount = count;
2123                break;
2124             }
2125 
2126             default:
2127                anv_debug_ignored_stype(ext->sType);
2128             }
2129          }
2130       }
2131    }
2132 }
2133 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2134 void anv_GetPhysicalDeviceMemoryProperties(
2135     VkPhysicalDevice                            physicalDevice,
2136     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2137 {
2138    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2139 
2140    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2141    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2142       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2143          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2144          .heapIndex     = physical_device->memory.types[i].heapIndex,
2145       };
2146    }
2147 
2148    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2149    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2150       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2151          .size    = physical_device->memory.heaps[i].size,
2152          .flags   = physical_device->memory.heaps[i].flags,
2153       };
2154    }
2155 }
2156 
2157 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2158 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2159                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2160 {
2161    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2162 
2163    if (!device->vk.supported_extensions.EXT_memory_budget)
2164       return;
2165 
2166    anv_update_meminfo(device, device->local_fd);
2167 
2168    VkDeviceSize total_sys_heaps_size = 0;
2169    for (size_t i = 0; i < device->memory.heap_count; i++)
2170       total_sys_heaps_size += device->memory.heaps[i].size;
2171 
2172    for (size_t i = 0; i < device->memory.heap_count; i++) {
2173       VkDeviceSize heap_size = device->memory.heaps[i].size;
2174       VkDeviceSize heap_used = device->memory.heaps[i].used;
2175       VkDeviceSize heap_budget, total_heaps_size;
2176       uint64_t mem_available = 0;
2177 
2178       total_heaps_size = total_sys_heaps_size;
2179       mem_available = device->sys.available;
2180 
2181       double heap_proportion = (double) heap_size / total_heaps_size;
2182       VkDeviceSize available_prop = mem_available * heap_proportion;
2183 
2184       /*
2185        * Let's not incite the app to starve the system: report at most 90% of
2186        * the available heap memory.
2187        */
2188       uint64_t heap_available = available_prop * 9 / 10;
2189       heap_budget = MIN2(heap_size, heap_used + heap_available);
2190 
2191       /*
2192        * Round down to the nearest MB
2193        */
2194       heap_budget &= ~((1ull << 20) - 1);
2195 
2196       /*
2197        * The heapBudget value must be non-zero for array elements less than
2198        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2199        * value must be less than or equal to VkMemoryHeap::size for each heap.
2200        */
2201       assert(0 < heap_budget && heap_budget <= heap_size);
2202 
2203       memoryBudget->heapUsage[i] = heap_used;
2204       memoryBudget->heapBudget[i] = heap_budget;
2205    }
2206 
2207    /* The heapBudget and heapUsage values must be zero for array elements
2208     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2209     */
2210    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2211       memoryBudget->heapBudget[i] = 0;
2212       memoryBudget->heapUsage[i] = 0;
2213    }
2214 }
2215 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2216 void anv_GetPhysicalDeviceMemoryProperties2(
2217     VkPhysicalDevice                            physicalDevice,
2218     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2219 {
2220    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2221                                          &pMemoryProperties->memoryProperties);
2222 
2223    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2224       switch (ext->sType) {
2225       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2226          anv_get_memory_budget(physicalDevice, (void*)ext);
2227          break;
2228       default:
2229          anv_debug_ignored_stype(ext->sType);
2230          break;
2231       }
2232    }
2233 }
2234 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2235 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2236     VkInstance                                  _instance,
2237     const char*                                 pName)
2238 {
2239    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2240    return vk_instance_get_proc_addr(&instance->vk,
2241                                     &anv_instance_entrypoints,
2242                                     pName);
2243 }
2244 
2245 /* With version 1+ of the loader interface the ICD should expose
2246  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2247  */
2248 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2249 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2250     VkInstance                                  instance,
2251     const char*                                 pName)
2252 {
2253    return anv_GetInstanceProcAddr(instance, pName);
2254 }
2255 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2256 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2257 {
2258    struct anv_state state;
2259 
2260    state = anv_state_pool_alloc(pool, size, align);
2261    memcpy(state.map, p, size);
2262 
2263    return state;
2264 }
2265 
2266 static void
anv_device_init_border_colors(struct anv_device * device)2267 anv_device_init_border_colors(struct anv_device *device)
2268 {
2269    if (device->info->platform == INTEL_PLATFORM_HSW) {
2270       static const struct hsw_border_color border_colors[] = {
2271          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2272          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2273          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2274          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2275          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2276          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2277       };
2278 
2279       device->border_colors =
2280          anv_state_pool_emit_data(&device->dynamic_state_pool,
2281                                   sizeof(border_colors), 512, border_colors);
2282    } else {
2283       static const struct gfx8_border_color border_colors[] = {
2284          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2285          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2286          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2287          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2288          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2289          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2290       };
2291 
2292       device->border_colors =
2293          anv_state_pool_emit_data(&device->dynamic_state_pool,
2294                                   sizeof(border_colors), 64, border_colors);
2295    }
2296 }
2297 
2298 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2299 anv_device_init_trivial_batch(struct anv_device *device)
2300 {
2301    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2302                                          ANV_BO_ALLOC_MAPPED,
2303                                          0 /* explicit_address */,
2304                                          &device->trivial_batch_bo);
2305    if (result != VK_SUCCESS)
2306       return result;
2307 
2308    struct anv_batch batch = {
2309       .start = device->trivial_batch_bo->map,
2310       .next = device->trivial_batch_bo->map,
2311       .end = device->trivial_batch_bo->map + 4096,
2312    };
2313 
2314    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2315    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2316 
2317 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2318    if (device->physical->memory.need_flush)
2319       intel_flush_range(batch.start, batch.next - batch.start);
2320 #endif
2321 
2322    return VK_SUCCESS;
2323 }
2324 
2325 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2326 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2327                  struct anv_block_pool *pool,
2328                  uint64_t address)
2329 {
2330    anv_block_pool_foreach_bo(bo, pool) {
2331       uint64_t bo_address = intel_48b_address(bo->offset);
2332       if (address >= bo_address && address < (bo_address + bo->size)) {
2333          *ret = (struct intel_batch_decode_bo) {
2334             .addr = bo_address,
2335             .size = bo->size,
2336             .map = bo->map,
2337          };
2338          return true;
2339       }
2340    }
2341    return false;
2342 }
2343 
2344 /* Finding a buffer for batch decoding */
2345 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2346 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2347 {
2348    struct anv_device *device = v_batch;
2349    struct intel_batch_decode_bo ret_bo = {};
2350 
2351    assert(ppgtt);
2352 
2353    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2354       return ret_bo;
2355    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2356       return ret_bo;
2357    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2358       return ret_bo;
2359    if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2360       return ret_bo;
2361 
2362    if (!device->cmd_buffer_being_decoded)
2363       return (struct intel_batch_decode_bo) { };
2364 
2365    struct anv_batch_bo **bo;
2366 
2367    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2368       /* The decoder zeroes out the top 16 bits, so we need to as well */
2369       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2370 
2371       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2372          return (struct intel_batch_decode_bo) {
2373             .addr = bo_address,
2374             .size = (*bo)->bo->size,
2375             .map = (*bo)->bo->map,
2376          };
2377       }
2378    }
2379 
2380    return (struct intel_batch_decode_bo) { };
2381 }
2382 
2383 static VkResult anv_device_check_status(struct vk_device *vk_device);
2384 
2385 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2386 anv_device_setup_context(struct anv_device *device,
2387                          const VkDeviceCreateInfo *pCreateInfo,
2388                          const uint32_t num_queues)
2389 {
2390    struct anv_physical_device *physical_device = device->physical;
2391    VkResult result = VK_SUCCESS;
2392 
2393    if (device->physical->engine_info) {
2394       /* The kernel API supports at most 64 engines */
2395       assert(num_queues <= 64);
2396       enum intel_engine_class engine_classes[64];
2397       int engine_count = 0;
2398       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2399          const VkDeviceQueueCreateInfo *queueCreateInfo =
2400             &pCreateInfo->pQueueCreateInfos[i];
2401 
2402          assert(queueCreateInfo->queueFamilyIndex <
2403                 physical_device->queue.family_count);
2404          struct anv_queue_family *queue_family =
2405             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2406 
2407          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2408             engine_classes[engine_count++] = queue_family->engine_class;
2409       }
2410       if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2411                                             physical_device->engine_info,
2412                                             engine_count, engine_classes,
2413                                             0 /* vm_id */,
2414                                             (uint32_t *)&device->context_id))
2415          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2416                             "kernel context creation failed");
2417    } else {
2418       assert(num_queues == 1);
2419       if (!intel_gem_create_context(device->fd, &device->context_id))
2420          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2421    }
2422 
2423    if (result != VK_SUCCESS)
2424       return result;
2425 
2426    /* Here we tell the kernel not to attempt to recover our context but
2427     * immediately (on the next batchbuffer submission) report that the
2428     * context is lost, and we will do the recovery ourselves.  In the case
2429     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2430     * the client clean up the pieces.
2431     */
2432    anv_gem_set_context_param(device->fd, device->context_id,
2433                              I915_CONTEXT_PARAM_RECOVERABLE, false);
2434 
2435    /* Check if client specified queue priority. */
2436    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2437       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2438                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2439 
2440    VkQueueGlobalPriorityKHR priority =
2441       queue_priority ? queue_priority->globalPriority :
2442          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2443 
2444    /* As per spec, the driver implementation may deny requests to acquire
2445     * a priority above the default priority (MEDIUM) if the caller does not
2446     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2447     * is returned.
2448     */
2449    if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2450       int err = anv_gem_set_context_param(device->fd, device->context_id,
2451                                           I915_CONTEXT_PARAM_PRIORITY,
2452                                           vk_priority_to_gen(priority));
2453       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2454          result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2455          goto fail_context;
2456       }
2457    }
2458 
2459    return result;
2460 
2461 fail_context:
2462    intel_gem_destroy_context(device->fd, device->context_id);
2463    return result;
2464 }
2465 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2466 VkResult anv_CreateDevice(
2467     VkPhysicalDevice                            physicalDevice,
2468     const VkDeviceCreateInfo*                   pCreateInfo,
2469     const VkAllocationCallbacks*                pAllocator,
2470     VkDevice*                                   pDevice)
2471 {
2472    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2473    VkResult result;
2474    struct anv_device *device;
2475 
2476    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2477 
2478    /* Check requested queues and fail if we are requested to create any
2479     * queues with flags we don't support.
2480     */
2481    assert(pCreateInfo->queueCreateInfoCount > 0);
2482    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2483       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2484          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2485    }
2486 
2487    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2488                        sizeof(*device), 8,
2489                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2490    if (!device)
2491       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2492 
2493    struct vk_device_dispatch_table dispatch_table;
2494 
2495    bool override_initial_entrypoints = true;
2496    if (physical_device->instance->vk.app_info.app_name &&
2497        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2498       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2499       override_initial_entrypoints = false;
2500    }
2501    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2502       anv_genX(&physical_device->info, device_entrypoints),
2503       override_initial_entrypoints);
2504    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2505       &anv_device_entrypoints, false);
2506    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2507       &wsi_device_entrypoints, false);
2508 
2509    result = vk_device_init(&device->vk, &physical_device->vk,
2510                            &dispatch_table, pCreateInfo, pAllocator);
2511    if (result != VK_SUCCESS)
2512       goto fail_alloc;
2513 
2514    if (INTEL_DEBUG(DEBUG_BATCH)) {
2515       const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2516 
2517       intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2518                                       &physical_device->compiler->isa,
2519                                       &physical_device->info,
2520                                       stderr, decode_flags, NULL,
2521                                       decode_get_bo, NULL, device);
2522 
2523       device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2524       device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2525       device->decoder_ctx.instruction_base =
2526          INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2527    }
2528 
2529    anv_device_set_physical(device, physical_device);
2530 
2531    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2532    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2533    if (device->fd == -1) {
2534       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2535       goto fail_device;
2536    }
2537 
2538    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2539    device->vk.check_status = anv_device_check_status;
2540    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2541    vk_device_set_drm_fd(&device->vk, device->fd);
2542 
2543    uint32_t num_queues = 0;
2544    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2545       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2546 
2547    result = anv_device_setup_context(device, pCreateInfo, num_queues);
2548    if (result != VK_SUCCESS)
2549       goto fail_fd;
2550 
2551    device->queues =
2552       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2553                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2554    if (device->queues == NULL) {
2555       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2556       goto fail_context_id;
2557    }
2558 
2559    device->queue_count = 0;
2560    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2561       const VkDeviceQueueCreateInfo *queueCreateInfo =
2562          &pCreateInfo->pQueueCreateInfos[i];
2563 
2564       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2565          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2566           * engine-based contexts, the bottom 6 bits of exec_flags are used
2567           * for the engine ID.
2568           */
2569          uint32_t exec_flags = device->physical->engine_info ?
2570                                device->queue_count : I915_EXEC_RENDER;
2571 
2572          result = anv_queue_init(device, &device->queues[device->queue_count],
2573                                  exec_flags, queueCreateInfo, j);
2574          if (result != VK_SUCCESS)
2575             goto fail_queues;
2576 
2577          device->queue_count++;
2578       }
2579    }
2580 
2581    if (!anv_use_relocations(physical_device)) {
2582       if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2583          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2584          goto fail_queues;
2585       }
2586 
2587       /* keep the page with address zero out of the allocator */
2588       util_vma_heap_init(&device->vma_lo,
2589                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2590 
2591       util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2592                          CLIENT_VISIBLE_HEAP_SIZE);
2593 
2594       /* Leave the last 4GiB out of the high vma range, so that no state
2595        * base address + size can overflow 48 bits. For more information see
2596        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2597        */
2598       util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2599                          physical_device->gtt_size - (1ull << 32) -
2600                          HIGH_HEAP_MIN_ADDRESS);
2601    }
2602 
2603    list_inithead(&device->memory_objects);
2604 
2605    /* On Broadwell and later, we can use batch chaining to more efficiently
2606     * implement growing command buffers.  Prior to Haswell, the kernel
2607     * command parser gets in the way and we have to fall back to growing
2608     * the batch.
2609     */
2610    device->can_chain_batches = device->info->ver >= 8;
2611 
2612    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2613       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2614       goto fail_vmas;
2615    }
2616 
2617    pthread_condattr_t condattr;
2618    if (pthread_condattr_init(&condattr) != 0) {
2619       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2620       goto fail_mutex;
2621    }
2622    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2623       pthread_condattr_destroy(&condattr);
2624       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2625       goto fail_mutex;
2626    }
2627    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2628       pthread_condattr_destroy(&condattr);
2629       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2630       goto fail_mutex;
2631    }
2632    pthread_condattr_destroy(&condattr);
2633 
2634    result = anv_bo_cache_init(&device->bo_cache, device);
2635    if (result != VK_SUCCESS)
2636       goto fail_queue_cond;
2637 
2638    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2639 
2640    /* Because scratch is also relative to General State Base Address, we leave
2641     * the base address 0 and start the pool memory at an offset.  This way we
2642     * get the correct offsets in the anv_states that get allocated from it.
2643     */
2644    result = anv_state_pool_init(&device->general_state_pool, device,
2645                                 "general pool",
2646                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2647    if (result != VK_SUCCESS)
2648       goto fail_batch_bo_pool;
2649 
2650    result = anv_state_pool_init(&device->dynamic_state_pool, device,
2651                                 "dynamic pool",
2652                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2653    if (result != VK_SUCCESS)
2654       goto fail_general_state_pool;
2655 
2656    if (device->info->ver >= 8) {
2657       /* The border color pointer is limited to 24 bits, so we need to make
2658        * sure that any such color used at any point in the program doesn't
2659        * exceed that limit.
2660        * We achieve that by reserving all the custom border colors we support
2661        * right off the bat, so they are close to the base address.
2662        */
2663       anv_state_reserved_pool_init(&device->custom_border_colors,
2664                                    &device->dynamic_state_pool,
2665                                    MAX_CUSTOM_BORDER_COLORS,
2666                                    sizeof(struct gfx8_border_color), 64);
2667    }
2668 
2669    result = anv_state_pool_init(&device->instruction_state_pool, device,
2670                                 "instruction pool",
2671                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2672    if (result != VK_SUCCESS)
2673       goto fail_dynamic_state_pool;
2674 
2675    result = anv_state_pool_init(&device->surface_state_pool, device,
2676                                 "surface state pool",
2677                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2678    if (result != VK_SUCCESS)
2679       goto fail_instruction_state_pool;
2680 
2681    if (!anv_use_relocations(physical_device)) {
2682       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2683                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2684       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2685       result = anv_state_pool_init(&device->binding_table_pool, device,
2686                                    "binding table pool",
2687                                    SURFACE_STATE_POOL_MIN_ADDRESS,
2688                                    bt_pool_offset,
2689                                    BINDING_TABLE_POOL_BLOCK_SIZE);
2690    }
2691    if (result != VK_SUCCESS)
2692       goto fail_surface_state_pool;
2693 
2694    result = anv_device_alloc_bo(device, "workaround", 4096,
2695                                 ANV_BO_ALLOC_CAPTURE |
2696                                 ANV_BO_ALLOC_MAPPED,
2697                                 0 /* explicit_address */,
2698                                 &device->workaround_bo);
2699    if (result != VK_SUCCESS)
2700       goto fail_binding_table_pool;
2701 
2702    device->workaround_address = (struct anv_address) {
2703       .bo = device->workaround_bo,
2704       .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2705                                                     device->workaround_bo->size,
2706                                                     "hasvk"), 32),
2707    };
2708 
2709    device->workarounds.doom64_images = NULL;
2710 
2711    device->debug_frame_desc =
2712       intel_debug_get_identifier_block(device->workaround_bo->map,
2713                                        device->workaround_bo->size,
2714                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
2715 
2716    result = anv_device_init_trivial_batch(device);
2717    if (result != VK_SUCCESS)
2718       goto fail_workaround_bo;
2719 
2720    /* Allocate a null surface state at surface state offset 0.  This makes
2721     * NULL descriptor handling trivial because we can just memset structures
2722     * to zero and they have a valid descriptor.
2723     */
2724    device->null_surface_state =
2725       anv_state_pool_alloc(&device->surface_state_pool,
2726                            device->isl_dev.ss.size,
2727                            device->isl_dev.ss.align);
2728    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2729                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2730    assert(device->null_surface_state.offset == 0);
2731 
2732    anv_scratch_pool_init(device, &device->scratch_pool);
2733 
2734    result = anv_genX(device->info, init_device_state)(device);
2735    if (result != VK_SUCCESS)
2736       goto fail_trivial_batch_bo_and_scratch_pool;
2737 
2738    struct vk_pipeline_cache_create_info pcc_info = { };
2739    device->default_pipeline_cache =
2740       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2741    if (!device->default_pipeline_cache) {
2742       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2743       goto fail_trivial_batch_bo_and_scratch_pool;
2744    }
2745 
2746    /* Internal shaders need their own pipeline cache because, unlike the rest
2747     * of ANV, it won't work at all without the cache. It depends on it for
2748     * shaders to remain resident while it runs. Therefore, we need a special
2749     * cache just for BLORP/RT that's forced to always be enabled.
2750     */
2751    pcc_info.force_enable = true;
2752    device->internal_cache =
2753       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2754    if (device->internal_cache == NULL) {
2755       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2756       goto fail_default_pipeline_cache;
2757    }
2758 
2759    device->robust_buffer_access =
2760       device->vk.enabled_features.robustBufferAccess ||
2761       device->vk.enabled_features.nullDescriptor;
2762 
2763    anv_device_init_blorp(device);
2764 
2765    anv_device_init_border_colors(device);
2766 
2767    anv_device_perf_init(device);
2768 
2769    anv_device_utrace_init(device);
2770 
2771    *pDevice = anv_device_to_handle(device);
2772 
2773    return VK_SUCCESS;
2774 
2775  fail_default_pipeline_cache:
2776    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2777  fail_trivial_batch_bo_and_scratch_pool:
2778    anv_scratch_pool_finish(device, &device->scratch_pool);
2779    anv_device_release_bo(device, device->trivial_batch_bo);
2780  fail_workaround_bo:
2781    anv_device_release_bo(device, device->workaround_bo);
2782  fail_binding_table_pool:
2783    if (!anv_use_relocations(physical_device))
2784       anv_state_pool_finish(&device->binding_table_pool);
2785  fail_surface_state_pool:
2786    anv_state_pool_finish(&device->surface_state_pool);
2787  fail_instruction_state_pool:
2788    anv_state_pool_finish(&device->instruction_state_pool);
2789  fail_dynamic_state_pool:
2790    if (device->info->ver >= 8)
2791       anv_state_reserved_pool_finish(&device->custom_border_colors);
2792    anv_state_pool_finish(&device->dynamic_state_pool);
2793  fail_general_state_pool:
2794    anv_state_pool_finish(&device->general_state_pool);
2795  fail_batch_bo_pool:
2796    anv_bo_pool_finish(&device->batch_bo_pool);
2797    anv_bo_cache_finish(&device->bo_cache);
2798  fail_queue_cond:
2799    pthread_cond_destroy(&device->queue_submit);
2800  fail_mutex:
2801    pthread_mutex_destroy(&device->mutex);
2802  fail_vmas:
2803    if (!anv_use_relocations(physical_device)) {
2804       util_vma_heap_finish(&device->vma_hi);
2805       util_vma_heap_finish(&device->vma_cva);
2806       util_vma_heap_finish(&device->vma_lo);
2807    }
2808  fail_queues:
2809    for (uint32_t i = 0; i < device->queue_count; i++)
2810       anv_queue_finish(&device->queues[i]);
2811    vk_free(&device->vk.alloc, device->queues);
2812  fail_context_id:
2813    intel_gem_destroy_context(device->fd, device->context_id);
2814  fail_fd:
2815    close(device->fd);
2816  fail_device:
2817    vk_device_finish(&device->vk);
2818  fail_alloc:
2819    vk_free(&device->vk.alloc, device);
2820 
2821    return result;
2822 }
2823 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2824 void anv_DestroyDevice(
2825     VkDevice                                    _device,
2826     const VkAllocationCallbacks*                pAllocator)
2827 {
2828    ANV_FROM_HANDLE(anv_device, device, _device);
2829 
2830    if (!device)
2831       return;
2832 
2833    anv_device_utrace_finish(device);
2834 
2835    anv_device_finish_blorp(device);
2836 
2837    vk_pipeline_cache_destroy(device->internal_cache, NULL);
2838    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2839 
2840 #ifdef HAVE_VALGRIND
2841    /* We only need to free these to prevent valgrind errors.  The backing
2842     * BO will go away in a couple of lines so we don't actually leak.
2843     */
2844    if (device->info->ver >= 8)
2845       anv_state_reserved_pool_finish(&device->custom_border_colors);
2846    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2847    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2848 #endif
2849 
2850    anv_scratch_pool_finish(device, &device->scratch_pool);
2851 
2852    anv_device_release_bo(device, device->workaround_bo);
2853    anv_device_release_bo(device, device->trivial_batch_bo);
2854 
2855    if (!anv_use_relocations(device->physical))
2856       anv_state_pool_finish(&device->binding_table_pool);
2857    anv_state_pool_finish(&device->surface_state_pool);
2858    anv_state_pool_finish(&device->instruction_state_pool);
2859    anv_state_pool_finish(&device->dynamic_state_pool);
2860    anv_state_pool_finish(&device->general_state_pool);
2861 
2862    anv_bo_pool_finish(&device->batch_bo_pool);
2863 
2864    anv_bo_cache_finish(&device->bo_cache);
2865 
2866    if (!anv_use_relocations(device->physical)) {
2867       util_vma_heap_finish(&device->vma_hi);
2868       util_vma_heap_finish(&device->vma_cva);
2869       util_vma_heap_finish(&device->vma_lo);
2870    }
2871 
2872    pthread_cond_destroy(&device->queue_submit);
2873    pthread_mutex_destroy(&device->mutex);
2874 
2875    for (uint32_t i = 0; i < device->queue_count; i++)
2876       anv_queue_finish(&device->queues[i]);
2877    vk_free(&device->vk.alloc, device->queues);
2878 
2879    intel_gem_destroy_context(device->fd, device->context_id);
2880 
2881    if (INTEL_DEBUG(DEBUG_BATCH))
2882       intel_batch_decode_ctx_finish(&device->decoder_ctx);
2883 
2884    close(device->fd);
2885 
2886    vk_device_finish(&device->vk);
2887    vk_free(&device->vk.alloc, device);
2888 }
2889 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2890 VkResult anv_EnumerateInstanceLayerProperties(
2891     uint32_t*                                   pPropertyCount,
2892     VkLayerProperties*                          pProperties)
2893 {
2894    if (pProperties == NULL) {
2895       *pPropertyCount = 0;
2896       return VK_SUCCESS;
2897    }
2898 
2899    /* None supported at this time */
2900    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2901 }
2902 
2903 static VkResult
anv_device_check_status(struct vk_device * vk_device)2904 anv_device_check_status(struct vk_device *vk_device)
2905 {
2906    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2907 
2908    uint32_t active, pending;
2909    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2910                                              &active, &pending);
2911    if (ret == -1) {
2912       /* We don't know the real error. */
2913       return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2914    }
2915 
2916    if (active) {
2917       return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2918    } else if (pending) {
2919       return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2920    }
2921 
2922    return VK_SUCCESS;
2923 }
2924 
2925 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2926 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2927                 int64_t timeout)
2928 {
2929    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2930    if (ret == -1 && errno == ETIME) {
2931       return VK_TIMEOUT;
2932    } else if (ret == -1) {
2933       /* We don't know the real error. */
2934       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2935    } else {
2936       return VK_SUCCESS;
2937    }
2938 }
2939 
2940 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2941 anv_vma_alloc(struct anv_device *device,
2942               uint64_t size, uint64_t align,
2943               enum anv_bo_alloc_flags alloc_flags,
2944               uint64_t client_address)
2945 {
2946    pthread_mutex_lock(&device->vma_mutex);
2947 
2948    uint64_t addr = 0;
2949 
2950    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2951       if (client_address) {
2952          if (util_vma_heap_alloc_addr(&device->vma_cva,
2953                                       client_address, size)) {
2954             addr = client_address;
2955          }
2956       } else {
2957          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2958       }
2959       /* We don't want to fall back to other heaps */
2960       goto done;
2961    }
2962 
2963    assert(client_address == 0);
2964 
2965    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2966       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2967 
2968    if (addr == 0)
2969       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2970 
2971 done:
2972    pthread_mutex_unlock(&device->vma_mutex);
2973 
2974    assert(addr == intel_48b_address(addr));
2975    return intel_canonical_address(addr);
2976 }
2977 
2978 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2979 anv_vma_free(struct anv_device *device,
2980              uint64_t address, uint64_t size)
2981 {
2982    const uint64_t addr_48b = intel_48b_address(address);
2983 
2984    pthread_mutex_lock(&device->vma_mutex);
2985 
2986    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2987        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2988       util_vma_heap_free(&device->vma_lo, addr_48b, size);
2989    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2990               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2991       util_vma_heap_free(&device->vma_cva, addr_48b, size);
2992    } else {
2993       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2994       util_vma_heap_free(&device->vma_hi, addr_48b, size);
2995    }
2996 
2997    pthread_mutex_unlock(&device->vma_mutex);
2998 }
2999 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3000 VkResult anv_AllocateMemory(
3001     VkDevice                                    _device,
3002     const VkMemoryAllocateInfo*                 pAllocateInfo,
3003     const VkAllocationCallbacks*                pAllocator,
3004     VkDeviceMemory*                             pMem)
3005 {
3006    ANV_FROM_HANDLE(anv_device, device, _device);
3007    struct anv_physical_device *pdevice = device->physical;
3008    struct anv_device_memory *mem;
3009    VkResult result = VK_SUCCESS;
3010 
3011    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3012 
3013    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3014    assert(pAllocateInfo->allocationSize > 0);
3015 
3016    VkDeviceSize aligned_alloc_size =
3017       align64(pAllocateInfo->allocationSize, 4096);
3018 
3019    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3020       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3021 
3022    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3023    struct anv_memory_type *mem_type =
3024       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3025    assert(mem_type->heapIndex < pdevice->memory.heap_count);
3026    struct anv_memory_heap *mem_heap =
3027       &pdevice->memory.heaps[mem_type->heapIndex];
3028 
3029    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3030    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3031       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3032 
3033    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3034                          VK_OBJECT_TYPE_DEVICE_MEMORY);
3035    if (mem == NULL)
3036       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3037 
3038    mem->type = mem_type;
3039    mem->map = NULL;
3040    mem->map_size = 0;
3041    mem->map_delta = 0;
3042    mem->ahw = NULL;
3043    mem->host_ptr = NULL;
3044 
3045    enum anv_bo_alloc_flags alloc_flags = 0;
3046 
3047    const VkExportMemoryAllocateInfo *export_info = NULL;
3048    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3049    const VkImportMemoryFdInfoKHR *fd_info = NULL;
3050    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3051    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3052    VkMemoryAllocateFlags vk_flags = 0;
3053    uint64_t client_address = 0;
3054 
3055    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3056       switch (ext->sType) {
3057       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3058          export_info = (void *)ext;
3059          break;
3060 
3061       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3062          ahw_import_info = (void *)ext;
3063          break;
3064 
3065       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3066          fd_info = (void *)ext;
3067          break;
3068 
3069       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3070          host_ptr_info = (void *)ext;
3071          break;
3072 
3073       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3074          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3075          vk_flags = flags_info->flags;
3076          break;
3077       }
3078 
3079       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3080          dedicated_info = (void *)ext;
3081          break;
3082 
3083       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3084          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3085             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3086          client_address = addr_info->opaqueCaptureAddress;
3087          break;
3088       }
3089 
3090       default:
3091          if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3092             /* this isn't a real enum value,
3093              * so use conditional to avoid compiler warn
3094              */
3095             anv_debug_ignored_stype(ext->sType);
3096          break;
3097       }
3098    }
3099 
3100    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3101       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3102 
3103    if ((export_info && export_info->handleTypes) ||
3104        (fd_info && fd_info->handleType) ||
3105        (host_ptr_info && host_ptr_info->handleType)) {
3106       /* Anything imported or exported is EXTERNAL */
3107       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3108    }
3109 
3110    /* Check if we need to support Android HW buffer export. If so,
3111     * create AHardwareBuffer and import memory from it.
3112     */
3113    bool android_export = false;
3114    if (export_info && export_info->handleTypes &
3115        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3116       android_export = true;
3117 
3118    if (ahw_import_info) {
3119       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3120       if (result != VK_SUCCESS)
3121          goto fail;
3122 
3123       goto success;
3124    } else if (android_export) {
3125       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3126       if (result != VK_SUCCESS)
3127          goto fail;
3128 
3129       goto success;
3130    }
3131 
3132    /* The Vulkan spec permits handleType to be 0, in which case the struct is
3133     * ignored.
3134     */
3135    if (fd_info && fd_info->handleType) {
3136       /* At the moment, we support only the below handle types. */
3137       assert(fd_info->handleType ==
3138                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3139              fd_info->handleType ==
3140                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3141 
3142       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3143                                     client_address, &mem->bo);
3144       if (result != VK_SUCCESS)
3145          goto fail;
3146 
3147       /* For security purposes, we reject importing the bo if it's smaller
3148        * than the requested allocation size.  This prevents a malicious client
3149        * from passing a buffer to a trusted client, lying about the size, and
3150        * telling the trusted client to try and texture from an image that goes
3151        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
3152        * in the trusted client.  The trusted client can protect itself against
3153        * this sort of attack but only if it can trust the buffer size.
3154        */
3155       if (mem->bo->size < aligned_alloc_size) {
3156          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3157                             "aligned allocationSize too large for "
3158                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3159                             "%"PRIu64"B > %"PRIu64"B",
3160                             aligned_alloc_size, mem->bo->size);
3161          anv_device_release_bo(device, mem->bo);
3162          goto fail;
3163       }
3164 
3165       /* From the Vulkan spec:
3166        *
3167        *    "Importing memory from a file descriptor transfers ownership of
3168        *    the file descriptor from the application to the Vulkan
3169        *    implementation. The application must not perform any operations on
3170        *    the file descriptor after a successful import."
3171        *
3172        * If the import fails, we leave the file descriptor open.
3173        */
3174       close(fd_info->fd);
3175       goto success;
3176    }
3177 
3178    if (host_ptr_info && host_ptr_info->handleType) {
3179       if (host_ptr_info->handleType ==
3180           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3181          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3182          goto fail;
3183       }
3184 
3185       assert(host_ptr_info->handleType ==
3186              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3187 
3188       result = anv_device_import_bo_from_host_ptr(device,
3189                                                   host_ptr_info->pHostPointer,
3190                                                   pAllocateInfo->allocationSize,
3191                                                   alloc_flags,
3192                                                   client_address,
3193                                                   &mem->bo);
3194       if (result != VK_SUCCESS)
3195          goto fail;
3196 
3197       mem->host_ptr = host_ptr_info->pHostPointer;
3198       goto success;
3199    }
3200 
3201    /* Regular allocate (not importing memory). */
3202 
3203    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3204                                 alloc_flags, client_address, &mem->bo);
3205    if (result != VK_SUCCESS)
3206       goto fail;
3207 
3208    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3209       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3210 
3211       /* Some legacy (non-modifiers) consumers need the tiling to be set on
3212        * the BO.  In this case, we have a dedicated allocation.
3213        */
3214       if (image->vk.wsi_legacy_scanout) {
3215          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3216          result = anv_device_set_bo_tiling(device, mem->bo,
3217                                            surf->row_pitch_B,
3218                                            surf->tiling);
3219          if (result != VK_SUCCESS) {
3220             anv_device_release_bo(device, mem->bo);
3221             goto fail;
3222          }
3223       }
3224    }
3225 
3226  success:
3227    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3228    if (mem_heap_used > mem_heap->size) {
3229       p_atomic_add(&mem_heap->used, -mem->bo->size);
3230       anv_device_release_bo(device, mem->bo);
3231       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3232                          "Out of heap memory");
3233       goto fail;
3234    }
3235 
3236    pthread_mutex_lock(&device->mutex);
3237    list_addtail(&mem->link, &device->memory_objects);
3238    pthread_mutex_unlock(&device->mutex);
3239 
3240    *pMem = anv_device_memory_to_handle(mem);
3241 
3242    return VK_SUCCESS;
3243 
3244  fail:
3245    vk_object_free(&device->vk, pAllocator, mem);
3246 
3247    return result;
3248 }
3249 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3250 VkResult anv_GetMemoryFdKHR(
3251     VkDevice                                    device_h,
3252     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
3253     int*                                        pFd)
3254 {
3255    ANV_FROM_HANDLE(anv_device, dev, device_h);
3256    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3257 
3258    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3259 
3260    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3261           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3262 
3263    return anv_device_export_bo(dev, mem->bo, pFd);
3264 }
3265 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3266 VkResult anv_GetMemoryFdPropertiesKHR(
3267     VkDevice                                    _device,
3268     VkExternalMemoryHandleTypeFlagBits          handleType,
3269     int                                         fd,
3270     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
3271 {
3272    ANV_FROM_HANDLE(anv_device, device, _device);
3273 
3274    switch (handleType) {
3275    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3276       /* dma-buf can be imported as any memory type */
3277       pMemoryFdProperties->memoryTypeBits =
3278          (1 << device->physical->memory.type_count) - 1;
3279       return VK_SUCCESS;
3280 
3281    default:
3282       /* The valid usage section for this function says:
3283        *
3284        *    "handleType must not be one of the handle types defined as
3285        *    opaque."
3286        *
3287        * So opaque handle types fall into the default "unsupported" case.
3288        */
3289       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3290    }
3291 }
3292 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3293 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3294    VkDevice                                    _device,
3295    VkExternalMemoryHandleTypeFlagBits          handleType,
3296    const void*                                 pHostPointer,
3297    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
3298 {
3299    ANV_FROM_HANDLE(anv_device, device, _device);
3300 
3301    assert(pMemoryHostPointerProperties->sType ==
3302           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3303 
3304    switch (handleType) {
3305    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3306       /* Host memory can be imported as any memory type. */
3307       pMemoryHostPointerProperties->memoryTypeBits =
3308          (1ull << device->physical->memory.type_count) - 1;
3309 
3310       return VK_SUCCESS;
3311 
3312    default:
3313       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3314    }
3315 }
3316 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3317 void anv_FreeMemory(
3318     VkDevice                                    _device,
3319     VkDeviceMemory                              _mem,
3320     const VkAllocationCallbacks*                pAllocator)
3321 {
3322    ANV_FROM_HANDLE(anv_device, device, _device);
3323    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3324 
3325    if (mem == NULL)
3326       return;
3327 
3328    pthread_mutex_lock(&device->mutex);
3329    list_del(&mem->link);
3330    pthread_mutex_unlock(&device->mutex);
3331 
3332    if (mem->map)
3333       anv_UnmapMemory(_device, _mem);
3334 
3335    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3336                 -mem->bo->size);
3337 
3338    anv_device_release_bo(device, mem->bo);
3339 
3340 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3341    if (mem->ahw)
3342       AHardwareBuffer_release(mem->ahw);
3343 #endif
3344 
3345    vk_object_free(&device->vk, pAllocator, mem);
3346 }
3347 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3348 VkResult anv_MapMemory(
3349     VkDevice                                    _device,
3350     VkDeviceMemory                              _memory,
3351     VkDeviceSize                                offset,
3352     VkDeviceSize                                size,
3353     VkMemoryMapFlags                            flags,
3354     void**                                      ppData)
3355 {
3356    ANV_FROM_HANDLE(anv_device, device, _device);
3357    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3358 
3359    if (mem == NULL) {
3360       *ppData = NULL;
3361       return VK_SUCCESS;
3362    }
3363 
3364    if (mem->host_ptr) {
3365       *ppData = mem->host_ptr + offset;
3366       return VK_SUCCESS;
3367    }
3368 
3369    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3370     *
3371     *  * memory must have been created with a memory type that reports
3372     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3373     */
3374    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3375       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3376                        "Memory object not mappable.");
3377    }
3378 
3379    if (size == VK_WHOLE_SIZE)
3380       size = mem->bo->size - offset;
3381 
3382    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3383     *
3384     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3385     *    assert(size != 0);
3386     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3387     *    equal to the size of the memory minus offset
3388     */
3389    assert(size > 0);
3390    assert(offset + size <= mem->bo->size);
3391 
3392    if (size != (size_t)size) {
3393       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3394                        "requested size 0x%"PRIx64" does not fit in %u bits",
3395                        size, (unsigned)(sizeof(size_t) * 8));
3396    }
3397 
3398    /* From the Vulkan 1.2.194 spec:
3399     *
3400     *    "memory must not be currently host mapped"
3401     */
3402    if (mem->map != NULL) {
3403       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3404                        "Memory object already mapped.");
3405    }
3406 
3407    uint32_t gem_flags = 0;
3408 
3409    if (!device->info->has_llc &&
3410        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3411       gem_flags |= I915_MMAP_WC;
3412 
3413    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
3414    uint64_t map_offset;
3415    if (!device->physical->info.has_mmap_offset)
3416       map_offset = offset & ~4095ull;
3417    else
3418       map_offset = 0;
3419    assert(offset >= map_offset);
3420    uint64_t map_size = (offset + size) - map_offset;
3421 
3422    /* Let's map whole pages */
3423    map_size = align64(map_size, 4096);
3424 
3425    void *map;
3426    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3427                                        map_size, gem_flags, &map);
3428    if (result != VK_SUCCESS)
3429       return result;
3430 
3431    mem->map = map;
3432    mem->map_size = map_size;
3433    mem->map_delta = (offset - map_offset);
3434    *ppData = mem->map + mem->map_delta;
3435 
3436    return VK_SUCCESS;
3437 }
3438 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3439 void anv_UnmapMemory(
3440     VkDevice                                    _device,
3441     VkDeviceMemory                              _memory)
3442 {
3443    ANV_FROM_HANDLE(anv_device, device, _device);
3444    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3445 
3446    if (mem == NULL || mem->host_ptr)
3447       return;
3448 
3449    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3450 
3451    mem->map = NULL;
3452    mem->map_size = 0;
3453    mem->map_delta = 0;
3454 }
3455 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3456 VkResult anv_FlushMappedMemoryRanges(
3457     VkDevice                                    _device,
3458     uint32_t                                    memoryRangeCount,
3459     const VkMappedMemoryRange*                  pMemoryRanges)
3460 {
3461    ANV_FROM_HANDLE(anv_device, device, _device);
3462 
3463    if (!device->physical->memory.need_flush)
3464       return VK_SUCCESS;
3465 
3466 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3467    /* Make sure the writes we're flushing have landed. */
3468    __builtin_ia32_mfence();
3469 #endif
3470 
3471    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3472       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3473       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3474          continue;
3475 
3476       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3477       if (map_offset >= mem->map_size)
3478          continue;
3479 
3480 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3481       intel_flush_range(mem->map + map_offset,
3482                         MIN2(pMemoryRanges[i].size,
3483                              mem->map_size - map_offset));
3484 #endif
3485    }
3486 
3487    return VK_SUCCESS;
3488 }
3489 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3490 VkResult anv_InvalidateMappedMemoryRanges(
3491     VkDevice                                    _device,
3492     uint32_t                                    memoryRangeCount,
3493     const VkMappedMemoryRange*                  pMemoryRanges)
3494 {
3495    ANV_FROM_HANDLE(anv_device, device, _device);
3496 
3497    if (!device->physical->memory.need_flush)
3498       return VK_SUCCESS;
3499 
3500    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3501       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3502       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3503          continue;
3504 
3505       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3506       if (map_offset >= mem->map_size)
3507          continue;
3508 
3509 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3510       intel_invalidate_range(mem->map + map_offset,
3511                              MIN2(pMemoryRanges[i].size,
3512                                   mem->map_size - map_offset));
3513 #endif
3514    }
3515 
3516 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3517    /* Make sure no reads get moved up above the invalidate. */
3518    __builtin_ia32_mfence();
3519 #endif
3520 
3521    return VK_SUCCESS;
3522 }
3523 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3524 void anv_GetDeviceMemoryCommitment(
3525     VkDevice                                    device,
3526     VkDeviceMemory                              memory,
3527     VkDeviceSize*                               pCommittedMemoryInBytes)
3528 {
3529    *pCommittedMemoryInBytes = 0;
3530 }
3531 
3532 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3533 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3534 {
3535    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3536    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3537 
3538    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3539 
3540    if (mem) {
3541       assert(pBindInfo->memoryOffset < mem->bo->size);
3542       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3543       buffer->address = (struct anv_address) {
3544          .bo = mem->bo,
3545          .offset = pBindInfo->memoryOffset,
3546       };
3547    } else {
3548       buffer->address = ANV_NULL_ADDRESS;
3549    }
3550 }
3551 
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3552 VkResult anv_BindBufferMemory2(
3553     VkDevice                                    device,
3554     uint32_t                                    bindInfoCount,
3555     const VkBindBufferMemoryInfo*               pBindInfos)
3556 {
3557    for (uint32_t i = 0; i < bindInfoCount; i++)
3558       anv_bind_buffer_memory(&pBindInfos[i]);
3559 
3560    return VK_SUCCESS;
3561 }
3562 
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3563 VkResult anv_QueueBindSparse(
3564     VkQueue                                     _queue,
3565     uint32_t                                    bindInfoCount,
3566     const VkBindSparseInfo*                     pBindInfo,
3567     VkFence                                     fence)
3568 {
3569    ANV_FROM_HANDLE(anv_queue, queue, _queue);
3570    if (vk_device_is_lost(&queue->device->vk))
3571       return VK_ERROR_DEVICE_LOST;
3572 
3573    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3574 }
3575 
3576 // Event functions
3577 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3578 VkResult anv_CreateEvent(
3579     VkDevice                                    _device,
3580     const VkEventCreateInfo*                    pCreateInfo,
3581     const VkAllocationCallbacks*                pAllocator,
3582     VkEvent*                                    pEvent)
3583 {
3584    ANV_FROM_HANDLE(anv_device, device, _device);
3585    struct anv_event *event;
3586 
3587    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3588 
3589    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3590                            VK_OBJECT_TYPE_EVENT);
3591    if (event == NULL)
3592       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3593 
3594    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3595                                        sizeof(uint64_t), 8);
3596    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3597 
3598    *pEvent = anv_event_to_handle(event);
3599 
3600    return VK_SUCCESS;
3601 }
3602 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3603 void anv_DestroyEvent(
3604     VkDevice                                    _device,
3605     VkEvent                                     _event,
3606     const VkAllocationCallbacks*                pAllocator)
3607 {
3608    ANV_FROM_HANDLE(anv_device, device, _device);
3609    ANV_FROM_HANDLE(anv_event, event, _event);
3610 
3611    if (!event)
3612       return;
3613 
3614    anv_state_pool_free(&device->dynamic_state_pool, event->state);
3615 
3616    vk_object_free(&device->vk, pAllocator, event);
3617 }
3618 
anv_GetEventStatus(VkDevice _device,VkEvent _event)3619 VkResult anv_GetEventStatus(
3620     VkDevice                                    _device,
3621     VkEvent                                     _event)
3622 {
3623    ANV_FROM_HANDLE(anv_device, device, _device);
3624    ANV_FROM_HANDLE(anv_event, event, _event);
3625 
3626    if (vk_device_is_lost(&device->vk))
3627       return VK_ERROR_DEVICE_LOST;
3628 
3629    return *(uint64_t *)event->state.map;
3630 }
3631 
anv_SetEvent(VkDevice _device,VkEvent _event)3632 VkResult anv_SetEvent(
3633     VkDevice                                    _device,
3634     VkEvent                                     _event)
3635 {
3636    ANV_FROM_HANDLE(anv_event, event, _event);
3637 
3638    *(uint64_t *)event->state.map = VK_EVENT_SET;
3639 
3640    return VK_SUCCESS;
3641 }
3642 
anv_ResetEvent(VkDevice _device,VkEvent _event)3643 VkResult anv_ResetEvent(
3644     VkDevice                                    _device,
3645     VkEvent                                     _event)
3646 {
3647    ANV_FROM_HANDLE(anv_event, event, _event);
3648 
3649    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3650 
3651    return VK_SUCCESS;
3652 }
3653 
3654 // Buffer functions
3655 
3656 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3657 anv_get_buffer_memory_requirements(struct anv_device *device,
3658                                    VkDeviceSize size,
3659                                    VkBufferUsageFlags usage,
3660                                    VkMemoryRequirements2* pMemoryRequirements)
3661 {
3662    /* The Vulkan spec (git aaed022) says:
3663     *
3664     *    memoryTypeBits is a bitfield and contains one bit set for every
3665     *    supported memory type for the resource. The bit `1<<i` is set if and
3666     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3667     *    structure for the physical device is supported.
3668     */
3669    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3670 
3671    /* Base alignment requirement of a cache line */
3672    uint32_t alignment = 16;
3673 
3674    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3675       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3676 
3677    pMemoryRequirements->memoryRequirements.size = size;
3678    pMemoryRequirements->memoryRequirements.alignment = alignment;
3679 
3680    /* Storage and Uniform buffers should have their size aligned to
3681     * 32-bits to avoid boundary checks when last DWord is not complete.
3682     * This would ensure that not internal padding would be needed for
3683     * 16-bit types.
3684     */
3685    if (device->vk.enabled_features.robustBufferAccess &&
3686        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3687         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3688       pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3689 
3690    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3691 
3692    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3693       switch (ext->sType) {
3694       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3695          VkMemoryDedicatedRequirements *requirements = (void *)ext;
3696          requirements->prefersDedicatedAllocation = false;
3697          requirements->requiresDedicatedAllocation = false;
3698          break;
3699       }
3700 
3701       default:
3702          anv_debug_ignored_stype(ext->sType);
3703          break;
3704       }
3705    }
3706 }
3707 
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3708 void anv_GetBufferMemoryRequirements2(
3709     VkDevice                                    _device,
3710     const VkBufferMemoryRequirementsInfo2*      pInfo,
3711     VkMemoryRequirements2*                      pMemoryRequirements)
3712 {
3713    ANV_FROM_HANDLE(anv_device, device, _device);
3714    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3715 
3716    anv_get_buffer_memory_requirements(device,
3717                                       buffer->vk.size,
3718                                       buffer->vk.usage,
3719                                       pMemoryRequirements);
3720 }
3721 
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3722 void anv_GetDeviceBufferMemoryRequirements(
3723     VkDevice                                    _device,
3724     const VkDeviceBufferMemoryRequirements*     pInfo,
3725     VkMemoryRequirements2*                      pMemoryRequirements)
3726 {
3727    ANV_FROM_HANDLE(anv_device, device, _device);
3728 
3729    anv_get_buffer_memory_requirements(device,
3730                                       pInfo->pCreateInfo->size,
3731                                       pInfo->pCreateInfo->usage,
3732                                       pMemoryRequirements);
3733 }
3734 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3735 VkResult anv_CreateBuffer(
3736     VkDevice                                    _device,
3737     const VkBufferCreateInfo*                   pCreateInfo,
3738     const VkAllocationCallbacks*                pAllocator,
3739     VkBuffer*                                   pBuffer)
3740 {
3741    ANV_FROM_HANDLE(anv_device, device, _device);
3742    struct anv_buffer *buffer;
3743 
3744    /* Don't allow creating buffers bigger than our address space.  The real
3745     * issue here is that we may align up the buffer size and we don't want
3746     * doing so to cause roll-over.  However, no one has any business
3747     * allocating a buffer larger than our GTT size.
3748     */
3749    if (pCreateInfo->size > device->physical->gtt_size)
3750       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3751 
3752    buffer = vk_buffer_create(&device->vk, pCreateInfo,
3753                              pAllocator, sizeof(*buffer));
3754    if (buffer == NULL)
3755       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3756 
3757    buffer->address = ANV_NULL_ADDRESS;
3758 
3759    *pBuffer = anv_buffer_to_handle(buffer);
3760 
3761    return VK_SUCCESS;
3762 }
3763 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3764 void anv_DestroyBuffer(
3765     VkDevice                                    _device,
3766     VkBuffer                                    _buffer,
3767     const VkAllocationCallbacks*                pAllocator)
3768 {
3769    ANV_FROM_HANDLE(anv_device, device, _device);
3770    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3771 
3772    if (!buffer)
3773       return;
3774 
3775    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3776 }
3777 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3778 VkDeviceAddress anv_GetBufferDeviceAddress(
3779     VkDevice                                    device,
3780     const VkBufferDeviceAddressInfo*            pInfo)
3781 {
3782    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3783 
3784    assert(!anv_address_is_null(buffer->address));
3785    assert(anv_bo_is_pinned(buffer->address.bo));
3786 
3787    return anv_address_physical(buffer->address);
3788 }
3789 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3790 uint64_t anv_GetBufferOpaqueCaptureAddress(
3791     VkDevice                                    device,
3792     const VkBufferDeviceAddressInfo*            pInfo)
3793 {
3794    return 0;
3795 }
3796 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3797 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3798     VkDevice                                    device,
3799     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3800 {
3801    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3802 
3803    assert(anv_bo_is_pinned(memory->bo));
3804    assert(memory->bo->has_client_visible_address);
3805 
3806    return intel_48b_address(memory->bo->offset);
3807 }
3808 
3809 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3810 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3811                               enum isl_format format,
3812                               struct isl_swizzle swizzle,
3813                               isl_surf_usage_flags_t usage,
3814                               struct anv_address address,
3815                               uint32_t range, uint32_t stride)
3816 {
3817    isl_buffer_fill_state(&device->isl_dev, state.map,
3818                          .address = anv_address_physical(address),
3819                          .mocs = isl_mocs(&device->isl_dev, usage,
3820                                           address.bo && address.bo->is_external),
3821                          .size_B = range,
3822                          .format = format,
3823                          .swizzle = swizzle,
3824                          .stride_B = stride);
3825 }
3826 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3827 void anv_DestroySampler(
3828     VkDevice                                    _device,
3829     VkSampler                                   _sampler,
3830     const VkAllocationCallbacks*                pAllocator)
3831 {
3832    ANV_FROM_HANDLE(anv_device, device, _device);
3833    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3834 
3835    if (!sampler)
3836       return;
3837 
3838    if (sampler->bindless_state.map) {
3839       anv_state_pool_free(&device->dynamic_state_pool,
3840                           sampler->bindless_state);
3841    }
3842 
3843    if (sampler->custom_border_color.map) {
3844       anv_state_reserved_pool_free(&device->custom_border_colors,
3845                                    sampler->custom_border_color);
3846    }
3847 
3848    vk_object_free(&device->vk, pAllocator, sampler);
3849 }
3850 
3851 static const VkTimeDomainEXT anv_time_domains[] = {
3852    VK_TIME_DOMAIN_DEVICE_EXT,
3853    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
3854 #ifdef CLOCK_MONOTONIC_RAW
3855    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
3856 #endif
3857 };
3858 
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)3859 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
3860    VkPhysicalDevice                             physicalDevice,
3861    uint32_t                                     *pTimeDomainCount,
3862    VkTimeDomainEXT                              *pTimeDomains)
3863 {
3864    int d;
3865    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
3866 
3867    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
3868       vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
3869          *i = anv_time_domains[d];
3870       }
3871    }
3872 
3873    return vk_outarray_status(&out);
3874 }
3875 
3876 static uint64_t
anv_clock_gettime(clockid_t clock_id)3877 anv_clock_gettime(clockid_t clock_id)
3878 {
3879    struct timespec current;
3880    int ret;
3881 
3882    ret = clock_gettime(clock_id, &current);
3883 #ifdef CLOCK_MONOTONIC_RAW
3884    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3885       ret = clock_gettime(CLOCK_MONOTONIC, &current);
3886 #endif
3887    if (ret < 0)
3888       return 0;
3889 
3890    return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3891 }
3892 
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)3893 VkResult anv_GetCalibratedTimestampsEXT(
3894    VkDevice                                     _device,
3895    uint32_t                                     timestampCount,
3896    const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
3897    uint64_t                                     *pTimestamps,
3898    uint64_t                                     *pMaxDeviation)
3899 {
3900    ANV_FROM_HANDLE(anv_device, device, _device);
3901    uint64_t timestamp_frequency = device->info->timestamp_frequency;
3902    int d;
3903    uint64_t begin, end;
3904    uint64_t max_clock_period = 0;
3905 
3906 #ifdef CLOCK_MONOTONIC_RAW
3907    begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3908 #else
3909    begin = anv_clock_gettime(CLOCK_MONOTONIC);
3910 #endif
3911 
3912    for (d = 0; d < timestampCount; d++) {
3913       switch (pTimestampInfos[d].timeDomain) {
3914       case VK_TIME_DOMAIN_DEVICE_EXT:
3915          if (!intel_gem_read_render_timestamp(device->fd,
3916                                               device->info->kmd_type,
3917                                               &pTimestamps[d])) {
3918             return vk_device_set_lost(&device->vk, "Failed to read the "
3919                                       "TIMESTAMP register: %m");
3920          }
3921          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
3922          max_clock_period = MAX2(max_clock_period, device_period);
3923          break;
3924       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
3925          pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
3926          max_clock_period = MAX2(max_clock_period, 1);
3927          break;
3928 
3929 #ifdef CLOCK_MONOTONIC_RAW
3930       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
3931          pTimestamps[d] = begin;
3932          break;
3933 #endif
3934       default:
3935          pTimestamps[d] = 0;
3936          break;
3937       }
3938    }
3939 
3940 #ifdef CLOCK_MONOTONIC_RAW
3941    end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3942 #else
3943    end = anv_clock_gettime(CLOCK_MONOTONIC);
3944 #endif
3945 
3946     /*
3947      * The maximum deviation is the sum of the interval over which we
3948      * perform the sampling and the maximum period of any sampled
3949      * clock. That's because the maximum skew between any two sampled
3950      * clock edges is when the sampled clock with the largest period is
3951      * sampled at the end of that period but right at the beginning of the
3952      * sampling interval and some other clock is sampled right at the
3953      * beginning of its sampling period and right at the end of the
3954      * sampling interval. Let's assume the GPU has the longest clock
3955      * period and that the application is sampling GPU and monotonic:
3956      *
3957      *                               s                 e
3958      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
3959      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3960      *
3961      *                               g
3962      *		  0         1         2         3
3963      *	GPU       -----_____-----_____-----_____-----_____
3964      *
3965      *                                                m
3966      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
3967      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3968      *
3969      *	Interval                     <----------------->
3970      *	Deviation           <-------------------------->
3971      *
3972      *		s  = read(raw)       2
3973      *		g  = read(GPU)       1
3974      *		m  = read(monotonic) 2
3975      *		e  = read(raw)       b
3976      *
3977      * We round the sample interval up by one tick to cover sampling error
3978      * in the interval clock
3979      */
3980 
3981    uint64_t sample_interval = end - begin + 1;
3982 
3983    *pMaxDeviation = sample_interval + max_clock_period;
3984 
3985    return VK_SUCCESS;
3986 }
3987 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3988 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3989     VkPhysicalDevice                            physicalDevice,
3990     VkSampleCountFlagBits                       samples,
3991     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3992 {
3993    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3994 
3995    assert(pMultisampleProperties->sType ==
3996           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3997 
3998    VkExtent2D grid_size;
3999    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4000       grid_size.width = 1;
4001       grid_size.height = 1;
4002    } else {
4003       grid_size.width = 0;
4004       grid_size.height = 0;
4005    }
4006    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4007 
4008    vk_foreach_struct(ext, pMultisampleProperties->pNext)
4009       anv_debug_ignored_stype(ext->sType);
4010 }
4011