1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_uuid.h"
59 #include "perf/intel_perf.h"
60
61 #include "genxml/gen7_pack.h"
62 #include "genxml/genX_bits.h"
63
64 static const driOptionDescription anv_dri_options[] = {
65 DRI_CONF_SECTION_PERFORMANCE
66 DRI_CONF_ADAPTIVE_SYNC(true)
67 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
68 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
69 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
70 DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
71 DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
72 DRI_CONF_NO_16BIT(false)
73 DRI_CONF_ANV_HASVK_OVERRIDE_API_VERSION(false)
74 DRI_CONF_SECTION_END
75
76 DRI_CONF_SECTION_DEBUG
77 DRI_CONF_ALWAYS_FLUSH_CACHE(false)
78 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
79 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
80 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
81 DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
82 DRI_CONF_SECTION_END
83
84 DRI_CONF_SECTION_QUALITY
85 DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
86 DRI_CONF_SECTION_END
87 };
88
89 /* This is probably far to big but it reflects the max size used for messages
90 * in OpenGLs KHR_debug.
91 */
92 #define MAX_DEBUG_MESSAGE_LENGTH 4096
93
94 /* Render engine timestamp register */
95 #define TIMESTAMP 0x2358
96
97 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
98 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
99 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
100 #endif
101
102 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)103 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
104 {
105 char str[MAX_DEBUG_MESSAGE_LENGTH];
106 struct anv_device *device = (struct anv_device *)data;
107 UNUSED struct anv_instance *instance = device->physical->instance;
108
109 va_list args;
110 va_start(args, fmt);
111 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
112 va_end(args);
113
114 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
115 }
116
117 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)118 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
119 {
120 va_list args;
121 va_start(args, fmt);
122
123 if (INTEL_DEBUG(DEBUG_PERF))
124 mesa_logd_v(fmt, args);
125
126 va_end(args);
127 }
128
129 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
130 defined(VK_USE_PLATFORM_XCB_KHR) || \
131 defined(VK_USE_PLATFORM_XLIB_KHR) || \
132 defined(VK_USE_PLATFORM_DISPLAY_KHR)
133 #define ANV_USE_WSI_PLATFORM
134 #endif
135
136 #ifdef ANDROID_STRICT
137 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
138 #else
139 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
140 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
141 #endif
142
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)143 VkResult anv_EnumerateInstanceVersion(
144 uint32_t* pApiVersion)
145 {
146 #ifdef ANDROID_STRICT
147 *pApiVersion = ANV_API_VERSION;
148 #else
149 *pApiVersion = ANV_API_VERSION_1_3;
150 #endif
151 return VK_SUCCESS;
152 }
153
154 static const struct vk_instance_extension_table instance_extensions = {
155 .KHR_device_group_creation = true,
156 .KHR_external_fence_capabilities = true,
157 .KHR_external_memory_capabilities = true,
158 .KHR_external_semaphore_capabilities = true,
159 .KHR_get_physical_device_properties2 = true,
160 .EXT_debug_report = true,
161 .EXT_debug_utils = true,
162
163 #ifdef ANV_USE_WSI_PLATFORM
164 .KHR_get_surface_capabilities2 = true,
165 .KHR_surface = true,
166 .KHR_surface_protected_capabilities = true,
167 #endif
168 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
169 .KHR_wayland_surface = true,
170 #endif
171 #ifdef VK_USE_PLATFORM_XCB_KHR
172 .KHR_xcb_surface = true,
173 #endif
174 #ifdef VK_USE_PLATFORM_XLIB_KHR
175 .KHR_xlib_surface = true,
176 #endif
177 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
178 .EXT_acquire_xlib_display = true,
179 #endif
180 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
181 .KHR_display = true,
182 .KHR_get_display_properties2 = true,
183 .EXT_direct_mode_display = true,
184 .EXT_display_surface_counter = true,
185 .EXT_acquire_drm_display = true,
186 #endif
187 #ifndef VK_USE_PLATFORM_WIN32_KHR
188 .EXT_headless_surface = true,
189 #endif
190 };
191
192 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)193 get_device_extensions(const struct anv_physical_device *device,
194 struct vk_device_extension_table *ext)
195 {
196 const bool has_syncobj_wait =
197 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
198
199 *ext = (struct vk_device_extension_table) {
200 .KHR_8bit_storage = device->info.ver >= 8,
201 .KHR_16bit_storage = device->info.ver >= 8 && !device->instance->no_16bit,
202 .KHR_bind_memory2 = true,
203 .KHR_buffer_device_address = device->has_a64_buffer_access,
204 .KHR_copy_commands2 = true,
205 .KHR_create_renderpass2 = true,
206 .KHR_dedicated_allocation = true,
207 .KHR_deferred_host_operations = true,
208 .KHR_depth_stencil_resolve = true,
209 .KHR_descriptor_update_template = true,
210 .KHR_device_group = true,
211 .KHR_draw_indirect_count = true,
212 .KHR_driver_properties = true,
213 .KHR_dynamic_rendering = true,
214 .KHR_external_fence = has_syncobj_wait,
215 .KHR_external_fence_fd = has_syncobj_wait,
216 .KHR_external_memory = true,
217 .KHR_external_memory_fd = true,
218 .KHR_external_semaphore = true,
219 .KHR_external_semaphore_fd = true,
220 .KHR_format_feature_flags2 = true,
221 .KHR_get_memory_requirements2 = true,
222 .KHR_image_format_list = true,
223 .KHR_imageless_framebuffer = true,
224 #ifdef ANV_USE_WSI_PLATFORM
225 .KHR_incremental_present = true,
226 #endif
227 .KHR_maintenance1 = true,
228 .KHR_maintenance2 = true,
229 .KHR_maintenance3 = true,
230 .KHR_maintenance4 = true,
231 .KHR_multiview = true,
232 .KHR_performance_query =
233 !anv_use_relocations(device) && device->perf &&
234 (device->perf->i915_perf_version >= 3 ||
235 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
236 device->use_call_secondary,
237 .KHR_pipeline_executable_properties = true,
238 .KHR_push_descriptor = true,
239 .KHR_relaxed_block_layout = true,
240 .KHR_sampler_mirror_clamp_to_edge = true,
241 .KHR_sampler_ycbcr_conversion = true,
242 .KHR_separate_depth_stencil_layouts = true,
243 .KHR_shader_clock = true,
244 .KHR_shader_draw_parameters = true,
245 .KHR_shader_expect_assume = true,
246 .KHR_shader_float16_int8 = device->info.ver >= 8 && !device->instance->no_16bit,
247 .KHR_shader_float_controls = true,
248 .KHR_shader_integer_dot_product = true,
249 .KHR_shader_non_semantic_info = true,
250 .KHR_shader_subgroup_extended_types = device->info.ver >= 8,
251 .KHR_shader_subgroup_uniform_control_flow = true,
252 .KHR_shader_terminate_invocation = true,
253 .KHR_spirv_1_4 = true,
254 .KHR_storage_buffer_storage_class = true,
255 #ifdef ANV_USE_WSI_PLATFORM
256 .KHR_swapchain = true,
257 .KHR_swapchain_mutable_format = true,
258 #endif
259 .KHR_synchronization2 = true,
260 .KHR_timeline_semaphore = true,
261 .KHR_uniform_buffer_standard_layout = true,
262 .KHR_variable_pointers = true,
263 .KHR_vulkan_memory_model = true,
264 .KHR_workgroup_memory_explicit_layout = true,
265 .KHR_zero_initialize_workgroup_memory = true,
266 .EXT_4444_formats = true,
267 .EXT_border_color_swizzle = device->info.ver >= 8,
268 .EXT_buffer_device_address = device->has_a64_buffer_access,
269 .EXT_calibrated_timestamps = device->has_reg_timestamp,
270 .EXT_color_write_enable = true,
271 .EXT_conditional_rendering = device->info.verx10 >= 75,
272 .EXT_custom_border_color = device->info.ver >= 8,
273 .EXT_depth_clamp_zero_one = true,
274 .EXT_depth_clip_control = true,
275 .EXT_depth_clip_enable = true,
276 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
277 .EXT_display_control = true,
278 #endif
279 .EXT_extended_dynamic_state = true,
280 .EXT_extended_dynamic_state2 = true,
281 .EXT_external_memory_dma_buf = true,
282 .EXT_external_memory_host = true,
283 .EXT_global_priority = device->max_context_priority >=
284 INTEL_CONTEXT_MEDIUM_PRIORITY,
285 .EXT_global_priority_query = device->max_context_priority >=
286 INTEL_CONTEXT_MEDIUM_PRIORITY,
287 .EXT_host_query_reset = true,
288 .EXT_image_2d_view_of_3d = true,
289 .EXT_image_robustness = true,
290 .EXT_image_drm_format_modifier = true,
291 .EXT_image_view_min_lod = true,
292 .EXT_index_type_uint8 = true,
293 .EXT_inline_uniform_block = true,
294 .EXT_line_rasterization = true,
295 /* Enable the extension only if we have support on both the local &
296 * system memory
297 */
298 .EXT_memory_budget = device->sys.available,
299 .EXT_non_seamless_cube_map = true,
300 .EXT_pci_bus_info = true,
301 .EXT_physical_device_drm = true,
302 .EXT_pipeline_creation_cache_control = true,
303 .EXT_pipeline_creation_feedback = true,
304 .EXT_primitives_generated_query = true,
305 .EXT_primitive_topology_list_restart = true,
306 .EXT_private_data = true,
307 .EXT_provoking_vertex = true,
308 .EXT_queue_family_foreign = true,
309 .EXT_robustness2 = true,
310 .EXT_sample_locations = true,
311 .EXT_scalar_block_layout = true,
312 .EXT_separate_stencil_usage = true,
313 .EXT_shader_atomic_float = true,
314 .EXT_shader_demote_to_helper_invocation = true,
315 .EXT_shader_module_identifier = true,
316 .EXT_shader_subgroup_ballot = true,
317 .EXT_shader_subgroup_vote = true,
318 .EXT_shader_viewport_index_layer = true,
319 .EXT_subgroup_size_control = true,
320 .EXT_texel_buffer_alignment = true,
321 .EXT_tooling_info = true,
322 .EXT_transform_feedback = true,
323 .EXT_vertex_attribute_divisor = true,
324 .EXT_ycbcr_image_arrays = true,
325 #if DETECT_OS_ANDROID
326 .ANDROID_external_memory_android_hardware_buffer = true,
327 .ANDROID_native_buffer = true,
328 #endif
329 .GOOGLE_decorate_string = true,
330 .GOOGLE_hlsl_functionality1 = true,
331 .GOOGLE_user_type = true,
332 .INTEL_performance_query = device->perf &&
333 device->perf->i915_perf_version >= 3,
334 .INTEL_shader_integer_functions2 = device->info.ver >= 8,
335 .EXT_multi_draw = true,
336 .NV_compute_shader_derivatives = true,
337 .VALVE_mutable_descriptor_type = true,
338 };
339 }
340
341 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)342 get_features(const struct anv_physical_device *pdevice,
343 struct vk_features *features)
344 {
345 /* Just pick one; they're all the same */
346 const bool has_astc_ldr =
347 isl_format_supports_sampling(&pdevice->info,
348 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
349
350 *features = (struct vk_features) {
351 /* Vulkan 1.0 */
352 .robustBufferAccess = true,
353 .fullDrawIndexUint32 = true,
354 .imageCubeArray = true,
355 .independentBlend = true,
356 .geometryShader = true,
357 .tessellationShader = true,
358 .sampleRateShading = true,
359 .dualSrcBlend = true,
360 .logicOp = true,
361 .multiDrawIndirect = true,
362 .drawIndirectFirstInstance = true,
363 .depthClamp = true,
364 .depthBiasClamp = true,
365 .fillModeNonSolid = true,
366 .depthBounds = pdevice->info.ver >= 12,
367 .wideLines = true,
368 .largePoints = true,
369 .alphaToOne = true,
370 .multiViewport = true,
371 .samplerAnisotropy = true,
372 .textureCompressionETC2 = pdevice->info.ver >= 8 ||
373 pdevice->info.platform == INTEL_PLATFORM_BYT,
374 .textureCompressionASTC_LDR = has_astc_ldr,
375 .textureCompressionBC = true,
376 .occlusionQueryPrecise = true,
377 .pipelineStatisticsQuery = true,
378 .fragmentStoresAndAtomics = true,
379 .shaderTessellationAndGeometryPointSize = true,
380 .shaderImageGatherExtended = true,
381 .shaderStorageImageExtendedFormats = true,
382 .shaderStorageImageMultisample = false,
383 .shaderStorageImageReadWithoutFormat = false,
384 .shaderStorageImageWriteWithoutFormat = true,
385 .shaderUniformBufferArrayDynamicIndexing = true,
386 .shaderSampledImageArrayDynamicIndexing = true,
387 .shaderStorageBufferArrayDynamicIndexing = true,
388 .shaderStorageImageArrayDynamicIndexing = true,
389 .shaderClipDistance = true,
390 .shaderCullDistance = true,
391 .shaderFloat64 = pdevice->info.ver >= 8 &&
392 pdevice->info.has_64bit_float,
393 .shaderInt64 = pdevice->info.ver >= 8,
394 .shaderInt16 = pdevice->info.ver >= 8,
395 .shaderResourceMinLod = false,
396 .variableMultisampleRate = true,
397 .inheritedQueries = true,
398
399 /* Vulkan 1.1 */
400 .storageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
401 .uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
402 .storagePushConstant16 = pdevice->info.ver >= 8,
403 .storageInputOutput16 = false,
404 .multiview = true,
405 .multiviewGeometryShader = true,
406 .multiviewTessellationShader = true,
407 .variablePointersStorageBuffer = true,
408 .variablePointers = true,
409 .protectedMemory = false,
410 .samplerYcbcrConversion = true,
411 .shaderDrawParameters = true,
412
413 /* Vulkan 1.2 */
414 .samplerMirrorClampToEdge = true,
415 .drawIndirectCount = true,
416 .storageBuffer8BitAccess = pdevice->info.ver >= 8,
417 .uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8,
418 .storagePushConstant8 = pdevice->info.ver >= 8,
419 .shaderBufferInt64Atomics = false,
420 .shaderSharedInt64Atomics = false,
421 .shaderFloat16 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
422 .shaderInt8 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
423
424 .descriptorIndexing = false,
425 .shaderInputAttachmentArrayDynamicIndexing = false,
426 .shaderUniformTexelBufferArrayDynamicIndexing = false,
427 .shaderStorageTexelBufferArrayDynamicIndexing = false,
428 .shaderUniformBufferArrayNonUniformIndexing = false,
429 .shaderSampledImageArrayNonUniformIndexing = false,
430 .shaderStorageBufferArrayNonUniformIndexing = false,
431 .shaderStorageImageArrayNonUniformIndexing = false,
432 .shaderInputAttachmentArrayNonUniformIndexing = false,
433 .shaderUniformTexelBufferArrayNonUniformIndexing = false,
434 .shaderStorageTexelBufferArrayNonUniformIndexing = false,
435 .descriptorBindingUniformBufferUpdateAfterBind = false,
436 .descriptorBindingSampledImageUpdateAfterBind = false,
437 .descriptorBindingStorageImageUpdateAfterBind = false,
438 .descriptorBindingStorageBufferUpdateAfterBind = false,
439 .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
440 .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
441 .descriptorBindingUpdateUnusedWhilePending = false,
442 .descriptorBindingPartiallyBound = false,
443 .descriptorBindingVariableDescriptorCount = false,
444 .runtimeDescriptorArray = false,
445
446 .samplerFilterMinmax = false,
447 .scalarBlockLayout = true,
448 .imagelessFramebuffer = true,
449 .uniformBufferStandardLayout = true,
450 .shaderSubgroupExtendedTypes = true,
451 .separateDepthStencilLayouts = true,
452 .hostQueryReset = true,
453 .timelineSemaphore = true,
454 .bufferDeviceAddress = pdevice->has_a64_buffer_access,
455 .bufferDeviceAddressCaptureReplay = pdevice->has_a64_buffer_access,
456 .bufferDeviceAddressMultiDevice = false,
457 .vulkanMemoryModel = true,
458 .vulkanMemoryModelDeviceScope = true,
459 .vulkanMemoryModelAvailabilityVisibilityChains = true,
460 .shaderOutputViewportIndex = true,
461 .shaderOutputLayer = true,
462 .subgroupBroadcastDynamicId = true,
463
464 /* Vulkan 1.3 */
465 .robustImageAccess = true,
466 .inlineUniformBlock = true,
467 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
468 .pipelineCreationCacheControl = true,
469 .privateData = true,
470 .shaderDemoteToHelperInvocation = true,
471 .shaderTerminateInvocation = true,
472 .subgroupSizeControl = true,
473 .computeFullSubgroups = true,
474 .synchronization2 = true,
475 .textureCompressionASTC_HDR = false,
476 .shaderZeroInitializeWorkgroupMemory = true,
477 .dynamicRendering = true,
478 .shaderIntegerDotProduct = true,
479 .maintenance4 = true,
480
481 /* VK_EXT_4444_formats */
482 .formatA4R4G4B4 = true,
483 .formatA4B4G4R4 = false,
484
485 /* VK_EXT_border_color_swizzle */
486 .borderColorSwizzle = true,
487 .borderColorSwizzleFromImage = true,
488
489 /* VK_EXT_color_write_enable */
490 .colorWriteEnable = true,
491
492 /* VK_EXT_image_2d_view_of_3d */
493 .image2DViewOf3D = true,
494 .sampler2DViewOf3D = false,
495
496 /* VK_NV_compute_shader_derivatives */
497 .computeDerivativeGroupQuads = true,
498 .computeDerivativeGroupLinear = true,
499
500 /* VK_EXT_conditional_rendering */
501 .conditionalRendering = pdevice->info.verx10 >= 75,
502 .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
503
504 /* VK_EXT_custom_border_color */
505 .customBorderColors = pdevice->info.ver >= 8,
506 .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
507
508 /* VK_EXT_depth_clamp_zero_one */
509 .depthClampZeroOne = true,
510
511 /* VK_EXT_depth_clip_enable */
512 .depthClipEnable = true,
513
514 /* VK_KHR_global_priority */
515 .globalPriorityQuery = true,
516
517 /* VK_EXT_image_view_min_lod */
518 .minLod = true,
519
520 /* VK_EXT_index_type_uint8 */
521 .indexTypeUint8 = true,
522
523 /* VK_EXT_line_rasterization */
524 /* Rectangular lines must use the strict algorithm, which is not
525 * supported for wide lines prior to ICL. See rasterization_mode for
526 * details and how the HW states are programmed.
527 */
528 .rectangularLines = false,
529 .bresenhamLines = true,
530 /* Support for Smooth lines with MSAA was removed on gfx11. From the
531 * BSpec section "Multisample ModesState" table for "AA Line Support
532 * Requirements":
533 *
534 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
535 *
536 * Fortunately, this isn't a case most people care about.
537 */
538 .smoothLines = pdevice->info.ver < 10,
539 .stippledRectangularLines = false,
540 .stippledBresenhamLines = true,
541 .stippledSmoothLines = false,
542
543 /* VK_EXT_mutable_descriptor_type */
544 .mutableDescriptorType = true,
545
546 /* VK_KHR_performance_query */
547 .performanceCounterQueryPools = true,
548 /* HW only supports a single configuration at a time. */
549 .performanceCounterMultipleQueryPools = false,
550
551 /* VK_KHR_pipeline_executable_properties */
552 .pipelineExecutableInfo = true,
553
554 /* VK_EXT_primitives_generated_query */
555 .primitivesGeneratedQuery = true,
556 .primitivesGeneratedQueryWithRasterizerDiscard = false,
557 .primitivesGeneratedQueryWithNonZeroStreams = false,
558
559 /* VK_EXT_provoking_vertex */
560 .provokingVertexLast = true,
561 .transformFeedbackPreservesProvokingVertex = true,
562
563 /* VK_EXT_robustness2 */
564 .robustBufferAccess2 = true,
565 .robustImageAccess2 = true,
566 .nullDescriptor = true,
567
568 /* VK_EXT_shader_atomic_float */
569 .shaderBufferFloat32Atomics = true,
570 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
571 .shaderBufferFloat64Atomics =
572 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
573 .shaderBufferFloat64AtomicAdd = false,
574 .shaderSharedFloat32Atomics = true,
575 .shaderSharedFloat32AtomicAdd = false,
576 .shaderSharedFloat64Atomics = false,
577 .shaderSharedFloat64AtomicAdd = false,
578 .shaderImageFloat32Atomics = true,
579 .shaderImageFloat32AtomicAdd = false,
580 .sparseImageFloat32Atomics = false,
581 .sparseImageFloat32AtomicAdd = false,
582
583 /* VK_KHR_shader_clock */
584 .shaderSubgroupClock = true,
585 .shaderDeviceClock = false,
586
587 /* VK_INTEL_shader_integer_functions2 */
588 .shaderIntegerFunctions2 = true,
589
590 /* VK_EXT_shader_module_identifier */
591 .shaderModuleIdentifier = true,
592
593 /* VK_KHR_shader_subgroup_uniform_control_flow */
594 .shaderSubgroupUniformControlFlow = true,
595
596 /* VK_EXT_texel_buffer_alignment */
597 .texelBufferAlignment = true,
598
599 /* VK_EXT_transform_feedback */
600 .transformFeedback = true,
601 .geometryStreams = true,
602
603 /* VK_EXT_vertex_attribute_divisor */
604 .vertexAttributeInstanceRateDivisor = true,
605 .vertexAttributeInstanceRateZeroDivisor = true,
606
607 /* VK_KHR_workgroup_memory_explicit_layout */
608 .workgroupMemoryExplicitLayout = true,
609 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
610 .workgroupMemoryExplicitLayout8BitAccess = true,
611 .workgroupMemoryExplicitLayout16BitAccess = true,
612
613 /* VK_EXT_ycbcr_image_arrays */
614 .ycbcrImageArrays = true,
615
616 /* VK_EXT_extended_dynamic_state */
617 .extendedDynamicState = true,
618
619 /* VK_EXT_extended_dynamic_state2 */
620 .extendedDynamicState2 = true,
621 .extendedDynamicState2LogicOp = true,
622 .extendedDynamicState2PatchControlPoints = false,
623
624 /* VK_EXT_multi_draw */
625 .multiDraw = true,
626
627 /* VK_EXT_non_seamless_cube_map */
628 .nonSeamlessCubeMap = true,
629
630 /* VK_EXT_primitive_topology_list_restart */
631 .primitiveTopologyListRestart = true,
632 .primitiveTopologyPatchListRestart = true,
633
634 /* VK_EXT_depth_clip_control */
635 .depthClipControl = true,
636
637 /* VK_KHR_shader_expect_assume */
638 .shaderExpectAssume = true,
639 };
640
641 /* We can't do image stores in vec4 shaders */
642 features->vertexPipelineStoresAndAtomics =
643 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
644 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
645
646 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
647
648 /* The new DOOM and Wolfenstein games require depthBounds without
649 * checking for it. They seem to run fine without it so just claim it's
650 * there and accept the consequences.
651 */
652 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
653 features->depthBounds = true;
654 }
655
656 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)657 anv_compute_sys_heap_size(struct anv_physical_device *device,
658 uint64_t available_ram)
659 {
660 /* We want to leave some padding for things we allocate in the driver,
661 * so don't go over 3/4 of the GTT either.
662 */
663 available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
664
665 if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
666 /* When running with an overridden PCI ID, we may get a GTT size from
667 * the kernel that is greater than 2 GiB but the execbuf check for 48bit
668 * address support can still fail. Just clamp the address space size to
669 * 2 GiB if we don't have 48-bit support.
670 */
671 mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
672 "not support for 48-bit addresses",
673 __FILE__, __LINE__);
674 available_ram = 2ull << 30;
675 }
676
677 return available_ram;
678 }
679
680 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)681 anv_init_meminfo(struct anv_physical_device *device, int fd)
682 {
683 const struct intel_device_info *devinfo = &device->info;
684
685 device->sys.size =
686 anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
687 device->sys.available = devinfo->mem.sram.mappable.free;
688
689 return VK_SUCCESS;
690 }
691
692 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)693 anv_update_meminfo(struct anv_physical_device *device, int fd)
694 {
695 if (!intel_device_info_update_memory_info(&device->info, fd))
696 return;
697
698 const struct intel_device_info *devinfo = &device->info;
699 device->sys.available = devinfo->mem.sram.mappable.free;
700 }
701
702 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)703 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
704 {
705 VkResult result = anv_init_meminfo(device, fd);
706 if (result != VK_SUCCESS)
707 return result;
708
709 assert(device->sys.size != 0);
710
711 if (device->info.has_llc) {
712 device->memory.heap_count = 1;
713 device->memory.heaps[0] = (struct anv_memory_heap) {
714 .size = device->sys.size,
715 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
716 };
717
718 /* Big core GPUs share LLC with the CPU and thus one memory type can be
719 * both cached and coherent at the same time.
720 *
721 * But some game engines can't handle single type well
722 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
723 *
724 * And Intel on Windows uses 3 types so it's better to add extra one here
725 */
726 device->memory.type_count = 2;
727 device->memory.types[0] = (struct anv_memory_type) {
728 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
729 .heapIndex = 0,
730 };
731 device->memory.types[1] = (struct anv_memory_type) {
732 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
733 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
734 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
735 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
736 .heapIndex = 0,
737 };
738 } else {
739 device->memory.heap_count = 1;
740 device->memory.heaps[0] = (struct anv_memory_heap) {
741 .size = device->sys.size,
742 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
743 };
744
745 /* The spec requires that we expose a host-visible, coherent memory
746 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
747 * to give the application a choice between cached, but not coherent and
748 * coherent but uncached (WC though).
749 */
750 device->memory.type_count = 2;
751 device->memory.types[0] = (struct anv_memory_type) {
752 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
753 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
754 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
755 .heapIndex = 0,
756 };
757 device->memory.types[1] = (struct anv_memory_type) {
758 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
759 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
760 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
761 .heapIndex = 0,
762 };
763 }
764
765 device->memory.need_flush = false;
766 for (unsigned i = 0; i < device->memory.type_count; i++) {
767 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
768 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
769 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
770 device->memory.need_flush = true;
771 }
772
773 return VK_SUCCESS;
774 }
775
776 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)777 anv_physical_device_init_uuids(struct anv_physical_device *device)
778 {
779 const struct build_id_note *note =
780 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
781 if (!note) {
782 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
783 "Failed to find build-id");
784 }
785
786 unsigned build_id_len = build_id_length(note);
787 if (build_id_len < 20) {
788 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
789 "build-id too short. It needs to be a SHA");
790 }
791
792 memcpy(device->driver_build_sha1, build_id_data(note), 20);
793
794 struct mesa_sha1 sha1_ctx;
795 uint8_t sha1[20];
796 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
797
798 /* The pipeline cache UUID is used for determining when a pipeline cache is
799 * invalid. It needs both a driver build and the PCI ID of the device.
800 */
801 _mesa_sha1_init(&sha1_ctx);
802 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
803 _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
804 sizeof(device->info.pci_device_id));
805 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
806 sizeof(device->always_use_bindless));
807 _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
808 sizeof(device->has_a64_buffer_access));
809 _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
810 sizeof(device->has_bindless_samplers));
811 _mesa_sha1_final(&sha1_ctx, sha1);
812 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
813
814 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
815 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
816
817 return VK_SUCCESS;
818 }
819
820 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)821 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
822 {
823 #ifdef ENABLE_SHADER_CACHE
824 char renderer[10];
825 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
826 device->info.pci_device_id);
827 assert(len == sizeof(renderer) - 2);
828
829 char timestamp[41];
830 _mesa_sha1_format(timestamp, device->driver_build_sha1);
831
832 const uint64_t driver_flags =
833 elk_get_compiler_config_value(device->compiler);
834 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
835 #endif
836 }
837
838 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)839 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
840 {
841 #ifdef ENABLE_SHADER_CACHE
842 if (device->vk.disk_cache) {
843 disk_cache_destroy(device->vk.disk_cache);
844 device->vk.disk_cache = NULL;
845 }
846 #else
847 assert(device->vk.disk_cache == NULL);
848 #endif
849 }
850
851 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
852 * queue overrides.
853 *
854 * To override the number queues:
855 * * "gc" is for graphics queues with compute support
856 * * "g" is for graphics queues with no compute support
857 * * "c" is for compute queues with no graphics support
858 *
859 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
860 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
861 * with compute-only support.
862 *
863 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
864 * include 1 queue with compute-only support, but it will not change the
865 * number of graphics+compute queues.
866 *
867 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
868 * to include 1 queue with compute-only support, and it would override the
869 * number of graphics+compute queues to be 0.
870 */
871 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)872 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
873 {
874 int gc_override = -1;
875 int g_override = -1;
876 int c_override = -1;
877 char *env = getenv("ANV_QUEUE_OVERRIDE");
878
879 if (env == NULL)
880 return;
881
882 env = strdup(env);
883 char *save = NULL;
884 char *next = strtok_r(env, ",", &save);
885 while (next != NULL) {
886 if (strncmp(next, "gc=", 3) == 0) {
887 gc_override = strtol(next + 3, NULL, 0);
888 } else if (strncmp(next, "g=", 2) == 0) {
889 g_override = strtol(next + 2, NULL, 0);
890 } else if (strncmp(next, "c=", 2) == 0) {
891 c_override = strtol(next + 2, NULL, 0);
892 } else {
893 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
894 }
895 next = strtok_r(NULL, ",", &save);
896 }
897 free(env);
898 if (gc_override >= 0)
899 *gc_count = gc_override;
900 if (g_override >= 0)
901 *g_count = g_override;
902 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
903 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
904 "Vulkan specification");
905 if (c_override >= 0)
906 *c_count = c_override;
907 }
908
909 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)910 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
911 {
912 uint32_t family_count = 0;
913
914 if (pdevice->engine_info) {
915 int gc_count =
916 intel_engines_count(pdevice->engine_info,
917 INTEL_ENGINE_CLASS_RENDER);
918 int g_count = 0;
919 int c_count = 0;
920
921 anv_override_engine_counts(&gc_count, &g_count, &c_count);
922
923 if (gc_count > 0) {
924 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
925 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
926 VK_QUEUE_COMPUTE_BIT |
927 VK_QUEUE_TRANSFER_BIT,
928 .queueCount = gc_count,
929 .engine_class = INTEL_ENGINE_CLASS_RENDER,
930 };
931 }
932 if (g_count > 0) {
933 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
934 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
935 VK_QUEUE_TRANSFER_BIT,
936 .queueCount = g_count,
937 .engine_class = INTEL_ENGINE_CLASS_RENDER,
938 };
939 }
940 if (c_count > 0) {
941 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
942 .queueFlags = VK_QUEUE_COMPUTE_BIT |
943 VK_QUEUE_TRANSFER_BIT,
944 .queueCount = c_count,
945 .engine_class = INTEL_ENGINE_CLASS_RENDER,
946 };
947 }
948 /* Increase count below when other families are added as a reminder to
949 * increase the ANV_MAX_QUEUE_FAMILIES value.
950 */
951 STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
952 } else {
953 /* Default to a single render queue */
954 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
955 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
956 VK_QUEUE_COMPUTE_BIT |
957 VK_QUEUE_TRANSFER_BIT,
958 .queueCount = 1,
959 .engine_class = INTEL_ENGINE_CLASS_RENDER,
960 };
961 family_count = 1;
962 }
963 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
964 pdevice->queue.family_count = family_count;
965 }
966
967 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)968 anv_physical_device_try_create(struct vk_instance *vk_instance,
969 struct _drmDevice *drm_device,
970 struct vk_physical_device **out)
971 {
972 struct anv_instance *instance =
973 container_of(vk_instance, struct anv_instance, vk);
974
975 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
976 drm_device->bustype != DRM_BUS_PCI ||
977 drm_device->deviceinfo.pci->vendor_id != 0x8086)
978 return VK_ERROR_INCOMPATIBLE_DRIVER;
979
980 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
981 const char *path = drm_device->nodes[DRM_NODE_RENDER];
982 VkResult result;
983 int fd;
984 int master_fd = -1;
985
986 process_intel_debug_variable();
987
988 fd = open(path, O_RDWR | O_CLOEXEC);
989 if (fd < 0) {
990 if (errno == ENOMEM) {
991 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
992 "Unable to open device %s: out of memory", path);
993 }
994 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
995 "Unable to open device %s: %m", path);
996 }
997
998 struct intel_device_info devinfo;
999 if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1000 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1001 goto fail_fd;
1002 }
1003
1004 bool is_alpha = true;
1005 bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1006 if (devinfo.platform == INTEL_PLATFORM_HSW) {
1007 if (warn)
1008 mesa_logw("Haswell Vulkan support is incomplete");
1009 } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1010 if (warn)
1011 mesa_logw("Ivy Bridge Vulkan support is incomplete");
1012 } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1013 if (warn)
1014 mesa_logw("Bay Trail Vulkan support is incomplete");
1015 } else if (devinfo.ver == 8) {
1016 /* Gfx8 fully supported */
1017 is_alpha = false;
1018 } else {
1019 /* Silently fail here, anv will either pick up this device or display an
1020 * error message.
1021 */
1022 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1023 goto fail_fd;
1024 }
1025
1026 struct anv_physical_device *device =
1027 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1028 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1029 if (device == NULL) {
1030 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1031 goto fail_fd;
1032 }
1033
1034 struct vk_physical_device_dispatch_table dispatch_table;
1035 vk_physical_device_dispatch_table_from_entrypoints(
1036 &dispatch_table, &anv_physical_device_entrypoints, true);
1037 vk_physical_device_dispatch_table_from_entrypoints(
1038 &dispatch_table, &wsi_physical_device_entrypoints, false);
1039
1040 result = vk_physical_device_init(&device->vk, &instance->vk,
1041 NULL, NULL, NULL, /* We set up extensions later */
1042 &dispatch_table);
1043 if (result != VK_SUCCESS) {
1044 vk_error(instance, result);
1045 goto fail_alloc;
1046 }
1047 device->instance = instance;
1048
1049 assert(strlen(path) < ARRAY_SIZE(device->path));
1050 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1051
1052 device->info = devinfo;
1053 device->is_alpha = is_alpha;
1054
1055 device->cmd_parser_version = -1;
1056 if (device->info.ver == 7) {
1057 if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1058 device->cmd_parser_version == -1) {
1059 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1060 "failed to get command parser version");
1061 goto fail_base;
1062 }
1063 }
1064
1065 int val;
1066 if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1067 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1068 "kernel missing gem wait");
1069 goto fail_base;
1070 }
1071
1072 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1073 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1074 "kernel missing execbuf2");
1075 goto fail_base;
1076 }
1077
1078 if (!device->info.has_llc &&
1079 (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1080 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1081 "kernel missing wc mmap");
1082 goto fail_base;
1083 }
1084
1085 device->use_relocations = device->info.ver < 8 ||
1086 device->info.platform == INTEL_PLATFORM_CHV;
1087
1088 if (!device->use_relocations &&
1089 (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1090 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1091 "kernel missing softpin");
1092 goto fail_alloc;
1093 }
1094
1095 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1096 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1097 "kernel missing syncobj support");
1098 goto fail_base;
1099 }
1100
1101 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1102 device->has_exec_async = val;
1103 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1104 device->has_exec_capture = val;
1105
1106 /* Start with medium; sorted low to high */
1107 const int priorities[] = {
1108 INTEL_CONTEXT_MEDIUM_PRIORITY,
1109 INTEL_CONTEXT_HIGH_PRIORITY,
1110 INTEL_CONTEXT_REALTIME_PRIORITY,
1111 };
1112 device->max_context_priority = INT_MIN;
1113 for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1114 if (!anv_gem_has_context_priority(fd, priorities[i]))
1115 break;
1116 device->max_context_priority = priorities[i];
1117 }
1118
1119 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1120 device->info.aperture_bytes;
1121
1122 /* We only allow 48-bit addresses with softpin because knowing the actual
1123 * address is required for the vertex cache flush workaround.
1124 */
1125 device->supports_48bit_addresses = (device->info.ver >= 8) &&
1126 device->gtt_size > (4ULL << 30 /* GiB */);
1127
1128 result = anv_physical_device_init_heaps(device, fd);
1129 if (result != VK_SUCCESS)
1130 goto fail_base;
1131
1132 assert(device->supports_48bit_addresses == !device->use_relocations);
1133 device->use_softpin = !device->use_relocations;
1134
1135 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1136 device->has_exec_timeline = val;
1137 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1138 device->has_exec_timeline = false;
1139
1140 unsigned st_idx = 0;
1141
1142 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1143 if (!device->has_exec_timeline)
1144 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1145 device->sync_types[st_idx++] = &device->sync_syncobj_type;
1146
1147 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1148 device->sync_types[st_idx++] = &anv_bo_sync_type;
1149
1150 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1151 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1152 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1153 }
1154
1155 device->sync_types[st_idx++] = NULL;
1156 assert(st_idx <= ARRAY_SIZE(device->sync_types));
1157 device->vk.supported_sync_types = device->sync_types;
1158
1159 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1160
1161 device->always_use_bindless =
1162 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1163
1164 device->use_call_secondary =
1165 device->use_softpin &&
1166 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1167
1168 /* We first got the A64 messages on broadwell and we can only use them if
1169 * we can pass addresses directly into the shader which requires softpin.
1170 */
1171 device->has_a64_buffer_access = device->info.ver >= 8 &&
1172 device->use_softpin;
1173
1174 /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1175 * because it's just a matter of setting the sampler address in the sample
1176 * message header. However, we've not bothered to wire it up for vec4 so
1177 * we leave it disabled on gfx7.
1178 */
1179 device->has_bindless_samplers = device->info.ver >= 8;
1180
1181 /* Check if we can read the GPU timestamp register from the CPU */
1182 uint64_t u64_ignore;
1183 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1184 device->info.kmd_type,
1185 &u64_ignore);
1186
1187 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1188 driQueryOptionb(&instance->dri_options, "always_flush_cache");
1189
1190 device->compiler = elk_compiler_create(NULL, &device->info);
1191 if (device->compiler == NULL) {
1192 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1193 goto fail_base;
1194 }
1195 device->compiler->shader_debug_log = compiler_debug_log;
1196 device->compiler->shader_perf_log = compiler_perf_log;
1197 device->compiler->constant_buffer_0_is_relative =
1198 device->info.ver < 8 || !device->info.has_context_isolation;
1199 device->compiler->supports_shader_constants = true;
1200 device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
1201
1202 isl_device_init(&device->isl_dev, &device->info);
1203
1204 result = anv_physical_device_init_uuids(device);
1205 if (result != VK_SUCCESS)
1206 goto fail_compiler;
1207
1208 anv_physical_device_init_disk_cache(device);
1209
1210 if (instance->vk.enabled_extensions.KHR_display) {
1211 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1212 if (master_fd >= 0) {
1213 /* fail if we don't have permission to even render on this device */
1214 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1215 close(master_fd);
1216 master_fd = -1;
1217 }
1218 }
1219 }
1220 device->master_fd = master_fd;
1221
1222 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1223 anv_physical_device_init_queue_families(device);
1224
1225 device->local_fd = fd;
1226
1227 anv_physical_device_init_perf(device, fd);
1228
1229 get_device_extensions(device, &device->vk.supported_extensions);
1230 get_features(device, &device->vk.supported_features);
1231
1232 result = anv_init_wsi(device);
1233 if (result != VK_SUCCESS)
1234 goto fail_perf;
1235
1236 anv_measure_device_init(device);
1237
1238 anv_genX(&device->info, init_physical_device_state)(device);
1239
1240 *out = &device->vk;
1241
1242 struct stat st;
1243
1244 if (stat(primary_path, &st) == 0) {
1245 device->has_master = true;
1246 device->master_major = major(st.st_rdev);
1247 device->master_minor = minor(st.st_rdev);
1248 } else {
1249 device->has_master = false;
1250 device->master_major = 0;
1251 device->master_minor = 0;
1252 }
1253
1254 if (stat(path, &st) == 0) {
1255 device->has_local = true;
1256 device->local_major = major(st.st_rdev);
1257 device->local_minor = minor(st.st_rdev);
1258 } else {
1259 device->has_local = false;
1260 device->local_major = 0;
1261 device->local_minor = 0;
1262 }
1263
1264 return VK_SUCCESS;
1265
1266 fail_perf:
1267 ralloc_free(device->perf);
1268 free(device->engine_info);
1269 anv_physical_device_free_disk_cache(device);
1270 fail_compiler:
1271 ralloc_free(device->compiler);
1272 fail_base:
1273 vk_physical_device_finish(&device->vk);
1274 fail_alloc:
1275 vk_free(&instance->vk.alloc, device);
1276 fail_fd:
1277 close(fd);
1278 if (master_fd != -1)
1279 close(master_fd);
1280 return result;
1281 }
1282
1283 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1284 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1285 {
1286 struct anv_physical_device *device =
1287 container_of(vk_device, struct anv_physical_device, vk);
1288
1289 anv_finish_wsi(device);
1290 anv_measure_device_destroy(device);
1291 free(device->engine_info);
1292 anv_physical_device_free_disk_cache(device);
1293 ralloc_free(device->compiler);
1294 ralloc_free(device->perf);
1295 close(device->local_fd);
1296 if (device->master_fd >= 0)
1297 close(device->master_fd);
1298 vk_physical_device_finish(&device->vk);
1299 vk_free(&device->instance->vk.alloc, device);
1300 }
1301
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1302 VkResult anv_EnumerateInstanceExtensionProperties(
1303 const char* pLayerName,
1304 uint32_t* pPropertyCount,
1305 VkExtensionProperties* pProperties)
1306 {
1307 if (pLayerName)
1308 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1309
1310 return vk_enumerate_instance_extension_properties(
1311 &instance_extensions, pPropertyCount, pProperties);
1312 }
1313
1314 static void
anv_init_dri_options(struct anv_instance * instance)1315 anv_init_dri_options(struct anv_instance *instance)
1316 {
1317 driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1318 ARRAY_SIZE(anv_dri_options));
1319 driParseConfigFiles(&instance->dri_options,
1320 &instance->available_dri_options, 0, "anv", NULL, NULL,
1321 instance->vk.app_info.app_name,
1322 instance->vk.app_info.app_version,
1323 instance->vk.app_info.engine_name,
1324 instance->vk.app_info.engine_version);
1325
1326 instance->assume_full_subgroups =
1327 driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1328 instance->limit_trig_input_range =
1329 driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1330 instance->sample_mask_out_opengl_behaviour =
1331 driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1332 instance->lower_depth_range_rate =
1333 driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1334 instance->no_16bit =
1335 driQueryOptionb(&instance->dri_options, "no_16bit");
1336 instance->report_vk_1_3 =
1337 driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1338 }
1339
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1340 VkResult anv_CreateInstance(
1341 const VkInstanceCreateInfo* pCreateInfo,
1342 const VkAllocationCallbacks* pAllocator,
1343 VkInstance* pInstance)
1344 {
1345 struct anv_instance *instance;
1346 VkResult result;
1347
1348 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1349
1350 if (pAllocator == NULL)
1351 pAllocator = vk_default_allocator();
1352
1353 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1354 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1355 if (!instance)
1356 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1357
1358 struct vk_instance_dispatch_table dispatch_table;
1359 vk_instance_dispatch_table_from_entrypoints(
1360 &dispatch_table, &anv_instance_entrypoints, true);
1361 vk_instance_dispatch_table_from_entrypoints(
1362 &dispatch_table, &wsi_instance_entrypoints, false);
1363
1364 result = vk_instance_init(&instance->vk, &instance_extensions,
1365 &dispatch_table, pCreateInfo, pAllocator);
1366 if (result != VK_SUCCESS) {
1367 vk_free(pAllocator, instance);
1368 return vk_error(NULL, result);
1369 }
1370
1371 instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1372 instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1373
1374 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1375
1376 anv_init_dri_options(instance);
1377
1378 intel_driver_ds_init();
1379
1380 *pInstance = anv_instance_to_handle(instance);
1381
1382 return VK_SUCCESS;
1383 }
1384
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1385 void anv_DestroyInstance(
1386 VkInstance _instance,
1387 const VkAllocationCallbacks* pAllocator)
1388 {
1389 ANV_FROM_HANDLE(anv_instance, instance, _instance);
1390
1391 if (!instance)
1392 return;
1393
1394 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1395
1396 driDestroyOptionCache(&instance->dri_options);
1397 driDestroyOptionInfo(&instance->available_dri_options);
1398
1399 vk_instance_finish(&instance->vk);
1400 vk_free(&instance->vk.alloc, instance);
1401 }
1402
1403 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
1404
1405 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1406 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
1407
1408 #define MAX_CUSTOM_BORDER_COLORS 4096
1409
anv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1410 void anv_GetPhysicalDeviceProperties(
1411 VkPhysicalDevice physicalDevice,
1412 VkPhysicalDeviceProperties* pProperties)
1413 {
1414 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1415 const struct intel_device_info *devinfo = &pdevice->info;
1416
1417 const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1418 const uint32_t max_textures = 128;
1419 const uint32_t max_samplers =
1420 pdevice->has_bindless_samplers ? UINT16_MAX :
1421 (devinfo->verx10 >= 75) ? 128 : 16;
1422 const uint32_t max_images = MAX_IMAGES;
1423
1424 /* If we can use bindless for everything, claim a high per-stage limit,
1425 * otherwise use the binding table size, minus the slots reserved for
1426 * render targets and one slot for the descriptor buffer. */
1427 const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1428
1429 const uint32_t max_workgroup_size =
1430 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1431
1432 VkSampleCountFlags sample_counts =
1433 isl_device_get_sample_counts(&pdevice->isl_dev);
1434
1435
1436 VkPhysicalDeviceLimits limits = {
1437 .maxImageDimension1D = (1 << 14),
1438 /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
1439 * is greater than 8192 pixels. */
1440 .maxImageDimension2D = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
1441 .maxImageDimension3D = (1 << 11),
1442 .maxImageDimensionCube = (1 << 14),
1443 .maxImageArrayLayers = (1 << 11),
1444 .maxTexelBufferElements = 128 * 1024 * 1024,
1445 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1446 .maxStorageBufferRange = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
1447 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1448 .maxMemoryAllocationCount = UINT32_MAX,
1449 .maxSamplerAllocationCount = 64 * 1024,
1450 .bufferImageGranularity = 1,
1451 .sparseAddressSpaceSize = 0,
1452 .maxBoundDescriptorSets = MAX_SETS,
1453 .maxPerStageDescriptorSamplers = max_samplers,
1454 .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1455 .maxPerStageDescriptorStorageBuffers = max_ssbos,
1456 .maxPerStageDescriptorSampledImages = max_textures,
1457 .maxPerStageDescriptorStorageImages = max_images,
1458 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1459 .maxPerStageResources = max_per_stage,
1460 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1461 .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1462 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1463 .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1464 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1465 .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1466 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1467 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1468 .maxVertexInputAttributes = MAX_VES,
1469 .maxVertexInputBindings = MAX_VBS,
1470 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1471 *
1472 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1473 */
1474 .maxVertexInputAttributeOffset = 2047,
1475 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1476 *
1477 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1478 *
1479 * Skylake PRMs: Volume 2d: Command Reference: Structures:
1480 *
1481 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1482 */
1483 .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095,
1484 .maxVertexOutputComponents = 128,
1485 .maxTessellationGenerationLevel = 64,
1486 .maxTessellationPatchSize = 32,
1487 .maxTessellationControlPerVertexInputComponents = 128,
1488 .maxTessellationControlPerVertexOutputComponents = 128,
1489 .maxTessellationControlPerPatchOutputComponents = 128,
1490 .maxTessellationControlTotalOutputComponents = 2048,
1491 .maxTessellationEvaluationInputComponents = 128,
1492 .maxTessellationEvaluationOutputComponents = 128,
1493 .maxGeometryShaderInvocations = 32,
1494 .maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64,
1495 .maxGeometryOutputComponents = 128,
1496 .maxGeometryOutputVertices = 256,
1497 .maxGeometryTotalOutputComponents = 1024,
1498 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1499 .maxFragmentOutputAttachments = 8,
1500 .maxFragmentDualSrcAttachments = 1,
1501 .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1502 .maxComputeSharedMemorySize = 64 * 1024,
1503 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1504 .maxComputeWorkGroupInvocations = max_workgroup_size,
1505 .maxComputeWorkGroupSize = {
1506 max_workgroup_size,
1507 max_workgroup_size,
1508 max_workgroup_size,
1509 },
1510 .subPixelPrecisionBits = 8,
1511 .subTexelPrecisionBits = 8,
1512 .mipmapPrecisionBits = 8,
1513 .maxDrawIndexedIndexValue = UINT32_MAX,
1514 .maxDrawIndirectCount = UINT32_MAX,
1515 .maxSamplerLodBias = 16,
1516 .maxSamplerAnisotropy = 16,
1517 .maxViewports = MAX_VIEWPORTS,
1518 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1519 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1520 .viewportSubPixelBits = 13, /* We take a float? */
1521 .minMemoryMapAlignment = 4096, /* A page */
1522 /* The dataport requires texel alignment so we need to assume a worst
1523 * case of R32G32B32A32 which is 16 bytes.
1524 */
1525 .minTexelBufferOffsetAlignment = 16,
1526 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1527 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1528 .minTexelOffset = -8,
1529 .maxTexelOffset = 7,
1530 .minTexelGatherOffset = -32,
1531 .maxTexelGatherOffset = 31,
1532 .minInterpolationOffset = -0.5,
1533 .maxInterpolationOffset = 0.4375,
1534 .subPixelInterpolationOffsetBits = 4,
1535 .maxFramebufferWidth = (1 << 14),
1536 .maxFramebufferHeight = (1 << 14),
1537 .maxFramebufferLayers = (1 << 11),
1538 .framebufferColorSampleCounts = sample_counts,
1539 .framebufferDepthSampleCounts = sample_counts,
1540 .framebufferStencilSampleCounts = sample_counts,
1541 .framebufferNoAttachmentsSampleCounts = sample_counts,
1542 .maxColorAttachments = MAX_RTS,
1543 .sampledImageColorSampleCounts = sample_counts,
1544 /* Multisampling with SINT formats is not supported on gfx7 */
1545 .sampledImageIntegerSampleCounts = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1546 .sampledImageDepthSampleCounts = sample_counts,
1547 .sampledImageStencilSampleCounts = sample_counts,
1548 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1549 .maxSampleMaskWords = 1,
1550 .timestampComputeAndGraphics = true,
1551 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1552 .maxClipDistances = 8,
1553 .maxCullDistances = 8,
1554 .maxCombinedClipAndCullDistances = 8,
1555 .discreteQueuePriorities = 2,
1556 .pointSizeRange = { 0.125, 255.875 },
1557 /* While SKL and up support much wider lines than we are setting here,
1558 * in practice we run into conformance issues if we go past this limit.
1559 * Since the Windows driver does the same, it's probably fair to assume
1560 * that no one needs more than this.
1561 */
1562 .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1563 .pointSizeGranularity = (1.0 / 8.0),
1564 .lineWidthGranularity = (1.0 / 128.0),
1565 .strictLines = false,
1566 .standardSampleLocations = true,
1567 .optimalBufferCopyOffsetAlignment = 128,
1568 .optimalBufferCopyRowPitchAlignment = 128,
1569 .nonCoherentAtomSize = 64,
1570 };
1571
1572 *pProperties = (VkPhysicalDeviceProperties) {
1573 #ifdef ANDROID_STRICT
1574 .apiVersion = ANV_API_VERSION,
1575 #else
1576 .apiVersion = (pdevice->use_softpin || pdevice->instance->report_vk_1_3) ?
1577 ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
1578 #endif
1579 .driverVersion = vk_get_driver_version(),
1580 .vendorID = 0x8086,
1581 .deviceID = pdevice->info.pci_device_id,
1582 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1583 .limits = limits,
1584 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1585 };
1586
1587 snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1588 "%s", pdevice->info.name);
1589 memcpy(pProperties->pipelineCacheUUID,
1590 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1591 }
1592
1593 static void
anv_get_physical_device_properties_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1594 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
1595 VkPhysicalDeviceVulkan11Properties *p)
1596 {
1597 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1598
1599 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1600 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1601 memset(p->deviceLUID, 0, VK_LUID_SIZE);
1602 p->deviceNodeMask = 0;
1603 p->deviceLUIDValid = false;
1604
1605 p->subgroupSize = ELK_SUBGROUP_SIZE;
1606 VkShaderStageFlags scalar_stages = 0;
1607 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
1608 if (pdevice->compiler->scalar_stage[stage])
1609 scalar_stages |= mesa_to_vk_shader_stage(stage);
1610 }
1611 p->subgroupSupportedStages = scalar_stages;
1612 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1613 VK_SUBGROUP_FEATURE_VOTE_BIT |
1614 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1615 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1616 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1617 VK_SUBGROUP_FEATURE_QUAD_BIT;
1618 if (pdevice->info.ver >= 8) {
1619 /* TODO: There's no technical reason why these can't be made to
1620 * work on gfx7 but they don't at the moment so it's best to leave
1621 * the feature disabled than enabled and broken.
1622 */
1623 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1624 VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
1625 }
1626 p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
1627
1628 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
1629 p->maxMultiviewViewCount = 16;
1630 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
1631 p->protectedNoFault = false;
1632 /* This value doesn't matter for us today as our per-stage descriptors are
1633 * the real limit.
1634 */
1635 p->maxPerSetDescriptors = 1024;
1636 p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
1637 }
1638
1639 static void
anv_get_physical_device_properties_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1640 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
1641 VkPhysicalDeviceVulkan12Properties *p)
1642 {
1643 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1644
1645 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1646 memset(p->driverName, 0, sizeof(p->driverName));
1647 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
1648 "Intel open-source Mesa driver");
1649 memset(p->driverInfo, 0, sizeof(p->driverInfo));
1650 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1651 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1652
1653 /* Don't advertise conformance with a particular version if the hardware's
1654 * support is incomplete/alpha.
1655 */
1656 if (pdevice->is_alpha) {
1657 p->conformanceVersion = (VkConformanceVersion) {
1658 .major = 0,
1659 .minor = 0,
1660 .subminor = 0,
1661 .patch = 0,
1662 };
1663 }
1664 else {
1665 p->conformanceVersion = (VkConformanceVersion) {
1666 .major = 1,
1667 .minor = pdevice->use_softpin ? 3 : 2,
1668 .subminor = 0,
1669 .patch = 0,
1670 };
1671 }
1672
1673 p->denormBehaviorIndependence =
1674 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1675 p->roundingModeIndependence =
1676 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
1677
1678 /* Broadwell does not support HF denorms and there are restrictions
1679 * other gens. According to Kabylake's PRM:
1680 *
1681 * "math - Extended Math Function
1682 * [...]
1683 * Restriction : Half-float denorms are always retained."
1684 */
1685 p->shaderDenormFlushToZeroFloat16 = false;
1686 p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
1687 p->shaderRoundingModeRTEFloat16 = true;
1688 p->shaderRoundingModeRTZFloat16 = true;
1689 p->shaderSignedZeroInfNanPreserveFloat16 = true;
1690
1691 p->shaderDenormFlushToZeroFloat32 = true;
1692 p->shaderDenormPreserveFloat32 = pdevice->info.ver >= 8;
1693 p->shaderRoundingModeRTEFloat32 = true;
1694 p->shaderRoundingModeRTZFloat32 = true;
1695 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1696
1697 p->shaderDenormFlushToZeroFloat64 = true;
1698 p->shaderDenormPreserveFloat64 = true;
1699 p->shaderRoundingModeRTEFloat64 = true;
1700 p->shaderRoundingModeRTZFloat64 = true;
1701 p->shaderSignedZeroInfNanPreserveFloat64 = true;
1702
1703 /* It's a bit hard to exactly map our implementation to the limits
1704 * described by Vulkan. The bindless surface handle in the extended
1705 * message descriptors is 20 bits and it's an index into the table of
1706 * RENDER_SURFACE_STATE structs that starts at bindless surface base
1707 * address. This means that we can have at must 1M surface states
1708 * allocated at any given time. Since most image views take two
1709 * descriptors, this means we have a limit of about 500K image views.
1710 *
1711 * However, since we allocate surface states at vkCreateImageView time,
1712 * this means our limit is actually something on the order of 500K image
1713 * views allocated at any time. The actual limit describe by Vulkan, on
1714 * the other hand, is a limit of how many you can have in a descriptor set.
1715 * Assuming anyone using 1M descriptors will be using the same image view
1716 * twice a bunch of times (or a bunch of null descriptors), we can safely
1717 * advertise a larger limit here.
1718 */
1719 const unsigned max_bindless_views = 1 << 20;
1720 p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
1721 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1722 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1723 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
1724 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1725 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1726 p->robustBufferAccessUpdateAfterBind = true;
1727 p->quadDivergentImplicitLod = false;
1728 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
1729 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1730 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
1731 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
1732 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
1733 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
1734 p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
1735 p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
1736 p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
1737 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1738 p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
1739 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
1740 p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
1741 p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
1742 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
1743
1744 /* We support all of the depth resolve modes */
1745 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
1746 VK_RESOLVE_MODE_AVERAGE_BIT |
1747 VK_RESOLVE_MODE_MIN_BIT |
1748 VK_RESOLVE_MODE_MAX_BIT;
1749 /* Average doesn't make sense for stencil so we don't support that */
1750 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1751 if (pdevice->info.ver >= 8) {
1752 /* The advanced stencil resolve modes currently require stencil
1753 * sampling be supported by the hardware.
1754 */
1755 p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
1756 VK_RESOLVE_MODE_MAX_BIT;
1757 }
1758 p->independentResolveNone = true;
1759 p->independentResolve = true;
1760
1761 p->filterMinmaxSingleComponentFormats = false;
1762 p->filterMinmaxImageComponentMapping = false;
1763
1764 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1765
1766 p->framebufferIntegerColorSampleCounts =
1767 pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
1768 }
1769
1770 static void
anv_get_physical_device_properties_1_3(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)1771 anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
1772 VkPhysicalDeviceVulkan13Properties *p)
1773 {
1774 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
1775
1776 p->minSubgroupSize = 8;
1777 p->maxSubgroupSize = 32;
1778 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1779 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1780
1781 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1782 p->maxPerStageDescriptorInlineUniformBlocks =
1783 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1784 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1785 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1786 p->maxDescriptorSetInlineUniformBlocks =
1787 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1788 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1789 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1790 p->maxInlineUniformTotalSize = UINT16_MAX;
1791
1792 p->integerDotProduct8BitUnsignedAccelerated = false;
1793 p->integerDotProduct8BitSignedAccelerated = false;
1794 p->integerDotProduct8BitMixedSignednessAccelerated = false;
1795 p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
1796 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1797 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
1798 p->integerDotProduct16BitUnsignedAccelerated = false;
1799 p->integerDotProduct16BitSignedAccelerated = false;
1800 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1801 p->integerDotProduct32BitUnsignedAccelerated = false;
1802 p->integerDotProduct32BitSignedAccelerated = false;
1803 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1804 p->integerDotProduct64BitUnsignedAccelerated = false;
1805 p->integerDotProduct64BitSignedAccelerated = false;
1806 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1807 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1808 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1809 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1810 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
1811 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1812 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
1813 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1814 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1815 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1816 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1817 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1818 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1819 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1820 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1821 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1822
1823 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1824 * Base Address:
1825 *
1826 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1827 * specifies the base address of the first element of the surface,
1828 * computed in software by adding the surface base address to the
1829 * byte offset of the element in the buffer. The base address must
1830 * be aligned to element size."
1831 *
1832 * The typed dataport messages require that things be texel aligned.
1833 * Otherwise, we may just load/store the wrong data or, in the worst
1834 * case, there may be hangs.
1835 */
1836 p->storageTexelBufferOffsetAlignmentBytes = 16;
1837 p->storageTexelBufferOffsetSingleTexelAlignment = true;
1838
1839 /* The sampler, however, is much more forgiving and it can handle
1840 * arbitrary byte alignment for linear and buffer surfaces. It's
1841 * hard to find a good PRM citation for this but years of empirical
1842 * experience demonstrate that this is true.
1843 */
1844 p->uniformTexelBufferOffsetAlignmentBytes = 1;
1845 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1846
1847 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1848 }
1849
anv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1850 void anv_GetPhysicalDeviceProperties2(
1851 VkPhysicalDevice physicalDevice,
1852 VkPhysicalDeviceProperties2* pProperties)
1853 {
1854 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1855
1856 anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1857
1858 VkPhysicalDeviceVulkan11Properties core_1_1 = {
1859 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1860 };
1861 anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1862
1863 VkPhysicalDeviceVulkan12Properties core_1_2 = {
1864 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1865 };
1866 anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1867
1868 VkPhysicalDeviceVulkan13Properties core_1_3 = {
1869 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1870 };
1871 anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
1872
1873 vk_foreach_struct(ext, pProperties->pNext) {
1874 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1875 continue;
1876 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1877 continue;
1878 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1879 continue;
1880
1881 switch (ext->sType) {
1882 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1883 VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
1884 (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
1885 properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1886 break;
1887 }
1888
1889 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1890 VkPhysicalDeviceDrmPropertiesEXT *props =
1891 (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1892
1893 props->hasPrimary = pdevice->has_master;
1894 props->primaryMajor = pdevice->master_major;
1895 props->primaryMinor = pdevice->master_minor;
1896
1897 props->hasRender = pdevice->has_local;
1898 props->renderMajor = pdevice->local_major;
1899 props->renderMinor = pdevice->local_minor;
1900
1901 break;
1902 }
1903
1904 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1905 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
1906 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1907 /* Userptr needs page aligned memory. */
1908 props->minImportedHostPointerAlignment = 4096;
1909 break;
1910 }
1911
1912 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1913 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1914 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1915 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
1916 * Sampling Rules - Legacy Mode", it says the following:
1917 *
1918 * "Note that the device divides a pixel into a 16x16 array of
1919 * subpixels, referenced by their upper left corners."
1920 *
1921 * This is the only known reference in the PRMs to the subpixel
1922 * precision of line rasterization and a "16x16 array of subpixels"
1923 * implies 4 subpixel precision bits. Empirical testing has shown
1924 * that 4 subpixel precision bits applies to all line rasterization
1925 * types.
1926 */
1927 props->lineSubPixelPrecisionBits = 4;
1928 break;
1929 }
1930
1931 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
1932 VkPhysicalDeviceMaintenance4Properties *properties =
1933 (VkPhysicalDeviceMaintenance4Properties *)ext;
1934 properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1935 break;
1936 }
1937
1938 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1939 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1940 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1941 properties->pciDomain = pdevice->info.pci_domain;
1942 properties->pciBus = pdevice->info.pci_bus;
1943 properties->pciDevice = pdevice->info.pci_dev;
1944 properties->pciFunction = pdevice->info.pci_func;
1945 break;
1946 }
1947
1948 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1949 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1950 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1951 /* We could support this by spawning a shader to do the equation
1952 * normalization.
1953 */
1954 properties->allowCommandBufferQueryCopies = false;
1955 break;
1956 }
1957
1958 #pragma GCC diagnostic push
1959 #pragma GCC diagnostic ignored "-Wswitch"
1960 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
1961 VkPhysicalDevicePresentationPropertiesANDROID *props =
1962 (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
1963 props->sharedImage = VK_FALSE;
1964 break;
1965 }
1966 #pragma GCC diagnostic pop
1967
1968 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1969 VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1970 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1971 properties->provokingVertexModePerPipeline = true;
1972 properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1973 break;
1974 }
1975
1976 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1977 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1978 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1979 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1980 break;
1981 }
1982
1983 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1984 VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
1985 properties->robustStorageBufferAccessSizeAlignment =
1986 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1987 properties->robustUniformBufferAccessSizeAlignment =
1988 ANV_UBO_ALIGNMENT;
1989 break;
1990 }
1991
1992 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1993 VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
1994 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1995
1996 props->sampleLocationSampleCounts =
1997 isl_device_get_sample_counts(&pdevice->isl_dev);
1998
1999 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2000 props->maxSampleLocationGridSize.width = 1;
2001 props->maxSampleLocationGridSize.height = 1;
2002
2003 props->sampleLocationCoordinateRange[0] = 0;
2004 props->sampleLocationCoordinateRange[1] = 0.9375;
2005 props->sampleLocationSubPixelBits = 4;
2006
2007 props->variableSampleLocations = true;
2008 break;
2009 }
2010
2011 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2012 VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
2013 (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2014 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2015 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2016 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
2017 vk_shaderModuleIdentifierAlgorithmUUID,
2018 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2019 break;
2020 }
2021
2022 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2023 VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2024 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2025
2026 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2027 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2028 props->maxTransformFeedbackBufferSize = (1ull << 32);
2029 props->maxTransformFeedbackStreamDataSize = 128 * 4;
2030 props->maxTransformFeedbackBufferDataSize = 128 * 4;
2031 props->maxTransformFeedbackBufferDataStride = 2048;
2032 props->transformFeedbackQueries = true;
2033 props->transformFeedbackStreamsLinesTriangles = false;
2034 props->transformFeedbackRasterizationStreamSelect = false;
2035 /* This requires MI_MATH */
2036 props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2037 break;
2038 }
2039
2040 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2041 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2042 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2043 /* We have to restrict this a bit for multiview */
2044 props->maxVertexAttribDivisor = UINT32_MAX / 16;
2045 break;
2046 }
2047
2048 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2049 VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2050 props->maxMultiDrawCount = 2048;
2051 break;
2052 }
2053
2054 default:
2055 anv_debug_ignored_stype(ext->sType);
2056 break;
2057 }
2058 }
2059 }
2060
2061 static int
vk_priority_to_gen(int priority)2062 vk_priority_to_gen(int priority)
2063 {
2064 switch (priority) {
2065 case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2066 return INTEL_CONTEXT_LOW_PRIORITY;
2067 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2068 return INTEL_CONTEXT_MEDIUM_PRIORITY;
2069 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2070 return INTEL_CONTEXT_HIGH_PRIORITY;
2071 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2072 return INTEL_CONTEXT_REALTIME_PRIORITY;
2073 default:
2074 unreachable("Invalid priority");
2075 }
2076 }
2077
2078 static const VkQueueFamilyProperties
2079 anv_queue_family_properties_template = {
2080 .timestampValidBits = 36, /* XXX: Real value here */
2081 .minImageTransferGranularity = { 1, 1, 1 },
2082 };
2083
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2084 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2085 VkPhysicalDevice physicalDevice,
2086 uint32_t* pQueueFamilyPropertyCount,
2087 VkQueueFamilyProperties2* pQueueFamilyProperties)
2088 {
2089 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2090 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2091 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2092
2093 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2094 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2095 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2096 p->queueFamilyProperties = anv_queue_family_properties_template;
2097 p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2098 p->queueFamilyProperties.queueCount = queue_family->queueCount;
2099
2100 vk_foreach_struct(ext, p->pNext) {
2101 switch (ext->sType) {
2102 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2103 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2104 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2105
2106 /* Deliberately sorted low to high */
2107 VkQueueGlobalPriorityKHR all_priorities[] = {
2108 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2109 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2110 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2111 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2112 };
2113
2114 uint32_t count = 0;
2115 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2116 if (vk_priority_to_gen(all_priorities[i]) >
2117 pdevice->max_context_priority)
2118 break;
2119
2120 properties->priorities[count++] = all_priorities[i];
2121 }
2122 properties->priorityCount = count;
2123 break;
2124 }
2125
2126 default:
2127 anv_debug_ignored_stype(ext->sType);
2128 }
2129 }
2130 }
2131 }
2132 }
2133
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2134 void anv_GetPhysicalDeviceMemoryProperties(
2135 VkPhysicalDevice physicalDevice,
2136 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2137 {
2138 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2139
2140 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2141 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2142 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2143 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2144 .heapIndex = physical_device->memory.types[i].heapIndex,
2145 };
2146 }
2147
2148 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2149 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2150 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2151 .size = physical_device->memory.heaps[i].size,
2152 .flags = physical_device->memory.heaps[i].flags,
2153 };
2154 }
2155 }
2156
2157 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2158 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2159 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2160 {
2161 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2162
2163 if (!device->vk.supported_extensions.EXT_memory_budget)
2164 return;
2165
2166 anv_update_meminfo(device, device->local_fd);
2167
2168 VkDeviceSize total_sys_heaps_size = 0;
2169 for (size_t i = 0; i < device->memory.heap_count; i++)
2170 total_sys_heaps_size += device->memory.heaps[i].size;
2171
2172 for (size_t i = 0; i < device->memory.heap_count; i++) {
2173 VkDeviceSize heap_size = device->memory.heaps[i].size;
2174 VkDeviceSize heap_used = device->memory.heaps[i].used;
2175 VkDeviceSize heap_budget, total_heaps_size;
2176 uint64_t mem_available = 0;
2177
2178 total_heaps_size = total_sys_heaps_size;
2179 mem_available = device->sys.available;
2180
2181 double heap_proportion = (double) heap_size / total_heaps_size;
2182 VkDeviceSize available_prop = mem_available * heap_proportion;
2183
2184 /*
2185 * Let's not incite the app to starve the system: report at most 90% of
2186 * the available heap memory.
2187 */
2188 uint64_t heap_available = available_prop * 9 / 10;
2189 heap_budget = MIN2(heap_size, heap_used + heap_available);
2190
2191 /*
2192 * Round down to the nearest MB
2193 */
2194 heap_budget &= ~((1ull << 20) - 1);
2195
2196 /*
2197 * The heapBudget value must be non-zero for array elements less than
2198 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2199 * value must be less than or equal to VkMemoryHeap::size for each heap.
2200 */
2201 assert(0 < heap_budget && heap_budget <= heap_size);
2202
2203 memoryBudget->heapUsage[i] = heap_used;
2204 memoryBudget->heapBudget[i] = heap_budget;
2205 }
2206
2207 /* The heapBudget and heapUsage values must be zero for array elements
2208 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2209 */
2210 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2211 memoryBudget->heapBudget[i] = 0;
2212 memoryBudget->heapUsage[i] = 0;
2213 }
2214 }
2215
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2216 void anv_GetPhysicalDeviceMemoryProperties2(
2217 VkPhysicalDevice physicalDevice,
2218 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2219 {
2220 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2221 &pMemoryProperties->memoryProperties);
2222
2223 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2224 switch (ext->sType) {
2225 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2226 anv_get_memory_budget(physicalDevice, (void*)ext);
2227 break;
2228 default:
2229 anv_debug_ignored_stype(ext->sType);
2230 break;
2231 }
2232 }
2233 }
2234
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2235 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2236 VkInstance _instance,
2237 const char* pName)
2238 {
2239 ANV_FROM_HANDLE(anv_instance, instance, _instance);
2240 return vk_instance_get_proc_addr(&instance->vk,
2241 &anv_instance_entrypoints,
2242 pName);
2243 }
2244
2245 /* With version 1+ of the loader interface the ICD should expose
2246 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2247 */
2248 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2249 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2250 VkInstance instance,
2251 const char* pName)
2252 {
2253 return anv_GetInstanceProcAddr(instance, pName);
2254 }
2255 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2256 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2257 {
2258 struct anv_state state;
2259
2260 state = anv_state_pool_alloc(pool, size, align);
2261 memcpy(state.map, p, size);
2262
2263 return state;
2264 }
2265
2266 static void
anv_device_init_border_colors(struct anv_device * device)2267 anv_device_init_border_colors(struct anv_device *device)
2268 {
2269 if (device->info->platform == INTEL_PLATFORM_HSW) {
2270 static const struct hsw_border_color border_colors[] = {
2271 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2272 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2273 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2274 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2275 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2276 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2277 };
2278
2279 device->border_colors =
2280 anv_state_pool_emit_data(&device->dynamic_state_pool,
2281 sizeof(border_colors), 512, border_colors);
2282 } else {
2283 static const struct gfx8_border_color border_colors[] = {
2284 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2285 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2286 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2287 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2288 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2289 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2290 };
2291
2292 device->border_colors =
2293 anv_state_pool_emit_data(&device->dynamic_state_pool,
2294 sizeof(border_colors), 64, border_colors);
2295 }
2296 }
2297
2298 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2299 anv_device_init_trivial_batch(struct anv_device *device)
2300 {
2301 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2302 ANV_BO_ALLOC_MAPPED,
2303 0 /* explicit_address */,
2304 &device->trivial_batch_bo);
2305 if (result != VK_SUCCESS)
2306 return result;
2307
2308 struct anv_batch batch = {
2309 .start = device->trivial_batch_bo->map,
2310 .next = device->trivial_batch_bo->map,
2311 .end = device->trivial_batch_bo->map + 4096,
2312 };
2313
2314 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2315 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2316
2317 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2318 if (device->physical->memory.need_flush)
2319 intel_flush_range(batch.start, batch.next - batch.start);
2320 #endif
2321
2322 return VK_SUCCESS;
2323 }
2324
2325 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2326 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2327 struct anv_block_pool *pool,
2328 uint64_t address)
2329 {
2330 anv_block_pool_foreach_bo(bo, pool) {
2331 uint64_t bo_address = intel_48b_address(bo->offset);
2332 if (address >= bo_address && address < (bo_address + bo->size)) {
2333 *ret = (struct intel_batch_decode_bo) {
2334 .addr = bo_address,
2335 .size = bo->size,
2336 .map = bo->map,
2337 };
2338 return true;
2339 }
2340 }
2341 return false;
2342 }
2343
2344 /* Finding a buffer for batch decoding */
2345 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2346 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2347 {
2348 struct anv_device *device = v_batch;
2349 struct intel_batch_decode_bo ret_bo = {};
2350
2351 assert(ppgtt);
2352
2353 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2354 return ret_bo;
2355 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2356 return ret_bo;
2357 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2358 return ret_bo;
2359 if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2360 return ret_bo;
2361
2362 if (!device->cmd_buffer_being_decoded)
2363 return (struct intel_batch_decode_bo) { };
2364
2365 struct anv_batch_bo **bo;
2366
2367 u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2368 /* The decoder zeroes out the top 16 bits, so we need to as well */
2369 uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2370
2371 if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2372 return (struct intel_batch_decode_bo) {
2373 .addr = bo_address,
2374 .size = (*bo)->bo->size,
2375 .map = (*bo)->bo->map,
2376 };
2377 }
2378 }
2379
2380 return (struct intel_batch_decode_bo) { };
2381 }
2382
2383 static VkResult anv_device_check_status(struct vk_device *vk_device);
2384
2385 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2386 anv_device_setup_context(struct anv_device *device,
2387 const VkDeviceCreateInfo *pCreateInfo,
2388 const uint32_t num_queues)
2389 {
2390 struct anv_physical_device *physical_device = device->physical;
2391 VkResult result = VK_SUCCESS;
2392
2393 if (device->physical->engine_info) {
2394 /* The kernel API supports at most 64 engines */
2395 assert(num_queues <= 64);
2396 enum intel_engine_class engine_classes[64];
2397 int engine_count = 0;
2398 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2399 const VkDeviceQueueCreateInfo *queueCreateInfo =
2400 &pCreateInfo->pQueueCreateInfos[i];
2401
2402 assert(queueCreateInfo->queueFamilyIndex <
2403 physical_device->queue.family_count);
2404 struct anv_queue_family *queue_family =
2405 &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2406
2407 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2408 engine_classes[engine_count++] = queue_family->engine_class;
2409 }
2410 if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2411 physical_device->engine_info,
2412 engine_count, engine_classes,
2413 0 /* vm_id */,
2414 (uint32_t *)&device->context_id))
2415 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2416 "kernel context creation failed");
2417 } else {
2418 assert(num_queues == 1);
2419 if (!intel_gem_create_context(device->fd, &device->context_id))
2420 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2421 }
2422
2423 if (result != VK_SUCCESS)
2424 return result;
2425
2426 /* Here we tell the kernel not to attempt to recover our context but
2427 * immediately (on the next batchbuffer submission) report that the
2428 * context is lost, and we will do the recovery ourselves. In the case
2429 * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2430 * the client clean up the pieces.
2431 */
2432 anv_gem_set_context_param(device->fd, device->context_id,
2433 I915_CONTEXT_PARAM_RECOVERABLE, false);
2434
2435 /* Check if client specified queue priority. */
2436 const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2437 vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2438 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2439
2440 VkQueueGlobalPriorityKHR priority =
2441 queue_priority ? queue_priority->globalPriority :
2442 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2443
2444 /* As per spec, the driver implementation may deny requests to acquire
2445 * a priority above the default priority (MEDIUM) if the caller does not
2446 * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2447 * is returned.
2448 */
2449 if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2450 int err = anv_gem_set_context_param(device->fd, device->context_id,
2451 I915_CONTEXT_PARAM_PRIORITY,
2452 vk_priority_to_gen(priority));
2453 if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2454 result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2455 goto fail_context;
2456 }
2457 }
2458
2459 return result;
2460
2461 fail_context:
2462 intel_gem_destroy_context(device->fd, device->context_id);
2463 return result;
2464 }
2465
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2466 VkResult anv_CreateDevice(
2467 VkPhysicalDevice physicalDevice,
2468 const VkDeviceCreateInfo* pCreateInfo,
2469 const VkAllocationCallbacks* pAllocator,
2470 VkDevice* pDevice)
2471 {
2472 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2473 VkResult result;
2474 struct anv_device *device;
2475
2476 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2477
2478 /* Check requested queues and fail if we are requested to create any
2479 * queues with flags we don't support.
2480 */
2481 assert(pCreateInfo->queueCreateInfoCount > 0);
2482 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2483 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2484 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2485 }
2486
2487 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2488 sizeof(*device), 8,
2489 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2490 if (!device)
2491 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2492
2493 struct vk_device_dispatch_table dispatch_table;
2494
2495 bool override_initial_entrypoints = true;
2496 if (physical_device->instance->vk.app_info.app_name &&
2497 !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2498 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2499 override_initial_entrypoints = false;
2500 }
2501 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2502 anv_genX(&physical_device->info, device_entrypoints),
2503 override_initial_entrypoints);
2504 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2505 &anv_device_entrypoints, false);
2506 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2507 &wsi_device_entrypoints, false);
2508
2509 result = vk_device_init(&device->vk, &physical_device->vk,
2510 &dispatch_table, pCreateInfo, pAllocator);
2511 if (result != VK_SUCCESS)
2512 goto fail_alloc;
2513
2514 if (INTEL_DEBUG(DEBUG_BATCH)) {
2515 const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2516
2517 intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2518 &physical_device->compiler->isa,
2519 &physical_device->info,
2520 stderr, decode_flags, NULL,
2521 decode_get_bo, NULL, device);
2522
2523 device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2524 device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2525 device->decoder_ctx.instruction_base =
2526 INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2527 }
2528
2529 anv_device_set_physical(device, physical_device);
2530
2531 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2532 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2533 if (device->fd == -1) {
2534 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2535 goto fail_device;
2536 }
2537
2538 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2539 device->vk.check_status = anv_device_check_status;
2540 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2541 vk_device_set_drm_fd(&device->vk, device->fd);
2542
2543 uint32_t num_queues = 0;
2544 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2545 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2546
2547 result = anv_device_setup_context(device, pCreateInfo, num_queues);
2548 if (result != VK_SUCCESS)
2549 goto fail_fd;
2550
2551 device->queues =
2552 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2553 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2554 if (device->queues == NULL) {
2555 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2556 goto fail_context_id;
2557 }
2558
2559 device->queue_count = 0;
2560 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2561 const VkDeviceQueueCreateInfo *queueCreateInfo =
2562 &pCreateInfo->pQueueCreateInfos[i];
2563
2564 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2565 /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2566 * engine-based contexts, the bottom 6 bits of exec_flags are used
2567 * for the engine ID.
2568 */
2569 uint32_t exec_flags = device->physical->engine_info ?
2570 device->queue_count : I915_EXEC_RENDER;
2571
2572 result = anv_queue_init(device, &device->queues[device->queue_count],
2573 exec_flags, queueCreateInfo, j);
2574 if (result != VK_SUCCESS)
2575 goto fail_queues;
2576
2577 device->queue_count++;
2578 }
2579 }
2580
2581 if (!anv_use_relocations(physical_device)) {
2582 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2583 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2584 goto fail_queues;
2585 }
2586
2587 /* keep the page with address zero out of the allocator */
2588 util_vma_heap_init(&device->vma_lo,
2589 LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2590
2591 util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2592 CLIENT_VISIBLE_HEAP_SIZE);
2593
2594 /* Leave the last 4GiB out of the high vma range, so that no state
2595 * base address + size can overflow 48 bits. For more information see
2596 * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2597 */
2598 util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2599 physical_device->gtt_size - (1ull << 32) -
2600 HIGH_HEAP_MIN_ADDRESS);
2601 }
2602
2603 list_inithead(&device->memory_objects);
2604
2605 /* On Broadwell and later, we can use batch chaining to more efficiently
2606 * implement growing command buffers. Prior to Haswell, the kernel
2607 * command parser gets in the way and we have to fall back to growing
2608 * the batch.
2609 */
2610 device->can_chain_batches = device->info->ver >= 8;
2611
2612 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2613 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2614 goto fail_vmas;
2615 }
2616
2617 pthread_condattr_t condattr;
2618 if (pthread_condattr_init(&condattr) != 0) {
2619 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2620 goto fail_mutex;
2621 }
2622 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2623 pthread_condattr_destroy(&condattr);
2624 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2625 goto fail_mutex;
2626 }
2627 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2628 pthread_condattr_destroy(&condattr);
2629 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2630 goto fail_mutex;
2631 }
2632 pthread_condattr_destroy(&condattr);
2633
2634 result = anv_bo_cache_init(&device->bo_cache, device);
2635 if (result != VK_SUCCESS)
2636 goto fail_queue_cond;
2637
2638 anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2639
2640 /* Because scratch is also relative to General State Base Address, we leave
2641 * the base address 0 and start the pool memory at an offset. This way we
2642 * get the correct offsets in the anv_states that get allocated from it.
2643 */
2644 result = anv_state_pool_init(&device->general_state_pool, device,
2645 "general pool",
2646 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2647 if (result != VK_SUCCESS)
2648 goto fail_batch_bo_pool;
2649
2650 result = anv_state_pool_init(&device->dynamic_state_pool, device,
2651 "dynamic pool",
2652 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2653 if (result != VK_SUCCESS)
2654 goto fail_general_state_pool;
2655
2656 if (device->info->ver >= 8) {
2657 /* The border color pointer is limited to 24 bits, so we need to make
2658 * sure that any such color used at any point in the program doesn't
2659 * exceed that limit.
2660 * We achieve that by reserving all the custom border colors we support
2661 * right off the bat, so they are close to the base address.
2662 */
2663 anv_state_reserved_pool_init(&device->custom_border_colors,
2664 &device->dynamic_state_pool,
2665 MAX_CUSTOM_BORDER_COLORS,
2666 sizeof(struct gfx8_border_color), 64);
2667 }
2668
2669 result = anv_state_pool_init(&device->instruction_state_pool, device,
2670 "instruction pool",
2671 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2672 if (result != VK_SUCCESS)
2673 goto fail_dynamic_state_pool;
2674
2675 result = anv_state_pool_init(&device->surface_state_pool, device,
2676 "surface state pool",
2677 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2678 if (result != VK_SUCCESS)
2679 goto fail_instruction_state_pool;
2680
2681 if (!anv_use_relocations(physical_device)) {
2682 int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2683 (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2684 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2685 result = anv_state_pool_init(&device->binding_table_pool, device,
2686 "binding table pool",
2687 SURFACE_STATE_POOL_MIN_ADDRESS,
2688 bt_pool_offset,
2689 BINDING_TABLE_POOL_BLOCK_SIZE);
2690 }
2691 if (result != VK_SUCCESS)
2692 goto fail_surface_state_pool;
2693
2694 result = anv_device_alloc_bo(device, "workaround", 4096,
2695 ANV_BO_ALLOC_CAPTURE |
2696 ANV_BO_ALLOC_MAPPED,
2697 0 /* explicit_address */,
2698 &device->workaround_bo);
2699 if (result != VK_SUCCESS)
2700 goto fail_binding_table_pool;
2701
2702 device->workaround_address = (struct anv_address) {
2703 .bo = device->workaround_bo,
2704 .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2705 device->workaround_bo->size,
2706 "hasvk"), 32),
2707 };
2708
2709 device->workarounds.doom64_images = NULL;
2710
2711 device->debug_frame_desc =
2712 intel_debug_get_identifier_block(device->workaround_bo->map,
2713 device->workaround_bo->size,
2714 INTEL_DEBUG_BLOCK_TYPE_FRAME);
2715
2716 result = anv_device_init_trivial_batch(device);
2717 if (result != VK_SUCCESS)
2718 goto fail_workaround_bo;
2719
2720 /* Allocate a null surface state at surface state offset 0. This makes
2721 * NULL descriptor handling trivial because we can just memset structures
2722 * to zero and they have a valid descriptor.
2723 */
2724 device->null_surface_state =
2725 anv_state_pool_alloc(&device->surface_state_pool,
2726 device->isl_dev.ss.size,
2727 device->isl_dev.ss.align);
2728 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2729 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2730 assert(device->null_surface_state.offset == 0);
2731
2732 anv_scratch_pool_init(device, &device->scratch_pool);
2733
2734 result = anv_genX(device->info, init_device_state)(device);
2735 if (result != VK_SUCCESS)
2736 goto fail_trivial_batch_bo_and_scratch_pool;
2737
2738 struct vk_pipeline_cache_create_info pcc_info = { };
2739 device->default_pipeline_cache =
2740 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2741 if (!device->default_pipeline_cache) {
2742 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2743 goto fail_trivial_batch_bo_and_scratch_pool;
2744 }
2745
2746 /* Internal shaders need their own pipeline cache because, unlike the rest
2747 * of ANV, it won't work at all without the cache. It depends on it for
2748 * shaders to remain resident while it runs. Therefore, we need a special
2749 * cache just for BLORP/RT that's forced to always be enabled.
2750 */
2751 pcc_info.force_enable = true;
2752 device->internal_cache =
2753 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2754 if (device->internal_cache == NULL) {
2755 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2756 goto fail_default_pipeline_cache;
2757 }
2758
2759 device->robust_buffer_access =
2760 device->vk.enabled_features.robustBufferAccess ||
2761 device->vk.enabled_features.nullDescriptor;
2762
2763 anv_device_init_blorp(device);
2764
2765 anv_device_init_border_colors(device);
2766
2767 anv_device_perf_init(device);
2768
2769 anv_device_utrace_init(device);
2770
2771 *pDevice = anv_device_to_handle(device);
2772
2773 return VK_SUCCESS;
2774
2775 fail_default_pipeline_cache:
2776 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2777 fail_trivial_batch_bo_and_scratch_pool:
2778 anv_scratch_pool_finish(device, &device->scratch_pool);
2779 anv_device_release_bo(device, device->trivial_batch_bo);
2780 fail_workaround_bo:
2781 anv_device_release_bo(device, device->workaround_bo);
2782 fail_binding_table_pool:
2783 if (!anv_use_relocations(physical_device))
2784 anv_state_pool_finish(&device->binding_table_pool);
2785 fail_surface_state_pool:
2786 anv_state_pool_finish(&device->surface_state_pool);
2787 fail_instruction_state_pool:
2788 anv_state_pool_finish(&device->instruction_state_pool);
2789 fail_dynamic_state_pool:
2790 if (device->info->ver >= 8)
2791 anv_state_reserved_pool_finish(&device->custom_border_colors);
2792 anv_state_pool_finish(&device->dynamic_state_pool);
2793 fail_general_state_pool:
2794 anv_state_pool_finish(&device->general_state_pool);
2795 fail_batch_bo_pool:
2796 anv_bo_pool_finish(&device->batch_bo_pool);
2797 anv_bo_cache_finish(&device->bo_cache);
2798 fail_queue_cond:
2799 pthread_cond_destroy(&device->queue_submit);
2800 fail_mutex:
2801 pthread_mutex_destroy(&device->mutex);
2802 fail_vmas:
2803 if (!anv_use_relocations(physical_device)) {
2804 util_vma_heap_finish(&device->vma_hi);
2805 util_vma_heap_finish(&device->vma_cva);
2806 util_vma_heap_finish(&device->vma_lo);
2807 }
2808 fail_queues:
2809 for (uint32_t i = 0; i < device->queue_count; i++)
2810 anv_queue_finish(&device->queues[i]);
2811 vk_free(&device->vk.alloc, device->queues);
2812 fail_context_id:
2813 intel_gem_destroy_context(device->fd, device->context_id);
2814 fail_fd:
2815 close(device->fd);
2816 fail_device:
2817 vk_device_finish(&device->vk);
2818 fail_alloc:
2819 vk_free(&device->vk.alloc, device);
2820
2821 return result;
2822 }
2823
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2824 void anv_DestroyDevice(
2825 VkDevice _device,
2826 const VkAllocationCallbacks* pAllocator)
2827 {
2828 ANV_FROM_HANDLE(anv_device, device, _device);
2829
2830 if (!device)
2831 return;
2832
2833 anv_device_utrace_finish(device);
2834
2835 anv_device_finish_blorp(device);
2836
2837 vk_pipeline_cache_destroy(device->internal_cache, NULL);
2838 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2839
2840 #ifdef HAVE_VALGRIND
2841 /* We only need to free these to prevent valgrind errors. The backing
2842 * BO will go away in a couple of lines so we don't actually leak.
2843 */
2844 if (device->info->ver >= 8)
2845 anv_state_reserved_pool_finish(&device->custom_border_colors);
2846 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2847 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2848 #endif
2849
2850 anv_scratch_pool_finish(device, &device->scratch_pool);
2851
2852 anv_device_release_bo(device, device->workaround_bo);
2853 anv_device_release_bo(device, device->trivial_batch_bo);
2854
2855 if (!anv_use_relocations(device->physical))
2856 anv_state_pool_finish(&device->binding_table_pool);
2857 anv_state_pool_finish(&device->surface_state_pool);
2858 anv_state_pool_finish(&device->instruction_state_pool);
2859 anv_state_pool_finish(&device->dynamic_state_pool);
2860 anv_state_pool_finish(&device->general_state_pool);
2861
2862 anv_bo_pool_finish(&device->batch_bo_pool);
2863
2864 anv_bo_cache_finish(&device->bo_cache);
2865
2866 if (!anv_use_relocations(device->physical)) {
2867 util_vma_heap_finish(&device->vma_hi);
2868 util_vma_heap_finish(&device->vma_cva);
2869 util_vma_heap_finish(&device->vma_lo);
2870 }
2871
2872 pthread_cond_destroy(&device->queue_submit);
2873 pthread_mutex_destroy(&device->mutex);
2874
2875 for (uint32_t i = 0; i < device->queue_count; i++)
2876 anv_queue_finish(&device->queues[i]);
2877 vk_free(&device->vk.alloc, device->queues);
2878
2879 intel_gem_destroy_context(device->fd, device->context_id);
2880
2881 if (INTEL_DEBUG(DEBUG_BATCH))
2882 intel_batch_decode_ctx_finish(&device->decoder_ctx);
2883
2884 close(device->fd);
2885
2886 vk_device_finish(&device->vk);
2887 vk_free(&device->vk.alloc, device);
2888 }
2889
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2890 VkResult anv_EnumerateInstanceLayerProperties(
2891 uint32_t* pPropertyCount,
2892 VkLayerProperties* pProperties)
2893 {
2894 if (pProperties == NULL) {
2895 *pPropertyCount = 0;
2896 return VK_SUCCESS;
2897 }
2898
2899 /* None supported at this time */
2900 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2901 }
2902
2903 static VkResult
anv_device_check_status(struct vk_device * vk_device)2904 anv_device_check_status(struct vk_device *vk_device)
2905 {
2906 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2907
2908 uint32_t active, pending;
2909 int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2910 &active, &pending);
2911 if (ret == -1) {
2912 /* We don't know the real error. */
2913 return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2914 }
2915
2916 if (active) {
2917 return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2918 } else if (pending) {
2919 return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2920 }
2921
2922 return VK_SUCCESS;
2923 }
2924
2925 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2926 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2927 int64_t timeout)
2928 {
2929 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2930 if (ret == -1 && errno == ETIME) {
2931 return VK_TIMEOUT;
2932 } else if (ret == -1) {
2933 /* We don't know the real error. */
2934 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2935 } else {
2936 return VK_SUCCESS;
2937 }
2938 }
2939
2940 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2941 anv_vma_alloc(struct anv_device *device,
2942 uint64_t size, uint64_t align,
2943 enum anv_bo_alloc_flags alloc_flags,
2944 uint64_t client_address)
2945 {
2946 pthread_mutex_lock(&device->vma_mutex);
2947
2948 uint64_t addr = 0;
2949
2950 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2951 if (client_address) {
2952 if (util_vma_heap_alloc_addr(&device->vma_cva,
2953 client_address, size)) {
2954 addr = client_address;
2955 }
2956 } else {
2957 addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2958 }
2959 /* We don't want to fall back to other heaps */
2960 goto done;
2961 }
2962
2963 assert(client_address == 0);
2964
2965 if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2966 addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2967
2968 if (addr == 0)
2969 addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2970
2971 done:
2972 pthread_mutex_unlock(&device->vma_mutex);
2973
2974 assert(addr == intel_48b_address(addr));
2975 return intel_canonical_address(addr);
2976 }
2977
2978 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2979 anv_vma_free(struct anv_device *device,
2980 uint64_t address, uint64_t size)
2981 {
2982 const uint64_t addr_48b = intel_48b_address(address);
2983
2984 pthread_mutex_lock(&device->vma_mutex);
2985
2986 if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2987 addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2988 util_vma_heap_free(&device->vma_lo, addr_48b, size);
2989 } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2990 addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2991 util_vma_heap_free(&device->vma_cva, addr_48b, size);
2992 } else {
2993 assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2994 util_vma_heap_free(&device->vma_hi, addr_48b, size);
2995 }
2996
2997 pthread_mutex_unlock(&device->vma_mutex);
2998 }
2999
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3000 VkResult anv_AllocateMemory(
3001 VkDevice _device,
3002 const VkMemoryAllocateInfo* pAllocateInfo,
3003 const VkAllocationCallbacks* pAllocator,
3004 VkDeviceMemory* pMem)
3005 {
3006 ANV_FROM_HANDLE(anv_device, device, _device);
3007 struct anv_physical_device *pdevice = device->physical;
3008 struct anv_device_memory *mem;
3009 VkResult result = VK_SUCCESS;
3010
3011 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3012
3013 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3014 assert(pAllocateInfo->allocationSize > 0);
3015
3016 VkDeviceSize aligned_alloc_size =
3017 align64(pAllocateInfo->allocationSize, 4096);
3018
3019 if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3020 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3021
3022 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3023 struct anv_memory_type *mem_type =
3024 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3025 assert(mem_type->heapIndex < pdevice->memory.heap_count);
3026 struct anv_memory_heap *mem_heap =
3027 &pdevice->memory.heaps[mem_type->heapIndex];
3028
3029 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3030 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3031 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3032
3033 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3034 VK_OBJECT_TYPE_DEVICE_MEMORY);
3035 if (mem == NULL)
3036 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3037
3038 mem->type = mem_type;
3039 mem->map = NULL;
3040 mem->map_size = 0;
3041 mem->map_delta = 0;
3042 mem->ahw = NULL;
3043 mem->host_ptr = NULL;
3044
3045 enum anv_bo_alloc_flags alloc_flags = 0;
3046
3047 const VkExportMemoryAllocateInfo *export_info = NULL;
3048 const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3049 const VkImportMemoryFdInfoKHR *fd_info = NULL;
3050 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3051 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3052 VkMemoryAllocateFlags vk_flags = 0;
3053 uint64_t client_address = 0;
3054
3055 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3056 switch (ext->sType) {
3057 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3058 export_info = (void *)ext;
3059 break;
3060
3061 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3062 ahw_import_info = (void *)ext;
3063 break;
3064
3065 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3066 fd_info = (void *)ext;
3067 break;
3068
3069 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3070 host_ptr_info = (void *)ext;
3071 break;
3072
3073 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3074 const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3075 vk_flags = flags_info->flags;
3076 break;
3077 }
3078
3079 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3080 dedicated_info = (void *)ext;
3081 break;
3082
3083 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3084 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3085 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3086 client_address = addr_info->opaqueCaptureAddress;
3087 break;
3088 }
3089
3090 default:
3091 if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3092 /* this isn't a real enum value,
3093 * so use conditional to avoid compiler warn
3094 */
3095 anv_debug_ignored_stype(ext->sType);
3096 break;
3097 }
3098 }
3099
3100 if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3101 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3102
3103 if ((export_info && export_info->handleTypes) ||
3104 (fd_info && fd_info->handleType) ||
3105 (host_ptr_info && host_ptr_info->handleType)) {
3106 /* Anything imported or exported is EXTERNAL */
3107 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3108 }
3109
3110 /* Check if we need to support Android HW buffer export. If so,
3111 * create AHardwareBuffer and import memory from it.
3112 */
3113 bool android_export = false;
3114 if (export_info && export_info->handleTypes &
3115 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3116 android_export = true;
3117
3118 if (ahw_import_info) {
3119 result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3120 if (result != VK_SUCCESS)
3121 goto fail;
3122
3123 goto success;
3124 } else if (android_export) {
3125 result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3126 if (result != VK_SUCCESS)
3127 goto fail;
3128
3129 goto success;
3130 }
3131
3132 /* The Vulkan spec permits handleType to be 0, in which case the struct is
3133 * ignored.
3134 */
3135 if (fd_info && fd_info->handleType) {
3136 /* At the moment, we support only the below handle types. */
3137 assert(fd_info->handleType ==
3138 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3139 fd_info->handleType ==
3140 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3141
3142 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3143 client_address, &mem->bo);
3144 if (result != VK_SUCCESS)
3145 goto fail;
3146
3147 /* For security purposes, we reject importing the bo if it's smaller
3148 * than the requested allocation size. This prevents a malicious client
3149 * from passing a buffer to a trusted client, lying about the size, and
3150 * telling the trusted client to try and texture from an image that goes
3151 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
3152 * in the trusted client. The trusted client can protect itself against
3153 * this sort of attack but only if it can trust the buffer size.
3154 */
3155 if (mem->bo->size < aligned_alloc_size) {
3156 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3157 "aligned allocationSize too large for "
3158 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3159 "%"PRIu64"B > %"PRIu64"B",
3160 aligned_alloc_size, mem->bo->size);
3161 anv_device_release_bo(device, mem->bo);
3162 goto fail;
3163 }
3164
3165 /* From the Vulkan spec:
3166 *
3167 * "Importing memory from a file descriptor transfers ownership of
3168 * the file descriptor from the application to the Vulkan
3169 * implementation. The application must not perform any operations on
3170 * the file descriptor after a successful import."
3171 *
3172 * If the import fails, we leave the file descriptor open.
3173 */
3174 close(fd_info->fd);
3175 goto success;
3176 }
3177
3178 if (host_ptr_info && host_ptr_info->handleType) {
3179 if (host_ptr_info->handleType ==
3180 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3181 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3182 goto fail;
3183 }
3184
3185 assert(host_ptr_info->handleType ==
3186 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3187
3188 result = anv_device_import_bo_from_host_ptr(device,
3189 host_ptr_info->pHostPointer,
3190 pAllocateInfo->allocationSize,
3191 alloc_flags,
3192 client_address,
3193 &mem->bo);
3194 if (result != VK_SUCCESS)
3195 goto fail;
3196
3197 mem->host_ptr = host_ptr_info->pHostPointer;
3198 goto success;
3199 }
3200
3201 /* Regular allocate (not importing memory). */
3202
3203 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3204 alloc_flags, client_address, &mem->bo);
3205 if (result != VK_SUCCESS)
3206 goto fail;
3207
3208 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3209 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3210
3211 /* Some legacy (non-modifiers) consumers need the tiling to be set on
3212 * the BO. In this case, we have a dedicated allocation.
3213 */
3214 if (image->vk.wsi_legacy_scanout) {
3215 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3216 result = anv_device_set_bo_tiling(device, mem->bo,
3217 surf->row_pitch_B,
3218 surf->tiling);
3219 if (result != VK_SUCCESS) {
3220 anv_device_release_bo(device, mem->bo);
3221 goto fail;
3222 }
3223 }
3224 }
3225
3226 success:
3227 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3228 if (mem_heap_used > mem_heap->size) {
3229 p_atomic_add(&mem_heap->used, -mem->bo->size);
3230 anv_device_release_bo(device, mem->bo);
3231 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3232 "Out of heap memory");
3233 goto fail;
3234 }
3235
3236 pthread_mutex_lock(&device->mutex);
3237 list_addtail(&mem->link, &device->memory_objects);
3238 pthread_mutex_unlock(&device->mutex);
3239
3240 *pMem = anv_device_memory_to_handle(mem);
3241
3242 return VK_SUCCESS;
3243
3244 fail:
3245 vk_object_free(&device->vk, pAllocator, mem);
3246
3247 return result;
3248 }
3249
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3250 VkResult anv_GetMemoryFdKHR(
3251 VkDevice device_h,
3252 const VkMemoryGetFdInfoKHR* pGetFdInfo,
3253 int* pFd)
3254 {
3255 ANV_FROM_HANDLE(anv_device, dev, device_h);
3256 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3257
3258 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3259
3260 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3261 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3262
3263 return anv_device_export_bo(dev, mem->bo, pFd);
3264 }
3265
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3266 VkResult anv_GetMemoryFdPropertiesKHR(
3267 VkDevice _device,
3268 VkExternalMemoryHandleTypeFlagBits handleType,
3269 int fd,
3270 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
3271 {
3272 ANV_FROM_HANDLE(anv_device, device, _device);
3273
3274 switch (handleType) {
3275 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3276 /* dma-buf can be imported as any memory type */
3277 pMemoryFdProperties->memoryTypeBits =
3278 (1 << device->physical->memory.type_count) - 1;
3279 return VK_SUCCESS;
3280
3281 default:
3282 /* The valid usage section for this function says:
3283 *
3284 * "handleType must not be one of the handle types defined as
3285 * opaque."
3286 *
3287 * So opaque handle types fall into the default "unsupported" case.
3288 */
3289 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3290 }
3291 }
3292
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3293 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3294 VkDevice _device,
3295 VkExternalMemoryHandleTypeFlagBits handleType,
3296 const void* pHostPointer,
3297 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
3298 {
3299 ANV_FROM_HANDLE(anv_device, device, _device);
3300
3301 assert(pMemoryHostPointerProperties->sType ==
3302 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3303
3304 switch (handleType) {
3305 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3306 /* Host memory can be imported as any memory type. */
3307 pMemoryHostPointerProperties->memoryTypeBits =
3308 (1ull << device->physical->memory.type_count) - 1;
3309
3310 return VK_SUCCESS;
3311
3312 default:
3313 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3314 }
3315 }
3316
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3317 void anv_FreeMemory(
3318 VkDevice _device,
3319 VkDeviceMemory _mem,
3320 const VkAllocationCallbacks* pAllocator)
3321 {
3322 ANV_FROM_HANDLE(anv_device, device, _device);
3323 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3324
3325 if (mem == NULL)
3326 return;
3327
3328 pthread_mutex_lock(&device->mutex);
3329 list_del(&mem->link);
3330 pthread_mutex_unlock(&device->mutex);
3331
3332 if (mem->map)
3333 anv_UnmapMemory(_device, _mem);
3334
3335 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3336 -mem->bo->size);
3337
3338 anv_device_release_bo(device, mem->bo);
3339
3340 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3341 if (mem->ahw)
3342 AHardwareBuffer_release(mem->ahw);
3343 #endif
3344
3345 vk_object_free(&device->vk, pAllocator, mem);
3346 }
3347
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3348 VkResult anv_MapMemory(
3349 VkDevice _device,
3350 VkDeviceMemory _memory,
3351 VkDeviceSize offset,
3352 VkDeviceSize size,
3353 VkMemoryMapFlags flags,
3354 void** ppData)
3355 {
3356 ANV_FROM_HANDLE(anv_device, device, _device);
3357 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3358
3359 if (mem == NULL) {
3360 *ppData = NULL;
3361 return VK_SUCCESS;
3362 }
3363
3364 if (mem->host_ptr) {
3365 *ppData = mem->host_ptr + offset;
3366 return VK_SUCCESS;
3367 }
3368
3369 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3370 *
3371 * * memory must have been created with a memory type that reports
3372 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3373 */
3374 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3375 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3376 "Memory object not mappable.");
3377 }
3378
3379 if (size == VK_WHOLE_SIZE)
3380 size = mem->bo->size - offset;
3381
3382 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3383 *
3384 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3385 * assert(size != 0);
3386 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3387 * equal to the size of the memory minus offset
3388 */
3389 assert(size > 0);
3390 assert(offset + size <= mem->bo->size);
3391
3392 if (size != (size_t)size) {
3393 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3394 "requested size 0x%"PRIx64" does not fit in %u bits",
3395 size, (unsigned)(sizeof(size_t) * 8));
3396 }
3397
3398 /* From the Vulkan 1.2.194 spec:
3399 *
3400 * "memory must not be currently host mapped"
3401 */
3402 if (mem->map != NULL) {
3403 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3404 "Memory object already mapped.");
3405 }
3406
3407 uint32_t gem_flags = 0;
3408
3409 if (!device->info->has_llc &&
3410 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3411 gem_flags |= I915_MMAP_WC;
3412
3413 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
3414 uint64_t map_offset;
3415 if (!device->physical->info.has_mmap_offset)
3416 map_offset = offset & ~4095ull;
3417 else
3418 map_offset = 0;
3419 assert(offset >= map_offset);
3420 uint64_t map_size = (offset + size) - map_offset;
3421
3422 /* Let's map whole pages */
3423 map_size = align64(map_size, 4096);
3424
3425 void *map;
3426 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3427 map_size, gem_flags, &map);
3428 if (result != VK_SUCCESS)
3429 return result;
3430
3431 mem->map = map;
3432 mem->map_size = map_size;
3433 mem->map_delta = (offset - map_offset);
3434 *ppData = mem->map + mem->map_delta;
3435
3436 return VK_SUCCESS;
3437 }
3438
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3439 void anv_UnmapMemory(
3440 VkDevice _device,
3441 VkDeviceMemory _memory)
3442 {
3443 ANV_FROM_HANDLE(anv_device, device, _device);
3444 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3445
3446 if (mem == NULL || mem->host_ptr)
3447 return;
3448
3449 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3450
3451 mem->map = NULL;
3452 mem->map_size = 0;
3453 mem->map_delta = 0;
3454 }
3455
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3456 VkResult anv_FlushMappedMemoryRanges(
3457 VkDevice _device,
3458 uint32_t memoryRangeCount,
3459 const VkMappedMemoryRange* pMemoryRanges)
3460 {
3461 ANV_FROM_HANDLE(anv_device, device, _device);
3462
3463 if (!device->physical->memory.need_flush)
3464 return VK_SUCCESS;
3465
3466 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3467 /* Make sure the writes we're flushing have landed. */
3468 __builtin_ia32_mfence();
3469 #endif
3470
3471 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3472 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3473 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3474 continue;
3475
3476 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3477 if (map_offset >= mem->map_size)
3478 continue;
3479
3480 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3481 intel_flush_range(mem->map + map_offset,
3482 MIN2(pMemoryRanges[i].size,
3483 mem->map_size - map_offset));
3484 #endif
3485 }
3486
3487 return VK_SUCCESS;
3488 }
3489
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3490 VkResult anv_InvalidateMappedMemoryRanges(
3491 VkDevice _device,
3492 uint32_t memoryRangeCount,
3493 const VkMappedMemoryRange* pMemoryRanges)
3494 {
3495 ANV_FROM_HANDLE(anv_device, device, _device);
3496
3497 if (!device->physical->memory.need_flush)
3498 return VK_SUCCESS;
3499
3500 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3501 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3502 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3503 continue;
3504
3505 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3506 if (map_offset >= mem->map_size)
3507 continue;
3508
3509 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3510 intel_invalidate_range(mem->map + map_offset,
3511 MIN2(pMemoryRanges[i].size,
3512 mem->map_size - map_offset));
3513 #endif
3514 }
3515
3516 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3517 /* Make sure no reads get moved up above the invalidate. */
3518 __builtin_ia32_mfence();
3519 #endif
3520
3521 return VK_SUCCESS;
3522 }
3523
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3524 void anv_GetDeviceMemoryCommitment(
3525 VkDevice device,
3526 VkDeviceMemory memory,
3527 VkDeviceSize* pCommittedMemoryInBytes)
3528 {
3529 *pCommittedMemoryInBytes = 0;
3530 }
3531
3532 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3533 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3534 {
3535 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3536 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3537
3538 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3539
3540 if (mem) {
3541 assert(pBindInfo->memoryOffset < mem->bo->size);
3542 assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3543 buffer->address = (struct anv_address) {
3544 .bo = mem->bo,
3545 .offset = pBindInfo->memoryOffset,
3546 };
3547 } else {
3548 buffer->address = ANV_NULL_ADDRESS;
3549 }
3550 }
3551
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3552 VkResult anv_BindBufferMemory2(
3553 VkDevice device,
3554 uint32_t bindInfoCount,
3555 const VkBindBufferMemoryInfo* pBindInfos)
3556 {
3557 for (uint32_t i = 0; i < bindInfoCount; i++)
3558 anv_bind_buffer_memory(&pBindInfos[i]);
3559
3560 return VK_SUCCESS;
3561 }
3562
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3563 VkResult anv_QueueBindSparse(
3564 VkQueue _queue,
3565 uint32_t bindInfoCount,
3566 const VkBindSparseInfo* pBindInfo,
3567 VkFence fence)
3568 {
3569 ANV_FROM_HANDLE(anv_queue, queue, _queue);
3570 if (vk_device_is_lost(&queue->device->vk))
3571 return VK_ERROR_DEVICE_LOST;
3572
3573 return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3574 }
3575
3576 // Event functions
3577
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3578 VkResult anv_CreateEvent(
3579 VkDevice _device,
3580 const VkEventCreateInfo* pCreateInfo,
3581 const VkAllocationCallbacks* pAllocator,
3582 VkEvent* pEvent)
3583 {
3584 ANV_FROM_HANDLE(anv_device, device, _device);
3585 struct anv_event *event;
3586
3587 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3588
3589 event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3590 VK_OBJECT_TYPE_EVENT);
3591 if (event == NULL)
3592 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3593
3594 event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3595 sizeof(uint64_t), 8);
3596 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3597
3598 *pEvent = anv_event_to_handle(event);
3599
3600 return VK_SUCCESS;
3601 }
3602
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3603 void anv_DestroyEvent(
3604 VkDevice _device,
3605 VkEvent _event,
3606 const VkAllocationCallbacks* pAllocator)
3607 {
3608 ANV_FROM_HANDLE(anv_device, device, _device);
3609 ANV_FROM_HANDLE(anv_event, event, _event);
3610
3611 if (!event)
3612 return;
3613
3614 anv_state_pool_free(&device->dynamic_state_pool, event->state);
3615
3616 vk_object_free(&device->vk, pAllocator, event);
3617 }
3618
anv_GetEventStatus(VkDevice _device,VkEvent _event)3619 VkResult anv_GetEventStatus(
3620 VkDevice _device,
3621 VkEvent _event)
3622 {
3623 ANV_FROM_HANDLE(anv_device, device, _device);
3624 ANV_FROM_HANDLE(anv_event, event, _event);
3625
3626 if (vk_device_is_lost(&device->vk))
3627 return VK_ERROR_DEVICE_LOST;
3628
3629 return *(uint64_t *)event->state.map;
3630 }
3631
anv_SetEvent(VkDevice _device,VkEvent _event)3632 VkResult anv_SetEvent(
3633 VkDevice _device,
3634 VkEvent _event)
3635 {
3636 ANV_FROM_HANDLE(anv_event, event, _event);
3637
3638 *(uint64_t *)event->state.map = VK_EVENT_SET;
3639
3640 return VK_SUCCESS;
3641 }
3642
anv_ResetEvent(VkDevice _device,VkEvent _event)3643 VkResult anv_ResetEvent(
3644 VkDevice _device,
3645 VkEvent _event)
3646 {
3647 ANV_FROM_HANDLE(anv_event, event, _event);
3648
3649 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3650
3651 return VK_SUCCESS;
3652 }
3653
3654 // Buffer functions
3655
3656 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3657 anv_get_buffer_memory_requirements(struct anv_device *device,
3658 VkDeviceSize size,
3659 VkBufferUsageFlags usage,
3660 VkMemoryRequirements2* pMemoryRequirements)
3661 {
3662 /* The Vulkan spec (git aaed022) says:
3663 *
3664 * memoryTypeBits is a bitfield and contains one bit set for every
3665 * supported memory type for the resource. The bit `1<<i` is set if and
3666 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3667 * structure for the physical device is supported.
3668 */
3669 uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3670
3671 /* Base alignment requirement of a cache line */
3672 uint32_t alignment = 16;
3673
3674 if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3675 alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3676
3677 pMemoryRequirements->memoryRequirements.size = size;
3678 pMemoryRequirements->memoryRequirements.alignment = alignment;
3679
3680 /* Storage and Uniform buffers should have their size aligned to
3681 * 32-bits to avoid boundary checks when last DWord is not complete.
3682 * This would ensure that not internal padding would be needed for
3683 * 16-bit types.
3684 */
3685 if (device->vk.enabled_features.robustBufferAccess &&
3686 (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3687 usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3688 pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3689
3690 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3691
3692 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3693 switch (ext->sType) {
3694 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3695 VkMemoryDedicatedRequirements *requirements = (void *)ext;
3696 requirements->prefersDedicatedAllocation = false;
3697 requirements->requiresDedicatedAllocation = false;
3698 break;
3699 }
3700
3701 default:
3702 anv_debug_ignored_stype(ext->sType);
3703 break;
3704 }
3705 }
3706 }
3707
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3708 void anv_GetBufferMemoryRequirements2(
3709 VkDevice _device,
3710 const VkBufferMemoryRequirementsInfo2* pInfo,
3711 VkMemoryRequirements2* pMemoryRequirements)
3712 {
3713 ANV_FROM_HANDLE(anv_device, device, _device);
3714 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3715
3716 anv_get_buffer_memory_requirements(device,
3717 buffer->vk.size,
3718 buffer->vk.usage,
3719 pMemoryRequirements);
3720 }
3721
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3722 void anv_GetDeviceBufferMemoryRequirements(
3723 VkDevice _device,
3724 const VkDeviceBufferMemoryRequirements* pInfo,
3725 VkMemoryRequirements2* pMemoryRequirements)
3726 {
3727 ANV_FROM_HANDLE(anv_device, device, _device);
3728
3729 anv_get_buffer_memory_requirements(device,
3730 pInfo->pCreateInfo->size,
3731 pInfo->pCreateInfo->usage,
3732 pMemoryRequirements);
3733 }
3734
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3735 VkResult anv_CreateBuffer(
3736 VkDevice _device,
3737 const VkBufferCreateInfo* pCreateInfo,
3738 const VkAllocationCallbacks* pAllocator,
3739 VkBuffer* pBuffer)
3740 {
3741 ANV_FROM_HANDLE(anv_device, device, _device);
3742 struct anv_buffer *buffer;
3743
3744 /* Don't allow creating buffers bigger than our address space. The real
3745 * issue here is that we may align up the buffer size and we don't want
3746 * doing so to cause roll-over. However, no one has any business
3747 * allocating a buffer larger than our GTT size.
3748 */
3749 if (pCreateInfo->size > device->physical->gtt_size)
3750 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3751
3752 buffer = vk_buffer_create(&device->vk, pCreateInfo,
3753 pAllocator, sizeof(*buffer));
3754 if (buffer == NULL)
3755 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3756
3757 buffer->address = ANV_NULL_ADDRESS;
3758
3759 *pBuffer = anv_buffer_to_handle(buffer);
3760
3761 return VK_SUCCESS;
3762 }
3763
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3764 void anv_DestroyBuffer(
3765 VkDevice _device,
3766 VkBuffer _buffer,
3767 const VkAllocationCallbacks* pAllocator)
3768 {
3769 ANV_FROM_HANDLE(anv_device, device, _device);
3770 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3771
3772 if (!buffer)
3773 return;
3774
3775 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3776 }
3777
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3778 VkDeviceAddress anv_GetBufferDeviceAddress(
3779 VkDevice device,
3780 const VkBufferDeviceAddressInfo* pInfo)
3781 {
3782 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3783
3784 assert(!anv_address_is_null(buffer->address));
3785 assert(anv_bo_is_pinned(buffer->address.bo));
3786
3787 return anv_address_physical(buffer->address);
3788 }
3789
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3790 uint64_t anv_GetBufferOpaqueCaptureAddress(
3791 VkDevice device,
3792 const VkBufferDeviceAddressInfo* pInfo)
3793 {
3794 return 0;
3795 }
3796
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3797 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3798 VkDevice device,
3799 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3800 {
3801 ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3802
3803 assert(anv_bo_is_pinned(memory->bo));
3804 assert(memory->bo->has_client_visible_address);
3805
3806 return intel_48b_address(memory->bo->offset);
3807 }
3808
3809 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3810 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3811 enum isl_format format,
3812 struct isl_swizzle swizzle,
3813 isl_surf_usage_flags_t usage,
3814 struct anv_address address,
3815 uint32_t range, uint32_t stride)
3816 {
3817 isl_buffer_fill_state(&device->isl_dev, state.map,
3818 .address = anv_address_physical(address),
3819 .mocs = isl_mocs(&device->isl_dev, usage,
3820 address.bo && address.bo->is_external),
3821 .size_B = range,
3822 .format = format,
3823 .swizzle = swizzle,
3824 .stride_B = stride);
3825 }
3826
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3827 void anv_DestroySampler(
3828 VkDevice _device,
3829 VkSampler _sampler,
3830 const VkAllocationCallbacks* pAllocator)
3831 {
3832 ANV_FROM_HANDLE(anv_device, device, _device);
3833 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3834
3835 if (!sampler)
3836 return;
3837
3838 if (sampler->bindless_state.map) {
3839 anv_state_pool_free(&device->dynamic_state_pool,
3840 sampler->bindless_state);
3841 }
3842
3843 if (sampler->custom_border_color.map) {
3844 anv_state_reserved_pool_free(&device->custom_border_colors,
3845 sampler->custom_border_color);
3846 }
3847
3848 vk_object_free(&device->vk, pAllocator, sampler);
3849 }
3850
3851 static const VkTimeDomainEXT anv_time_domains[] = {
3852 VK_TIME_DOMAIN_DEVICE_EXT,
3853 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
3854 #ifdef CLOCK_MONOTONIC_RAW
3855 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
3856 #endif
3857 };
3858
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)3859 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
3860 VkPhysicalDevice physicalDevice,
3861 uint32_t *pTimeDomainCount,
3862 VkTimeDomainEXT *pTimeDomains)
3863 {
3864 int d;
3865 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
3866
3867 for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
3868 vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
3869 *i = anv_time_domains[d];
3870 }
3871 }
3872
3873 return vk_outarray_status(&out);
3874 }
3875
3876 static uint64_t
anv_clock_gettime(clockid_t clock_id)3877 anv_clock_gettime(clockid_t clock_id)
3878 {
3879 struct timespec current;
3880 int ret;
3881
3882 ret = clock_gettime(clock_id, ¤t);
3883 #ifdef CLOCK_MONOTONIC_RAW
3884 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3885 ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
3886 #endif
3887 if (ret < 0)
3888 return 0;
3889
3890 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3891 }
3892
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)3893 VkResult anv_GetCalibratedTimestampsEXT(
3894 VkDevice _device,
3895 uint32_t timestampCount,
3896 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
3897 uint64_t *pTimestamps,
3898 uint64_t *pMaxDeviation)
3899 {
3900 ANV_FROM_HANDLE(anv_device, device, _device);
3901 uint64_t timestamp_frequency = device->info->timestamp_frequency;
3902 int d;
3903 uint64_t begin, end;
3904 uint64_t max_clock_period = 0;
3905
3906 #ifdef CLOCK_MONOTONIC_RAW
3907 begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3908 #else
3909 begin = anv_clock_gettime(CLOCK_MONOTONIC);
3910 #endif
3911
3912 for (d = 0; d < timestampCount; d++) {
3913 switch (pTimestampInfos[d].timeDomain) {
3914 case VK_TIME_DOMAIN_DEVICE_EXT:
3915 if (!intel_gem_read_render_timestamp(device->fd,
3916 device->info->kmd_type,
3917 &pTimestamps[d])) {
3918 return vk_device_set_lost(&device->vk, "Failed to read the "
3919 "TIMESTAMP register: %m");
3920 }
3921 uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
3922 max_clock_period = MAX2(max_clock_period, device_period);
3923 break;
3924 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
3925 pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
3926 max_clock_period = MAX2(max_clock_period, 1);
3927 break;
3928
3929 #ifdef CLOCK_MONOTONIC_RAW
3930 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
3931 pTimestamps[d] = begin;
3932 break;
3933 #endif
3934 default:
3935 pTimestamps[d] = 0;
3936 break;
3937 }
3938 }
3939
3940 #ifdef CLOCK_MONOTONIC_RAW
3941 end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3942 #else
3943 end = anv_clock_gettime(CLOCK_MONOTONIC);
3944 #endif
3945
3946 /*
3947 * The maximum deviation is the sum of the interval over which we
3948 * perform the sampling and the maximum period of any sampled
3949 * clock. That's because the maximum skew between any two sampled
3950 * clock edges is when the sampled clock with the largest period is
3951 * sampled at the end of that period but right at the beginning of the
3952 * sampling interval and some other clock is sampled right at the
3953 * beginning of its sampling period and right at the end of the
3954 * sampling interval. Let's assume the GPU has the longest clock
3955 * period and that the application is sampling GPU and monotonic:
3956 *
3957 * s e
3958 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
3959 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3960 *
3961 * g
3962 * 0 1 2 3
3963 * GPU -----_____-----_____-----_____-----_____
3964 *
3965 * m
3966 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
3967 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3968 *
3969 * Interval <----------------->
3970 * Deviation <-------------------------->
3971 *
3972 * s = read(raw) 2
3973 * g = read(GPU) 1
3974 * m = read(monotonic) 2
3975 * e = read(raw) b
3976 *
3977 * We round the sample interval up by one tick to cover sampling error
3978 * in the interval clock
3979 */
3980
3981 uint64_t sample_interval = end - begin + 1;
3982
3983 *pMaxDeviation = sample_interval + max_clock_period;
3984
3985 return VK_SUCCESS;
3986 }
3987
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3988 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3989 VkPhysicalDevice physicalDevice,
3990 VkSampleCountFlagBits samples,
3991 VkMultisamplePropertiesEXT* pMultisampleProperties)
3992 {
3993 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3994
3995 assert(pMultisampleProperties->sType ==
3996 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3997
3998 VkExtent2D grid_size;
3999 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4000 grid_size.width = 1;
4001 grid_size.height = 1;
4002 } else {
4003 grid_size.width = 0;
4004 grid_size.height = 0;
4005 }
4006 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4007
4008 vk_foreach_struct(ext, pMultisampleProperties->pNext)
4009 anv_debug_ignored_stype(ext->sType);
4010 }
4011