1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_debug_identifier.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61
62 #include "genxml/gen70_pack.h"
63 #include "genxml/genX_bits.h"
64
65 static const driOptionDescription anv_dri_options[] = {
66 DRI_CONF_SECTION_PERFORMANCE
67 DRI_CONF_ADAPTIVE_SYNC(true)
68 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72 DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
73 DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74 DRI_CONF_NO_16BIT(false)
75 DRI_CONF_HASVK_OVERRIDE_API_VERSION(false)
76 DRI_CONF_SECTION_END
77
78 DRI_CONF_SECTION_DEBUG
79 DRI_CONF_ALWAYS_FLUSH_CACHE(false)
80 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
81 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
82 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
83 DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
84 DRI_CONF_SECTION_END
85
86 DRI_CONF_SECTION_QUALITY
87 DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
88 DRI_CONF_SECTION_END
89 };
90
91 /* This is probably far to big but it reflects the max size used for messages
92 * in OpenGLs KHR_debug.
93 */
94 #define MAX_DEBUG_MESSAGE_LENGTH 4096
95
96 /* Render engine timestamp register */
97 #define TIMESTAMP 0x2358
98
99 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
100 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
101 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
102 #endif
103
104 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)105 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
106 {
107 char str[MAX_DEBUG_MESSAGE_LENGTH];
108 struct anv_device *device = (struct anv_device *)data;
109 UNUSED struct anv_instance *instance = device->physical->instance;
110
111 va_list args;
112 va_start(args, fmt);
113 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
114 va_end(args);
115
116 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
117 }
118
119 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)120 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
121 {
122 va_list args;
123 va_start(args, fmt);
124
125 if (INTEL_DEBUG(DEBUG_PERF))
126 mesa_logd_v(fmt, args);
127
128 va_end(args);
129 }
130
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
132 defined(VK_USE_PLATFORM_XCB_KHR) || \
133 defined(VK_USE_PLATFORM_XLIB_KHR) || \
134 defined(VK_USE_PLATFORM_DISPLAY_KHR)
135 #define ANV_USE_WSI_PLATFORM
136 #endif
137
138 #ifdef ANDROID_STRICT
139 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
140 #else
141 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
142 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
143 #endif
144
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)145 VkResult anv_EnumerateInstanceVersion(
146 uint32_t* pApiVersion)
147 {
148 #ifdef ANDROID_STRICT
149 *pApiVersion = ANV_API_VERSION;
150 #else
151 *pApiVersion = ANV_API_VERSION_1_3;
152 #endif
153 return VK_SUCCESS;
154 }
155
156 static const struct vk_instance_extension_table instance_extensions = {
157 .KHR_device_group_creation = true,
158 .KHR_external_fence_capabilities = true,
159 .KHR_external_memory_capabilities = true,
160 .KHR_external_semaphore_capabilities = true,
161 .KHR_get_physical_device_properties2 = true,
162 .EXT_debug_report = true,
163 .EXT_debug_utils = true,
164
165 #ifdef ANV_USE_WSI_PLATFORM
166 .KHR_get_surface_capabilities2 = true,
167 .KHR_surface = true,
168 .KHR_surface_protected_capabilities = true,
169 #endif
170 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
171 .KHR_wayland_surface = true,
172 #endif
173 #ifdef VK_USE_PLATFORM_XCB_KHR
174 .KHR_xcb_surface = true,
175 #endif
176 #ifdef VK_USE_PLATFORM_XLIB_KHR
177 .KHR_xlib_surface = true,
178 #endif
179 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
180 .EXT_acquire_xlib_display = true,
181 #endif
182 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
183 .KHR_display = true,
184 .KHR_get_display_properties2 = true,
185 .EXT_direct_mode_display = true,
186 .EXT_display_surface_counter = true,
187 .EXT_acquire_drm_display = true,
188 #endif
189 #ifndef VK_USE_PLATFORM_WIN32_KHR
190 .EXT_headless_surface = true,
191 #endif
192 };
193
194 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)195 get_device_extensions(const struct anv_physical_device *device,
196 struct vk_device_extension_table *ext)
197 {
198 const bool has_syncobj_wait =
199 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
200
201 *ext = (struct vk_device_extension_table) {
202 .KHR_8bit_storage = device->info.ver >= 8,
203 .KHR_16bit_storage = device->info.ver >= 8 && !device->instance->no_16bit,
204 .KHR_bind_memory2 = true,
205 .KHR_buffer_device_address = device->has_a64_buffer_access,
206 .KHR_copy_commands2 = true,
207 .KHR_create_renderpass2 = true,
208 .KHR_dedicated_allocation = true,
209 .KHR_deferred_host_operations = true,
210 .KHR_depth_stencil_resolve = true,
211 .KHR_descriptor_update_template = true,
212 .KHR_device_group = true,
213 .KHR_draw_indirect_count = true,
214 .KHR_driver_properties = true,
215 .KHR_dynamic_rendering = true,
216 .KHR_external_fence = has_syncobj_wait,
217 .KHR_external_fence_fd = has_syncobj_wait,
218 .KHR_external_memory = true,
219 .KHR_external_memory_fd = true,
220 .KHR_external_semaphore = true,
221 .KHR_external_semaphore_fd = true,
222 .KHR_format_feature_flags2 = true,
223 .KHR_get_memory_requirements2 = true,
224 .KHR_image_format_list = true,
225 .KHR_imageless_framebuffer = true,
226 #ifdef ANV_USE_WSI_PLATFORM
227 .KHR_incremental_present = true,
228 #endif
229 .KHR_maintenance1 = true,
230 .KHR_maintenance2 = true,
231 .KHR_maintenance3 = true,
232 .KHR_maintenance4 = true,
233 .KHR_multiview = true,
234 .KHR_performance_query =
235 !anv_use_relocations(device) && device->perf &&
236 (intel_perf_has_hold_preemption(device->perf) ||
237 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
238 device->use_call_secondary,
239 .KHR_pipeline_executable_properties = true,
240 /* Hide these behind dri configs for now since we cannot implement it reliably on
241 * all surfaces yet. There is no surface capability query for present wait/id,
242 * but the feature is useful enough to hide behind an opt-in mechanism for now.
243 * If the instance only enables surface extensions that unconditionally support present wait,
244 * we can also expose the extension that way. */
245 .KHR_present_id =
246 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
247 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
248 .KHR_present_wait =
249 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
250 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
251 .KHR_push_descriptor = true,
252 .KHR_relaxed_block_layout = true,
253 .KHR_sampler_mirror_clamp_to_edge = true,
254 .KHR_sampler_ycbcr_conversion = true,
255 .KHR_separate_depth_stencil_layouts = true,
256 .KHR_shader_clock = true,
257 .KHR_shader_draw_parameters = true,
258 .KHR_shader_expect_assume = true,
259 .KHR_shader_float16_int8 = device->info.ver >= 8 && !device->instance->no_16bit,
260 .KHR_shader_float_controls = true,
261 .KHR_shader_integer_dot_product = true,
262 .KHR_shader_non_semantic_info = true,
263 .KHR_shader_relaxed_extended_instruction = true,
264 .KHR_shader_subgroup_extended_types = device->info.ver >= 8,
265 .KHR_shader_subgroup_uniform_control_flow = true,
266 .KHR_shader_terminate_invocation = true,
267 .KHR_spirv_1_4 = true,
268 .KHR_storage_buffer_storage_class = true,
269 #ifdef ANV_USE_WSI_PLATFORM
270 .KHR_swapchain = true,
271 .KHR_swapchain_mutable_format = true,
272 #endif
273 .KHR_synchronization2 = true,
274 .KHR_timeline_semaphore = true,
275 .KHR_uniform_buffer_standard_layout = true,
276 .KHR_variable_pointers = true,
277 .KHR_vulkan_memory_model = true,
278 .KHR_workgroup_memory_explicit_layout = true,
279 .KHR_zero_initialize_workgroup_memory = true,
280 .EXT_4444_formats = true,
281 .EXT_border_color_swizzle = device->info.ver >= 8,
282 .EXT_buffer_device_address = device->has_a64_buffer_access,
283 .EXT_calibrated_timestamps = device->has_reg_timestamp,
284 .EXT_color_write_enable = true,
285 .EXT_conditional_rendering = device->info.verx10 >= 75,
286 .EXT_custom_border_color = device->info.ver >= 8,
287 .EXT_depth_clamp_zero_one = true,
288 .EXT_depth_clamp_control = true,
289 .EXT_depth_clip_control = true,
290 .EXT_depth_clip_enable = true,
291 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
292 .EXT_display_control = true,
293 #endif
294 .EXT_extended_dynamic_state = true,
295 .EXT_extended_dynamic_state2 = true,
296 .EXT_external_memory_dma_buf = true,
297 .EXT_external_memory_host = true,
298 .EXT_global_priority = device->max_context_priority >=
299 INTEL_CONTEXT_MEDIUM_PRIORITY,
300 .EXT_global_priority_query = device->max_context_priority >=
301 INTEL_CONTEXT_MEDIUM_PRIORITY,
302 .EXT_host_query_reset = true,
303 .EXT_image_2d_view_of_3d = true,
304 .EXT_image_robustness = true,
305 .EXT_image_drm_format_modifier = true,
306 .EXT_image_view_min_lod = true,
307 .EXT_index_type_uint8 = true,
308 .EXT_inline_uniform_block = true,
309 .EXT_line_rasterization = true,
310 /* Enable the extension only if we have support on both the local &
311 * system memory
312 */
313 .EXT_memory_budget = device->sys.available,
314 .EXT_non_seamless_cube_map = true,
315 .EXT_pci_bus_info = true,
316 .EXT_physical_device_drm = true,
317 .EXT_pipeline_creation_cache_control = true,
318 .EXT_pipeline_creation_feedback = true,
319 .EXT_primitives_generated_query = true,
320 .EXT_primitive_topology_list_restart = true,
321 .EXT_private_data = true,
322 .EXT_provoking_vertex = true,
323 .EXT_queue_family_foreign = true,
324 .EXT_robustness2 = true,
325 .EXT_sample_locations = true,
326 .EXT_scalar_block_layout = true,
327 .EXT_separate_stencil_usage = true,
328 .EXT_shader_atomic_float = true,
329 .EXT_shader_demote_to_helper_invocation = true,
330 .EXT_shader_module_identifier = true,
331 .EXT_shader_replicated_composites = true,
332 .EXT_shader_subgroup_ballot = true,
333 .EXT_shader_subgroup_vote = true,
334 .EXT_shader_viewport_index_layer = true,
335 .EXT_subgroup_size_control = true,
336 .EXT_texel_buffer_alignment = true,
337 .EXT_tooling_info = true,
338 .EXT_transform_feedback = true,
339 .EXT_vertex_attribute_divisor = true,
340 .EXT_ycbcr_image_arrays = true,
341 #if DETECT_OS_ANDROID
342 .ANDROID_external_memory_android_hardware_buffer = true,
343 .ANDROID_native_buffer = true,
344 #endif
345 .GOOGLE_decorate_string = true,
346 .GOOGLE_hlsl_functionality1 = true,
347 .GOOGLE_user_type = true,
348 .INTEL_performance_query = device->perf &&
349 intel_perf_has_hold_preemption(device->perf),
350 .INTEL_shader_integer_functions2 = device->info.ver >= 8,
351 .EXT_multi_draw = true,
352 .NV_compute_shader_derivatives = true,
353 .VALVE_mutable_descriptor_type = true,
354 };
355 }
356
357 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)358 get_features(const struct anv_physical_device *pdevice,
359 struct vk_features *features)
360 {
361 /* Just pick one; they're all the same */
362 const bool has_astc_ldr =
363 isl_format_supports_sampling(&pdevice->info,
364 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
365
366 *features = (struct vk_features) {
367 /* Vulkan 1.0 */
368 .robustBufferAccess = true,
369 .fullDrawIndexUint32 = true,
370 .imageCubeArray = true,
371 .independentBlend = true,
372 .geometryShader = true,
373 .tessellationShader = true,
374 .sampleRateShading = true,
375 .dualSrcBlend = true,
376 .logicOp = true,
377 .multiDrawIndirect = true,
378 .drawIndirectFirstInstance = true,
379 .depthClamp = true,
380 .depthBiasClamp = true,
381 .fillModeNonSolid = true,
382 .depthBounds = pdevice->info.ver >= 12,
383 .wideLines = true,
384 .largePoints = true,
385 .alphaToOne = true,
386 .multiViewport = true,
387 .samplerAnisotropy = true,
388 .textureCompressionETC2 = pdevice->info.ver >= 8 ||
389 pdevice->info.platform == INTEL_PLATFORM_BYT,
390 .textureCompressionASTC_LDR = has_astc_ldr,
391 .textureCompressionBC = true,
392 .occlusionQueryPrecise = true,
393 .pipelineStatisticsQuery = true,
394 .fragmentStoresAndAtomics = true,
395 .shaderTessellationAndGeometryPointSize = true,
396 .shaderImageGatherExtended = true,
397 .shaderStorageImageExtendedFormats = true,
398 .shaderStorageImageMultisample = false,
399 .shaderStorageImageReadWithoutFormat = false,
400 .shaderStorageImageWriteWithoutFormat = true,
401 .shaderUniformBufferArrayDynamicIndexing = true,
402 .shaderSampledImageArrayDynamicIndexing = true,
403 .shaderStorageBufferArrayDynamicIndexing = true,
404 .shaderStorageImageArrayDynamicIndexing = true,
405 .shaderClipDistance = true,
406 .shaderCullDistance = true,
407 .shaderFloat64 = pdevice->info.ver >= 8 &&
408 pdevice->info.has_64bit_float,
409 .shaderInt64 = pdevice->info.ver >= 8,
410 .shaderInt16 = pdevice->info.ver >= 8,
411 .shaderResourceMinLod = false,
412 .variableMultisampleRate = true,
413 .inheritedQueries = true,
414
415 /* Vulkan 1.1 */
416 .storageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
417 .uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
418 .storagePushConstant16 = pdevice->info.ver >= 8,
419 .storageInputOutput16 = false,
420 .multiview = true,
421 .multiviewGeometryShader = true,
422 .multiviewTessellationShader = true,
423 .variablePointersStorageBuffer = true,
424 .variablePointers = true,
425 .protectedMemory = false,
426 .samplerYcbcrConversion = true,
427 .shaderDrawParameters = true,
428
429 /* Vulkan 1.2 */
430 .samplerMirrorClampToEdge = true,
431 .drawIndirectCount = true,
432 .storageBuffer8BitAccess = pdevice->info.ver >= 8,
433 .uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8,
434 .storagePushConstant8 = pdevice->info.ver >= 8,
435 .shaderBufferInt64Atomics = false,
436 .shaderSharedInt64Atomics = false,
437 .shaderFloat16 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
438 .shaderInt8 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
439
440 .descriptorIndexing = false,
441 .shaderInputAttachmentArrayDynamicIndexing = false,
442 .shaderUniformTexelBufferArrayDynamicIndexing = false,
443 .shaderStorageTexelBufferArrayDynamicIndexing = false,
444 .shaderUniformBufferArrayNonUniformIndexing = false,
445 .shaderSampledImageArrayNonUniformIndexing = false,
446 .shaderStorageBufferArrayNonUniformIndexing = false,
447 .shaderStorageImageArrayNonUniformIndexing = false,
448 .shaderInputAttachmentArrayNonUniformIndexing = false,
449 .shaderUniformTexelBufferArrayNonUniformIndexing = false,
450 .shaderStorageTexelBufferArrayNonUniformIndexing = false,
451 .descriptorBindingUniformBufferUpdateAfterBind = false,
452 .descriptorBindingSampledImageUpdateAfterBind = false,
453 .descriptorBindingStorageImageUpdateAfterBind = false,
454 .descriptorBindingStorageBufferUpdateAfterBind = false,
455 .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
456 .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
457 .descriptorBindingUpdateUnusedWhilePending = false,
458 .descriptorBindingPartiallyBound = false,
459 .descriptorBindingVariableDescriptorCount = false,
460 .runtimeDescriptorArray = false,
461
462 .samplerFilterMinmax = false,
463 .scalarBlockLayout = true,
464 .imagelessFramebuffer = true,
465 .uniformBufferStandardLayout = true,
466 .shaderSubgroupExtendedTypes = true,
467 .separateDepthStencilLayouts = true,
468 .hostQueryReset = true,
469 .timelineSemaphore = true,
470 .bufferDeviceAddress = pdevice->has_a64_buffer_access,
471 .bufferDeviceAddressCaptureReplay = pdevice->has_a64_buffer_access,
472 .bufferDeviceAddressMultiDevice = false,
473 .vulkanMemoryModel = true,
474 .vulkanMemoryModelDeviceScope = true,
475 .vulkanMemoryModelAvailabilityVisibilityChains = true,
476 .shaderOutputViewportIndex = true,
477 .shaderOutputLayer = true,
478 .subgroupBroadcastDynamicId = true,
479
480 /* Vulkan 1.3 */
481 .robustImageAccess = true,
482 .inlineUniformBlock = true,
483 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
484 .pipelineCreationCacheControl = true,
485 .privateData = true,
486 .shaderDemoteToHelperInvocation = true,
487 .shaderTerminateInvocation = true,
488 .subgroupSizeControl = true,
489 .computeFullSubgroups = true,
490 .synchronization2 = true,
491 .textureCompressionASTC_HDR = false,
492 .shaderZeroInitializeWorkgroupMemory = true,
493 .dynamicRendering = true,
494 .shaderIntegerDotProduct = true,
495 .maintenance4 = true,
496
497 /* VK_EXT_4444_formats */
498 .formatA4R4G4B4 = true,
499 .formatA4B4G4R4 = false,
500
501 /* VK_EXT_border_color_swizzle */
502 .borderColorSwizzle = true,
503 .borderColorSwizzleFromImage = true,
504
505 /* VK_EXT_color_write_enable */
506 .colorWriteEnable = true,
507
508 /* VK_EXT_image_2d_view_of_3d */
509 .image2DViewOf3D = true,
510 .sampler2DViewOf3D = false,
511
512 /* VK_NV_compute_shader_derivatives */
513 .computeDerivativeGroupQuads = true,
514 .computeDerivativeGroupLinear = true,
515
516 /* VK_EXT_conditional_rendering */
517 .conditionalRendering = pdevice->info.verx10 >= 75,
518 .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
519
520 /* VK_EXT_custom_border_color */
521 .customBorderColors = pdevice->info.ver >= 8,
522 .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
523
524 /* VK_EXT_depth_clamp_zero_one */
525 .depthClampZeroOne = true,
526
527 /* VK_EXT_depth_clip_enable */
528 .depthClipEnable = true,
529
530 /* VK_KHR_global_priority */
531 .globalPriorityQuery = true,
532
533 /* VK_EXT_image_view_min_lod */
534 .minLod = true,
535
536 /* VK_EXT_index_type_uint8 */
537 .indexTypeUint8 = true,
538
539 /* VK_EXT_line_rasterization */
540 /* Rectangular lines must use the strict algorithm, which is not
541 * supported for wide lines prior to ICL. See rasterization_mode for
542 * details and how the HW states are programmed.
543 */
544 .rectangularLines = false,
545 .bresenhamLines = true,
546 /* Support for Smooth lines with MSAA was removed on gfx11. From the
547 * BSpec section "Multisample ModesState" table for "AA Line Support
548 * Requirements":
549 *
550 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
551 *
552 * Fortunately, this isn't a case most people care about.
553 */
554 .smoothLines = pdevice->info.ver < 10,
555 .stippledRectangularLines = false,
556 .stippledBresenhamLines = true,
557 .stippledSmoothLines = false,
558
559 /* VK_EXT_mutable_descriptor_type */
560 .mutableDescriptorType = true,
561
562 /* VK_KHR_performance_query */
563 .performanceCounterQueryPools = true,
564 /* HW only supports a single configuration at a time. */
565 .performanceCounterMultipleQueryPools = false,
566
567 /* VK_KHR_pipeline_executable_properties */
568 .pipelineExecutableInfo = true,
569
570 /* VK_EXT_primitives_generated_query */
571 .primitivesGeneratedQuery = true,
572 .primitivesGeneratedQueryWithRasterizerDiscard = false,
573 .primitivesGeneratedQueryWithNonZeroStreams = false,
574
575 /* VK_EXT_provoking_vertex */
576 .provokingVertexLast = true,
577 .transformFeedbackPreservesProvokingVertex = true,
578
579 /* VK_EXT_robustness2 */
580 .robustBufferAccess2 = true,
581 .robustImageAccess2 = true,
582 .nullDescriptor = true,
583
584 /* VK_EXT_shader_atomic_float */
585 .shaderBufferFloat32Atomics = true,
586 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
587 .shaderBufferFloat64Atomics =
588 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
589 .shaderBufferFloat64AtomicAdd = false,
590 .shaderSharedFloat32Atomics = true,
591 .shaderSharedFloat32AtomicAdd = false,
592 .shaderSharedFloat64Atomics = false,
593 .shaderSharedFloat64AtomicAdd = false,
594 .shaderImageFloat32Atomics = true,
595 .shaderImageFloat32AtomicAdd = false,
596 .sparseImageFloat32Atomics = false,
597 .sparseImageFloat32AtomicAdd = false,
598
599 /* VK_KHR_shader_clock */
600 .shaderSubgroupClock = true,
601 .shaderDeviceClock = false,
602
603 /* VK_INTEL_shader_integer_functions2 */
604 .shaderIntegerFunctions2 = true,
605
606 /* VK_EXT_shader_module_identifier */
607 .shaderModuleIdentifier = true,
608
609 /* VK_EXT_shader_replicated_composites */
610 .shaderReplicatedComposites = true,
611
612 /* VK_KHR_shader_subgroup_uniform_control_flow */
613 .shaderSubgroupUniformControlFlow = true,
614
615 /* VK_EXT_texel_buffer_alignment */
616 .texelBufferAlignment = true,
617
618 /* VK_EXT_transform_feedback */
619 .transformFeedback = true,
620 .geometryStreams = true,
621
622 /* VK_EXT_vertex_attribute_divisor */
623 .vertexAttributeInstanceRateDivisor = true,
624 .vertexAttributeInstanceRateZeroDivisor = true,
625
626 /* VK_KHR_workgroup_memory_explicit_layout */
627 .workgroupMemoryExplicitLayout = true,
628 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
629 .workgroupMemoryExplicitLayout8BitAccess = true,
630 .workgroupMemoryExplicitLayout16BitAccess = true,
631
632 /* VK_EXT_ycbcr_image_arrays */
633 .ycbcrImageArrays = true,
634
635 /* VK_EXT_extended_dynamic_state */
636 .extendedDynamicState = true,
637
638 /* VK_EXT_extended_dynamic_state2 */
639 .extendedDynamicState2 = true,
640 .extendedDynamicState2LogicOp = true,
641 .extendedDynamicState2PatchControlPoints = false,
642
643 /* VK_EXT_multi_draw */
644 .multiDraw = true,
645
646 /* VK_EXT_non_seamless_cube_map */
647 .nonSeamlessCubeMap = true,
648
649 /* VK_EXT_primitive_topology_list_restart */
650 .primitiveTopologyListRestart = true,
651 .primitiveTopologyPatchListRestart = true,
652
653 /* VK_EXT_depth_clamp_control */
654 .depthClampControl = true,
655
656 /* VK_EXT_depth_clip_control */
657 .depthClipControl = true,
658
659 /* VK_KHR_present_id */
660 .presentId = pdevice->vk.supported_extensions.KHR_present_id,
661
662 /* VK_KHR_present_wait */
663 .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
664
665 /* VK_KHR_shader_expect_assume */
666 .shaderExpectAssume = true,
667
668 /* VK_KHR_shader_relaxed_extended_instruction */
669 .shaderRelaxedExtendedInstruction = true,
670 };
671
672 /* We can't do image stores in vec4 shaders */
673 features->vertexPipelineStoresAndAtomics =
674 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
675 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
676
677 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
678
679 /* The new DOOM and Wolfenstein games require depthBounds without
680 * checking for it. They seem to run fine without it so just claim it's
681 * there and accept the consequences.
682 */
683 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
684 features->depthBounds = true;
685 }
686
687
688 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
689
690 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
691 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
692
693 #define MAX_CUSTOM_BORDER_COLORS 4096
694
695 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)696 get_properties_1_1(const struct anv_physical_device *pdevice,
697 struct vk_properties *p)
698 {
699 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
700 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
701 memset(p->deviceLUID, 0, VK_LUID_SIZE);
702 p->deviceNodeMask = 0;
703 p->deviceLUIDValid = false;
704
705 p->subgroupSize = ELK_SUBGROUP_SIZE;
706 VkShaderStageFlags scalar_stages = 0;
707 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
708 if (pdevice->compiler->scalar_stage[stage])
709 scalar_stages |= mesa_to_vk_shader_stage(stage);
710 }
711 p->subgroupSupportedStages = scalar_stages;
712 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
713 VK_SUBGROUP_FEATURE_VOTE_BIT |
714 VK_SUBGROUP_FEATURE_BALLOT_BIT |
715 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
716 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
717 VK_SUBGROUP_FEATURE_QUAD_BIT;
718 if (pdevice->info.ver >= 8) {
719 /* TODO: There's no technical reason why these can't be made to
720 * work on gfx7 but they don't at the moment so it's best to leave
721 * the feature disabled than enabled and broken.
722 */
723 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
724 VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
725 }
726 p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
727
728 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
729 p->maxMultiviewViewCount = 16;
730 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
731 p->protectedNoFault = false;
732 /* This value doesn't matter for us today as our per-stage descriptors are
733 * the real limit.
734 */
735 p->maxPerSetDescriptors = 1024;
736 p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
737 }
738
739 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)740 get_properties_1_2(const struct anv_physical_device *pdevice,
741 struct vk_properties *p)
742 {
743 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
744 memset(p->driverName, 0, sizeof(p->driverName));
745 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
746 "Intel open-source Mesa driver");
747 memset(p->driverInfo, 0, sizeof(p->driverInfo));
748 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
749 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
750
751 /* Don't advertise conformance with a particular version if the hardware's
752 * support is incomplete/alpha.
753 */
754 if (pdevice->is_alpha) {
755 p->conformanceVersion = (VkConformanceVersion) {
756 .major = 0,
757 .minor = 0,
758 .subminor = 0,
759 .patch = 0,
760 };
761 }
762 else {
763 p->conformanceVersion = (VkConformanceVersion) {
764 .major = 1,
765 .minor = pdevice->use_softpin ? 3 : 2,
766 .subminor = 0,
767 .patch = 0,
768 };
769 }
770
771 p->denormBehaviorIndependence =
772 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
773 p->roundingModeIndependence =
774 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
775
776 /* Broadwell does not support HF denorms and there are restrictions
777 * other gens. According to Kabylake's PRM:
778 *
779 * "math - Extended Math Function
780 * [...]
781 * Restriction : Half-float denorms are always retained."
782 */
783 p->shaderDenormFlushToZeroFloat16 = false;
784 p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
785 p->shaderRoundingModeRTEFloat16 = true;
786 p->shaderRoundingModeRTZFloat16 = true;
787 p->shaderSignedZeroInfNanPreserveFloat16 = true;
788
789 p->shaderDenormFlushToZeroFloat32 = true;
790 p->shaderDenormPreserveFloat32 = pdevice->info.ver >= 8;
791 p->shaderRoundingModeRTEFloat32 = true;
792 p->shaderRoundingModeRTZFloat32 = true;
793 p->shaderSignedZeroInfNanPreserveFloat32 = true;
794
795 p->shaderDenormFlushToZeroFloat64 = true;
796 p->shaderDenormPreserveFloat64 = true;
797 p->shaderRoundingModeRTEFloat64 = true;
798 p->shaderRoundingModeRTZFloat64 = true;
799 p->shaderSignedZeroInfNanPreserveFloat64 = true;
800
801 /* It's a bit hard to exactly map our implementation to the limits
802 * described by Vulkan. The bindless surface handle in the extended
803 * message descriptors is 20 bits and it's an index into the table of
804 * RENDER_SURFACE_STATE structs that starts at bindless surface base
805 * address. This means that we can have at must 1M surface states
806 * allocated at any given time. Since most image views take two
807 * descriptors, this means we have a limit of about 500K image views.
808 *
809 * However, since we allocate surface states at vkCreateImageView time,
810 * this means our limit is actually something on the order of 500K image
811 * views allocated at any time. The actual limit describe by Vulkan, on
812 * the other hand, is a limit of how many you can have in a descriptor set.
813 * Assuming anyone using 1M descriptors will be using the same image view
814 * twice a bunch of times (or a bunch of null descriptors), we can safely
815 * advertise a larger limit here.
816 */
817 const unsigned max_bindless_views = 1 << 20;
818 p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
819 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
820 p->shaderSampledImageArrayNonUniformIndexingNative = false;
821 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
822 p->shaderStorageImageArrayNonUniformIndexingNative = false;
823 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
824 p->robustBufferAccessUpdateAfterBind = true;
825 p->quadDivergentImplicitLod = false;
826 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
827 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
828 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
829 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
830 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
831 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
832 p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
833 p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
834 p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
835 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
836 p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
837 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
838 p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
839 p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
840 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
841
842 /* We support all of the depth resolve modes */
843 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
844 VK_RESOLVE_MODE_AVERAGE_BIT |
845 VK_RESOLVE_MODE_MIN_BIT |
846 VK_RESOLVE_MODE_MAX_BIT;
847 /* Average doesn't make sense for stencil so we don't support that */
848 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
849 if (pdevice->info.ver >= 8) {
850 /* The advanced stencil resolve modes currently require stencil
851 * sampling be supported by the hardware.
852 */
853 p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
854 VK_RESOLVE_MODE_MAX_BIT;
855 }
856 p->independentResolveNone = true;
857 p->independentResolve = true;
858
859 p->filterMinmaxSingleComponentFormats = false;
860 p->filterMinmaxImageComponentMapping = false;
861
862 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
863
864 p->framebufferIntegerColorSampleCounts =
865 pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
866 }
867
868 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)869 get_properties_1_3(const struct anv_physical_device *pdevice,
870 struct vk_properties *p)
871 {
872 p->minSubgroupSize = 8;
873 p->maxSubgroupSize = 32;
874 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
875 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
876
877 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
878 p->maxPerStageDescriptorInlineUniformBlocks =
879 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
880 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
881 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
882 p->maxDescriptorSetInlineUniformBlocks =
883 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
884 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
885 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
886 p->maxInlineUniformTotalSize = UINT16_MAX;
887
888 p->integerDotProduct8BitUnsignedAccelerated = false;
889 p->integerDotProduct8BitSignedAccelerated = false;
890 p->integerDotProduct8BitMixedSignednessAccelerated = false;
891 p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
892 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
893 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
894 p->integerDotProduct16BitUnsignedAccelerated = false;
895 p->integerDotProduct16BitSignedAccelerated = false;
896 p->integerDotProduct16BitMixedSignednessAccelerated = false;
897 p->integerDotProduct32BitUnsignedAccelerated = false;
898 p->integerDotProduct32BitSignedAccelerated = false;
899 p->integerDotProduct32BitMixedSignednessAccelerated = false;
900 p->integerDotProduct64BitUnsignedAccelerated = false;
901 p->integerDotProduct64BitSignedAccelerated = false;
902 p->integerDotProduct64BitMixedSignednessAccelerated = false;
903 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
904 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
905 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
906 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
907 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
908 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
909 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
910 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
911 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
912 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
913 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
914 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
915 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
916 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
917 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
918
919 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
920 * Base Address:
921 *
922 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
923 * specifies the base address of the first element of the surface,
924 * computed in software by adding the surface base address to the
925 * byte offset of the element in the buffer. The base address must
926 * be aligned to element size."
927 *
928 * The typed dataport messages require that things be texel aligned.
929 * Otherwise, we may just load/store the wrong data or, in the worst
930 * case, there may be hangs.
931 */
932 p->storageTexelBufferOffsetAlignmentBytes = 16;
933 p->storageTexelBufferOffsetSingleTexelAlignment = true;
934
935 /* The sampler, however, is much more forgiving and it can handle
936 * arbitrary byte alignment for linear and buffer surfaces. It's
937 * hard to find a good PRM citation for this but years of empirical
938 * experience demonstrate that this is true.
939 */
940 p->uniformTexelBufferOffsetAlignmentBytes = 1;
941 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
942
943 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
944 }
945
946 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)947 get_properties(const struct anv_physical_device *pdevice,
948 struct vk_properties *props)
949 {
950 const struct intel_device_info *devinfo = &pdevice->info;
951
952 const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
953 const uint32_t max_textures = 128;
954 const uint32_t max_samplers =
955 pdevice->has_bindless_samplers ? UINT16_MAX :
956 (devinfo->verx10 >= 75) ? 128 : 16;
957 const uint32_t max_images = MAX_IMAGES;
958
959 /* If we can use bindless for everything, claim a high per-stage limit,
960 * otherwise use the binding table size, minus the slots reserved for
961 * render targets and one slot for the descriptor buffer. */
962 const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
963
964 const uint32_t max_workgroup_size =
965 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
966
967 VkSampleCountFlags sample_counts =
968 isl_device_get_sample_counts(&pdevice->isl_dev);
969
970 *props = (struct vk_properties) {
971 #if DETECT_OS_ANDROID
972 .apiVersion = ANV_API_VERSION,
973 #else
974 .apiVersion = pdevice->use_softpin ? ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
975 #endif /* DETECT_OS_ANDROID */
976 .driverVersion = vk_get_driver_version(),
977 .vendorID = 0x8086,
978 .deviceID = pdevice->info.pci_device_id,
979 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
980
981 /* Limits: */
982 .maxImageDimension1D = (1 << 14),
983 /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
984 * is greater than 8192 pixels. */
985 .maxImageDimension2D = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
986 .maxImageDimension3D = (1 << 11),
987 .maxImageDimensionCube = (1 << 14),
988 .maxImageArrayLayers = (1 << 11),
989 .maxTexelBufferElements = 128 * 1024 * 1024,
990 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
991 .maxStorageBufferRange = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
992 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
993 .maxMemoryAllocationCount = UINT32_MAX,
994 .maxSamplerAllocationCount = 64 * 1024,
995 .bufferImageGranularity = 1,
996 .sparseAddressSpaceSize = 0,
997 .maxBoundDescriptorSets = MAX_SETS,
998 .maxPerStageDescriptorSamplers = max_samplers,
999 .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1000 .maxPerStageDescriptorStorageBuffers = max_ssbos,
1001 .maxPerStageDescriptorSampledImages = max_textures,
1002 .maxPerStageDescriptorStorageImages = max_images,
1003 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1004 .maxPerStageResources = max_per_stage,
1005 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1006 .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1007 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1008 .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1009 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1010 .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1011 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1012 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1013 .maxVertexInputAttributes = MAX_VES,
1014 .maxVertexInputBindings = MAX_VBS,
1015 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1016 *
1017 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1018 */
1019 .maxVertexInputAttributeOffset = 2047,
1020 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1021 *
1022 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1023 *
1024 * Skylake PRMs: Volume 2d: Command Reference: Structures:
1025 *
1026 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1027 */
1028 .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095,
1029 .maxVertexOutputComponents = 128,
1030 .maxTessellationGenerationLevel = 64,
1031 .maxTessellationPatchSize = 32,
1032 .maxTessellationControlPerVertexInputComponents = 128,
1033 .maxTessellationControlPerVertexOutputComponents = 128,
1034 .maxTessellationControlPerPatchOutputComponents = 128,
1035 .maxTessellationControlTotalOutputComponents = 2048,
1036 .maxTessellationEvaluationInputComponents = 128,
1037 .maxTessellationEvaluationOutputComponents = 128,
1038 .maxGeometryShaderInvocations = 32,
1039 .maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64,
1040 .maxGeometryOutputComponents = 128,
1041 .maxGeometryOutputVertices = 256,
1042 .maxGeometryTotalOutputComponents = 1024,
1043 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1044 .maxFragmentOutputAttachments = 8,
1045 .maxFragmentDualSrcAttachments = 1,
1046 .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1047 .maxComputeSharedMemorySize = 64 * 1024,
1048 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1049 .maxComputeWorkGroupInvocations = max_workgroup_size,
1050 .maxComputeWorkGroupSize = {
1051 max_workgroup_size,
1052 max_workgroup_size,
1053 max_workgroup_size,
1054 },
1055 .subPixelPrecisionBits = 8,
1056 .subTexelPrecisionBits = 8,
1057 .mipmapPrecisionBits = 8,
1058 .maxDrawIndexedIndexValue = UINT32_MAX,
1059 .maxDrawIndirectCount = UINT32_MAX,
1060 .maxSamplerLodBias = 16,
1061 .maxSamplerAnisotropy = 16,
1062 .maxViewports = MAX_VIEWPORTS,
1063 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1064 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1065 .viewportSubPixelBits = 13, /* We take a float? */
1066 .minMemoryMapAlignment = 4096, /* A page */
1067 /* The dataport requires texel alignment so we need to assume a worst
1068 * case of R32G32B32A32 which is 16 bytes.
1069 */
1070 .minTexelBufferOffsetAlignment = 16,
1071 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1072 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1073 .minTexelOffset = -8,
1074 .maxTexelOffset = 7,
1075 .minTexelGatherOffset = -32,
1076 .maxTexelGatherOffset = 31,
1077 .minInterpolationOffset = -0.5,
1078 .maxInterpolationOffset = 0.4375,
1079 .subPixelInterpolationOffsetBits = 4,
1080 .maxFramebufferWidth = (1 << 14),
1081 .maxFramebufferHeight = (1 << 14),
1082 .maxFramebufferLayers = (1 << 11),
1083 .framebufferColorSampleCounts = sample_counts,
1084 .framebufferDepthSampleCounts = sample_counts,
1085 .framebufferStencilSampleCounts = sample_counts,
1086 .framebufferNoAttachmentsSampleCounts = sample_counts,
1087 .maxColorAttachments = MAX_RTS,
1088 .sampledImageColorSampleCounts = sample_counts,
1089 /* Multisampling with SINT formats is not supported on gfx7 */
1090 .sampledImageIntegerSampleCounts = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1091 .sampledImageDepthSampleCounts = sample_counts,
1092 .sampledImageStencilSampleCounts = sample_counts,
1093 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1094 .maxSampleMaskWords = 1,
1095 .timestampComputeAndGraphics = true,
1096 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1097 .maxClipDistances = 8,
1098 .maxCullDistances = 8,
1099 .maxCombinedClipAndCullDistances = 8,
1100 .discreteQueuePriorities = 2,
1101 .pointSizeRange = { 0.125, 255.875 },
1102 /* While SKL and up support much wider lines than we are setting here,
1103 * in practice we run into conformance issues if we go past this limit.
1104 * Since the Windows driver does the same, it's probably fair to assume
1105 * that no one needs more than this.
1106 */
1107 .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1108 .pointSizeGranularity = (1.0 / 8.0),
1109 .lineWidthGranularity = (1.0 / 128.0),
1110 .strictLines = false,
1111 .standardSampleLocations = true,
1112 .optimalBufferCopyOffsetAlignment = 128,
1113 .optimalBufferCopyRowPitchAlignment = 128,
1114 .nonCoherentAtomSize = 64,
1115
1116 /* Broadwell doesn't do sparse. */
1117 .sparseResidencyStandard2DBlockShape = false,
1118 .sparseResidencyStandard2DMultisampleBlockShape = false,
1119 .sparseResidencyStandard3DBlockShape = false,
1120 .sparseResidencyAlignedMipSize = false,
1121 .sparseResidencyNonResidentStrict = false,
1122 };
1123
1124 snprintf(props->deviceName, sizeof(props->deviceName),
1125 "%s", pdevice->info.name);
1126 memcpy(props->pipelineCacheUUID,
1127 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1128
1129 get_properties_1_1(pdevice, props);
1130 get_properties_1_2(pdevice, props);
1131 get_properties_1_3(pdevice, props);
1132
1133 /* VK_KHR_performance_query */
1134 {
1135 /* We could support this by spawning a shader to do the equation normalization. */
1136 props->allowCommandBufferQueryCopies = false;
1137 }
1138
1139 /* VK_KHR_push_descriptor */
1140 {
1141 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1142 }
1143
1144 /* VK_KHR_vertex_attribute_divisor */
1145 {
1146 /* We have to restrict this a bit for multiview */
1147 props->maxVertexAttribDivisor = UINT32_MAX / 16;
1148 }
1149
1150 /* VK_EXT_custom_border_color */
1151 {
1152 props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1153 }
1154
1155 /* VK_EXT_external_memory_host */
1156 {
1157 /* Userptr needs page aligned memory. */
1158 props->minImportedHostPointerAlignment = 4096;
1159 }
1160
1161 /* VK_EXT_line_rasterization */
1162 {
1163 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1164 * Rules - Legacy Mode", it says the following:
1165 *
1166 * "Note that the device divides a pixel into a 16x16 array of
1167 * subpixels, referenced by their upper left corners."
1168 *
1169 * This is the only known reference in the PRMs to the subpixel
1170 * precision of line rasterization and a "16x16 array of subpixels"
1171 * implies 4 subpixel precision bits. Empirical testing has shown that 4
1172 * subpixel precision bits applies to all line rasterization types.
1173 */
1174 props->lineSubPixelPrecisionBits = 4;
1175 }
1176
1177 /* VK_EXT_multi_draw */
1178 {
1179 props->maxMultiDrawCount = 2048;
1180 }
1181
1182 /* VK_EXT_pci_bus_info */
1183 {
1184 props->pciDomain = pdevice->info.pci_domain;
1185 props->pciBus = pdevice->info.pci_bus;
1186 props->pciDevice = pdevice->info.pci_dev;
1187 props->pciFunction = pdevice->info.pci_func;
1188 }
1189
1190 /* VK_EXT_physical_device_drm */
1191 {
1192 props->drmHasPrimary = pdevice->has_master;
1193 props->drmPrimaryMajor = pdevice->master_major;
1194 props->drmPrimaryMinor = pdevice->master_minor;
1195 props->drmHasRender = pdevice->has_local;
1196 props->drmRenderMajor = pdevice->local_major;
1197 props->drmRenderMinor = pdevice->local_minor;
1198 }
1199
1200 /* VK_EXT_provoking_vertex */
1201 {
1202 props->provokingVertexModePerPipeline = true;
1203 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1204 }
1205
1206 /* VK_EXT_robustness2 */
1207 {
1208 props->robustStorageBufferAccessSizeAlignment =
1209 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1210 props->robustUniformBufferAccessSizeAlignment =
1211 ANV_UBO_ALIGNMENT;
1212 }
1213
1214 /* VK_EXT_sample_locations */
1215 {
1216 props->sampleLocationSampleCounts =
1217 isl_device_get_sample_counts(&pdevice->isl_dev);
1218
1219 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1220 props->maxSampleLocationGridSize.width = 1;
1221 props->maxSampleLocationGridSize.height = 1;
1222
1223 props->sampleLocationCoordinateRange[0] = 0;
1224 props->sampleLocationCoordinateRange[1] = 0.9375;
1225 props->sampleLocationSubPixelBits = 4;
1226
1227 props->variableSampleLocations = true;
1228 }
1229
1230 /* VK_EXT_shader_module_identifier */
1231 {
1232 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1233 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1234 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1235 vk_shaderModuleIdentifierAlgorithmUUID,
1236 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1237 }
1238
1239 /* VK_EXT_transform_feedback */
1240 {
1241 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1242 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1243 props->maxTransformFeedbackBufferSize = (1ull << 32);
1244 props->maxTransformFeedbackStreamDataSize = 128 * 4;
1245 props->maxTransformFeedbackBufferDataSize = 128 * 4;
1246 props->maxTransformFeedbackBufferDataStride = 2048;
1247 props->transformFeedbackQueries = true;
1248 props->transformFeedbackStreamsLinesTriangles = false;
1249 props->transformFeedbackRasterizationStreamSelect = false;
1250 /* This requires MI_MATH */
1251 props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
1252 }
1253
1254 /* VK_ANDROID_native_buffer */
1255 #if DETECT_OS_ANDROID
1256 {
1257 props->sharedImage = VK_FALSE;
1258 }
1259 #endif /* DETECT_OS_ANDROID */
1260
1261 }
1262
1263 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)1264 anv_compute_sys_heap_size(struct anv_physical_device *device,
1265 uint64_t available_ram)
1266 {
1267 /* We want to leave some padding for things we allocate in the driver,
1268 * so don't go over 3/4 of the GTT either.
1269 */
1270 available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
1271
1272 if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
1273 /* When running with an overridden PCI ID, we may get a GTT size from
1274 * the kernel that is greater than 2 GiB but the execbuf check for 48bit
1275 * address support can still fail. Just clamp the address space size to
1276 * 2 GiB if we don't have 48-bit support.
1277 */
1278 mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
1279 "not support for 48-bit addresses",
1280 __FILE__, __LINE__);
1281 available_ram = 2ull << 30;
1282 }
1283
1284 return available_ram;
1285 }
1286
1287 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1288 anv_init_meminfo(struct anv_physical_device *device, int fd)
1289 {
1290 const struct intel_device_info *devinfo = &device->info;
1291
1292 device->sys.size =
1293 anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
1294 device->sys.available = devinfo->mem.sram.mappable.free;
1295
1296 return VK_SUCCESS;
1297 }
1298
1299 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1300 anv_update_meminfo(struct anv_physical_device *device, int fd)
1301 {
1302 if (!intel_device_info_update_memory_info(&device->info, fd))
1303 return;
1304
1305 const struct intel_device_info *devinfo = &device->info;
1306 device->sys.available = devinfo->mem.sram.mappable.free;
1307 }
1308
1309 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1310 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1311 {
1312 VkResult result = anv_init_meminfo(device, fd);
1313 if (result != VK_SUCCESS)
1314 return result;
1315
1316 assert(device->sys.size != 0);
1317
1318 if (device->info.has_llc) {
1319 device->memory.heap_count = 1;
1320 device->memory.heaps[0] = (struct anv_memory_heap) {
1321 .size = device->sys.size,
1322 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1323 };
1324
1325 /* Big core GPUs share LLC with the CPU and thus one memory type can be
1326 * both cached and coherent at the same time.
1327 *
1328 * But some game engines can't handle single type well
1329 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
1330 *
1331 * And Intel on Windows uses 3 types so it's better to add extra one here
1332 */
1333 device->memory.type_count = 2;
1334 device->memory.types[0] = (struct anv_memory_type) {
1335 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1336 .heapIndex = 0,
1337 };
1338 device->memory.types[1] = (struct anv_memory_type) {
1339 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1340 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1341 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1342 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1343 .heapIndex = 0,
1344 };
1345 } else {
1346 device->memory.heap_count = 1;
1347 device->memory.heaps[0] = (struct anv_memory_heap) {
1348 .size = device->sys.size,
1349 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1350 };
1351
1352 /* The spec requires that we expose a host-visible, coherent memory
1353 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
1354 * to give the application a choice between cached, but not coherent and
1355 * coherent but uncached (WC though).
1356 */
1357 device->memory.type_count = 2;
1358 device->memory.types[0] = (struct anv_memory_type) {
1359 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1360 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1361 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1362 .heapIndex = 0,
1363 };
1364 device->memory.types[1] = (struct anv_memory_type) {
1365 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1366 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1367 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1368 .heapIndex = 0,
1369 };
1370 }
1371
1372 device->memory.need_flush = false;
1373 for (unsigned i = 0; i < device->memory.type_count; i++) {
1374 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1375 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1376 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1377 device->memory.need_flush = true;
1378 }
1379
1380 return VK_SUCCESS;
1381 }
1382
1383 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1384 anv_physical_device_init_uuids(struct anv_physical_device *device)
1385 {
1386 const struct build_id_note *note =
1387 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1388 if (!note) {
1389 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1390 "Failed to find build-id");
1391 }
1392
1393 unsigned build_id_len = build_id_length(note);
1394 if (build_id_len < 20) {
1395 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1396 "build-id too short. It needs to be a SHA");
1397 }
1398
1399 memcpy(device->driver_build_sha1, build_id_data(note), 20);
1400
1401 struct mesa_sha1 sha1_ctx;
1402 uint8_t sha1[20];
1403 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1404
1405 /* The pipeline cache UUID is used for determining when a pipeline cache is
1406 * invalid. It needs both a driver build and the PCI ID of the device.
1407 */
1408 _mesa_sha1_init(&sha1_ctx);
1409 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1410 _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
1411 sizeof(device->info.pci_device_id));
1412 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1413 sizeof(device->always_use_bindless));
1414 _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
1415 sizeof(device->has_a64_buffer_access));
1416 _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
1417 sizeof(device->has_bindless_samplers));
1418 _mesa_sha1_final(&sha1_ctx, sha1);
1419 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1420
1421 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1422 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1423
1424 return VK_SUCCESS;
1425 }
1426
1427 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1428 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1429 {
1430 #ifdef ENABLE_SHADER_CACHE
1431 char renderer[10];
1432 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
1433 device->info.pci_device_id);
1434 assert(len == sizeof(renderer) - 2);
1435
1436 char timestamp[41];
1437 _mesa_sha1_format(timestamp, device->driver_build_sha1);
1438
1439 const uint64_t driver_flags =
1440 elk_get_compiler_config_value(device->compiler);
1441 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1442 #endif
1443 }
1444
1445 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)1446 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
1447 {
1448 #ifdef ENABLE_SHADER_CACHE
1449 if (device->vk.disk_cache) {
1450 disk_cache_destroy(device->vk.disk_cache);
1451 device->vk.disk_cache = NULL;
1452 }
1453 #else
1454 assert(device->vk.disk_cache == NULL);
1455 #endif
1456 }
1457
1458 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
1459 * queue overrides.
1460 *
1461 * To override the number queues:
1462 * * "gc" is for graphics queues with compute support
1463 * * "g" is for graphics queues with no compute support
1464 * * "c" is for compute queues with no graphics support
1465 *
1466 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
1467 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
1468 * with compute-only support.
1469 *
1470 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
1471 * include 1 queue with compute-only support, but it will not change the
1472 * number of graphics+compute queues.
1473 *
1474 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
1475 * to include 1 queue with compute-only support, and it would override the
1476 * number of graphics+compute queues to be 0.
1477 */
1478 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)1479 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
1480 {
1481 int gc_override = -1;
1482 int g_override = -1;
1483 int c_override = -1;
1484 const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
1485
1486 if (env_ == NULL)
1487 return;
1488
1489 char *env = strdup(env_);
1490 char *save = NULL;
1491 char *next = strtok_r(env, ",", &save);
1492 while (next != NULL) {
1493 if (strncmp(next, "gc=", 3) == 0) {
1494 gc_override = strtol(next + 3, NULL, 0);
1495 } else if (strncmp(next, "g=", 2) == 0) {
1496 g_override = strtol(next + 2, NULL, 0);
1497 } else if (strncmp(next, "c=", 2) == 0) {
1498 c_override = strtol(next + 2, NULL, 0);
1499 } else {
1500 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
1501 }
1502 next = strtok_r(NULL, ",", &save);
1503 }
1504 free(env);
1505 if (gc_override >= 0)
1506 *gc_count = gc_override;
1507 if (g_override >= 0)
1508 *g_count = g_override;
1509 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
1510 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
1511 "Vulkan specification");
1512 if (c_override >= 0)
1513 *c_count = c_override;
1514 }
1515
1516 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)1517 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
1518 {
1519 uint32_t family_count = 0;
1520
1521 if (pdevice->engine_info) {
1522 int gc_count =
1523 intel_engines_count(pdevice->engine_info,
1524 INTEL_ENGINE_CLASS_RENDER);
1525 int g_count = 0;
1526 int c_count = 0;
1527
1528 anv_override_engine_counts(&gc_count, &g_count, &c_count);
1529
1530 if (gc_count > 0) {
1531 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1532 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1533 VK_QUEUE_COMPUTE_BIT |
1534 VK_QUEUE_TRANSFER_BIT,
1535 .queueCount = gc_count,
1536 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1537 };
1538 }
1539 if (g_count > 0) {
1540 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1541 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1542 VK_QUEUE_TRANSFER_BIT,
1543 .queueCount = g_count,
1544 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1545 };
1546 }
1547 if (c_count > 0) {
1548 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1549 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1550 VK_QUEUE_TRANSFER_BIT,
1551 .queueCount = c_count,
1552 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1553 };
1554 }
1555 /* Increase count below when other families are added as a reminder to
1556 * increase the ANV_MAX_QUEUE_FAMILIES value.
1557 */
1558 STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
1559 } else {
1560 /* Default to a single render queue */
1561 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1562 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1563 VK_QUEUE_COMPUTE_BIT |
1564 VK_QUEUE_TRANSFER_BIT,
1565 .queueCount = 1,
1566 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1567 };
1568 family_count = 1;
1569 }
1570 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
1571 pdevice->queue.family_count = family_count;
1572 }
1573
1574 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)1575 anv_physical_device_try_create(struct vk_instance *vk_instance,
1576 struct _drmDevice *drm_device,
1577 struct vk_physical_device **out)
1578 {
1579 struct anv_instance *instance =
1580 container_of(vk_instance, struct anv_instance, vk);
1581
1582 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1583 drm_device->bustype != DRM_BUS_PCI ||
1584 drm_device->deviceinfo.pci->vendor_id != 0x8086)
1585 return VK_ERROR_INCOMPATIBLE_DRIVER;
1586
1587 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
1588 const char *path = drm_device->nodes[DRM_NODE_RENDER];
1589 VkResult result;
1590 int fd;
1591 int master_fd = -1;
1592
1593 process_intel_debug_variable();
1594
1595 fd = open(path, O_RDWR | O_CLOEXEC);
1596 if (fd < 0) {
1597 if (errno == ENOMEM) {
1598 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1599 "Unable to open device %s: out of memory", path);
1600 }
1601 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1602 "Unable to open device %s: %m", path);
1603 }
1604
1605 struct intel_device_info devinfo;
1606 if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1607 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1608 goto fail_fd;
1609 }
1610
1611 bool is_alpha = true;
1612 bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1613 if (devinfo.platform == INTEL_PLATFORM_HSW) {
1614 if (warn)
1615 mesa_logw("Haswell Vulkan support is incomplete");
1616 } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1617 if (warn)
1618 mesa_logw("Ivy Bridge Vulkan support is incomplete");
1619 } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1620 if (warn)
1621 mesa_logw("Bay Trail Vulkan support is incomplete");
1622 } else if (devinfo.ver == 8) {
1623 /* Gfx8 fully supported */
1624 is_alpha = false;
1625 } else {
1626 /* Silently fail here, anv will either pick up this device or display an
1627 * error message.
1628 */
1629 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1630 goto fail_fd;
1631 }
1632
1633 struct anv_physical_device *device =
1634 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1635 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1636 if (device == NULL) {
1637 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1638 goto fail_fd;
1639 }
1640
1641 struct vk_physical_device_dispatch_table dispatch_table;
1642 vk_physical_device_dispatch_table_from_entrypoints(
1643 &dispatch_table, &anv_physical_device_entrypoints, true);
1644 vk_physical_device_dispatch_table_from_entrypoints(
1645 &dispatch_table, &wsi_physical_device_entrypoints, false);
1646
1647 result = vk_physical_device_init(&device->vk, &instance->vk,
1648 NULL, NULL, NULL, /* We set up extensions later */
1649 &dispatch_table);
1650 if (result != VK_SUCCESS) {
1651 vk_error(instance, result);
1652 goto fail_alloc;
1653 }
1654 device->instance = instance;
1655
1656 assert(strlen(path) < ARRAY_SIZE(device->path));
1657 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1658
1659 device->info = devinfo;
1660 device->is_alpha = is_alpha;
1661
1662 device->cmd_parser_version = -1;
1663 if (device->info.ver == 7) {
1664 if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1665 device->cmd_parser_version == -1) {
1666 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1667 "failed to get command parser version");
1668 goto fail_base;
1669 }
1670 }
1671
1672 int val;
1673 if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1674 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1675 "kernel missing gem wait");
1676 goto fail_base;
1677 }
1678
1679 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1680 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1681 "kernel missing execbuf2");
1682 goto fail_base;
1683 }
1684
1685 if (!device->info.has_llc &&
1686 (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1687 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1688 "kernel missing wc mmap");
1689 goto fail_base;
1690 }
1691
1692 device->use_relocations = device->info.ver < 8 ||
1693 device->info.platform == INTEL_PLATFORM_CHV;
1694
1695 if (!device->use_relocations &&
1696 (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1697 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1698 "kernel missing softpin");
1699 goto fail_alloc;
1700 }
1701
1702 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1703 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1704 "kernel missing syncobj support");
1705 goto fail_base;
1706 }
1707
1708 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1709 device->has_exec_async = val;
1710 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1711 device->has_exec_capture = val;
1712
1713 /* Start with medium; sorted low to high */
1714 const int priorities[] = {
1715 INTEL_CONTEXT_MEDIUM_PRIORITY,
1716 INTEL_CONTEXT_HIGH_PRIORITY,
1717 INTEL_CONTEXT_REALTIME_PRIORITY,
1718 };
1719 device->max_context_priority = INT_MIN;
1720 for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1721 if (!anv_gem_has_context_priority(fd, priorities[i]))
1722 break;
1723 device->max_context_priority = priorities[i];
1724 }
1725
1726 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1727 device->info.aperture_bytes;
1728
1729 /* We only allow 48-bit addresses with softpin because knowing the actual
1730 * address is required for the vertex cache flush workaround.
1731 */
1732 device->supports_48bit_addresses = (device->info.ver >= 8) &&
1733 device->gtt_size > (4ULL << 30 /* GiB */);
1734
1735 result = anv_physical_device_init_heaps(device, fd);
1736 if (result != VK_SUCCESS)
1737 goto fail_base;
1738
1739 assert(device->supports_48bit_addresses == !device->use_relocations);
1740 device->use_softpin = !device->use_relocations;
1741
1742 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1743 device->has_exec_timeline = val;
1744 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1745 device->has_exec_timeline = false;
1746
1747 unsigned st_idx = 0;
1748
1749 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1750 if (!device->has_exec_timeline)
1751 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1752 device->sync_types[st_idx++] = &device->sync_syncobj_type;
1753
1754 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1755 device->sync_types[st_idx++] = &anv_bo_sync_type;
1756
1757 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1758 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1759 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1760 }
1761
1762 device->sync_types[st_idx++] = NULL;
1763 assert(st_idx <= ARRAY_SIZE(device->sync_types));
1764 device->vk.supported_sync_types = device->sync_types;
1765
1766 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1767
1768 device->always_use_bindless =
1769 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1770
1771 device->use_call_secondary =
1772 device->use_softpin &&
1773 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1774
1775 /* We first got the A64 messages on broadwell and we can only use them if
1776 * we can pass addresses directly into the shader which requires softpin.
1777 */
1778 device->has_a64_buffer_access = device->info.ver >= 8 &&
1779 device->use_softpin;
1780
1781 /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1782 * because it's just a matter of setting the sampler address in the sample
1783 * message header. However, we've not bothered to wire it up for vec4 so
1784 * we leave it disabled on gfx7.
1785 */
1786 device->has_bindless_samplers = device->info.ver >= 8;
1787
1788 /* Check if we can read the GPU timestamp register from the CPU */
1789 uint64_t u64_ignore;
1790 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1791 device->info.kmd_type,
1792 &u64_ignore);
1793
1794 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1795 driQueryOptionb(&instance->dri_options, "always_flush_cache");
1796
1797 device->compiler = elk_compiler_create(NULL, &device->info);
1798 if (device->compiler == NULL) {
1799 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1800 goto fail_base;
1801 }
1802 device->compiler->shader_debug_log = compiler_debug_log;
1803 device->compiler->shader_perf_log = compiler_perf_log;
1804 device->compiler->constant_buffer_0_is_relative =
1805 device->info.ver < 8 || !device->info.has_context_isolation;
1806 device->compiler->supports_shader_constants = true;
1807
1808 isl_device_init(&device->isl_dev, &device->info);
1809
1810 result = anv_physical_device_init_uuids(device);
1811 if (result != VK_SUCCESS)
1812 goto fail_compiler;
1813
1814 anv_physical_device_init_disk_cache(device);
1815
1816 if (instance->vk.enabled_extensions.KHR_display) {
1817 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1818 if (master_fd >= 0) {
1819 /* fail if we don't have permission to even render on this device */
1820 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1821 close(master_fd);
1822 master_fd = -1;
1823 }
1824 }
1825 }
1826 device->master_fd = master_fd;
1827
1828 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1829 anv_physical_device_init_queue_families(device);
1830
1831 device->local_fd = fd;
1832
1833 anv_physical_device_init_perf(device, fd);
1834
1835 /* Gather major/minor before WSI. */
1836 struct stat st;
1837
1838 if (stat(primary_path, &st) == 0) {
1839 device->has_master = true;
1840 device->master_major = major(st.st_rdev);
1841 device->master_minor = minor(st.st_rdev);
1842 } else {
1843 device->has_master = false;
1844 device->master_major = 0;
1845 device->master_minor = 0;
1846 }
1847
1848 if (stat(path, &st) == 0) {
1849 device->has_local = true;
1850 device->local_major = major(st.st_rdev);
1851 device->local_minor = minor(st.st_rdev);
1852 } else {
1853 device->has_local = false;
1854 device->local_major = 0;
1855 device->local_minor = 0;
1856 }
1857
1858 get_device_extensions(device, &device->vk.supported_extensions);
1859 get_features(device, &device->vk.supported_features);
1860 get_properties(device, &device->vk.properties);
1861
1862 result = anv_init_wsi(device);
1863 if (result != VK_SUCCESS)
1864 goto fail_perf;
1865
1866 anv_measure_device_init(device);
1867
1868 anv_genX(&device->info, init_physical_device_state)(device);
1869
1870 *out = &device->vk;
1871
1872 return VK_SUCCESS;
1873
1874 fail_perf:
1875 intel_perf_free(device->perf);
1876 free(device->engine_info);
1877 anv_physical_device_free_disk_cache(device);
1878 fail_compiler:
1879 ralloc_free(device->compiler);
1880 fail_base:
1881 vk_physical_device_finish(&device->vk);
1882 fail_alloc:
1883 vk_free(&instance->vk.alloc, device);
1884 fail_fd:
1885 close(fd);
1886 if (master_fd != -1)
1887 close(master_fd);
1888 return result;
1889 }
1890
1891 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1892 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1893 {
1894 struct anv_physical_device *device =
1895 container_of(vk_device, struct anv_physical_device, vk);
1896
1897 anv_finish_wsi(device);
1898 anv_measure_device_destroy(device);
1899 free(device->engine_info);
1900 anv_physical_device_free_disk_cache(device);
1901 ralloc_free(device->compiler);
1902 intel_perf_free(device->perf);
1903 close(device->local_fd);
1904 if (device->master_fd >= 0)
1905 close(device->master_fd);
1906 vk_physical_device_finish(&device->vk);
1907 vk_free(&device->instance->vk.alloc, device);
1908 }
1909
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1910 VkResult anv_EnumerateInstanceExtensionProperties(
1911 const char* pLayerName,
1912 uint32_t* pPropertyCount,
1913 VkExtensionProperties* pProperties)
1914 {
1915 if (pLayerName)
1916 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1917
1918 return vk_enumerate_instance_extension_properties(
1919 &instance_extensions, pPropertyCount, pProperties);
1920 }
1921
1922 static void
anv_init_dri_options(struct anv_instance * instance)1923 anv_init_dri_options(struct anv_instance *instance)
1924 {
1925 driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1926 ARRAY_SIZE(anv_dri_options));
1927 driParseConfigFiles(&instance->dri_options,
1928 &instance->available_dri_options, 0, "anv", NULL, NULL,
1929 instance->vk.app_info.app_name,
1930 instance->vk.app_info.app_version,
1931 instance->vk.app_info.engine_name,
1932 instance->vk.app_info.engine_version);
1933
1934 instance->assume_full_subgroups =
1935 driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1936 instance->limit_trig_input_range =
1937 driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1938 instance->sample_mask_out_opengl_behaviour =
1939 driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1940 instance->lower_depth_range_rate =
1941 driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1942 instance->no_16bit =
1943 driQueryOptionb(&instance->dri_options, "no_16bit");
1944 instance->report_vk_1_3 =
1945 driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1946 }
1947
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1948 VkResult anv_CreateInstance(
1949 const VkInstanceCreateInfo* pCreateInfo,
1950 const VkAllocationCallbacks* pAllocator,
1951 VkInstance* pInstance)
1952 {
1953 struct anv_instance *instance;
1954 VkResult result;
1955
1956 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1957
1958 if (pAllocator == NULL)
1959 pAllocator = vk_default_allocator();
1960
1961 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1962 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1963 if (!instance)
1964 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1965
1966 struct vk_instance_dispatch_table dispatch_table;
1967 vk_instance_dispatch_table_from_entrypoints(
1968 &dispatch_table, &anv_instance_entrypoints, true);
1969 vk_instance_dispatch_table_from_entrypoints(
1970 &dispatch_table, &wsi_instance_entrypoints, false);
1971
1972 result = vk_instance_init(&instance->vk, &instance_extensions,
1973 &dispatch_table, pCreateInfo, pAllocator);
1974 if (result != VK_SUCCESS) {
1975 vk_free(pAllocator, instance);
1976 return vk_error(NULL, result);
1977 }
1978
1979 instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1980 instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1981
1982 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1983
1984 anv_init_dri_options(instance);
1985
1986 intel_driver_ds_init();
1987
1988 *pInstance = anv_instance_to_handle(instance);
1989
1990 return VK_SUCCESS;
1991 }
1992
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1993 void anv_DestroyInstance(
1994 VkInstance _instance,
1995 const VkAllocationCallbacks* pAllocator)
1996 {
1997 ANV_FROM_HANDLE(anv_instance, instance, _instance);
1998
1999 if (!instance)
2000 return;
2001
2002 VG(VALGRIND_DESTROY_MEMPOOL(instance));
2003
2004 driDestroyOptionCache(&instance->dri_options);
2005 driDestroyOptionInfo(&instance->available_dri_options);
2006
2007 vk_instance_finish(&instance->vk);
2008 vk_free(&instance->vk.alloc, instance);
2009 }
2010
2011 static int
vk_priority_to_gen(int priority)2012 vk_priority_to_gen(int priority)
2013 {
2014 switch (priority) {
2015 case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2016 return INTEL_CONTEXT_LOW_PRIORITY;
2017 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2018 return INTEL_CONTEXT_MEDIUM_PRIORITY;
2019 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2020 return INTEL_CONTEXT_HIGH_PRIORITY;
2021 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2022 return INTEL_CONTEXT_REALTIME_PRIORITY;
2023 default:
2024 unreachable("Invalid priority");
2025 }
2026 }
2027
2028 static const VkQueueFamilyProperties
2029 anv_queue_family_properties_template = {
2030 .timestampValidBits = 36, /* XXX: Real value here */
2031 .minImageTransferGranularity = { 1, 1, 1 },
2032 };
2033
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2034 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2035 VkPhysicalDevice physicalDevice,
2036 uint32_t* pQueueFamilyPropertyCount,
2037 VkQueueFamilyProperties2* pQueueFamilyProperties)
2038 {
2039 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2040 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2041 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2042
2043 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2044 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2045 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2046 p->queueFamilyProperties = anv_queue_family_properties_template;
2047 p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2048 p->queueFamilyProperties.queueCount = queue_family->queueCount;
2049
2050 vk_foreach_struct(ext, p->pNext) {
2051 switch (ext->sType) {
2052 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2053 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2054 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2055
2056 /* Deliberately sorted low to high */
2057 VkQueueGlobalPriorityKHR all_priorities[] = {
2058 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2059 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2060 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2061 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2062 };
2063
2064 uint32_t count = 0;
2065 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2066 if (vk_priority_to_gen(all_priorities[i]) >
2067 pdevice->max_context_priority)
2068 break;
2069
2070 properties->priorities[count++] = all_priorities[i];
2071 }
2072 properties->priorityCount = count;
2073 break;
2074 }
2075
2076 default:
2077 vk_debug_ignored_stype(ext->sType);
2078 }
2079 }
2080 }
2081 }
2082 }
2083
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2084 void anv_GetPhysicalDeviceMemoryProperties(
2085 VkPhysicalDevice physicalDevice,
2086 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2087 {
2088 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2089
2090 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2091 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2092 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2093 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2094 .heapIndex = physical_device->memory.types[i].heapIndex,
2095 };
2096 }
2097
2098 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2099 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2100 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2101 .size = physical_device->memory.heaps[i].size,
2102 .flags = physical_device->memory.heaps[i].flags,
2103 };
2104 }
2105 }
2106
2107 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2108 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2109 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2110 {
2111 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2112
2113 if (!device->vk.supported_extensions.EXT_memory_budget)
2114 return;
2115
2116 anv_update_meminfo(device, device->local_fd);
2117
2118 VkDeviceSize total_sys_heaps_size = 0;
2119 for (size_t i = 0; i < device->memory.heap_count; i++)
2120 total_sys_heaps_size += device->memory.heaps[i].size;
2121
2122 for (size_t i = 0; i < device->memory.heap_count; i++) {
2123 VkDeviceSize heap_size = device->memory.heaps[i].size;
2124 VkDeviceSize heap_used = device->memory.heaps[i].used;
2125 VkDeviceSize heap_budget, total_heaps_size;
2126 uint64_t mem_available = 0;
2127
2128 total_heaps_size = total_sys_heaps_size;
2129 mem_available = device->sys.available;
2130
2131 double heap_proportion = (double) heap_size / total_heaps_size;
2132 VkDeviceSize available_prop = mem_available * heap_proportion;
2133
2134 /*
2135 * Let's not incite the app to starve the system: report at most 90% of
2136 * the available heap memory.
2137 */
2138 uint64_t heap_available = available_prop * 9 / 10;
2139 heap_budget = MIN2(heap_size, heap_used + heap_available);
2140
2141 /*
2142 * Round down to the nearest MB
2143 */
2144 heap_budget &= ~((1ull << 20) - 1);
2145
2146 /*
2147 * The heapBudget value must be non-zero for array elements less than
2148 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2149 * value must be less than or equal to VkMemoryHeap::size for each heap.
2150 */
2151 assert(0 < heap_budget && heap_budget <= heap_size);
2152
2153 memoryBudget->heapUsage[i] = heap_used;
2154 memoryBudget->heapBudget[i] = heap_budget;
2155 }
2156
2157 /* The heapBudget and heapUsage values must be zero for array elements
2158 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2159 */
2160 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2161 memoryBudget->heapBudget[i] = 0;
2162 memoryBudget->heapUsage[i] = 0;
2163 }
2164 }
2165
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2166 void anv_GetPhysicalDeviceMemoryProperties2(
2167 VkPhysicalDevice physicalDevice,
2168 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2169 {
2170 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2171 &pMemoryProperties->memoryProperties);
2172
2173 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2174 switch (ext->sType) {
2175 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2176 anv_get_memory_budget(physicalDevice, (void*)ext);
2177 break;
2178 default:
2179 vk_debug_ignored_stype(ext->sType);
2180 break;
2181 }
2182 }
2183 }
2184
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2185 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2186 VkInstance _instance,
2187 const char* pName)
2188 {
2189 ANV_FROM_HANDLE(anv_instance, instance, _instance);
2190 return vk_instance_get_proc_addr(&instance->vk,
2191 &anv_instance_entrypoints,
2192 pName);
2193 }
2194
2195 /* With version 1+ of the loader interface the ICD should expose
2196 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2197 */
2198 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2199 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2200 VkInstance instance,
2201 const char* pName)
2202 {
2203 return anv_GetInstanceProcAddr(instance, pName);
2204 }
2205 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2206 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2207 {
2208 struct anv_state state;
2209
2210 state = anv_state_pool_alloc(pool, size, align);
2211 memcpy(state.map, p, size);
2212
2213 return state;
2214 }
2215
2216 static void
anv_device_init_border_colors(struct anv_device * device)2217 anv_device_init_border_colors(struct anv_device *device)
2218 {
2219 if (device->info->platform == INTEL_PLATFORM_HSW) {
2220 static const struct hsw_border_color border_colors[] = {
2221 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2222 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2223 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2224 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2225 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2226 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2227 };
2228
2229 device->border_colors =
2230 anv_state_pool_emit_data(&device->dynamic_state_pool,
2231 sizeof(border_colors), 512, border_colors);
2232 } else {
2233 static const struct gfx8_border_color border_colors[] = {
2234 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2235 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2236 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2237 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2238 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2239 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2240 };
2241
2242 device->border_colors =
2243 anv_state_pool_emit_data(&device->dynamic_state_pool,
2244 sizeof(border_colors), 64, border_colors);
2245 }
2246 }
2247
2248 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2249 anv_device_init_trivial_batch(struct anv_device *device)
2250 {
2251 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2252 ANV_BO_ALLOC_MAPPED,
2253 0 /* explicit_address */,
2254 &device->trivial_batch_bo);
2255 if (result != VK_SUCCESS)
2256 return result;
2257
2258 struct anv_batch batch = {
2259 .start = device->trivial_batch_bo->map,
2260 .next = device->trivial_batch_bo->map,
2261 .end = device->trivial_batch_bo->map + 4096,
2262 };
2263
2264 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2265 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2266
2267 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2268 if (device->physical->memory.need_flush)
2269 intel_flush_range(batch.start, batch.next - batch.start);
2270 #endif
2271
2272 return VK_SUCCESS;
2273 }
2274
2275 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2276 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2277 struct anv_block_pool *pool,
2278 uint64_t address)
2279 {
2280 anv_block_pool_foreach_bo(bo, pool) {
2281 uint64_t bo_address = intel_48b_address(bo->offset);
2282 if (address >= bo_address && address < (bo_address + bo->size)) {
2283 *ret = (struct intel_batch_decode_bo) {
2284 .addr = bo_address,
2285 .size = bo->size,
2286 .map = bo->map,
2287 };
2288 return true;
2289 }
2290 }
2291 return false;
2292 }
2293
2294 /* Finding a buffer for batch decoding */
2295 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2296 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2297 {
2298 struct anv_device *device = v_batch;
2299 struct intel_batch_decode_bo ret_bo = {};
2300
2301 assert(ppgtt);
2302
2303 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2304 return ret_bo;
2305 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2306 return ret_bo;
2307 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2308 return ret_bo;
2309 if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2310 return ret_bo;
2311
2312 if (!device->cmd_buffer_being_decoded)
2313 return (struct intel_batch_decode_bo) { };
2314
2315 struct anv_batch_bo **bo;
2316
2317 u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2318 /* The decoder zeroes out the top 16 bits, so we need to as well */
2319 uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2320
2321 if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2322 return (struct intel_batch_decode_bo) {
2323 .addr = bo_address,
2324 .size = (*bo)->bo->size,
2325 .map = (*bo)->bo->map,
2326 };
2327 }
2328 }
2329
2330 return (struct intel_batch_decode_bo) { };
2331 }
2332
2333 static VkResult anv_device_check_status(struct vk_device *vk_device);
2334
anv_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2335 static VkResult anv_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2336 {
2337 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2338
2339 if (!intel_gem_read_render_timestamp(device->fd,
2340 device->info->kmd_type,
2341 timestamp)) {
2342 return vk_device_set_lost(&device->vk,
2343 "Failed to read the TIMESTAMP register: %m");
2344 }
2345
2346 return VK_SUCCESS;
2347 }
2348
2349 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2350 anv_device_setup_context(struct anv_device *device,
2351 const VkDeviceCreateInfo *pCreateInfo,
2352 const uint32_t num_queues)
2353 {
2354 struct anv_physical_device *physical_device = device->physical;
2355 VkResult result = VK_SUCCESS;
2356
2357 if (device->physical->engine_info) {
2358 /* The kernel API supports at most 64 engines */
2359 assert(num_queues <= 64);
2360 enum intel_engine_class engine_classes[64];
2361 int engine_count = 0;
2362 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2363 const VkDeviceQueueCreateInfo *queueCreateInfo =
2364 &pCreateInfo->pQueueCreateInfos[i];
2365
2366 assert(queueCreateInfo->queueFamilyIndex <
2367 physical_device->queue.family_count);
2368 struct anv_queue_family *queue_family =
2369 &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2370
2371 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2372 engine_classes[engine_count++] = queue_family->engine_class;
2373 }
2374 if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2375 physical_device->engine_info,
2376 engine_count, engine_classes,
2377 0 /* vm_id */,
2378 (uint32_t *)&device->context_id))
2379 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2380 "kernel context creation failed");
2381 } else {
2382 assert(num_queues == 1);
2383 if (!intel_gem_create_context(device->fd, &device->context_id))
2384 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2385 }
2386
2387 if (result != VK_SUCCESS)
2388 return result;
2389
2390 /* Here we tell the kernel not to attempt to recover our context but
2391 * immediately (on the next batchbuffer submission) report that the
2392 * context is lost, and we will do the recovery ourselves. In the case
2393 * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2394 * the client clean up the pieces.
2395 */
2396 anv_gem_set_context_param(device->fd, device->context_id,
2397 I915_CONTEXT_PARAM_RECOVERABLE, false);
2398
2399 /* Check if client specified queue priority. */
2400 const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2401 vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2402 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2403
2404 VkQueueGlobalPriorityKHR priority =
2405 queue_priority ? queue_priority->globalPriority :
2406 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2407
2408 /* As per spec, the driver implementation may deny requests to acquire
2409 * a priority above the default priority (MEDIUM) if the caller does not
2410 * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2411 * is returned.
2412 */
2413 if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2414 int err = anv_gem_set_context_param(device->fd, device->context_id,
2415 I915_CONTEXT_PARAM_PRIORITY,
2416 vk_priority_to_gen(priority));
2417 if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2418 result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2419 goto fail_context;
2420 }
2421 }
2422
2423 return result;
2424
2425 fail_context:
2426 intel_gem_destroy_context(device->fd, device->context_id);
2427 return result;
2428 }
2429
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2430 VkResult anv_CreateDevice(
2431 VkPhysicalDevice physicalDevice,
2432 const VkDeviceCreateInfo* pCreateInfo,
2433 const VkAllocationCallbacks* pAllocator,
2434 VkDevice* pDevice)
2435 {
2436 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2437 VkResult result;
2438 struct anv_device *device;
2439
2440 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2441
2442 /* Check requested queues and fail if we are requested to create any
2443 * queues with flags we don't support.
2444 */
2445 assert(pCreateInfo->queueCreateInfoCount > 0);
2446 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2447 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2448 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2449 }
2450
2451 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2452 sizeof(*device), 8,
2453 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2454 if (!device)
2455 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2456
2457 struct vk_device_dispatch_table dispatch_table;
2458
2459 bool override_initial_entrypoints = true;
2460 if (physical_device->instance->vk.app_info.app_name &&
2461 !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2462 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2463 override_initial_entrypoints = false;
2464 }
2465 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2466 anv_genX(&physical_device->info, device_entrypoints),
2467 override_initial_entrypoints);
2468 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2469 &anv_device_entrypoints, false);
2470 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2471 &wsi_device_entrypoints, false);
2472
2473 result = vk_device_init(&device->vk, &physical_device->vk,
2474 &dispatch_table, pCreateInfo, pAllocator);
2475 if (result != VK_SUCCESS)
2476 goto fail_alloc;
2477
2478 if (INTEL_DEBUG(DEBUG_BATCH)) {
2479 const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2480
2481 intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2482 &physical_device->compiler->isa,
2483 &physical_device->info,
2484 stderr, decode_flags, NULL,
2485 decode_get_bo, NULL, device);
2486
2487 device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2488 device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2489 device->decoder_ctx.instruction_base =
2490 INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2491 }
2492
2493 anv_device_set_physical(device, physical_device);
2494
2495 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2496 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2497 if (device->fd == -1) {
2498 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2499 goto fail_device;
2500 }
2501
2502 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2503 device->vk.check_status = anv_device_check_status;
2504 device->vk.get_timestamp = anv_device_get_timestamp;
2505 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2506 vk_device_set_drm_fd(&device->vk, device->fd);
2507
2508 uint32_t num_queues = 0;
2509 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2510 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2511
2512 result = anv_device_setup_context(device, pCreateInfo, num_queues);
2513 if (result != VK_SUCCESS)
2514 goto fail_fd;
2515
2516 device->queues =
2517 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2518 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2519 if (device->queues == NULL) {
2520 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2521 goto fail_context_id;
2522 }
2523
2524 device->queue_count = 0;
2525 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2526 const VkDeviceQueueCreateInfo *queueCreateInfo =
2527 &pCreateInfo->pQueueCreateInfos[i];
2528
2529 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2530 /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2531 * engine-based contexts, the bottom 6 bits of exec_flags are used
2532 * for the engine ID.
2533 */
2534 uint32_t exec_flags = device->physical->engine_info ?
2535 device->queue_count : I915_EXEC_RENDER;
2536
2537 result = anv_queue_init(device, &device->queues[device->queue_count],
2538 exec_flags, queueCreateInfo, j);
2539 if (result != VK_SUCCESS)
2540 goto fail_queues;
2541
2542 device->queue_count++;
2543 }
2544 }
2545
2546 if (!anv_use_relocations(physical_device)) {
2547 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2548 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2549 goto fail_queues;
2550 }
2551
2552 /* keep the page with address zero out of the allocator */
2553 util_vma_heap_init(&device->vma_lo,
2554 LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2555
2556 util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2557 CLIENT_VISIBLE_HEAP_SIZE);
2558
2559 /* Leave the last 4GiB out of the high vma range, so that no state
2560 * base address + size can overflow 48 bits. For more information see
2561 * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2562 */
2563 util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2564 physical_device->gtt_size - (1ull << 32) -
2565 HIGH_HEAP_MIN_ADDRESS);
2566 }
2567
2568 list_inithead(&device->memory_objects);
2569
2570 /* On Broadwell and later, we can use batch chaining to more efficiently
2571 * implement growing command buffers. Prior to Haswell, the kernel
2572 * command parser gets in the way and we have to fall back to growing
2573 * the batch.
2574 */
2575 device->can_chain_batches = device->info->ver >= 8;
2576
2577 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2578 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2579 goto fail_vmas;
2580 }
2581
2582 pthread_condattr_t condattr;
2583 if (pthread_condattr_init(&condattr) != 0) {
2584 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2585 goto fail_mutex;
2586 }
2587 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2588 pthread_condattr_destroy(&condattr);
2589 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2590 goto fail_mutex;
2591 }
2592 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2593 pthread_condattr_destroy(&condattr);
2594 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2595 goto fail_mutex;
2596 }
2597 pthread_condattr_destroy(&condattr);
2598
2599 result = anv_bo_cache_init(&device->bo_cache, device);
2600 if (result != VK_SUCCESS)
2601 goto fail_queue_cond;
2602
2603 anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2604
2605 /* Because scratch is also relative to General State Base Address, we leave
2606 * the base address 0 and start the pool memory at an offset. This way we
2607 * get the correct offsets in the anv_states that get allocated from it.
2608 */
2609 result = anv_state_pool_init(&device->general_state_pool, device,
2610 "general pool",
2611 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2612 if (result != VK_SUCCESS)
2613 goto fail_batch_bo_pool;
2614
2615 result = anv_state_pool_init(&device->dynamic_state_pool, device,
2616 "dynamic pool",
2617 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2618 if (result != VK_SUCCESS)
2619 goto fail_general_state_pool;
2620
2621 if (device->info->ver >= 8) {
2622 /* The border color pointer is limited to 24 bits, so we need to make
2623 * sure that any such color used at any point in the program doesn't
2624 * exceed that limit.
2625 * We achieve that by reserving all the custom border colors we support
2626 * right off the bat, so they are close to the base address.
2627 */
2628 anv_state_reserved_pool_init(&device->custom_border_colors,
2629 &device->dynamic_state_pool,
2630 MAX_CUSTOM_BORDER_COLORS,
2631 sizeof(struct gfx8_border_color), 64);
2632 }
2633
2634 result = anv_state_pool_init(&device->instruction_state_pool, device,
2635 "instruction pool",
2636 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2637 if (result != VK_SUCCESS)
2638 goto fail_dynamic_state_pool;
2639
2640 result = anv_state_pool_init(&device->surface_state_pool, device,
2641 "surface state pool",
2642 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2643 if (result != VK_SUCCESS)
2644 goto fail_instruction_state_pool;
2645
2646 if (!anv_use_relocations(physical_device)) {
2647 int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2648 (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2649 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2650 result = anv_state_pool_init(&device->binding_table_pool, device,
2651 "binding table pool",
2652 SURFACE_STATE_POOL_MIN_ADDRESS,
2653 bt_pool_offset,
2654 BINDING_TABLE_POOL_BLOCK_SIZE);
2655 }
2656 if (result != VK_SUCCESS)
2657 goto fail_surface_state_pool;
2658
2659 result = anv_device_alloc_bo(device, "workaround", 4096,
2660 ANV_BO_ALLOC_CAPTURE |
2661 ANV_BO_ALLOC_MAPPED,
2662 0 /* explicit_address */,
2663 &device->workaround_bo);
2664 if (result != VK_SUCCESS)
2665 goto fail_binding_table_pool;
2666
2667 device->workaround_address = (struct anv_address) {
2668 .bo = device->workaround_bo,
2669 .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2670 device->workaround_bo->size,
2671 "hasvk"), 32),
2672 };
2673
2674 device->workarounds.doom64_images = NULL;
2675
2676 device->debug_frame_desc =
2677 intel_debug_get_identifier_block(device->workaround_bo->map,
2678 device->workaround_bo->size,
2679 INTEL_DEBUG_BLOCK_TYPE_FRAME);
2680
2681 result = anv_device_init_trivial_batch(device);
2682 if (result != VK_SUCCESS)
2683 goto fail_workaround_bo;
2684
2685 /* Allocate a null surface state at surface state offset 0. This makes
2686 * NULL descriptor handling trivial because we can just memset structures
2687 * to zero and they have a valid descriptor.
2688 */
2689 device->null_surface_state =
2690 anv_state_pool_alloc(&device->surface_state_pool,
2691 device->isl_dev.ss.size,
2692 device->isl_dev.ss.align);
2693 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2694 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2695 assert(device->null_surface_state.offset == 0);
2696
2697 anv_scratch_pool_init(device, &device->scratch_pool);
2698
2699 result = anv_genX(device->info, init_device_state)(device);
2700 if (result != VK_SUCCESS)
2701 goto fail_trivial_batch_bo_and_scratch_pool;
2702
2703 struct vk_pipeline_cache_create_info pcc_info = { };
2704 device->default_pipeline_cache =
2705 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2706 if (!device->default_pipeline_cache) {
2707 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2708 goto fail_trivial_batch_bo_and_scratch_pool;
2709 }
2710
2711 /* Internal shaders need their own pipeline cache because, unlike the rest
2712 * of ANV, it won't work at all without the cache. It depends on it for
2713 * shaders to remain resident while it runs. Therefore, we need a special
2714 * cache just for BLORP/RT that's forced to always be enabled.
2715 */
2716 pcc_info.force_enable = true;
2717 device->internal_cache =
2718 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2719 if (device->internal_cache == NULL) {
2720 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2721 goto fail_default_pipeline_cache;
2722 }
2723
2724 device->robust_buffer_access =
2725 device->vk.enabled_features.robustBufferAccess ||
2726 device->vk.enabled_features.nullDescriptor;
2727
2728 anv_device_init_blorp(device);
2729
2730 anv_device_init_border_colors(device);
2731
2732 anv_device_perf_init(device);
2733
2734 anv_device_utrace_init(device);
2735
2736 *pDevice = anv_device_to_handle(device);
2737
2738 return VK_SUCCESS;
2739
2740 fail_default_pipeline_cache:
2741 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2742 fail_trivial_batch_bo_and_scratch_pool:
2743 anv_scratch_pool_finish(device, &device->scratch_pool);
2744 anv_device_release_bo(device, device->trivial_batch_bo);
2745 fail_workaround_bo:
2746 anv_device_release_bo(device, device->workaround_bo);
2747 fail_binding_table_pool:
2748 if (!anv_use_relocations(physical_device))
2749 anv_state_pool_finish(&device->binding_table_pool);
2750 fail_surface_state_pool:
2751 anv_state_pool_finish(&device->surface_state_pool);
2752 fail_instruction_state_pool:
2753 anv_state_pool_finish(&device->instruction_state_pool);
2754 fail_dynamic_state_pool:
2755 if (device->info->ver >= 8)
2756 anv_state_reserved_pool_finish(&device->custom_border_colors);
2757 anv_state_pool_finish(&device->dynamic_state_pool);
2758 fail_general_state_pool:
2759 anv_state_pool_finish(&device->general_state_pool);
2760 fail_batch_bo_pool:
2761 anv_bo_pool_finish(&device->batch_bo_pool);
2762 anv_bo_cache_finish(&device->bo_cache);
2763 fail_queue_cond:
2764 pthread_cond_destroy(&device->queue_submit);
2765 fail_mutex:
2766 pthread_mutex_destroy(&device->mutex);
2767 fail_vmas:
2768 if (!anv_use_relocations(physical_device)) {
2769 util_vma_heap_finish(&device->vma_hi);
2770 util_vma_heap_finish(&device->vma_cva);
2771 util_vma_heap_finish(&device->vma_lo);
2772 }
2773 fail_queues:
2774 for (uint32_t i = 0; i < device->queue_count; i++)
2775 anv_queue_finish(&device->queues[i]);
2776 vk_free(&device->vk.alloc, device->queues);
2777 fail_context_id:
2778 intel_gem_destroy_context(device->fd, device->context_id);
2779 fail_fd:
2780 close(device->fd);
2781 fail_device:
2782 vk_device_finish(&device->vk);
2783 fail_alloc:
2784 vk_free(&device->vk.alloc, device);
2785
2786 return result;
2787 }
2788
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2789 void anv_DestroyDevice(
2790 VkDevice _device,
2791 const VkAllocationCallbacks* pAllocator)
2792 {
2793 ANV_FROM_HANDLE(anv_device, device, _device);
2794
2795 if (!device)
2796 return;
2797
2798 anv_device_utrace_finish(device);
2799
2800 anv_device_finish_blorp(device);
2801
2802 vk_pipeline_cache_destroy(device->internal_cache, NULL);
2803 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2804
2805 #ifdef HAVE_VALGRIND
2806 /* We only need to free these to prevent valgrind errors. The backing
2807 * BO will go away in a couple of lines so we don't actually leak.
2808 */
2809 if (device->info->ver >= 8)
2810 anv_state_reserved_pool_finish(&device->custom_border_colors);
2811 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2812 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2813 #endif
2814
2815 anv_scratch_pool_finish(device, &device->scratch_pool);
2816
2817 anv_device_release_bo(device, device->workaround_bo);
2818 anv_device_release_bo(device, device->trivial_batch_bo);
2819
2820 if (!anv_use_relocations(device->physical))
2821 anv_state_pool_finish(&device->binding_table_pool);
2822 anv_state_pool_finish(&device->surface_state_pool);
2823 anv_state_pool_finish(&device->instruction_state_pool);
2824 anv_state_pool_finish(&device->dynamic_state_pool);
2825 anv_state_pool_finish(&device->general_state_pool);
2826
2827 anv_bo_pool_finish(&device->batch_bo_pool);
2828
2829 anv_bo_cache_finish(&device->bo_cache);
2830
2831 if (!anv_use_relocations(device->physical)) {
2832 util_vma_heap_finish(&device->vma_hi);
2833 util_vma_heap_finish(&device->vma_cva);
2834 util_vma_heap_finish(&device->vma_lo);
2835 }
2836
2837 pthread_cond_destroy(&device->queue_submit);
2838 pthread_mutex_destroy(&device->mutex);
2839
2840 for (uint32_t i = 0; i < device->queue_count; i++)
2841 anv_queue_finish(&device->queues[i]);
2842 vk_free(&device->vk.alloc, device->queues);
2843
2844 intel_gem_destroy_context(device->fd, device->context_id);
2845
2846 if (INTEL_DEBUG(DEBUG_BATCH))
2847 intel_batch_decode_ctx_finish(&device->decoder_ctx);
2848
2849 close(device->fd);
2850
2851 vk_device_finish(&device->vk);
2852 vk_free(&device->vk.alloc, device);
2853 }
2854
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2855 VkResult anv_EnumerateInstanceLayerProperties(
2856 uint32_t* pPropertyCount,
2857 VkLayerProperties* pProperties)
2858 {
2859 if (pProperties == NULL) {
2860 *pPropertyCount = 0;
2861 return VK_SUCCESS;
2862 }
2863
2864 /* None supported at this time */
2865 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2866 }
2867
2868 static VkResult
anv_device_check_status(struct vk_device * vk_device)2869 anv_device_check_status(struct vk_device *vk_device)
2870 {
2871 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2872
2873 uint32_t active, pending;
2874 int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2875 &active, &pending);
2876 if (ret == -1) {
2877 /* We don't know the real error. */
2878 return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2879 }
2880
2881 if (active) {
2882 return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2883 } else if (pending) {
2884 return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2885 }
2886
2887 return VK_SUCCESS;
2888 }
2889
2890 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2891 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2892 int64_t timeout)
2893 {
2894 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2895 if (ret == -1 && errno == ETIME) {
2896 return VK_TIMEOUT;
2897 } else if (ret == -1) {
2898 /* We don't know the real error. */
2899 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2900 } else {
2901 return VK_SUCCESS;
2902 }
2903 }
2904
2905 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2906 anv_vma_alloc(struct anv_device *device,
2907 uint64_t size, uint64_t align,
2908 enum anv_bo_alloc_flags alloc_flags,
2909 uint64_t client_address)
2910 {
2911 pthread_mutex_lock(&device->vma_mutex);
2912
2913 uint64_t addr = 0;
2914
2915 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2916 if (client_address) {
2917 if (util_vma_heap_alloc_addr(&device->vma_cva,
2918 client_address, size)) {
2919 addr = client_address;
2920 }
2921 } else {
2922 addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2923 }
2924 /* We don't want to fall back to other heaps */
2925 goto done;
2926 }
2927
2928 assert(client_address == 0);
2929
2930 if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2931 addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2932
2933 if (addr == 0)
2934 addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2935
2936 done:
2937 pthread_mutex_unlock(&device->vma_mutex);
2938
2939 assert(addr == intel_48b_address(addr));
2940 return intel_canonical_address(addr);
2941 }
2942
2943 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2944 anv_vma_free(struct anv_device *device,
2945 uint64_t address, uint64_t size)
2946 {
2947 const uint64_t addr_48b = intel_48b_address(address);
2948
2949 pthread_mutex_lock(&device->vma_mutex);
2950
2951 if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2952 addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2953 util_vma_heap_free(&device->vma_lo, addr_48b, size);
2954 } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2955 addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2956 util_vma_heap_free(&device->vma_cva, addr_48b, size);
2957 } else {
2958 assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2959 util_vma_heap_free(&device->vma_hi, addr_48b, size);
2960 }
2961
2962 pthread_mutex_unlock(&device->vma_mutex);
2963 }
2964
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2965 VkResult anv_AllocateMemory(
2966 VkDevice _device,
2967 const VkMemoryAllocateInfo* pAllocateInfo,
2968 const VkAllocationCallbacks* pAllocator,
2969 VkDeviceMemory* pMem)
2970 {
2971 ANV_FROM_HANDLE(anv_device, device, _device);
2972 struct anv_physical_device *pdevice = device->physical;
2973 struct anv_device_memory *mem;
2974 VkResult result = VK_SUCCESS;
2975
2976 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2977
2978 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
2979 assert(pAllocateInfo->allocationSize > 0);
2980
2981 VkDeviceSize aligned_alloc_size =
2982 align64(pAllocateInfo->allocationSize, 4096);
2983
2984 if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
2985 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2986
2987 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
2988 struct anv_memory_type *mem_type =
2989 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
2990 assert(mem_type->heapIndex < pdevice->memory.heap_count);
2991 struct anv_memory_heap *mem_heap =
2992 &pdevice->memory.heaps[mem_type->heapIndex];
2993
2994 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2995 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
2996 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2997
2998 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2999 VK_OBJECT_TYPE_DEVICE_MEMORY);
3000 if (mem == NULL)
3001 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3002
3003 mem->type = mem_type;
3004 mem->map = NULL;
3005 mem->map_size = 0;
3006 mem->map_delta = 0;
3007 mem->ahw = NULL;
3008 mem->host_ptr = NULL;
3009
3010 enum anv_bo_alloc_flags alloc_flags = 0;
3011
3012 const VkExportMemoryAllocateInfo *export_info = NULL;
3013 const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3014 const VkImportMemoryFdInfoKHR *fd_info = NULL;
3015 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3016 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3017 VkMemoryAllocateFlags vk_flags = 0;
3018 uint64_t client_address = 0;
3019
3020 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3021 switch (ext->sType) {
3022 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3023 export_info = (void *)ext;
3024 break;
3025
3026 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3027 ahw_import_info = (void *)ext;
3028 break;
3029
3030 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3031 fd_info = (void *)ext;
3032 break;
3033
3034 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3035 host_ptr_info = (void *)ext;
3036 break;
3037
3038 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3039 const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3040 vk_flags = flags_info->flags;
3041 break;
3042 }
3043
3044 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3045 dedicated_info = (void *)ext;
3046 break;
3047
3048 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3049 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3050 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3051 client_address = addr_info->opaqueCaptureAddress;
3052 break;
3053 }
3054
3055 default:
3056 if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3057 /* this isn't a real enum value,
3058 * so use conditional to avoid compiler warn
3059 */
3060 vk_debug_ignored_stype(ext->sType);
3061 break;
3062 }
3063 }
3064
3065 if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3066 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3067
3068 if ((export_info && export_info->handleTypes) ||
3069 (fd_info && fd_info->handleType) ||
3070 (host_ptr_info && host_ptr_info->handleType)) {
3071 /* Anything imported or exported is EXTERNAL */
3072 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3073 }
3074
3075 /* Check if we need to support Android HW buffer export. If so,
3076 * create AHardwareBuffer and import memory from it.
3077 */
3078 bool android_export = false;
3079 if (export_info && export_info->handleTypes &
3080 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3081 android_export = true;
3082
3083 if (ahw_import_info) {
3084 result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3085 if (result != VK_SUCCESS)
3086 goto fail;
3087
3088 goto success;
3089 } else if (android_export) {
3090 result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3091 if (result != VK_SUCCESS)
3092 goto fail;
3093
3094 goto success;
3095 }
3096
3097 /* The Vulkan spec permits handleType to be 0, in which case the struct is
3098 * ignored.
3099 */
3100 if (fd_info && fd_info->handleType) {
3101 /* At the moment, we support only the below handle types. */
3102 assert(fd_info->handleType ==
3103 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3104 fd_info->handleType ==
3105 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3106
3107 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3108 client_address, &mem->bo);
3109 if (result != VK_SUCCESS)
3110 goto fail;
3111
3112 /* For security purposes, we reject importing the bo if it's smaller
3113 * than the requested allocation size. This prevents a malicious client
3114 * from passing a buffer to a trusted client, lying about the size, and
3115 * telling the trusted client to try and texture from an image that goes
3116 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
3117 * in the trusted client. The trusted client can protect itself against
3118 * this sort of attack but only if it can trust the buffer size.
3119 */
3120 if (mem->bo->size < aligned_alloc_size) {
3121 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3122 "aligned allocationSize too large for "
3123 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3124 "%"PRIu64"B > %"PRIu64"B",
3125 aligned_alloc_size, mem->bo->size);
3126 anv_device_release_bo(device, mem->bo);
3127 goto fail;
3128 }
3129
3130 /* From the Vulkan spec:
3131 *
3132 * "Importing memory from a file descriptor transfers ownership of
3133 * the file descriptor from the application to the Vulkan
3134 * implementation. The application must not perform any operations on
3135 * the file descriptor after a successful import."
3136 *
3137 * If the import fails, we leave the file descriptor open.
3138 */
3139 close(fd_info->fd);
3140 goto success;
3141 }
3142
3143 if (host_ptr_info && host_ptr_info->handleType) {
3144 if (host_ptr_info->handleType ==
3145 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3146 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3147 goto fail;
3148 }
3149
3150 assert(host_ptr_info->handleType ==
3151 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3152
3153 result = anv_device_import_bo_from_host_ptr(device,
3154 host_ptr_info->pHostPointer,
3155 pAllocateInfo->allocationSize,
3156 alloc_flags,
3157 client_address,
3158 &mem->bo);
3159 if (result != VK_SUCCESS)
3160 goto fail;
3161
3162 mem->host_ptr = host_ptr_info->pHostPointer;
3163 goto success;
3164 }
3165
3166 /* Regular allocate (not importing memory). */
3167
3168 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3169 alloc_flags, client_address, &mem->bo);
3170 if (result != VK_SUCCESS)
3171 goto fail;
3172
3173 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3174 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3175
3176 /* Some legacy (non-modifiers) consumers need the tiling to be set on
3177 * the BO. In this case, we have a dedicated allocation.
3178 */
3179 if (image->vk.wsi_legacy_scanout) {
3180 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3181 result = anv_device_set_bo_tiling(device, mem->bo,
3182 surf->row_pitch_B,
3183 surf->tiling);
3184 if (result != VK_SUCCESS) {
3185 anv_device_release_bo(device, mem->bo);
3186 goto fail;
3187 }
3188 }
3189 }
3190
3191 success:
3192 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3193 if (mem_heap_used > mem_heap->size) {
3194 p_atomic_add(&mem_heap->used, -mem->bo->size);
3195 anv_device_release_bo(device, mem->bo);
3196 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3197 "Out of heap memory");
3198 goto fail;
3199 }
3200
3201 pthread_mutex_lock(&device->mutex);
3202 list_addtail(&mem->link, &device->memory_objects);
3203 pthread_mutex_unlock(&device->mutex);
3204
3205 *pMem = anv_device_memory_to_handle(mem);
3206
3207 return VK_SUCCESS;
3208
3209 fail:
3210 vk_object_free(&device->vk, pAllocator, mem);
3211
3212 return result;
3213 }
3214
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3215 VkResult anv_GetMemoryFdKHR(
3216 VkDevice device_h,
3217 const VkMemoryGetFdInfoKHR* pGetFdInfo,
3218 int* pFd)
3219 {
3220 ANV_FROM_HANDLE(anv_device, dev, device_h);
3221 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3222
3223 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3224
3225 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3226 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3227
3228 return anv_device_export_bo(dev, mem->bo, pFd);
3229 }
3230
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3231 VkResult anv_GetMemoryFdPropertiesKHR(
3232 VkDevice _device,
3233 VkExternalMemoryHandleTypeFlagBits handleType,
3234 int fd,
3235 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
3236 {
3237 ANV_FROM_HANDLE(anv_device, device, _device);
3238
3239 switch (handleType) {
3240 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3241 /* dma-buf can be imported as any memory type */
3242 pMemoryFdProperties->memoryTypeBits =
3243 (1 << device->physical->memory.type_count) - 1;
3244 return VK_SUCCESS;
3245
3246 default:
3247 /* The valid usage section for this function says:
3248 *
3249 * "handleType must not be one of the handle types defined as
3250 * opaque."
3251 *
3252 * So opaque handle types fall into the default "unsupported" case.
3253 */
3254 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3255 }
3256 }
3257
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3258 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3259 VkDevice _device,
3260 VkExternalMemoryHandleTypeFlagBits handleType,
3261 const void* pHostPointer,
3262 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
3263 {
3264 ANV_FROM_HANDLE(anv_device, device, _device);
3265
3266 assert(pMemoryHostPointerProperties->sType ==
3267 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3268
3269 switch (handleType) {
3270 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3271 /* Host memory can be imported as any memory type. */
3272 pMemoryHostPointerProperties->memoryTypeBits =
3273 (1ull << device->physical->memory.type_count) - 1;
3274
3275 return VK_SUCCESS;
3276
3277 default:
3278 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3279 }
3280 }
3281
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3282 void anv_FreeMemory(
3283 VkDevice _device,
3284 VkDeviceMemory _mem,
3285 const VkAllocationCallbacks* pAllocator)
3286 {
3287 ANV_FROM_HANDLE(anv_device, device, _device);
3288 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3289
3290 if (mem == NULL)
3291 return;
3292
3293 pthread_mutex_lock(&device->mutex);
3294 list_del(&mem->link);
3295 pthread_mutex_unlock(&device->mutex);
3296
3297 if (mem->map)
3298 anv_UnmapMemory(_device, _mem);
3299
3300 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3301 -mem->bo->size);
3302
3303 anv_device_release_bo(device, mem->bo);
3304
3305 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3306 if (mem->ahw)
3307 AHardwareBuffer_release(mem->ahw);
3308 #endif
3309
3310 vk_object_free(&device->vk, pAllocator, mem);
3311 }
3312
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3313 VkResult anv_MapMemory(
3314 VkDevice _device,
3315 VkDeviceMemory _memory,
3316 VkDeviceSize offset,
3317 VkDeviceSize size,
3318 VkMemoryMapFlags flags,
3319 void** ppData)
3320 {
3321 ANV_FROM_HANDLE(anv_device, device, _device);
3322 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3323
3324 if (mem == NULL) {
3325 *ppData = NULL;
3326 return VK_SUCCESS;
3327 }
3328
3329 if (mem->host_ptr) {
3330 *ppData = mem->host_ptr + offset;
3331 return VK_SUCCESS;
3332 }
3333
3334 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3335 *
3336 * * memory must have been created with a memory type that reports
3337 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3338 */
3339 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3340 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3341 "Memory object not mappable.");
3342 }
3343
3344 if (size == VK_WHOLE_SIZE)
3345 size = mem->bo->size - offset;
3346
3347 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3348 *
3349 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3350 * assert(size != 0);
3351 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3352 * equal to the size of the memory minus offset
3353 */
3354 assert(size > 0);
3355 assert(offset + size <= mem->bo->size);
3356
3357 if (size != (size_t)size) {
3358 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3359 "requested size 0x%"PRIx64" does not fit in %u bits",
3360 size, (unsigned)(sizeof(size_t) * 8));
3361 }
3362
3363 /* From the Vulkan 1.2.194 spec:
3364 *
3365 * "memory must not be currently host mapped"
3366 */
3367 if (mem->map != NULL) {
3368 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3369 "Memory object already mapped.");
3370 }
3371
3372 uint32_t gem_flags = 0;
3373
3374 if (!device->info->has_llc &&
3375 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3376 gem_flags |= I915_MMAP_WC;
3377
3378 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
3379 uint64_t map_offset;
3380 if (!device->physical->info.has_mmap_offset)
3381 map_offset = offset & ~4095ull;
3382 else
3383 map_offset = 0;
3384 assert(offset >= map_offset);
3385 uint64_t map_size = (offset + size) - map_offset;
3386
3387 /* Let's map whole pages */
3388 map_size = align64(map_size, 4096);
3389
3390 void *map;
3391 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3392 map_size, gem_flags, &map);
3393 if (result != VK_SUCCESS)
3394 return result;
3395
3396 mem->map = map;
3397 mem->map_size = map_size;
3398 mem->map_delta = (offset - map_offset);
3399 *ppData = mem->map + mem->map_delta;
3400
3401 return VK_SUCCESS;
3402 }
3403
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3404 void anv_UnmapMemory(
3405 VkDevice _device,
3406 VkDeviceMemory _memory)
3407 {
3408 ANV_FROM_HANDLE(anv_device, device, _device);
3409 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3410
3411 if (mem == NULL || mem->host_ptr)
3412 return;
3413
3414 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3415
3416 mem->map = NULL;
3417 mem->map_size = 0;
3418 mem->map_delta = 0;
3419 }
3420
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3421 VkResult anv_FlushMappedMemoryRanges(
3422 VkDevice _device,
3423 uint32_t memoryRangeCount,
3424 const VkMappedMemoryRange* pMemoryRanges)
3425 {
3426 ANV_FROM_HANDLE(anv_device, device, _device);
3427
3428 if (!device->physical->memory.need_flush)
3429 return VK_SUCCESS;
3430
3431 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3432 /* Make sure the writes we're flushing have landed. */
3433 __builtin_ia32_mfence();
3434 #endif
3435
3436 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3437 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3438 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3439 continue;
3440
3441 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3442 if (map_offset >= mem->map_size)
3443 continue;
3444
3445 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3446 intel_flush_range(mem->map + map_offset,
3447 MIN2(pMemoryRanges[i].size,
3448 mem->map_size - map_offset));
3449 #endif
3450 }
3451
3452 return VK_SUCCESS;
3453 }
3454
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3455 VkResult anv_InvalidateMappedMemoryRanges(
3456 VkDevice _device,
3457 uint32_t memoryRangeCount,
3458 const VkMappedMemoryRange* pMemoryRanges)
3459 {
3460 ANV_FROM_HANDLE(anv_device, device, _device);
3461
3462 if (!device->physical->memory.need_flush)
3463 return VK_SUCCESS;
3464
3465 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3466 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3467 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3468 continue;
3469
3470 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3471 if (map_offset >= mem->map_size)
3472 continue;
3473
3474 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3475 intel_invalidate_range(mem->map + map_offset,
3476 MIN2(pMemoryRanges[i].size,
3477 mem->map_size - map_offset));
3478 #endif
3479 }
3480
3481 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3482 /* Make sure no reads get moved up above the invalidate. */
3483 __builtin_ia32_mfence();
3484 #endif
3485
3486 return VK_SUCCESS;
3487 }
3488
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3489 void anv_GetDeviceMemoryCommitment(
3490 VkDevice device,
3491 VkDeviceMemory memory,
3492 VkDeviceSize* pCommittedMemoryInBytes)
3493 {
3494 *pCommittedMemoryInBytes = 0;
3495 }
3496
3497 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3498 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3499 {
3500 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3501 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3502
3503 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3504
3505 if (mem) {
3506 assert(pBindInfo->memoryOffset < mem->bo->size);
3507 assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3508 buffer->address = (struct anv_address) {
3509 .bo = mem->bo,
3510 .offset = pBindInfo->memoryOffset,
3511 };
3512 } else {
3513 buffer->address = ANV_NULL_ADDRESS;
3514 }
3515 }
3516
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3517 VkResult anv_BindBufferMemory2(
3518 VkDevice device,
3519 uint32_t bindInfoCount,
3520 const VkBindBufferMemoryInfo* pBindInfos)
3521 {
3522 for (uint32_t i = 0; i < bindInfoCount; i++)
3523 anv_bind_buffer_memory(&pBindInfos[i]);
3524
3525 return VK_SUCCESS;
3526 }
3527
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3528 VkResult anv_QueueBindSparse(
3529 VkQueue _queue,
3530 uint32_t bindInfoCount,
3531 const VkBindSparseInfo* pBindInfo,
3532 VkFence fence)
3533 {
3534 ANV_FROM_HANDLE(anv_queue, queue, _queue);
3535 if (vk_device_is_lost(&queue->device->vk))
3536 return VK_ERROR_DEVICE_LOST;
3537
3538 return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3539 }
3540
3541 // Event functions
3542
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3543 VkResult anv_CreateEvent(
3544 VkDevice _device,
3545 const VkEventCreateInfo* pCreateInfo,
3546 const VkAllocationCallbacks* pAllocator,
3547 VkEvent* pEvent)
3548 {
3549 ANV_FROM_HANDLE(anv_device, device, _device);
3550 struct anv_event *event;
3551
3552 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3553
3554 event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3555 VK_OBJECT_TYPE_EVENT);
3556 if (event == NULL)
3557 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3558
3559 event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3560 sizeof(uint64_t), 8);
3561 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3562
3563 *pEvent = anv_event_to_handle(event);
3564
3565 return VK_SUCCESS;
3566 }
3567
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3568 void anv_DestroyEvent(
3569 VkDevice _device,
3570 VkEvent _event,
3571 const VkAllocationCallbacks* pAllocator)
3572 {
3573 ANV_FROM_HANDLE(anv_device, device, _device);
3574 ANV_FROM_HANDLE(anv_event, event, _event);
3575
3576 if (!event)
3577 return;
3578
3579 anv_state_pool_free(&device->dynamic_state_pool, event->state);
3580
3581 vk_object_free(&device->vk, pAllocator, event);
3582 }
3583
anv_GetEventStatus(VkDevice _device,VkEvent _event)3584 VkResult anv_GetEventStatus(
3585 VkDevice _device,
3586 VkEvent _event)
3587 {
3588 ANV_FROM_HANDLE(anv_device, device, _device);
3589 ANV_FROM_HANDLE(anv_event, event, _event);
3590
3591 if (vk_device_is_lost(&device->vk))
3592 return VK_ERROR_DEVICE_LOST;
3593
3594 return *(uint64_t *)event->state.map;
3595 }
3596
anv_SetEvent(VkDevice _device,VkEvent _event)3597 VkResult anv_SetEvent(
3598 VkDevice _device,
3599 VkEvent _event)
3600 {
3601 ANV_FROM_HANDLE(anv_event, event, _event);
3602
3603 *(uint64_t *)event->state.map = VK_EVENT_SET;
3604
3605 return VK_SUCCESS;
3606 }
3607
anv_ResetEvent(VkDevice _device,VkEvent _event)3608 VkResult anv_ResetEvent(
3609 VkDevice _device,
3610 VkEvent _event)
3611 {
3612 ANV_FROM_HANDLE(anv_event, event, _event);
3613
3614 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3615
3616 return VK_SUCCESS;
3617 }
3618
3619 // Buffer functions
3620
3621 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3622 anv_get_buffer_memory_requirements(struct anv_device *device,
3623 VkDeviceSize size,
3624 VkBufferUsageFlags usage,
3625 VkMemoryRequirements2* pMemoryRequirements)
3626 {
3627 /* The Vulkan spec (git aaed022) says:
3628 *
3629 * memoryTypeBits is a bitfield and contains one bit set for every
3630 * supported memory type for the resource. The bit `1<<i` is set if and
3631 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3632 * structure for the physical device is supported.
3633 */
3634 uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3635
3636 /* Base alignment requirement of a cache line */
3637 uint32_t alignment = 16;
3638
3639 if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3640 alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3641
3642 pMemoryRequirements->memoryRequirements.size = size;
3643 pMemoryRequirements->memoryRequirements.alignment = alignment;
3644
3645 /* Storage and Uniform buffers should have their size aligned to
3646 * 32-bits to avoid boundary checks when last DWord is not complete.
3647 * This would ensure that not internal padding would be needed for
3648 * 16-bit types.
3649 */
3650 if (device->vk.enabled_features.robustBufferAccess &&
3651 (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3652 usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3653 pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3654
3655 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3656
3657 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3658 switch (ext->sType) {
3659 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3660 VkMemoryDedicatedRequirements *requirements = (void *)ext;
3661 requirements->prefersDedicatedAllocation = false;
3662 requirements->requiresDedicatedAllocation = false;
3663 break;
3664 }
3665
3666 default:
3667 vk_debug_ignored_stype(ext->sType);
3668 break;
3669 }
3670 }
3671 }
3672
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3673 void anv_GetBufferMemoryRequirements2(
3674 VkDevice _device,
3675 const VkBufferMemoryRequirementsInfo2* pInfo,
3676 VkMemoryRequirements2* pMemoryRequirements)
3677 {
3678 ANV_FROM_HANDLE(anv_device, device, _device);
3679 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3680
3681 anv_get_buffer_memory_requirements(device,
3682 buffer->vk.size,
3683 buffer->vk.usage,
3684 pMemoryRequirements);
3685 }
3686
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3687 void anv_GetDeviceBufferMemoryRequirements(
3688 VkDevice _device,
3689 const VkDeviceBufferMemoryRequirements* pInfo,
3690 VkMemoryRequirements2* pMemoryRequirements)
3691 {
3692 ANV_FROM_HANDLE(anv_device, device, _device);
3693
3694 anv_get_buffer_memory_requirements(device,
3695 pInfo->pCreateInfo->size,
3696 pInfo->pCreateInfo->usage,
3697 pMemoryRequirements);
3698 }
3699
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3700 VkResult anv_CreateBuffer(
3701 VkDevice _device,
3702 const VkBufferCreateInfo* pCreateInfo,
3703 const VkAllocationCallbacks* pAllocator,
3704 VkBuffer* pBuffer)
3705 {
3706 ANV_FROM_HANDLE(anv_device, device, _device);
3707 struct anv_buffer *buffer;
3708
3709 /* Don't allow creating buffers bigger than our address space. The real
3710 * issue here is that we may align up the buffer size and we don't want
3711 * doing so to cause roll-over. However, no one has any business
3712 * allocating a buffer larger than our GTT size.
3713 */
3714 if (pCreateInfo->size > device->physical->gtt_size)
3715 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3716
3717 buffer = vk_buffer_create(&device->vk, pCreateInfo,
3718 pAllocator, sizeof(*buffer));
3719 if (buffer == NULL)
3720 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3721
3722 buffer->address = ANV_NULL_ADDRESS;
3723
3724 *pBuffer = anv_buffer_to_handle(buffer);
3725
3726 return VK_SUCCESS;
3727 }
3728
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3729 void anv_DestroyBuffer(
3730 VkDevice _device,
3731 VkBuffer _buffer,
3732 const VkAllocationCallbacks* pAllocator)
3733 {
3734 ANV_FROM_HANDLE(anv_device, device, _device);
3735 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3736
3737 if (!buffer)
3738 return;
3739
3740 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3741 }
3742
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3743 VkDeviceAddress anv_GetBufferDeviceAddress(
3744 VkDevice device,
3745 const VkBufferDeviceAddressInfo* pInfo)
3746 {
3747 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3748
3749 assert(!anv_address_is_null(buffer->address));
3750 assert(anv_bo_is_pinned(buffer->address.bo));
3751
3752 return anv_address_physical(buffer->address);
3753 }
3754
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3755 uint64_t anv_GetBufferOpaqueCaptureAddress(
3756 VkDevice device,
3757 const VkBufferDeviceAddressInfo* pInfo)
3758 {
3759 return 0;
3760 }
3761
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3762 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3763 VkDevice device,
3764 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3765 {
3766 ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3767
3768 assert(anv_bo_is_pinned(memory->bo));
3769 assert(memory->bo->has_client_visible_address);
3770
3771 return intel_48b_address(memory->bo->offset);
3772 }
3773
3774 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3775 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3776 enum isl_format format,
3777 struct isl_swizzle swizzle,
3778 isl_surf_usage_flags_t usage,
3779 struct anv_address address,
3780 uint32_t range, uint32_t stride)
3781 {
3782 isl_buffer_fill_state(&device->isl_dev, state.map,
3783 .address = anv_address_physical(address),
3784 .mocs = isl_mocs(&device->isl_dev, usage,
3785 address.bo && address.bo->is_external),
3786 .size_B = range,
3787 .format = format,
3788 .swizzle = swizzle,
3789 .stride_B = stride);
3790 }
3791
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3792 void anv_DestroySampler(
3793 VkDevice _device,
3794 VkSampler _sampler,
3795 const VkAllocationCallbacks* pAllocator)
3796 {
3797 ANV_FROM_HANDLE(anv_device, device, _device);
3798 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3799
3800 if (!sampler)
3801 return;
3802
3803 if (sampler->bindless_state.map) {
3804 anv_state_pool_free(&device->dynamic_state_pool,
3805 sampler->bindless_state);
3806 }
3807
3808 if (sampler->custom_border_color.map) {
3809 anv_state_reserved_pool_free(&device->custom_border_colors,
3810 sampler->custom_border_color);
3811 }
3812
3813 vk_object_free(&device->vk, pAllocator, sampler);
3814 }
3815
3816 static uint64_t
anv_clock_gettime(clockid_t clock_id)3817 anv_clock_gettime(clockid_t clock_id)
3818 {
3819 struct timespec current;
3820 int ret;
3821
3822 ret = clock_gettime(clock_id, ¤t);
3823 #ifdef CLOCK_MONOTONIC_RAW
3824 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3825 ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
3826 #endif
3827 if (ret < 0)
3828 return 0;
3829
3830 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3831 }
3832
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3833 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3834 VkPhysicalDevice physicalDevice,
3835 VkSampleCountFlagBits samples,
3836 VkMultisamplePropertiesEXT* pMultisampleProperties)
3837 {
3838 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3839
3840 assert(pMultisampleProperties->sType ==
3841 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3842
3843 VkExtent2D grid_size;
3844 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
3845 grid_size.width = 1;
3846 grid_size.height = 1;
3847 } else {
3848 grid_size.width = 0;
3849 grid_size.height = 0;
3850 }
3851 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
3852
3853 vk_foreach_struct(ext, pMultisampleProperties->pNext)
3854 vk_debug_ignored_stype(ext->sType);
3855 }
3856