1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <fcntl.h>
28 #include "drm-uapi/drm_fourcc.h"
29 #include "drm-uapi/drm.h"
30 #include <xf86drm.h>
31
32 #include "anv_private.h"
33 #include "anv_measure.h"
34 #include "util/u_debug.h"
35 #include "util/os_file.h"
36 #include "util/os_misc.h"
37 #include "util/u_atomic.h"
38 #if DETECT_OS_ANDROID
39 #include "util/u_gralloc/u_gralloc.h"
40 #endif
41 #include "util/u_string.h"
42 #include "vk_common_entrypoints.h"
43 #include "vk_util.h"
44 #include "vk_deferred_operation.h"
45 #include "vk_drm_syncobj.h"
46 #include "common/intel_aux_map.h"
47 #include "common/intel_common.h"
48 #include "common/intel_debug_identifier.h"
49
50 #include "i915/anv_device.h"
51 #include "xe/anv_device.h"
52
53 #include "genxml/gen70_pack.h"
54 #include "genxml/genX_bits.h"
55
56 static void
anv_device_init_border_colors(struct anv_device * device)57 anv_device_init_border_colors(struct anv_device *device)
58 {
59 static const struct gfx8_border_color border_colors[] = {
60 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
61 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
62 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
63 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
64 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
65 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
66 };
67
68 device->border_colors =
69 anv_state_pool_emit_data(&device->dynamic_state_pool,
70 sizeof(border_colors), 64, border_colors);
71 }
72
73 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)74 anv_device_init_trivial_batch(struct anv_device *device)
75 {
76 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
77 ANV_BO_ALLOC_MAPPED |
78 ANV_BO_ALLOC_HOST_COHERENT |
79 ANV_BO_ALLOC_INTERNAL |
80 ANV_BO_ALLOC_CAPTURE,
81 0 /* explicit_address */,
82 &device->trivial_batch_bo);
83 if (result != VK_SUCCESS)
84 return result;
85
86 struct anv_batch batch = {
87 .start = device->trivial_batch_bo->map,
88 .next = device->trivial_batch_bo->map,
89 .end = device->trivial_batch_bo->map + 4096,
90 };
91
92 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
93 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
94
95 return VK_SUCCESS;
96 }
97
98 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)99 get_bo_from_pool(struct intel_batch_decode_bo *ret,
100 struct anv_block_pool *pool,
101 uint64_t address)
102 {
103 anv_block_pool_foreach_bo(bo, pool) {
104 uint64_t bo_address = intel_48b_address(bo->offset);
105 if (address >= bo_address && address < (bo_address + bo->size)) {
106 *ret = (struct intel_batch_decode_bo) {
107 .addr = bo_address,
108 .size = bo->size,
109 .map = bo->map,
110 };
111 return true;
112 }
113 }
114 return false;
115 }
116
117 /* Finding a buffer for batch decoding */
118 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)119 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
120 {
121 struct anv_device *device = v_batch;
122 struct intel_batch_decode_bo ret_bo = {};
123
124 assert(ppgtt);
125
126 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
127 return ret_bo;
128 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
129 return ret_bo;
130 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
131 return ret_bo;
132 if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
133 return ret_bo;
134 if (device->physical->indirect_descriptors &&
135 get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
136 return ret_bo;
137 if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
138 return ret_bo;
139 if (device->physical->indirect_descriptors &&
140 get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
141 return ret_bo;
142 if (device->info->has_aux_map &&
143 get_bo_from_pool(&ret_bo, &device->aux_tt_pool.block_pool, address))
144 return ret_bo;
145
146 if (!device->cmd_buffer_being_decoded)
147 return (struct intel_batch_decode_bo) { };
148
149 struct anv_batch_bo **bbo;
150 u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
151 /* The decoder zeroes out the top 16 bits, so we need to as well */
152 uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);
153
154 if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
155 return (struct intel_batch_decode_bo) {
156 .addr = bo_address,
157 .size = (*bbo)->bo->size,
158 .map = (*bbo)->bo->map,
159 };
160 }
161
162 uint32_t dep_words = (*bbo)->relocs.dep_words;
163 BITSET_WORD *deps = (*bbo)->relocs.deps;
164 for (uint32_t w = 0; w < dep_words; w++) {
165 BITSET_WORD mask = deps[w];
166 while (mask) {
167 int i = u_bit_scan(&mask);
168 uint32_t gem_handle = w * BITSET_WORDBITS + i;
169 struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
170 assert(bo->refcount > 0);
171 bo_address = bo->offset & (~0ull >> 16);
172 if (address >= bo_address && address < bo_address + bo->size) {
173 return (struct intel_batch_decode_bo) {
174 .addr = bo_address,
175 .size = bo->size,
176 .map = bo->map,
177 };
178 }
179 }
180 }
181 }
182
183 return (struct intel_batch_decode_bo) { };
184 }
185
186 struct intel_aux_map_buffer {
187 struct intel_buffer base;
188 struct anv_state state;
189 };
190
191 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)192 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
193 {
194 struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
195 if (!buf)
196 return NULL;
197
198 struct anv_device *device = (struct anv_device*)driver_ctx;
199
200 struct anv_state_pool *pool = &device->aux_tt_pool;
201 buf->state = anv_state_pool_alloc(pool, size, size);
202
203 buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
204 buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
205 buf->base.map = buf->state.map;
206 buf->base.driver_bo = &buf->state;
207 return &buf->base;
208 }
209
210 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)211 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
212 {
213 struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
214 struct anv_device *device = (struct anv_device*)driver_ctx;
215 struct anv_state_pool *pool = &device->aux_tt_pool;
216 anv_state_pool_free(pool, buf->state);
217 free(buf);
218 }
219
220 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
221 .alloc = intel_aux_map_buffer_alloc,
222 .free = intel_aux_map_buffer_free,
223 };
224
225 static VkResult
anv_device_setup_context_or_vm(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)226 anv_device_setup_context_or_vm(struct anv_device *device,
227 const VkDeviceCreateInfo *pCreateInfo,
228 const uint32_t num_queues)
229 {
230 switch (device->info->kmd_type) {
231 case INTEL_KMD_TYPE_I915:
232 return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
233 case INTEL_KMD_TYPE_XE:
234 return anv_xe_device_setup_vm(device);
235 default:
236 unreachable("Missing");
237 return VK_ERROR_UNKNOWN;
238 }
239 }
240
241 static bool
anv_device_destroy_context_or_vm(struct anv_device * device)242 anv_device_destroy_context_or_vm(struct anv_device *device)
243 {
244 switch (device->info->kmd_type) {
245 case INTEL_KMD_TYPE_I915:
246 if (device->physical->has_vm_control)
247 return anv_i915_device_destroy_vm(device);
248 else
249 return intel_gem_destroy_context(device->fd, device->context_id);
250 case INTEL_KMD_TYPE_XE:
251 return anv_xe_device_destroy_vm(device);
252 default:
253 unreachable("Missing");
254 return false;
255 }
256 }
257
258 static VkResult
anv_device_init_trtt(struct anv_device * device)259 anv_device_init_trtt(struct anv_device *device)
260 {
261 if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
262 !device->vk.enabled_features.sparseBinding)
263 return VK_SUCCESS;
264
265 struct anv_trtt *trtt = &device->trtt;
266
267 VkResult result =
268 vk_sync_create(&device->vk,
269 &device->physical->sync_syncobj_type,
270 VK_SYNC_IS_TIMELINE,
271 0 /* initial_value */,
272 &trtt->timeline);
273 if (result != VK_SUCCESS)
274 return result;
275
276 simple_mtx_init(&trtt->mutex, mtx_plain);
277
278 list_inithead(&trtt->in_flight_batches);
279
280 return VK_SUCCESS;
281 }
282
283 static void
anv_device_finish_trtt(struct anv_device * device)284 anv_device_finish_trtt(struct anv_device *device)
285 {
286 if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
287 !device->vk.enabled_features.sparseBinding)
288 return;
289
290 struct anv_trtt *trtt = &device->trtt;
291
292 anv_sparse_trtt_garbage_collect_batches(device, true);
293
294 vk_sync_destroy(&device->vk, trtt->timeline);
295
296 simple_mtx_destroy(&trtt->mutex);
297
298 vk_free(&device->vk.alloc, trtt->l3_mirror);
299 vk_free(&device->vk.alloc, trtt->l2_mirror);
300
301 for (int i = 0; i < trtt->num_page_table_bos; i++)
302 anv_device_release_bo(device, trtt->page_table_bos[i]);
303
304 vk_free(&device->vk.alloc, trtt->page_table_bos);
305 }
306
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)307 VkResult anv_CreateDevice(
308 VkPhysicalDevice physicalDevice,
309 const VkDeviceCreateInfo* pCreateInfo,
310 const VkAllocationCallbacks* pAllocator,
311 VkDevice* pDevice)
312 {
313 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
314 VkResult result;
315 struct anv_device *device;
316 bool device_has_compute_queue = false;
317
318 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
319
320 /* Check requested queues and fail if we are requested to create any
321 * queues with flags we don't support.
322 */
323 assert(pCreateInfo->queueCreateInfoCount > 0);
324 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
325 if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
326 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
327
328 const struct anv_queue_family *family =
329 &physical_device->queue.families[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex];
330 device_has_compute_queue |= family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
331 }
332
333 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
334 sizeof(*device), 8,
335 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
336 if (!device)
337 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
338
339 struct vk_device_dispatch_table dispatch_table;
340
341 bool override_initial_entrypoints = true;
342 if (physical_device->instance->vk.app_info.app_name &&
343 !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
344 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
345 &anv_hitman3_device_entrypoints,
346 true);
347 override_initial_entrypoints = false;
348 }
349 if (physical_device->info.ver < 12 &&
350 physical_device->instance->vk.app_info.app_name &&
351 !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
352 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
353 &anv_doom64_device_entrypoints,
354 true);
355 override_initial_entrypoints = false;
356 }
357 #if DETECT_OS_ANDROID
358 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
359 &anv_android_device_entrypoints,
360 true);
361 override_initial_entrypoints = false;
362 #endif
363 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
364 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
365 &anv_rmv_device_entrypoints,
366 true);
367 override_initial_entrypoints = false;
368 }
369 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
370 anv_genX(&physical_device->info, device_entrypoints),
371 override_initial_entrypoints);
372 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
373 &anv_device_entrypoints, false);
374 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
375 &wsi_device_entrypoints, false);
376
377
378 result = vk_device_init(&device->vk, &physical_device->vk,
379 &dispatch_table, pCreateInfo, pAllocator);
380 if (result != VK_SUCCESS)
381 goto fail_alloc;
382
383 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
384 for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
385 struct intel_batch_decode_ctx *decoder = &device->decoder[i];
386
387 const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
388
389 intel_batch_decode_ctx_init_brw(decoder,
390 &physical_device->compiler->isa,
391 &physical_device->info,
392 stderr, decode_flags, NULL,
393 decode_get_bo, NULL, device);
394 intel_batch_stats_reset(decoder);
395
396 decoder->engine = physical_device->queue.families[i].engine_class;
397 decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
398 decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
399 decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
400 }
401 }
402
403 anv_device_set_physical(device, physical_device);
404 device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);
405
406 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
407 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
408 if (device->fd == -1) {
409 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
410 goto fail_device;
411 }
412
413 switch (device->info->kmd_type) {
414 case INTEL_KMD_TYPE_I915:
415 device->vk.check_status = anv_i915_device_check_status;
416 break;
417 case INTEL_KMD_TYPE_XE:
418 device->vk.check_status = anv_xe_device_check_status;
419 break;
420 default:
421 unreachable("Missing");
422 }
423
424 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
425 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
426 if (physical_device->info.kmd_type == INTEL_KMD_TYPE_I915)
427 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
428 vk_device_set_drm_fd(&device->vk, device->fd);
429
430 uint32_t num_queues = 0;
431 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
432 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
433
434 result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
435 if (result != VK_SUCCESS)
436 goto fail_fd;
437
438 device->queues =
439 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
440 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
441 if (device->queues == NULL) {
442 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
443 goto fail_context_id;
444 }
445
446 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
447 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
448 goto fail_queues_alloc;
449 }
450
451 /* keep the page with address zero out of the allocator */
452 util_vma_heap_init(&device->vma_lo,
453 device->physical->va.low_heap.addr,
454 device->physical->va.low_heap.size);
455
456 util_vma_heap_init(&device->vma_hi,
457 device->physical->va.high_heap.addr,
458 device->physical->va.high_heap.size);
459
460 if (device->physical->indirect_descriptors) {
461 util_vma_heap_init(&device->vma_desc,
462 device->physical->va.indirect_descriptor_pool.addr,
463 device->physical->va.indirect_descriptor_pool.size);
464 } else {
465 util_vma_heap_init(&device->vma_desc,
466 device->physical->va.bindless_surface_state_pool.addr,
467 device->physical->va.bindless_surface_state_pool.size);
468 }
469
470 /* Always initialized because the the memory types point to this and they
471 * are on the physical device.
472 */
473 util_vma_heap_init(&device->vma_dynamic_visible,
474 device->physical->va.dynamic_visible_pool.addr,
475 device->physical->va.dynamic_visible_pool.size);
476 util_vma_heap_init(&device->vma_trtt,
477 device->physical->va.trtt.addr,
478 device->physical->va.trtt.size);
479
480 list_inithead(&device->memory_objects);
481 list_inithead(&device->image_private_objects);
482 list_inithead(&device->bvh_dumps);
483
484 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
485 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
486 goto fail_vmas;
487 }
488
489 pthread_condattr_t condattr;
490 if (pthread_condattr_init(&condattr) != 0) {
491 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
492 goto fail_mutex;
493 }
494 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
495 pthread_condattr_destroy(&condattr);
496 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
497 goto fail_mutex;
498 }
499 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
500 pthread_condattr_destroy(&condattr);
501 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
502 goto fail_mutex;
503 }
504 pthread_condattr_destroy(&condattr);
505
506 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
507 anv_memory_trace_init(device);
508
509 result = anv_bo_cache_init(&device->bo_cache, device);
510 if (result != VK_SUCCESS)
511 goto fail_queue_cond;
512
513 anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
514 ANV_BO_ALLOC_MAPPED |
515 ANV_BO_ALLOC_HOST_CACHED_COHERENT |
516 ANV_BO_ALLOC_CAPTURE);
517 if (device->vk.enabled_extensions.KHR_acceleration_structure) {
518 anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
519 0 /* alloc_flags */);
520 }
521
522 /* Because scratch is also relative to General State Base Address, we leave
523 * the base address 0 and start the pool memory at an offset. This way we
524 * get the correct offsets in the anv_states that get allocated from it.
525 */
526 result = anv_state_pool_init(&device->general_state_pool, device,
527 &(struct anv_state_pool_params) {
528 .name = "general pool",
529 .base_address = 0,
530 .start_offset = device->physical->va.general_state_pool.addr,
531 .block_size = 16384,
532 .max_size = device->physical->va.general_state_pool.size
533 });
534 if (result != VK_SUCCESS)
535 goto fail_batch_bo_pool;
536
537 result = anv_state_pool_init(&device->dynamic_state_pool, device,
538 &(struct anv_state_pool_params) {
539 .name = "dynamic pool",
540 .base_address = device->physical->va.dynamic_state_pool.addr,
541 .block_size = 16384,
542 .max_size = device->physical->va.dynamic_state_pool.size,
543 });
544 if (result != VK_SUCCESS)
545 goto fail_general_state_pool;
546
547 /* The border color pointer is limited to 24 bits, so we need to make
548 * sure that any such color used at any point in the program doesn't
549 * exceed that limit.
550 * We achieve that by reserving all the custom border colors we support
551 * right off the bat, so they are close to the base address.
552 */
553 result = anv_state_reserved_array_pool_init(&device->custom_border_colors,
554 &device->dynamic_state_pool,
555 MAX_CUSTOM_BORDER_COLORS,
556 sizeof(struct gfx8_border_color), 64);
557 if (result != VK_SUCCESS)
558 goto fail_dynamic_state_pool;
559
560 result = anv_state_pool_init(&device->instruction_state_pool, device,
561 &(struct anv_state_pool_params) {
562 .name = "instruction pool",
563 .base_address = device->physical->va.instruction_state_pool.addr,
564 .block_size = 16384,
565 .max_size = device->physical->va.instruction_state_pool.size,
566 });
567 if (result != VK_SUCCESS)
568 goto fail_custom_border_color_pool;
569
570 if (device->info->verx10 >= 125) {
571 /* Put the scratch surface states at the beginning of the internal
572 * surface state pool.
573 */
574 result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
575 &(struct anv_state_pool_params) {
576 .name = "scratch surface state pool",
577 .base_address = device->physical->va.scratch_surface_state_pool.addr,
578 .block_size = 4096,
579 .max_size = device->physical->va.scratch_surface_state_pool.size,
580 });
581 if (result != VK_SUCCESS)
582 goto fail_instruction_state_pool;
583
584 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
585 &(struct anv_state_pool_params) {
586 .name = "internal surface state pool",
587 .base_address = device->physical->va.internal_surface_state_pool.addr,
588 .start_offset = device->physical->va.scratch_surface_state_pool.size,
589 .block_size = 4096,
590 .max_size = device->physical->va.internal_surface_state_pool.size,
591 });
592 } else {
593 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
594 &(struct anv_state_pool_params) {
595 .name = "internal surface state pool",
596 .base_address = device->physical->va.internal_surface_state_pool.addr,
597 .block_size = 4096,
598 .max_size = device->physical->va.internal_surface_state_pool.size,
599 });
600 }
601 if (result != VK_SUCCESS)
602 goto fail_scratch_surface_state_pool;
603
604 if (device->physical->indirect_descriptors) {
605 result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
606 &(struct anv_state_pool_params) {
607 .name = "bindless surface state pool",
608 .base_address = device->physical->va.bindless_surface_state_pool.addr,
609 .block_size = 4096,
610 .max_size = device->physical->va.bindless_surface_state_pool.size,
611 });
612 if (result != VK_SUCCESS)
613 goto fail_internal_surface_state_pool;
614 }
615
616 if (device->info->verx10 >= 125) {
617 /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
618 * table its own base address separately from surface state base.
619 */
620 result = anv_state_pool_init(&device->binding_table_pool, device,
621 &(struct anv_state_pool_params) {
622 .name = "binding table pool",
623 .base_address = device->physical->va.binding_table_pool.addr,
624 .block_size = BINDING_TABLE_POOL_BLOCK_SIZE,
625 .max_size = device->physical->va.binding_table_pool.size,
626 });
627 } else {
628 /* The binding table should be in front of the surface states in virtual
629 * address space so that all surface states can be express as relative
630 * offsets from the binding table location.
631 */
632 assert(device->physical->va.binding_table_pool.addr <
633 device->physical->va.internal_surface_state_pool.addr);
634 int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
635 (int64_t)device->physical->va.internal_surface_state_pool.addr;
636 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
637 result = anv_state_pool_init(&device->binding_table_pool, device,
638 &(struct anv_state_pool_params) {
639 .name = "binding table pool",
640 .base_address = device->physical->va.internal_surface_state_pool.addr,
641 .start_offset = bt_pool_offset,
642 .block_size = BINDING_TABLE_POOL_BLOCK_SIZE,
643 .max_size = device->physical->va.internal_surface_state_pool.size,
644 });
645 }
646 if (result != VK_SUCCESS)
647 goto fail_bindless_surface_state_pool;
648
649 if (device->physical->indirect_descriptors) {
650 result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
651 &(struct anv_state_pool_params) {
652 .name = "indirect push descriptor pool",
653 .base_address = device->physical->va.indirect_push_descriptor_pool.addr,
654 .block_size = 4096,
655 .max_size = device->physical->va.indirect_push_descriptor_pool.size,
656 });
657 if (result != VK_SUCCESS)
658 goto fail_binding_table_pool;
659 }
660
661 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
662 device->info->verx10 >= 125) {
663 /* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
664 * way we can wire push descriptors is through the bindless heap. This
665 * state pool is a 1Gb carve out of the 4Gb HW heap.
666 */
667 result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
668 &(struct anv_state_pool_params) {
669 .name = "push descriptor buffer state pool",
670 .base_address = device->physical->va.push_descriptor_buffer_pool.addr,
671 .block_size = 4096,
672 .max_size = device->physical->va.push_descriptor_buffer_pool.size,
673 });
674 if (result != VK_SUCCESS)
675 goto fail_indirect_push_descriptor_pool;
676 }
677
678 if (device->info->has_aux_map) {
679 result = anv_state_pool_init(&device->aux_tt_pool, device,
680 &(struct anv_state_pool_params) {
681 .name = "aux-tt pool",
682 .base_address = device->physical->va.aux_tt_pool.addr,
683 .block_size = 16384,
684 .max_size = device->physical->va.aux_tt_pool.size,
685 });
686 if (result != VK_SUCCESS)
687 goto fail_push_descriptor_buffer_pool;
688
689 device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
690 &physical_device->info);
691 if (!device->aux_map_ctx)
692 goto fail_aux_tt_pool;
693 }
694
695 result = anv_device_alloc_bo(device, "workaround", 8192,
696 ANV_BO_ALLOC_CAPTURE |
697 ANV_BO_ALLOC_HOST_COHERENT |
698 ANV_BO_ALLOC_MAPPED |
699 ANV_BO_ALLOC_INTERNAL,
700 0 /* explicit_address */,
701 &device->workaround_bo);
702 if (result != VK_SUCCESS)
703 goto fail_surface_aux_map_pool;
704
705 if (intel_needs_workaround(device->info, 14019708328)) {
706 result = anv_device_alloc_bo(device, "dummy_aux", 4096,
707 0 /* alloc_flags */,
708 0 /* explicit_address */,
709 &device->dummy_aux_bo);
710 if (result != VK_SUCCESS)
711 goto fail_alloc_device_bo;
712
713 device->isl_dev.dummy_aux_address = device->dummy_aux_bo->offset;
714 }
715
716 /* Programming note from MI_MEM_FENCE specification:
717 *
718 * Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
719 * programmed prior to programming this command.
720 *
721 * HAS 1607240579 then provides the size information: 4K
722 */
723 if (device->info->verx10 >= 200) {
724 result = anv_device_alloc_bo(device, "mem_fence", 4096,
725 ANV_BO_ALLOC_NO_LOCAL_MEM, 0,
726 &device->mem_fence_bo);
727 if (result != VK_SUCCESS)
728 goto fail_alloc_device_bo;
729 }
730
731 struct anv_address wa_addr = (struct anv_address) {
732 .bo = device->workaround_bo,
733 };
734
735 wa_addr = anv_address_add_aligned(wa_addr,
736 intel_debug_write_identifiers(
737 device->workaround_bo->map,
738 device->workaround_bo->size,
739 "Anv"), 32);
740
741 device->rt_uuid_addr = wa_addr;
742 memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
743 physical_device->rt_uuid,
744 sizeof(physical_device->rt_uuid));
745
746 /* Make sure the workaround address is the last one in the workaround BO,
747 * so that writes never overwrite other bits of data stored in the
748 * workaround BO.
749 */
750 wa_addr = anv_address_add_aligned(wa_addr,
751 sizeof(physical_device->rt_uuid), 64);
752 device->workaround_address = wa_addr;
753
754 /* Make sure we don't over the allocated BO. */
755 assert(device->workaround_address.offset < device->workaround_bo->size);
756 /* We also need 64B (maximum GRF size) from the workaround address (see
757 * TBIMR workaround)
758 */
759 assert((device->workaround_bo->size -
760 device->workaround_address.offset) >= 64);
761
762 device->workarounds.doom64_images = NULL;
763
764
765 device->debug_frame_desc =
766 intel_debug_get_identifier_block(device->workaround_bo->map,
767 device->workaround_bo->size,
768 INTEL_DEBUG_BLOCK_TYPE_FRAME);
769
770 if (device->vk.enabled_extensions.KHR_ray_query) {
771 uint32_t ray_queries_size =
772 align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
773
774 result = anv_device_alloc_bo(device, "ray queries",
775 ray_queries_size,
776 ANV_BO_ALLOC_INTERNAL,
777 0 /* explicit_address */,
778 &device->ray_query_bo[0]);
779 if (result != VK_SUCCESS)
780 goto fail_alloc_device_bo;
781
782 /* We need a separate ray query bo for CCS engine with Wa_14022863161. */
783 if (intel_needs_workaround(device->isl_dev.info, 14022863161) &&
784 device_has_compute_queue) {
785 result = anv_device_alloc_bo(device, "ray queries",
786 ray_queries_size,
787 ANV_BO_ALLOC_INTERNAL,
788 0 /* explicit_address */,
789 &device->ray_query_bo[1]);
790 if (result != VK_SUCCESS)
791 goto fail_ray_query_bo;
792 }
793 }
794
795 result = anv_device_init_trivial_batch(device);
796 if (result != VK_SUCCESS)
797 goto fail_ray_query_bo;
798
799 /* Emit the CPS states before running the initialization batch as those
800 * structures are referenced.
801 */
802 if (device->info->ver >= 12 && device->info->ver < 30) {
803 uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
804
805 if (device->info->has_coarse_pixel_primitive_and_cb)
806 n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
807
808 n_cps_states += 1; /* Disable CPS */
809
810 /* Each of the combinaison must be replicated on all viewports */
811 n_cps_states *= MAX_VIEWPORTS;
812
813 device->cps_states =
814 anv_state_pool_alloc(&device->dynamic_state_pool,
815 n_cps_states * CPS_STATE_length(device->info) * 4,
816 32);
817 if (device->cps_states.map == NULL)
818 goto fail_trivial_batch;
819
820 anv_genX(device->info, init_cps_device_state)(device);
821 }
822
823 if (device->physical->indirect_descriptors) {
824 /* Allocate a null surface state at surface state offset 0. This makes
825 * NULL descriptor handling trivial because we can just memset
826 * structures to zero and they have a valid descriptor.
827 */
828 device->null_surface_state =
829 anv_state_pool_alloc(&device->bindless_surface_state_pool,
830 device->isl_dev.ss.size,
831 device->isl_dev.ss.align);
832 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
833 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
834 assert(device->null_surface_state.offset == 0);
835 } else {
836 /* When using direct descriptors, those can hold the null surface state
837 * directly. We still need a null surface for the binding table entries
838 * though but this one can live anywhere the internal surface state
839 * pool.
840 */
841 device->null_surface_state =
842 anv_state_pool_alloc(&device->internal_surface_state_pool,
843 device->isl_dev.ss.size,
844 device->isl_dev.ss.align);
845 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
846 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
847 }
848
849 isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
850 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
851
852 anv_scratch_pool_init(device, &device->scratch_pool, false);
853 anv_scratch_pool_init(device, &device->protected_scratch_pool, true);
854
855 /* TODO(RT): Do we want some sort of data structure for this? */
856 memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
857
858 if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
859 /* The docs say to always allocate 128KB per DSS */
860 const uint32_t btd_fifo_bo_size =
861 128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
862 result = anv_device_alloc_bo(device,
863 "rt-btd-fifo",
864 btd_fifo_bo_size,
865 ANV_BO_ALLOC_INTERNAL,
866 0 /* explicit_address */,
867 &device->btd_fifo_bo);
868 if (result != VK_SUCCESS)
869 goto fail_trivial_batch_bo_and_scratch_pool;
870 }
871
872 struct vk_pipeline_cache_create_info pcc_info = { .weak_ref = true, };
873 device->vk.mem_cache =
874 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
875 if (!device->vk.mem_cache) {
876 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
877 goto fail_btd_fifo_bo;
878 }
879
880 /* Internal shaders need their own pipeline cache because, unlike the rest
881 * of ANV, it won't work at all without the cache. It depends on it for
882 * shaders to remain resident while it runs. Therefore, we need a special
883 * cache just for BLORP/RT that's forced to always be enabled.
884 */
885 struct vk_pipeline_cache_create_info internal_pcc_info = {
886 .force_enable = true,
887 .weak_ref = false,
888 };
889 device->internal_cache =
890 vk_pipeline_cache_create(&device->vk, &internal_pcc_info, NULL);
891 if (device->internal_cache == NULL) {
892 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
893 goto fail_default_pipeline_cache;
894 }
895
896 /* The device (currently is ICL/TGL) does not have float64 support. */
897 if (!device->info->has_64bit_float &&
898 device->physical->instance->fp64_workaround_enabled)
899 anv_load_fp64_shader(device);
900
901 if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
902 result = anv_device_print_init(device);
903 if (result != VK_SUCCESS)
904 goto fail_internal_cache;
905 }
906
907 #if DETECT_OS_ANDROID
908 device->u_gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
909 #endif
910
911 device->robust_buffer_access =
912 device->vk.enabled_features.robustBufferAccess ||
913 device->vk.enabled_features.nullDescriptor;
914
915 device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
916 4);
917 p_atomic_set(&device->draw_call_count, 0);
918
919 /* Create a separate command pool for companion RCS command buffer. */
920 if (device->info->verx10 >= 125) {
921 VkCommandPoolCreateInfo pool_info = {
922 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
923 .queueFamilyIndex =
924 anv_get_first_render_queue_index(device->physical),
925 };
926
927 result = vk_common_CreateCommandPool(anv_device_to_handle(device),
928 &pool_info, NULL,
929 &device->companion_rcs_cmd_pool);
930 if (result != VK_SUCCESS) {
931 goto fail_print;
932 }
933 }
934
935 result = anv_device_init_trtt(device);
936 if (result != VK_SUCCESS)
937 goto fail_companion_cmd_pool;
938
939 result = anv_device_init_rt_shaders(device);
940 if (result != VK_SUCCESS) {
941 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
942 goto fail_trtt;
943 }
944
945 anv_device_init_blorp(device);
946
947 anv_device_init_border_colors(device);
948
949 anv_device_init_internal_kernels(device);
950
951 anv_device_init_astc_emu(device);
952
953 anv_device_perf_init(device);
954
955 anv_device_init_embedded_samplers(device);
956
957 BITSET_ONES(device->gfx_dirty_state);
958 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
959 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
960 if (device->info->ver < 11)
961 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
962 if (device->info->ver < 12) {
963 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
964 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
965 }
966 if (!device->vk.enabled_extensions.EXT_sample_locations)
967 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
968 if (!device->vk.enabled_extensions.KHR_fragment_shading_rate) {
969 if (device->info->ver >= 30) {
970 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_COARSE_PIXEL);
971 } else {
972 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
973 }
974 }
975 if (!device->vk.enabled_extensions.EXT_mesh_shader) {
976 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
977 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
978 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
979 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
980 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
981 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
982 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
983 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
984 }
985 if (!intel_needs_workaround(device->info, 18019816803))
986 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
987 if (!intel_needs_workaround(device->info, 14018283232))
988 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14018283232);
989 if (device->info->ver > 9)
990 BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);
991
992 device->queue_count = 0;
993 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
994 const VkDeviceQueueCreateInfo *queueCreateInfo =
995 &pCreateInfo->pQueueCreateInfos[i];
996
997 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
998 result = anv_queue_init(device, &device->queues[device->queue_count],
999 queueCreateInfo, j);
1000 if (result != VK_SUCCESS)
1001 goto fail_queues;
1002
1003 device->queue_count++;
1004 }
1005 }
1006
1007 anv_device_utrace_init(device);
1008
1009 result = vk_meta_device_init(&device->vk, &device->meta_device);
1010 if (result != VK_SUCCESS)
1011 goto fail_utrace;
1012
1013 result = anv_genX(device->info, init_device_state)(device);
1014 if (result != VK_SUCCESS)
1015 goto fail_meta_device;
1016
1017 simple_mtx_init(&device->accel_struct_build.mutex, mtx_plain);
1018
1019 *pDevice = anv_device_to_handle(device);
1020
1021 return VK_SUCCESS;
1022
1023 fail_meta_device:
1024 vk_meta_device_finish(&device->vk, &device->meta_device);
1025 fail_utrace:
1026 anv_device_utrace_finish(device);
1027 fail_queues:
1028 for (uint32_t i = 0; i < device->queue_count; i++)
1029 anv_queue_finish(&device->queues[i]);
1030 anv_device_finish_embedded_samplers(device);
1031 anv_device_finish_blorp(device);
1032 anv_device_finish_astc_emu(device);
1033 anv_device_finish_internal_kernels(device);
1034 anv_device_finish_rt_shaders(device);
1035 fail_trtt:
1036 anv_device_finish_trtt(device);
1037 fail_companion_cmd_pool:
1038 if (device->info->verx10 >= 125) {
1039 vk_common_DestroyCommandPool(anv_device_to_handle(device),
1040 device->companion_rcs_cmd_pool, NULL);
1041 }
1042 fail_print:
1043 if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
1044 anv_device_print_fini(device);
1045 fail_internal_cache:
1046 vk_pipeline_cache_destroy(device->internal_cache, NULL);
1047 fail_default_pipeline_cache:
1048 vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1049 fail_btd_fifo_bo:
1050 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1051 anv_device_release_bo(device, device->btd_fifo_bo);
1052 fail_trivial_batch_bo_and_scratch_pool:
1053 anv_scratch_pool_finish(device, &device->scratch_pool);
1054 anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1055 fail_trivial_batch:
1056 anv_device_release_bo(device, device->trivial_batch_bo);
1057 fail_ray_query_bo:
1058 for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
1059 if (device->ray_query_bo[i])
1060 anv_device_release_bo(device, device->ray_query_bo[i]);
1061 }
1062 fail_alloc_device_bo:
1063 if (device->mem_fence_bo)
1064 anv_device_release_bo(device, device->mem_fence_bo);
1065 if (device->dummy_aux_bo)
1066 anv_device_release_bo(device, device->dummy_aux_bo);
1067 anv_device_release_bo(device, device->workaround_bo);
1068 fail_surface_aux_map_pool:
1069 if (device->info->has_aux_map) {
1070 intel_aux_map_finish(device->aux_map_ctx);
1071 device->aux_map_ctx = NULL;
1072 }
1073 fail_aux_tt_pool:
1074 if (device->info->has_aux_map)
1075 anv_state_pool_finish(&device->aux_tt_pool);
1076 fail_push_descriptor_buffer_pool:
1077 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1078 device->info->verx10 >= 125)
1079 anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1080 fail_indirect_push_descriptor_pool:
1081 if (device->physical->indirect_descriptors)
1082 anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1083 fail_binding_table_pool:
1084 anv_state_pool_finish(&device->binding_table_pool);
1085 fail_bindless_surface_state_pool:
1086 if (device->physical->indirect_descriptors)
1087 anv_state_pool_finish(&device->bindless_surface_state_pool);
1088 fail_internal_surface_state_pool:
1089 anv_state_pool_finish(&device->internal_surface_state_pool);
1090 fail_scratch_surface_state_pool:
1091 if (device->info->verx10 >= 125)
1092 anv_state_pool_finish(&device->scratch_surface_state_pool);
1093 fail_instruction_state_pool:
1094 anv_state_pool_finish(&device->instruction_state_pool);
1095 fail_custom_border_color_pool:
1096 anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1097 fail_dynamic_state_pool:
1098 anv_state_pool_finish(&device->dynamic_state_pool);
1099 fail_general_state_pool:
1100 anv_state_pool_finish(&device->general_state_pool);
1101 fail_batch_bo_pool:
1102 if (device->vk.enabled_extensions.KHR_acceleration_structure)
1103 anv_bo_pool_finish(&device->bvh_bo_pool);
1104 anv_bo_pool_finish(&device->batch_bo_pool);
1105 anv_bo_cache_finish(&device->bo_cache);
1106 fail_queue_cond:
1107 pthread_cond_destroy(&device->queue_submit);
1108 fail_mutex:
1109 pthread_mutex_destroy(&device->mutex);
1110 fail_vmas:
1111 util_vma_heap_finish(&device->vma_trtt);
1112 util_vma_heap_finish(&device->vma_dynamic_visible);
1113 util_vma_heap_finish(&device->vma_desc);
1114 util_vma_heap_finish(&device->vma_hi);
1115 util_vma_heap_finish(&device->vma_lo);
1116 pthread_mutex_destroy(&device->vma_mutex);
1117 fail_queues_alloc:
1118 vk_free(&device->vk.alloc, device->queues);
1119 fail_context_id:
1120 anv_device_destroy_context_or_vm(device);
1121 fail_fd:
1122 close(device->fd);
1123 fail_device:
1124 vk_device_finish(&device->vk);
1125 fail_alloc:
1126 vk_free(&device->vk.alloc, device);
1127
1128 return result;
1129 }
1130
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1131 void anv_DestroyDevice(
1132 VkDevice _device,
1133 const VkAllocationCallbacks* pAllocator)
1134 {
1135 ANV_FROM_HANDLE(anv_device, device, _device);
1136
1137 if (!device)
1138 return;
1139
1140 #if DETECT_OS_ANDROID
1141 u_gralloc_destroy(&device->u_gralloc);
1142 #endif
1143
1144 anv_memory_trace_finish(device);
1145
1146 struct anv_physical_device *pdevice = device->physical;
1147
1148 /* Do TRTT batch garbage collection before destroying queues. */
1149 anv_device_finish_trtt(device);
1150
1151 if (device->accel_struct_build.radix_sort) {
1152 radix_sort_vk_destroy(device->accel_struct_build.radix_sort,
1153 _device, &device->vk.alloc);
1154 }
1155 vk_meta_device_finish(&device->vk, &device->meta_device);
1156
1157 anv_device_utrace_finish(device);
1158
1159 for (uint32_t i = 0; i < device->queue_count; i++)
1160 anv_queue_finish(&device->queues[i]);
1161 vk_free(&device->vk.alloc, device->queues);
1162
1163 anv_device_finish_blorp(device);
1164
1165 anv_device_finish_rt_shaders(device);
1166
1167 anv_device_finish_astc_emu(device);
1168
1169 anv_device_finish_internal_kernels(device);
1170
1171 if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
1172 anv_device_print_fini(device);
1173
1174 vk_pipeline_cache_destroy(device->internal_cache, NULL);
1175 vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
1176
1177 anv_device_finish_embedded_samplers(device);
1178
1179 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
1180 anv_device_release_bo(device, device->btd_fifo_bo);
1181
1182 if (device->info->verx10 >= 125) {
1183 vk_common_DestroyCommandPool(anv_device_to_handle(device),
1184 device->companion_rcs_cmd_pool, NULL);
1185 }
1186
1187 anv_state_reserved_array_pool_finish(&device->custom_border_colors);
1188 #ifdef HAVE_VALGRIND
1189 /* We only need to free these to prevent valgrind errors. The backing
1190 * BO will go away in a couple of lines so we don't actually leak.
1191 */
1192 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1193 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
1194 anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
1195 anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
1196 #endif
1197
1198 for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
1199 if (device->rt_scratch_bos[i] != NULL)
1200 anv_device_release_bo(device, device->rt_scratch_bos[i]);
1201 }
1202
1203 anv_scratch_pool_finish(device, &device->scratch_pool);
1204 anv_scratch_pool_finish(device, &device->protected_scratch_pool);
1205
1206 if (device->vk.enabled_extensions.KHR_ray_query) {
1207 for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
1208 for (unsigned j = 0; j < ARRAY_SIZE(device->ray_query_shadow_bos[0]); j++) {
1209 if (device->ray_query_shadow_bos[i][j] != NULL)
1210 anv_device_release_bo(device, device->ray_query_shadow_bos[i][j]);
1211 }
1212 if (device->ray_query_bo[i])
1213 anv_device_release_bo(device, device->ray_query_bo[i]);
1214 }
1215 }
1216 anv_device_release_bo(device, device->workaround_bo);
1217 if (device->dummy_aux_bo)
1218 anv_device_release_bo(device, device->dummy_aux_bo);
1219 if (device->mem_fence_bo)
1220 anv_device_release_bo(device, device->mem_fence_bo);
1221 anv_device_release_bo(device, device->trivial_batch_bo);
1222
1223 if (device->info->has_aux_map) {
1224 intel_aux_map_finish(device->aux_map_ctx);
1225 device->aux_map_ctx = NULL;
1226 anv_state_pool_finish(&device->aux_tt_pool);
1227 }
1228 if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
1229 device->info->verx10 >= 125)
1230 anv_state_pool_finish(&device->push_descriptor_buffer_pool);
1231 if (device->physical->indirect_descriptors)
1232 anv_state_pool_finish(&device->indirect_push_descriptor_pool);
1233 anv_state_pool_finish(&device->binding_table_pool);
1234 if (device->info->verx10 >= 125)
1235 anv_state_pool_finish(&device->scratch_surface_state_pool);
1236 anv_state_pool_finish(&device->internal_surface_state_pool);
1237 if (device->physical->indirect_descriptors)
1238 anv_state_pool_finish(&device->bindless_surface_state_pool);
1239 anv_state_pool_finish(&device->instruction_state_pool);
1240 anv_state_pool_finish(&device->dynamic_state_pool);
1241 anv_state_pool_finish(&device->general_state_pool);
1242
1243 if (device->vk.enabled_extensions.KHR_acceleration_structure)
1244 anv_bo_pool_finish(&device->bvh_bo_pool);
1245 anv_bo_pool_finish(&device->batch_bo_pool);
1246
1247 anv_bo_cache_finish(&device->bo_cache);
1248
1249 util_vma_heap_finish(&device->vma_trtt);
1250 util_vma_heap_finish(&device->vma_dynamic_visible);
1251 util_vma_heap_finish(&device->vma_desc);
1252 util_vma_heap_finish(&device->vma_hi);
1253 util_vma_heap_finish(&device->vma_lo);
1254 pthread_mutex_destroy(&device->vma_mutex);
1255
1256 pthread_cond_destroy(&device->queue_submit);
1257 pthread_mutex_destroy(&device->mutex);
1258
1259 simple_mtx_destroy(&device->accel_struct_build.mutex);
1260
1261 ralloc_free(device->fp64_nir);
1262
1263 anv_device_destroy_context_or_vm(device);
1264
1265 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_BATCH_STATS)) {
1266 for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
1267 if (INTEL_DEBUG(DEBUG_BATCH_STATS))
1268 intel_batch_print_stats(&device->decoder[i]);
1269 intel_batch_decode_ctx_finish(&device->decoder[i]);
1270 }
1271 }
1272
1273 close(device->fd);
1274
1275 vk_device_finish(&device->vk);
1276 vk_free(&device->vk.alloc, device);
1277 }
1278
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1279 VkResult anv_EnumerateInstanceLayerProperties(
1280 uint32_t* pPropertyCount,
1281 VkLayerProperties* pProperties)
1282 {
1283 if (pProperties == NULL) {
1284 *pPropertyCount = 0;
1285 return VK_SUCCESS;
1286 }
1287
1288 /* None supported at this time */
1289 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1290 }
1291
1292 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)1293 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1294 int64_t timeout)
1295 {
1296 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1297 if (ret == -1 && errno == ETIME) {
1298 return VK_TIMEOUT;
1299 } else if (ret == -1) {
1300 /* We don't know the real error. */
1301 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
1302 } else {
1303 return VK_SUCCESS;
1304 }
1305 }
1306
1307 static struct util_vma_heap *
anv_vma_heap_for_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)1308 anv_vma_heap_for_flags(struct anv_device *device,
1309 enum anv_bo_alloc_flags alloc_flags)
1310 {
1311 if (alloc_flags & ANV_BO_ALLOC_TRTT)
1312 return &device->vma_trtt;
1313
1314 if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
1315 return &device->vma_lo;
1316
1317 if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
1318 return &device->vma_desc;
1319
1320 if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL)
1321 return &device->vma_dynamic_visible;
1322
1323 return &device->vma_hi;
1324 }
1325
1326 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct util_vma_heap ** out_vma_heap)1327 anv_vma_alloc(struct anv_device *device,
1328 uint64_t size, uint64_t align,
1329 enum anv_bo_alloc_flags alloc_flags,
1330 uint64_t client_address,
1331 struct util_vma_heap **out_vma_heap)
1332 {
1333 pthread_mutex_lock(&device->vma_mutex);
1334
1335 uint64_t addr = 0;
1336 *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);
1337
1338 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
1339 assert(*out_vma_heap == &device->vma_hi ||
1340 *out_vma_heap == &device->vma_dynamic_visible ||
1341 *out_vma_heap == &device->vma_trtt);
1342
1343 if (client_address) {
1344 if (util_vma_heap_alloc_addr(*out_vma_heap,
1345 client_address, size)) {
1346 addr = client_address;
1347 }
1348 } else {
1349 (*out_vma_heap)->alloc_high = false;
1350 addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1351 (*out_vma_heap)->alloc_high = true;
1352 }
1353 /* We don't want to fall back to other heaps */
1354 goto done;
1355 }
1356
1357 assert(client_address == 0);
1358
1359 addr = util_vma_heap_alloc(*out_vma_heap, size, align);
1360
1361 done:
1362 pthread_mutex_unlock(&device->vma_mutex);
1363
1364 assert(addr == intel_48b_address(addr));
1365 return intel_canonical_address(addr);
1366 }
1367
1368 void
anv_vma_free(struct anv_device * device,struct util_vma_heap * vma_heap,uint64_t address,uint64_t size)1369 anv_vma_free(struct anv_device *device,
1370 struct util_vma_heap *vma_heap,
1371 uint64_t address, uint64_t size)
1372 {
1373 assert(vma_heap == &device->vma_lo ||
1374 vma_heap == &device->vma_hi ||
1375 vma_heap == &device->vma_desc ||
1376 vma_heap == &device->vma_dynamic_visible ||
1377 vma_heap == &device->vma_trtt);
1378
1379 const uint64_t addr_48b = intel_48b_address(address);
1380
1381 pthread_mutex_lock(&device->vma_mutex);
1382
1383 util_vma_heap_free(vma_heap, addr_48b, size);
1384
1385 pthread_mutex_unlock(&device->vma_mutex);
1386 }
1387
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1388 VkResult anv_AllocateMemory(
1389 VkDevice _device,
1390 const VkMemoryAllocateInfo* pAllocateInfo,
1391 const VkAllocationCallbacks* pAllocator,
1392 VkDeviceMemory* pMem)
1393 {
1394 ANV_FROM_HANDLE(anv_device, device, _device);
1395 struct anv_physical_device *pdevice = device->physical;
1396 struct anv_device_memory *mem;
1397 VkResult result = VK_SUCCESS;
1398
1399 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1400
1401 VkDeviceSize aligned_alloc_size =
1402 align64(pAllocateInfo->allocationSize, 4096);
1403
1404 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
1405 const struct anv_memory_type *mem_type =
1406 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
1407 assert(mem_type->heapIndex < pdevice->memory.heap_count);
1408 struct anv_memory_heap *mem_heap =
1409 &pdevice->memory.heaps[mem_type->heapIndex];
1410
1411 if (aligned_alloc_size > mem_heap->size)
1412 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1413
1414 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
1415 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
1416 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1417
1418 mem = vk_device_memory_create(&device->vk, pAllocateInfo,
1419 pAllocator, sizeof(*mem));
1420 if (mem == NULL)
1421 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1422
1423 mem->type = mem_type;
1424 mem->map = NULL;
1425 mem->map_size = 0;
1426 mem->map_delta = 0;
1427
1428 enum anv_bo_alloc_flags alloc_flags = 0;
1429
1430 const VkImportMemoryFdInfoKHR *fd_info = NULL;
1431 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
1432 const struct wsi_memory_allocate_info *wsi_info = NULL;
1433 uint64_t client_address = 0;
1434
1435 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
1436 /* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
1437 * value, so use cast to avoid compiler warn
1438 */
1439 switch ((uint32_t)ext->sType) {
1440 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
1441 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
1442 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
1443 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
1444 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
1445 /* handled by vk_device_memory_create */
1446 break;
1447
1448 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1449 fd_info = (void *)ext;
1450 break;
1451
1452 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
1453 dedicated_info = (void *)ext;
1454 break;
1455
1456 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
1457 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
1458 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
1459 client_address = addr_info->opaqueCaptureAddress;
1460 break;
1461 }
1462
1463 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1464 wsi_info = (void *)ext;
1465 break;
1466
1467 default:
1468 vk_debug_ignored_stype(ext->sType);
1469 break;
1470 }
1471 }
1472
1473 /* If i915 reported a mappable/non_mappable vram regions and the
1474 * application want lmem mappable, then we need to use the
1475 * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
1476 */
1477 if (pdevice->vram_mappable.size > 0 &&
1478 pdevice->vram_non_mappable.size > 0 &&
1479 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
1480 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
1481 alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
1482
1483 if (!mem_heap->is_local_mem)
1484 alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
1485
1486 if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
1487 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
1488
1489 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
1490 alloc_flags |= ANV_BO_ALLOC_PROTECTED;
1491
1492 /* For now, always allocated AUX-TT aligned memory, regardless of dedicated
1493 * allocations. An application can for example, suballocate a large
1494 * VkDeviceMemory and try to bind an image created with a CCS modifier. In
1495 * that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
1496 * requirements, so we need to ensure both the VkDeviceMemory and the
1497 * alignment reported through vkGetImageMemoryRequirements() meet the
1498 * AUX-TT requirement.
1499 *
1500 * Allocations with the special dynamic_visible mem type are for things like
1501 * descriptor buffers, so AUX-TT alignment is not needed here.
1502 */
1503 if (device->info->has_aux_map && !mem_type->dynamic_visible)
1504 alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;
1505
1506 /* If the allocation is not dedicated nor a host pointer, allocate
1507 * additional CCS space.
1508 *
1509 * Allocations with the special dynamic_visible mem type are for things like
1510 * descriptor buffers, which don't need any compression.
1511 */
1512 if (device->physical->alloc_aux_tt_mem &&
1513 dedicated_info == NULL &&
1514 mem->vk.host_ptr == NULL &&
1515 !mem_type->dynamic_visible)
1516 alloc_flags |= ANV_BO_ALLOC_AUX_CCS;
1517
1518 /* TODO: Android, ChromeOS and other applications may need another way to
1519 * allocate buffers that can be scanout to display but it should pretty
1520 * easy to catch those as Xe KMD driver will print warnings in dmesg when
1521 * scanning buffers allocated without proper flag set.
1522 */
1523 if (wsi_info)
1524 alloc_flags |= ANV_BO_ALLOC_SCANOUT;
1525
1526 /* Anything imported or exported is EXTERNAL */
1527 if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
1528 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
1529
1530 /* wsi has its own way of synchronizing with the compositor */
1531 if (pdevice->instance->external_memory_implicit_sync &&
1532 !wsi_info && dedicated_info &&
1533 dedicated_info->image != VK_NULL_HANDLE) {
1534 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1535
1536 /* Apply implicit sync to be compatible with clients relying on
1537 * implicit fencing. This matches the behavior in iris i915_batch
1538 * submit. An example client is VA-API (iHD), so only dedicated
1539 * image scenario has to be covered.
1540 */
1541 alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;
1542
1543 /* For color attachment, apply IMPLICIT_WRITE so a client on the
1544 * consumer side relying on implicit fencing can have a fence to
1545 * wait for render complete.
1546 */
1547 if (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
1548 alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
1549 }
1550 }
1551
1552 /* TODO: Disabling compression on external bos will cause problems once we
1553 * have a modifier that supports compression (Xe2+).
1554 */
1555 if (!(alloc_flags & ANV_BO_ALLOC_EXTERNAL) && mem_type->compressed)
1556 alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
1557
1558 if (mem_type->dynamic_visible)
1559 alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;
1560
1561 if (mem->vk.ahardware_buffer) {
1562 result = anv_import_ahw_memory(_device, mem);
1563 if (result != VK_SUCCESS)
1564 goto fail;
1565
1566 goto success;
1567 }
1568
1569 /* The Vulkan spec permits handleType to be 0, in which case the struct is
1570 * ignored.
1571 */
1572 if (fd_info && fd_info->handleType) {
1573 /* At the moment, we support only the below handle types. */
1574 assert(fd_info->handleType ==
1575 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1576 fd_info->handleType ==
1577 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1578
1579 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
1580 client_address, &mem->bo);
1581 if (result != VK_SUCCESS)
1582 goto fail;
1583
1584 /* For security purposes, we reject importing the bo if it's smaller
1585 * than the requested allocation size. This prevents a malicious client
1586 * from passing a buffer to a trusted client, lying about the size, and
1587 * telling the trusted client to try and texture from an image that goes
1588 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1589 * in the trusted client. The trusted client can protect itself against
1590 * this sort of attack but only if it can trust the buffer size.
1591 */
1592 if (mem->bo->size < aligned_alloc_size) {
1593 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
1594 "aligned allocationSize too large for "
1595 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
1596 "%"PRIu64"B > %"PRIu64"B",
1597 aligned_alloc_size, mem->bo->size);
1598 anv_device_release_bo(device, mem->bo);
1599 goto fail;
1600 }
1601
1602 /* From the Vulkan spec:
1603 *
1604 * "Importing memory from a file descriptor transfers ownership of
1605 * the file descriptor from the application to the Vulkan
1606 * implementation. The application must not perform any operations on
1607 * the file descriptor after a successful import."
1608 *
1609 * If the import fails, we leave the file descriptor open.
1610 */
1611 close(fd_info->fd);
1612 goto success;
1613 }
1614
1615 if (mem->vk.host_ptr) {
1616 if (mem->vk.import_handle_type ==
1617 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
1618 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1619 goto fail;
1620 }
1621
1622 assert(mem->vk.import_handle_type ==
1623 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
1624
1625 result = anv_device_import_bo_from_host_ptr(device,
1626 mem->vk.host_ptr,
1627 mem->vk.size,
1628 alloc_flags,
1629 client_address,
1630 &mem->bo);
1631 if (result != VK_SUCCESS)
1632 goto fail;
1633
1634 goto success;
1635 }
1636
1637 if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
1638 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1639 } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
1640 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1641 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1642 if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
1643 alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
1644 } else {
1645 /* Required to set some host mode to have a valid pat index set */
1646 alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
1647 }
1648
1649 /* Regular allocate (not importing memory). */
1650
1651 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
1652 alloc_flags, client_address, &mem->bo);
1653 if (result != VK_SUCCESS)
1654 goto fail;
1655
1656 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
1657 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
1658
1659 /* Some legacy (non-modifiers) consumers need the tiling to be set on
1660 * the BO. In this case, we have a dedicated allocation.
1661 */
1662 if (image->vk.wsi_legacy_scanout) {
1663 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
1664 result = anv_device_set_bo_tiling(device, mem->bo,
1665 surf->row_pitch_B,
1666 surf->tiling);
1667 if (result != VK_SUCCESS) {
1668 anv_device_release_bo(device, mem->bo);
1669 goto fail;
1670 }
1671 }
1672 }
1673
1674 success:
1675 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
1676 if (mem_heap_used > mem_heap->size) {
1677 p_atomic_add(&mem_heap->used, -mem->bo->size);
1678 anv_device_release_bo(device, mem->bo);
1679 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
1680 "Out of heap memory");
1681 goto fail;
1682 }
1683
1684 pthread_mutex_lock(&device->mutex);
1685 list_addtail(&mem->link, &device->memory_objects);
1686 pthread_mutex_unlock(&device->mutex);
1687
1688 ANV_RMV(heap_create, device, mem, false, 0);
1689
1690 *pMem = anv_device_memory_to_handle(mem);
1691
1692 return VK_SUCCESS;
1693
1694 fail:
1695 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1696
1697 return result;
1698 }
1699
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)1700 VkResult anv_GetMemoryFdKHR(
1701 VkDevice device_h,
1702 const VkMemoryGetFdInfoKHR* pGetFdInfo,
1703 int* pFd)
1704 {
1705 ANV_FROM_HANDLE(anv_device, dev, device_h);
1706 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
1707
1708 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1709
1710 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1711 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1712
1713 return anv_device_export_bo(dev, mem->bo, pFd);
1714 }
1715
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1716 VkResult anv_GetMemoryFdPropertiesKHR(
1717 VkDevice _device,
1718 VkExternalMemoryHandleTypeFlagBits handleType,
1719 int fd,
1720 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
1721 {
1722 ANV_FROM_HANDLE(anv_device, device, _device);
1723
1724 switch (handleType) {
1725 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1726 /* dma-buf can be imported as any memory type */
1727 pMemoryFdProperties->memoryTypeBits =
1728 (1 << device->physical->memory.type_count) - 1;
1729 return VK_SUCCESS;
1730
1731 default:
1732 /* The valid usage section for this function says:
1733 *
1734 * "handleType must not be one of the handle types defined as
1735 * opaque."
1736 *
1737 * So opaque handle types fall into the default "unsupported" case.
1738 */
1739 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1740 }
1741 }
1742
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)1743 VkResult anv_GetMemoryHostPointerPropertiesEXT(
1744 VkDevice _device,
1745 VkExternalMemoryHandleTypeFlagBits handleType,
1746 const void* pHostPointer,
1747 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
1748 {
1749 ANV_FROM_HANDLE(anv_device, device, _device);
1750
1751 assert(pMemoryHostPointerProperties->sType ==
1752 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
1753
1754 switch (handleType) {
1755 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1756 /* Host memory can be imported as any memory type. */
1757 pMemoryHostPointerProperties->memoryTypeBits =
1758 (1ull << device->physical->memory.type_count) - 1;
1759
1760 return VK_SUCCESS;
1761
1762 default:
1763 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1764 }
1765 }
1766
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1767 void anv_FreeMemory(
1768 VkDevice _device,
1769 VkDeviceMemory _mem,
1770 const VkAllocationCallbacks* pAllocator)
1771 {
1772 ANV_FROM_HANDLE(anv_device, device, _device);
1773 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1774
1775 if (mem == NULL)
1776 return;
1777
1778 pthread_mutex_lock(&device->mutex);
1779 list_del(&mem->link);
1780 pthread_mutex_unlock(&device->mutex);
1781
1782 if (mem->map) {
1783 const VkMemoryUnmapInfoKHR unmap = {
1784 .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
1785 .memory = _mem,
1786 };
1787 anv_UnmapMemory2KHR(_device, &unmap);
1788 }
1789
1790 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
1791 -mem->bo->size);
1792
1793 anv_device_release_bo(device, mem->bo);
1794
1795 ANV_RMV(resource_destroy, device, mem);
1796
1797 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
1798 }
1799
anv_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)1800 VkResult anv_MapMemory2KHR(
1801 VkDevice _device,
1802 const VkMemoryMapInfoKHR* pMemoryMapInfo,
1803 void** ppData)
1804 {
1805 ANV_FROM_HANDLE(anv_device, device, _device);
1806 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);
1807
1808 if (mem == NULL) {
1809 *ppData = NULL;
1810 return VK_SUCCESS;
1811 }
1812
1813 if (mem->vk.host_ptr) {
1814 *ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
1815 return VK_SUCCESS;
1816 }
1817
1818 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1819 *
1820 * * memory must have been created with a memory type that reports
1821 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
1822 */
1823 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
1824 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1825 "Memory object not mappable.");
1826 }
1827
1828 assert(pMemoryMapInfo->size > 0);
1829 const VkDeviceSize offset = pMemoryMapInfo->offset;
1830 const VkDeviceSize size =
1831 vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
1832 pMemoryMapInfo->size);
1833
1834 if (size != (size_t)size) {
1835 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1836 "requested size 0x%"PRIx64" does not fit in %u bits",
1837 size, (unsigned)(sizeof(size_t) * 8));
1838 }
1839
1840 /* From the Vulkan 1.2.194 spec:
1841 *
1842 * "memory must not be currently host mapped"
1843 */
1844 if (mem->map != NULL) {
1845 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
1846 "Memory object already mapped.");
1847 }
1848
1849 void *placed_addr = NULL;
1850 if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
1851 const VkMemoryMapPlacedInfoEXT *placed_info =
1852 vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
1853 assert(placed_info != NULL);
1854 placed_addr = placed_info->pPlacedAddress;
1855 }
1856
1857 uint64_t map_offset, map_size;
1858 anv_sanitize_map_params(device, offset, size, &map_offset, &map_size);
1859
1860 void *map;
1861 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
1862 map_size, placed_addr, &map);
1863 if (result != VK_SUCCESS)
1864 return result;
1865
1866 mem->map = map;
1867 mem->map_size = map_size;
1868 mem->map_delta = (offset - map_offset);
1869 *ppData = mem->map + mem->map_delta;
1870
1871 return VK_SUCCESS;
1872 }
1873
anv_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)1874 VkResult anv_UnmapMemory2KHR(
1875 VkDevice _device,
1876 const VkMemoryUnmapInfoKHR* pMemoryUnmapInfo)
1877 {
1878 ANV_FROM_HANDLE(anv_device, device, _device);
1879 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);
1880
1881 if (mem == NULL || mem->vk.host_ptr)
1882 return VK_SUCCESS;
1883
1884 VkResult result =
1885 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size,
1886 pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
1887 if (result != VK_SUCCESS)
1888 return result;
1889
1890 mem->map = NULL;
1891 mem->map_size = 0;
1892 mem->map_delta = 0;
1893
1894 return VK_SUCCESS;
1895 }
1896
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1897 VkResult anv_FlushMappedMemoryRanges(
1898 VkDevice _device,
1899 uint32_t memoryRangeCount,
1900 const VkMappedMemoryRange* pMemoryRanges)
1901 {
1902 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1903 ANV_FROM_HANDLE(anv_device, device, _device);
1904
1905 if (!device->physical->memory.need_flush)
1906 return VK_SUCCESS;
1907
1908 /* Make sure the writes we're flushing have landed. */
1909 __builtin_ia32_mfence();
1910
1911 for (uint32_t i = 0; i < memoryRangeCount; i++) {
1912 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1913 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1914 continue;
1915
1916 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1917 if (map_offset >= mem->map_size)
1918 continue;
1919
1920 intel_flush_range(mem->map + map_offset,
1921 MIN2(pMemoryRanges[i].size,
1922 mem->map_size - map_offset));
1923 }
1924 #endif
1925 return VK_SUCCESS;
1926 }
1927
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1928 VkResult anv_InvalidateMappedMemoryRanges(
1929 VkDevice _device,
1930 uint32_t memoryRangeCount,
1931 const VkMappedMemoryRange* pMemoryRanges)
1932 {
1933 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1934 ANV_FROM_HANDLE(anv_device, device, _device);
1935
1936 if (!device->physical->memory.need_flush)
1937 return VK_SUCCESS;
1938
1939 for (uint32_t i = 0; i < memoryRangeCount; i++) {
1940 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
1941 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
1942 continue;
1943
1944 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
1945 if (map_offset >= mem->map_size)
1946 continue;
1947
1948 intel_invalidate_range(mem->map + map_offset,
1949 MIN2(pMemoryRanges[i].size,
1950 mem->map_size - map_offset));
1951 }
1952
1953 /* Make sure no reads get moved up above the invalidate. */
1954 __builtin_ia32_mfence();
1955 #endif
1956 return VK_SUCCESS;
1957 }
1958
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1959 void anv_GetDeviceMemoryCommitment(
1960 VkDevice device,
1961 VkDeviceMemory memory,
1962 VkDeviceSize* pCommittedMemoryInBytes)
1963 {
1964 *pCommittedMemoryInBytes = 0;
1965 }
1966
1967 static inline clockid_t
anv_get_default_cpu_clock_id(void)1968 anv_get_default_cpu_clock_id(void)
1969 {
1970 #ifdef CLOCK_MONOTONIC_RAW
1971 return CLOCK_MONOTONIC_RAW;
1972 #else
1973 return CLOCK_MONOTONIC;
1974 #endif
1975 }
1976
1977 static inline clockid_t
vk_time_domain_to_clockid(VkTimeDomainKHR domain)1978 vk_time_domain_to_clockid(VkTimeDomainKHR domain)
1979 {
1980 switch (domain) {
1981 #ifdef CLOCK_MONOTONIC_RAW
1982 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
1983 return CLOCK_MONOTONIC_RAW;
1984 #endif
1985 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
1986 return CLOCK_MONOTONIC;
1987 default:
1988 unreachable("Missing");
1989 return CLOCK_MONOTONIC;
1990 }
1991 }
1992
1993 static inline bool
is_cpu_time_domain(VkTimeDomainKHR domain)1994 is_cpu_time_domain(VkTimeDomainKHR domain)
1995 {
1996 return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
1997 domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
1998 }
1999
2000 static inline bool
is_gpu_time_domain(VkTimeDomainKHR domain)2001 is_gpu_time_domain(VkTimeDomainKHR domain)
2002 {
2003 return domain == VK_TIME_DOMAIN_DEVICE_KHR;
2004 }
2005
anv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)2006 VkResult anv_GetCalibratedTimestampsKHR(
2007 VkDevice _device,
2008 uint32_t timestampCount,
2009 const VkCalibratedTimestampInfoKHR *pTimestampInfos,
2010 uint64_t *pTimestamps,
2011 uint64_t *pMaxDeviation)
2012 {
2013 ANV_FROM_HANDLE(anv_device, device, _device);
2014 const uint64_t timestamp_frequency = device->info->timestamp_frequency;
2015 const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
2016 uint32_t d, increment;
2017 uint64_t begin, end;
2018 uint64_t max_clock_period = 0;
2019 const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
2020 const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
2021 clockid_t cpu_clock_id = -1;
2022
2023 begin = end = vk_clock_gettime(anv_get_default_cpu_clock_id());
2024
2025 for (d = 0, increment = 1; d < timestampCount; d += increment) {
2026 const VkTimeDomainKHR current = pTimestampInfos[d].timeDomain;
2027 /* If we have a request pattern like this :
2028 * - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
2029 * - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
2030 * - domain2 = domain0 (optional)
2031 *
2032 * We can combine all of those into a single ioctl for maximum accuracy.
2033 */
2034 if (has_correlate_timestamp && (d + 1) < timestampCount) {
2035 const VkTimeDomainKHR next = pTimestampInfos[d + 1].timeDomain;
2036
2037 if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
2038 (is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
2039 /* We'll consume at least 2 elements. */
2040 increment = 2;
2041
2042 if (is_cpu_time_domain(current))
2043 cpu_clock_id = vk_time_domain_to_clockid(current);
2044 else
2045 cpu_clock_id = vk_time_domain_to_clockid(next);
2046
2047 uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
2048 if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
2049 kmd_type,
2050 INTEL_ENGINE_CLASS_RENDER,
2051 0 /* engine_instance */,
2052 cpu_clock_id,
2053 &cpu_timestamp,
2054 &gpu_timestamp,
2055 &cpu_delta_timestamp))
2056 return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");
2057
2058 cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
2059 if (is_cpu_time_domain(current)) {
2060 pTimestamps[d] = cpu_timestamp;
2061 pTimestamps[d + 1] = gpu_timestamp;
2062 } else {
2063 pTimestamps[d] = gpu_timestamp;
2064 pTimestamps[d + 1] = cpu_end_timestamp;
2065 }
2066 max_clock_period = MAX2(max_clock_period, device_period);
2067
2068 /* If we can consume a third element */
2069 if ((d + 2) < timestampCount &&
2070 is_cpu_time_domain(current) &&
2071 current == pTimestampInfos[d + 2].timeDomain) {
2072 pTimestamps[d + 2] = cpu_end_timestamp;
2073 increment++;
2074 }
2075
2076 /* If we're the first element, we can replace begin */
2077 if (d == 0 && cpu_clock_id == anv_get_default_cpu_clock_id())
2078 begin = cpu_timestamp;
2079
2080 /* If we're in the same clock domain as begin/end. We can set the end. */
2081 if (cpu_clock_id == anv_get_default_cpu_clock_id())
2082 end = cpu_end_timestamp;
2083
2084 continue;
2085 }
2086 }
2087
2088 /* fallback to regular method */
2089 increment = 1;
2090 switch (current) {
2091 case VK_TIME_DOMAIN_DEVICE_KHR:
2092 if (!intel_gem_read_render_timestamp(device->fd,
2093 device->info->kmd_type,
2094 &pTimestamps[d])) {
2095 return vk_device_set_lost(&device->vk, "Failed to read the "
2096 "TIMESTAMP register: %m");
2097 }
2098 max_clock_period = MAX2(max_clock_period, device_period);
2099 break;
2100 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
2101 pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
2102 max_clock_period = MAX2(max_clock_period, 1);
2103 break;
2104
2105 #ifdef CLOCK_MONOTONIC_RAW
2106 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
2107 pTimestamps[d] = begin;
2108 break;
2109 #endif
2110 default:
2111 pTimestamps[d] = 0;
2112 break;
2113 }
2114 }
2115
2116 /* If last timestamp was not get with has_correlate_timestamp method or
2117 * if it was but last cpu clock is not the default one, get time again
2118 */
2119 if (increment == 1 || cpu_clock_id != anv_get_default_cpu_clock_id())
2120 end = vk_clock_gettime(anv_get_default_cpu_clock_id());
2121
2122 *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
2123
2124 return VK_SUCCESS;
2125 }
2126
2127 const struct intel_device_info_pat_entry *
anv_device_get_pat_entry(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)2128 anv_device_get_pat_entry(struct anv_device *device,
2129 enum anv_bo_alloc_flags alloc_flags)
2130 {
2131 if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
2132 return &device->info->pat.cached_coherent;
2133
2134 if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
2135 return &device->info->pat.compressed;
2136
2137 if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
2138 return &device->info->pat.scanout;
2139
2140 /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
2141 * be snopped by GPU and lmem will always be WC.
2142 * This might change in future discrete platforms.
2143 */
2144 if (anv_physical_device_has_vram(device->physical)) {
2145 if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
2146 return &device->info->pat.cached_coherent;
2147 return &device->info->pat.writecombining;
2148 }
2149
2150 /* Integrated platforms handling only */
2151 if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
2152 return &device->info->pat.cached_coherent;
2153 else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
2154 return &device->info->pat.writeback_incoherent;
2155 else
2156 return &device->info->pat.writecombining;
2157 }
2158