1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <anv_private.h>
25
26 /* Sparse binding handling.
27 *
28 * There is one main structure passed around all over this file:
29 *
30 * - struct anv_sparse_binding_data: every resource (VkBuffer or VkImage) has
31 * a pointer to an instance of this structure. It contains the virtual
32 * memory address (VMA) used by the binding operations (which is different
33 * from the VMA used by the anv_bo it's bound to) and the VMA range size. We
34 * do not keep record of our our list of bindings (which ranges were bound
35 * to which buffers).
36 */
37
38 __attribute__((format(printf, 1, 2)))
39 static void
sparse_debug(const char * format,...)40 sparse_debug(const char *format, ...)
41 {
42 if (!INTEL_DEBUG(DEBUG_SPARSE))
43 return;
44
45 va_list args;
46 va_start(args, format);
47 vfprintf(stderr, format, args);
48 va_end(args);
49 }
50
51 static void
dump_anv_vm_bind(struct anv_device * device,const struct anv_vm_bind * bind)52 dump_anv_vm_bind(struct anv_device *device,
53 const struct anv_vm_bind *bind)
54 {
55 sparse_debug("[%s] ", bind->op == ANV_VM_BIND ? " bind " : "unbind");
56
57 if (bind->bo)
58 sparse_debug("bo:%04u ", bind->bo->gem_handle);
59 else
60 sparse_debug("bo:---- ");
61 sparse_debug("address:%016"PRIx64" size:%08"PRIx64" "
62 "mem_offset:%08"PRIx64"\n",
63 bind->address, bind->size, bind->bo_offset);
64 }
65
66 static void
dump_vk_sparse_memory_bind(const VkSparseMemoryBind * bind)67 dump_vk_sparse_memory_bind(const VkSparseMemoryBind *bind)
68 {
69 if (!INTEL_DEBUG(DEBUG_SPARSE))
70 return;
71
72 if (bind->memory != VK_NULL_HANDLE) {
73 struct anv_bo *bo = anv_device_memory_from_handle(bind->memory)->bo;
74 sparse_debug("bo:%04u ", bo->gem_handle);
75 } else {
76 sparse_debug("bo:---- ");
77 }
78
79 sparse_debug("res_offset:%08"PRIx64" size:%08"PRIx64" "
80 "mem_offset:%08"PRIx64" flags:0x%08x\n",
81 bind->resourceOffset, bind->size, bind->memoryOffset,
82 bind->flags);
83 }
84
85 static void
dump_anv_image(struct anv_image * i)86 dump_anv_image(struct anv_image *i)
87 {
88 if (!INTEL_DEBUG(DEBUG_SPARSE))
89 return;
90
91 sparse_debug("anv_image:\n");
92 sparse_debug("- format: %d\n", i->vk.format);
93 sparse_debug("- extent: [%d, %d, %d]\n",
94 i->vk.extent.width, i->vk.extent.height, i->vk.extent.depth);
95 sparse_debug("- mip_levels: %d array_layers: %d samples: %d\n",
96 i->vk.mip_levels, i->vk.array_layers, i->vk.samples);
97 sparse_debug("- n_planes: %d\n", i->n_planes);
98 sparse_debug("- disjoint: %d\n", i->disjoint);
99 }
100
101 static void
dump_isl_surf(struct isl_surf * s)102 dump_isl_surf(struct isl_surf *s)
103 {
104 if (!INTEL_DEBUG(DEBUG_SPARSE))
105 return;
106
107 sparse_debug("isl_surf:\n");
108
109 const char *dim_s = s->dim == ISL_SURF_DIM_1D ? "1D" :
110 s->dim == ISL_SURF_DIM_2D ? "2D" :
111 s->dim == ISL_SURF_DIM_3D ? "3D" :
112 "(ERROR)";
113 sparse_debug("- dim: %s\n", dim_s);
114 sparse_debug("- tiling: %d (%s)\n", s->tiling,
115 isl_tiling_to_name(s->tiling));
116 sparse_debug("- format: %s\n", isl_format_get_short_name(s->format));
117 sparse_debug("- image_alignment_el: [%d, %d, %d]\n",
118 s->image_alignment_el.w, s->image_alignment_el.h,
119 s->image_alignment_el.d);
120 sparse_debug("- logical_level0_px: [%d, %d, %d, %d]\n",
121 s->logical_level0_px.w,
122 s->logical_level0_px.h,
123 s->logical_level0_px.d,
124 s->logical_level0_px.a);
125 sparse_debug("- phys_level0_sa: [%d, %d, %d, %d]\n",
126 s->phys_level0_sa.w,
127 s->phys_level0_sa.h,
128 s->phys_level0_sa.d,
129 s->phys_level0_sa.a);
130 sparse_debug("- levels: %d samples: %d\n", s->levels, s->samples);
131 sparse_debug("- size_B: %"PRIu64" alignment_B: %u\n",
132 s->size_B, s->alignment_B);
133 sparse_debug("- row_pitch_B: %u\n", s->row_pitch_B);
134 sparse_debug("- array_pitch_el_rows: %u\n", s->array_pitch_el_rows);
135
136 const struct isl_format_layout *layout = isl_format_get_layout(s->format);
137 sparse_debug("- format layout:\n");
138 sparse_debug(" - format:%d bpb:%d bw:%d bh:%d bd:%d\n",
139 layout->format, layout->bpb, layout->bw, layout->bh,
140 layout->bd);
141
142 struct isl_tile_info tile_info;
143 isl_surf_get_tile_info(s, &tile_info);
144
145 sparse_debug("- tile info:\n");
146 sparse_debug(" - format_bpb: %d\n", tile_info.format_bpb);
147 sparse_debug(" - logical_extent_el: [%d, %d, %d, %d]\n",
148 tile_info.logical_extent_el.w,
149 tile_info.logical_extent_el.h,
150 tile_info.logical_extent_el.d,
151 tile_info.logical_extent_el.a);
152 sparse_debug(" - phys_extent_B: [%d, %d]\n",
153 tile_info.phys_extent_B.w,
154 tile_info.phys_extent_B.h);
155 }
156
157 static VkOffset3D
vk_offset3d_px_to_el(const VkOffset3D offset_px,const struct isl_format_layout * layout)158 vk_offset3d_px_to_el(const VkOffset3D offset_px,
159 const struct isl_format_layout *layout)
160 {
161 return (VkOffset3D) {
162 .x = offset_px.x / layout->bw,
163 .y = offset_px.y / layout->bh,
164 .z = offset_px.z / layout->bd,
165 };
166 }
167
168 static VkOffset3D
vk_offset3d_el_to_px(const VkOffset3D offset_el,const struct isl_format_layout * layout)169 vk_offset3d_el_to_px(const VkOffset3D offset_el,
170 const struct isl_format_layout *layout)
171 {
172 return (VkOffset3D) {
173 .x = offset_el.x * layout->bw,
174 .y = offset_el.y * layout->bh,
175 .z = offset_el.z * layout->bd,
176 };
177 }
178
179 static VkExtent3D
vk_extent3d_px_to_el(const VkExtent3D extent_px,const struct isl_format_layout * layout)180 vk_extent3d_px_to_el(const VkExtent3D extent_px,
181 const struct isl_format_layout *layout)
182 {
183 return (VkExtent3D) {
184 .width = extent_px.width / layout->bw,
185 .height = extent_px.height / layout->bh,
186 .depth = extent_px.depth / layout->bd,
187 };
188 }
189
190 static VkExtent3D
vk_extent3d_el_to_px(const VkExtent3D extent_el,const struct isl_format_layout * layout)191 vk_extent3d_el_to_px(const VkExtent3D extent_el,
192 const struct isl_format_layout *layout)
193 {
194 return (VkExtent3D) {
195 .width = extent_el.width * layout->bw,
196 .height = extent_el.height * layout->bh,
197 .depth = extent_el.depth * layout->bd,
198 };
199 }
200
201 static bool
isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling)202 isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling)
203 {
204 return isl_tiling_is_64(tiling) ||
205 tiling == ISL_TILING_ICL_Ys ||
206 tiling == ISL_TILING_SKL_Ys;
207 }
208
209 static VkExtent3D
anv_sparse_get_standard_image_block_shape(enum isl_format format,VkImageType image_type,uint16_t texel_size)210 anv_sparse_get_standard_image_block_shape(enum isl_format format,
211 VkImageType image_type,
212 uint16_t texel_size)
213 {
214 const struct isl_format_layout *layout = isl_format_get_layout(format);
215 VkExtent3D block_shape = { .width = 0, .height = 0, .depth = 0 };
216
217 switch (image_type) {
218 case VK_IMAGE_TYPE_1D:
219 /* 1D images don't have a standard block format. */
220 assert(false);
221 break;
222 case VK_IMAGE_TYPE_2D:
223 switch (texel_size) {
224 case 8:
225 block_shape = (VkExtent3D) { .width = 256, .height = 256, .depth = 1 };
226 break;
227 case 16:
228 block_shape = (VkExtent3D) { .width = 256, .height = 128, .depth = 1 };
229 break;
230 case 32:
231 block_shape = (VkExtent3D) { .width = 128, .height = 128, .depth = 1 };
232 break;
233 case 64:
234 block_shape = (VkExtent3D) { .width = 128, .height = 64, .depth = 1 };
235 break;
236 case 128:
237 block_shape = (VkExtent3D) { .width = 64, .height = 64, .depth = 1 };
238 break;
239 default:
240 fprintf(stderr, "unexpected texel_size %d\n", texel_size);
241 assert(false);
242 }
243 break;
244 case VK_IMAGE_TYPE_3D:
245 switch (texel_size) {
246 case 8:
247 block_shape = (VkExtent3D) { .width = 64, .height = 32, .depth = 32 };
248 break;
249 case 16:
250 block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 32 };
251 break;
252 case 32:
253 block_shape = (VkExtent3D) { .width = 32, .height = 32, .depth = 16 };
254 break;
255 case 64:
256 block_shape = (VkExtent3D) { .width = 32, .height = 16, .depth = 16 };
257 break;
258 case 128:
259 block_shape = (VkExtent3D) { .width = 16, .height = 16, .depth = 16 };
260 break;
261 default:
262 fprintf(stderr, "unexpected texel_size %d\n", texel_size);
263 assert(false);
264 }
265 break;
266 default:
267 fprintf(stderr, "unexpected image_type %d\n", image_type);
268 assert(false);
269 }
270
271 return vk_extent3d_el_to_px(block_shape, layout);
272 }
273
274 /* Adds "bind_op" to the list in "submit", while also trying to check if we
275 * can just extend the last operation instead.
276 */
277 static VkResult
anv_sparse_submission_add(struct anv_device * device,struct anv_sparse_submission * submit,struct anv_vm_bind * bind_op)278 anv_sparse_submission_add(struct anv_device *device,
279 struct anv_sparse_submission *submit,
280 struct anv_vm_bind *bind_op)
281 {
282 struct anv_vm_bind *prev_bind = submit->binds_len == 0 ? NULL :
283 &submit->binds[submit->binds_len - 1];
284
285 if (prev_bind &&
286 bind_op->op == prev_bind->op &&
287 bind_op->bo == prev_bind->bo &&
288 bind_op->address == prev_bind->address + prev_bind->size &&
289 (bind_op->bo_offset == prev_bind->bo_offset + prev_bind->size ||
290 prev_bind->bo == NULL)) {
291 prev_bind->size += bind_op->size;
292 return VK_SUCCESS;
293 }
294
295 if (submit->binds_len < submit->binds_capacity) {
296 submit->binds[submit->binds_len++] = *bind_op;
297 return VK_SUCCESS;
298 }
299
300 int new_capacity = MAX2(32, submit->binds_capacity * 2);
301 struct anv_vm_bind *new_binds =
302 vk_realloc(&device->vk.alloc, submit->binds,
303 new_capacity * sizeof(*new_binds), 8,
304 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
305 if (!new_binds)
306 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
307
308 new_binds[submit->binds_len] = *bind_op;
309
310 submit->binds = new_binds;
311 submit->binds_len++;
312 submit->binds_capacity = new_capacity;
313
314 return VK_SUCCESS;
315 }
316
317 /* We really want to try to have all the page tables on as few BOs as possible
318 * to benefit from cache locality and to keep the i915.ko relocation lists
319 * small. On the other hand, we don't want to waste memory on unused space.
320 */
321 #define ANV_TRTT_PAGE_TABLE_BO_SIZE (2 * 1024 * 1024)
322
323 static VkResult
trtt_make_page_table_bo(struct anv_device * device,struct anv_bo ** bo)324 trtt_make_page_table_bo(struct anv_device *device, struct anv_bo **bo)
325 {
326 VkResult result;
327 struct anv_trtt *trtt = &device->trtt;
328
329 result = anv_device_alloc_bo(device, "trtt-page-table",
330 ANV_TRTT_PAGE_TABLE_BO_SIZE, 0, 0, bo);
331 if (result != VK_SUCCESS)
332 return result;
333
334 if (trtt->num_page_table_bos < trtt->page_table_bos_capacity) {
335 trtt->page_table_bos[trtt->num_page_table_bos++] = *bo;
336 } else {
337
338 int new_capacity = MAX2(8, trtt->page_table_bos_capacity * 2);
339 struct anv_bo **new_page_table_bos =
340 vk_realloc(&device->vk.alloc, trtt->page_table_bos,
341 new_capacity * sizeof(*trtt->page_table_bos), 8,
342 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
343 if (!new_page_table_bos) {
344 anv_device_release_bo(device, *bo);
345 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
346 }
347
348 new_page_table_bos[trtt->num_page_table_bos] = *bo;
349
350 trtt->page_table_bos = new_page_table_bos;
351 trtt->page_table_bos_capacity = new_capacity;
352 trtt->num_page_table_bos++;
353 }
354
355 trtt->cur_page_table_bo = *bo;
356 trtt->next_page_table_bo_offset = 0;
357
358 sparse_debug("new number of page table BOs: %d\n",
359 trtt->num_page_table_bos);
360
361 return VK_SUCCESS;
362 }
363
364 static VkResult
trtt_get_page_table_bo(struct anv_device * device,struct anv_bo ** bo,uint64_t * bo_addr)365 trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
366 uint64_t *bo_addr)
367 {
368 struct anv_trtt *trtt = &device->trtt;
369 VkResult result;
370
371 if (!trtt->cur_page_table_bo) {
372 result = trtt_make_page_table_bo(device, bo);
373 if (result != VK_SUCCESS)
374 return result;
375 }
376
377 *bo = trtt->cur_page_table_bo;
378 *bo_addr = trtt->cur_page_table_bo->offset +
379 trtt->next_page_table_bo_offset;
380
381 trtt->next_page_table_bo_offset += 4096;
382 if (trtt->next_page_table_bo_offset >= ANV_TRTT_PAGE_TABLE_BO_SIZE)
383 trtt->cur_page_table_bo = NULL;
384
385 return VK_SUCCESS;
386 }
387
388 static VkResult
anv_trtt_init_context_state(struct anv_queue * queue)389 anv_trtt_init_context_state(struct anv_queue *queue)
390 {
391 struct anv_device *device = queue->device;
392 struct anv_trtt *trtt = &device->trtt;
393
394 struct drm_syncobj_create create = {
395 .handle = 0,
396 .flags = 0,
397 };
398 if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create))
399 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
400 assert(create.handle != 0);
401 trtt->timeline_handle = create.handle;
402
403 struct anv_bo *l3_bo;
404 VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
405 if (result != VK_SUCCESS)
406 return result;
407
408 trtt->l3_mirror = vk_zalloc(&device->vk.alloc, 4096, 8,
409 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
410 if (!trtt->l3_mirror) {
411 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
412 return result;
413 }
414
415 /* L3 has 512 entries, so we can have up to 512 L2 tables. */
416 trtt->l2_mirror = vk_zalloc(&device->vk.alloc, 512 * 4096, 8,
417 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
418 if (!trtt->l2_mirror) {
419 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
420 goto fail_free_l3;
421 }
422
423 result = anv_genX(device->info, init_trtt_context_state)(queue);
424
425 return result;
426
427 fail_free_l3:
428 vk_free(&device->vk.alloc, trtt->l3_mirror);
429 return result;
430 }
431
432 static void
anv_trtt_bind_list_add_entry(struct anv_trtt_bind * binds,int * binds_len,uint64_t pte_addr,uint64_t entry_addr)433 anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
434 uint64_t pte_addr, uint64_t entry_addr)
435 {
436 binds[*binds_len] = (struct anv_trtt_bind) {
437 .pte_addr = pte_addr,
438 .entry_addr = entry_addr,
439 };
440 (*binds_len)++;
441 }
442
443 /* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
444 * respectively. For L1 entries, the hardware compares the addresses against
445 * what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
446 */
447 #define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
448 #define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
449
450 /* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
451 * entries to the HW yet.
452 */
453 static VkResult
anv_trtt_bind_add(struct anv_device * device,uint64_t trtt_addr,uint64_t dest_addr,struct anv_trtt_submission * s)454 anv_trtt_bind_add(struct anv_device *device,
455 uint64_t trtt_addr, uint64_t dest_addr,
456 struct anv_trtt_submission *s)
457 {
458 VkResult result = VK_SUCCESS;
459 struct anv_trtt *trtt = &device->trtt;
460 bool is_null_bind = dest_addr == ANV_TRTT_L1_NULL_TILE_VAL;
461
462 int l3_index = (trtt_addr >> 35) & 0x1FF;
463 int l2_index = (trtt_addr >> 26) & 0x1FF;
464 int l1_index = (trtt_addr >> 16) & 0x3FF;
465
466 uint64_t l2_addr = trtt->l3_mirror[l3_index];
467 if (l2_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
468 return VK_SUCCESS;
469 } else if (l2_addr == 0 || l2_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
470 if (is_null_bind) {
471 trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
472
473 anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
474 trtt->l3_addr + l3_index * sizeof(uint64_t),
475 ANV_TRTT_L3L2_NULL_ENTRY);
476
477 return VK_SUCCESS;
478 }
479
480 struct anv_bo *l2_bo;
481 result = trtt_get_page_table_bo(device, &l2_bo, &l2_addr);
482 if (result != VK_SUCCESS)
483 return result;
484
485 trtt->l3_mirror[l3_index] = l2_addr;
486
487 anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
488 trtt->l3_addr + l3_index * sizeof(uint64_t), l2_addr);
489 }
490 assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
491
492 /* The first page in the l2_mirror corresponds to l3_index=0 and so on. */
493 uint64_t l1_addr = trtt->l2_mirror[l3_index * 512 + l2_index];
494 if (l1_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
495 return VK_SUCCESS;
496 } else if (l1_addr == 0 || l1_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
497 if (is_null_bind) {
498 trtt->l2_mirror[l3_index * 512 + l2_index] =
499 ANV_TRTT_L3L2_NULL_ENTRY;
500
501 anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
502 l2_addr + l2_index * sizeof(uint64_t),
503 ANV_TRTT_L3L2_NULL_ENTRY);
504
505 return VK_SUCCESS;
506 }
507
508 struct anv_bo *l1_bo;
509 result = trtt_get_page_table_bo(device, &l1_bo, &l1_addr);
510 if (result != VK_SUCCESS)
511 return result;
512
513 trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
514
515 anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
516 l2_addr + l2_index * sizeof(uint64_t), l1_addr);
517 }
518 assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
519
520 anv_trtt_bind_list_add_entry(s->l1_binds, &s->l1_binds_len,
521 l1_addr + l1_index * sizeof(uint32_t), dest_addr);
522
523 return VK_SUCCESS;
524 }
525
526 static VkResult
anv_sparse_bind_trtt(struct anv_device * device,struct anv_sparse_submission * sparse_submit)527 anv_sparse_bind_trtt(struct anv_device *device,
528 struct anv_sparse_submission *sparse_submit)
529 {
530 struct anv_trtt *trtt = &device->trtt;
531 VkResult result;
532
533 /* TR-TT submission needs a queue even when the API entry point doesn't
534 * give one, such as resource creation. */
535 if (!sparse_submit->queue)
536 sparse_submit->queue = trtt->queue;
537
538 /* These capacities are conservative estimations. For L1 binds the
539 * number will match exactly unless we skip NULL binds due to L2 already
540 * being NULL. For L3/L2 things are harder to estimate, but the resulting
541 * numbers are so small that a little overestimation won't hurt.
542 *
543 * We have assertions below to catch estimation errors.
544 */
545 int l3l2_binds_capacity = 1;
546 int l1_binds_capacity = 0;
547 for (int b = 0; b < sparse_submit->binds_len; b++) {
548 assert(sparse_submit->binds[b].size % (64 * 1024) == 0);
549 int pages = sparse_submit->binds[b].size / (64 * 1024);
550 l1_binds_capacity += pages;
551 l3l2_binds_capacity += (pages / 1024 + 1) * 2;
552 }
553
554 STACK_ARRAY(struct anv_trtt_bind, l3l2_binds, l3l2_binds_capacity);
555 STACK_ARRAY(struct anv_trtt_bind, l1_binds, l1_binds_capacity);
556 struct anv_trtt_submission trtt_submit = {
557 .sparse = sparse_submit,
558 .l3l2_binds = l3l2_binds,
559 .l1_binds = l1_binds,
560 .l3l2_binds_len = 0,
561 .l1_binds_len = 0,
562 };
563
564 pthread_mutex_lock(&trtt->mutex);
565
566 if (!trtt->l3_addr)
567 anv_trtt_init_context_state(sparse_submit->queue);
568
569 assert(trtt->l3_addr);
570
571 for (int b = 0; b < sparse_submit->binds_len; b++) {
572 struct anv_vm_bind *vm_bind = &sparse_submit->binds[b];
573 for (size_t i = 0; i < vm_bind->size; i += 64 * 1024) {
574 uint64_t trtt_addr = vm_bind->address + i;
575 uint64_t dest_addr =
576 (vm_bind->op == ANV_VM_BIND && vm_bind->bo) ?
577 vm_bind->bo->offset + vm_bind->bo_offset + i :
578 ANV_TRTT_L1_NULL_TILE_VAL;
579
580 result = anv_trtt_bind_add(device, trtt_addr, dest_addr,
581 &trtt_submit);
582 if (result != VK_SUCCESS)
583 goto out;
584 }
585 }
586
587 assert(trtt_submit.l3l2_binds_len <= l3l2_binds_capacity);
588 assert(trtt_submit.l1_binds_len <= l1_binds_capacity);
589
590 sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
591 sparse_submit->binds_len, trtt_submit.l3l2_binds_len,
592 trtt_submit.l1_binds_len);
593
594 if (trtt_submit.l3l2_binds_len || trtt_submit.l1_binds_len)
595 result = anv_genX(device->info, write_trtt_entries)(&trtt_submit);
596
597 if (result == VK_SUCCESS)
598 ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
599
600 out:
601 pthread_mutex_unlock(&trtt->mutex);
602 STACK_ARRAY_FINISH(l1_binds);
603 STACK_ARRAY_FINISH(l3l2_binds);
604 return result;
605 }
606
607 static VkResult
anv_sparse_bind_vm_bind(struct anv_device * device,struct anv_sparse_submission * submit)608 anv_sparse_bind_vm_bind(struct anv_device *device,
609 struct anv_sparse_submission *submit)
610 {
611 struct anv_queue *queue = submit->queue;
612 VkResult result;
613
614 if (!queue)
615 assert(submit->wait_count == 0 && submit->signal_count == 0);
616
617 /* TODO: make both the syncs and signals be passed as part of the vm_bind
618 * ioctl so they can be waited asynchronously. For now this doesn't matter
619 * as we're doing synchronous vm_bind, but later when we make it async this
620 * will make a difference.
621 */
622 result = vk_sync_wait_many(&device->vk, submit->wait_count, submit->waits,
623 VK_SYNC_WAIT_COMPLETE, INT64_MAX);
624 if (result != VK_SUCCESS)
625 return vk_queue_set_lost(&queue->vk, "vk_sync_wait failed");
626
627 /* FIXME: here we were supposed to issue a single vm_bind ioctl by calling
628 * vm_bind(device, num_binds, binds), but for an unknown reason some
629 * shader-related tests fail when we do that, so work around it for now.
630 * See: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746
631 */
632 for (int b = 0; b < submit->binds_len; b++) {
633 struct anv_sparse_submission s = {
634 .queue = submit->queue,
635 .binds = &submit->binds[b],
636 .binds_len = 1,
637 .binds_capacity = 1,
638 .wait_count = 0,
639 .signal_count = 0,
640 };
641 int rc = device->kmd_backend->vm_bind(device, &s);
642 if (rc)
643 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
644 }
645
646 for (uint32_t i = 0; i < submit->signal_count; i++) {
647 struct vk_sync_signal *s = &submit->signals[i];
648 result = vk_sync_signal(&device->vk, s->sync, s->signal_value);
649 if (result != VK_SUCCESS)
650 return vk_queue_set_lost(&queue->vk, "vk_sync_signal failed");
651 }
652
653 return VK_SUCCESS;
654 }
655
656 VkResult
anv_sparse_bind(struct anv_device * device,struct anv_sparse_submission * submit)657 anv_sparse_bind(struct anv_device *device,
658 struct anv_sparse_submission *submit)
659 {
660 if (INTEL_DEBUG(DEBUG_SPARSE)) {
661 for (int b = 0; b < submit->binds_len; b++)
662 dump_anv_vm_bind(device, &submit->binds[b]);
663 }
664
665 return device->physical->sparse_uses_trtt ?
666 anv_sparse_bind_trtt(device, submit) :
667 anv_sparse_bind_vm_bind(device, submit);
668 }
669
670 VkResult
anv_init_sparse_bindings(struct anv_device * device,uint64_t size_,struct anv_sparse_binding_data * sparse,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct anv_address * out_address)671 anv_init_sparse_bindings(struct anv_device *device,
672 uint64_t size_,
673 struct anv_sparse_binding_data *sparse,
674 enum anv_bo_alloc_flags alloc_flags,
675 uint64_t client_address,
676 struct anv_address *out_address)
677 {
678 uint64_t size = align64(size_, ANV_SPARSE_BLOCK_SIZE);
679
680 if (device->physical->sparse_uses_trtt)
681 alloc_flags |= ANV_BO_ALLOC_TRTT;
682
683 sparse->address = anv_vma_alloc(device, size, ANV_SPARSE_BLOCK_SIZE,
684 alloc_flags,
685 intel_48b_address(client_address),
686 &sparse->vma_heap);
687 sparse->size = size;
688
689 out_address->bo = NULL;
690 out_address->offset = sparse->address;
691
692 struct anv_vm_bind bind = {
693 .bo = NULL, /* That's a NULL binding. */
694 .address = sparse->address,
695 .bo_offset = 0,
696 .size = size,
697 .op = ANV_VM_BIND,
698 };
699 struct anv_sparse_submission submit = {
700 .queue = NULL,
701 .binds = &bind,
702 .binds_len = 1,
703 .binds_capacity = 1,
704 .wait_count = 0,
705 .signal_count = 0,
706 };
707 VkResult res = anv_sparse_bind(device, &submit);
708 if (res != VK_SUCCESS) {
709 anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
710 return res;
711 }
712
713 return VK_SUCCESS;
714 }
715
716 VkResult
anv_free_sparse_bindings(struct anv_device * device,struct anv_sparse_binding_data * sparse)717 anv_free_sparse_bindings(struct anv_device *device,
718 struct anv_sparse_binding_data *sparse)
719 {
720 if (!sparse->address)
721 return VK_SUCCESS;
722
723 sparse_debug("%s: address:0x%016"PRIx64" size:0x%08"PRIx64"\n",
724 __func__, sparse->address, sparse->size);
725
726 struct anv_vm_bind unbind = {
727 .bo = 0,
728 .address = sparse->address,
729 .bo_offset = 0,
730 .size = sparse->size,
731 .op = ANV_VM_UNBIND,
732 };
733 struct anv_sparse_submission submit = {
734 .queue = NULL,
735 .binds = &unbind,
736 .binds_len = 1,
737 .binds_capacity = 1,
738 .wait_count = 0,
739 .signal_count = 0,
740 };
741 VkResult res = anv_sparse_bind(device, &submit);
742 if (res != VK_SUCCESS)
743 return res;
744
745 anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
746
747 return VK_SUCCESS;
748 }
749
750 static VkExtent3D
anv_sparse_calc_block_shape(struct anv_physical_device * pdevice,struct isl_surf * surf)751 anv_sparse_calc_block_shape(struct anv_physical_device *pdevice,
752 struct isl_surf *surf)
753 {
754 const struct isl_format_layout *layout =
755 isl_format_get_layout(surf->format);
756 const int Bpb = layout->bpb / 8;
757
758 struct isl_tile_info tile_info;
759 isl_surf_get_tile_info(surf, &tile_info);
760
761 VkExtent3D block_shape_el = {
762 .width = tile_info.logical_extent_el.width,
763 .height = tile_info.logical_extent_el.height,
764 .depth = tile_info.logical_extent_el.depth,
765 };
766 VkExtent3D block_shape_px = vk_extent3d_el_to_px(block_shape_el, layout);
767
768 if (surf->tiling == ISL_TILING_LINEAR) {
769 uint32_t elements_per_row = surf->row_pitch_B /
770 (block_shape_el.width * Bpb);
771 uint32_t rows_per_tile = ANV_SPARSE_BLOCK_SIZE /
772 (elements_per_row * Bpb);
773 assert(rows_per_tile * elements_per_row * Bpb == ANV_SPARSE_BLOCK_SIZE);
774
775 block_shape_px = (VkExtent3D) {
776 .width = elements_per_row * layout->bw,
777 .height = rows_per_tile * layout->bh,
778 .depth = layout->bd,
779 };
780 }
781
782 return block_shape_px;
783 }
784
785 VkSparseImageFormatProperties
anv_sparse_calc_image_format_properties(struct anv_physical_device * pdevice,VkImageAspectFlags aspect,VkImageType vk_image_type,struct isl_surf * surf)786 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
787 VkImageAspectFlags aspect,
788 VkImageType vk_image_type,
789 struct isl_surf *surf)
790 {
791 const struct isl_format_layout *isl_layout =
792 isl_format_get_layout(surf->format);
793 const int bpb = isl_layout->bpb;
794 assert(bpb == 8 || bpb == 16 || bpb == 32 || bpb == 64 ||bpb == 128);
795 const int Bpb = bpb / 8;
796
797 VkExtent3D granularity = anv_sparse_calc_block_shape(pdevice, surf);
798 bool is_standard = false;
799 bool is_known_nonstandard_format = false;
800
801 if (vk_image_type != VK_IMAGE_TYPE_1D) {
802 VkExtent3D std_shape =
803 anv_sparse_get_standard_image_block_shape(surf->format, vk_image_type,
804 bpb);
805 /* YUV formats don't work with Tile64, which is required if we want to
806 * claim standard block shapes. The spec requires us to support all
807 * non-compressed color formats that non-sparse supports, so we can't
808 * just say YUV formats are not supported by Sparse. So we end
809 * supporting this format and anv_sparse_calc_miptail_properties() will
810 * say that everything is part of the miptail.
811 *
812 * For more details on the hardware restriction, please check
813 * isl_gfx125_filter_tiling().
814 */
815 if (pdevice->info.verx10 >= 125 && isl_format_is_yuv(surf->format))
816 is_known_nonstandard_format = true;
817
818 /* The standard block shapes (and by extension, the tiling formats they
819 * require) are simply incompatible with getting a 2D view of a 3D
820 * image.
821 */
822 if (surf->usage & ISL_SURF_USAGE_2D_3D_COMPATIBLE_BIT)
823 is_known_nonstandard_format = true;
824
825 is_standard = granularity.width == std_shape.width &&
826 granularity.height == std_shape.height &&
827 granularity.depth == std_shape.depth;
828
829 /* TODO: dEQP seems to care about the block shapes being standard even
830 * for the cases where is_known_nonstandard_format is true. Luckily as
831 * of today all of those cases are NotSupported but sooner or later we
832 * may end up getting a failure.
833 * Notice that in practice we report these cases as having the mip tail
834 * starting on mip level 0, so the reported block shapes are irrelevant
835 * since non-opaque binds are not supported. Still, dEQP seems to care.
836 */
837 assert(is_standard || is_known_nonstandard_format);
838 }
839
840 uint32_t block_size = granularity.width * granularity.height *
841 granularity.depth * Bpb;
842 bool wrong_block_size = block_size != ANV_SPARSE_BLOCK_SIZE;
843
844 return (VkSparseImageFormatProperties) {
845 .aspectMask = aspect,
846 .imageGranularity = granularity,
847 .flags = ((is_standard || is_known_nonstandard_format) ? 0 :
848 VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT) |
849 (wrong_block_size ? VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT :
850 0),
851 };
852 }
853
854 /* The miptail is supposed to be this region where the tiniest mip levels
855 * are squished together in one single page, which should save us some memory.
856 * It's a hardware feature which our hardware supports on certain tiling
857 * formats - the ones we always want to use for sparse resources.
858 *
859 * For sparse, the main feature of the miptail is that it only supports opaque
860 * binds, so you either bind the whole miptail or you bind nothing at all,
861 * there are no subresources inside it to separately bind. While the idea is
862 * that the miptail as reported by sparse should match what our hardware does,
863 * in practice we can say in our sparse functions that certain mip levels are
864 * part of the miptail while from the point of view of our hardwared they
865 * aren't.
866 *
867 * If we detect we're using the sparse-friendly tiling formats and ISL
868 * supports miptails for them, we can just trust the miptail level set by ISL
869 * and things can proceed as The Spec intended.
870 *
871 * However, if that's not the case, we have to go on a best-effort policy. We
872 * could simply declare that every mip level is part of the miptail and be
873 * done, but since that kinda defeats the purpose of Sparse we try to find
874 * what level we really should be reporting as the first miptail level based
875 * on the alignments of the surface subresources.
876 */
877 void
anv_sparse_calc_miptail_properties(struct anv_device * device,struct anv_image * image,VkImageAspectFlags vk_aspect,uint32_t * imageMipTailFirstLod,VkDeviceSize * imageMipTailSize,VkDeviceSize * imageMipTailOffset,VkDeviceSize * imageMipTailStride)878 anv_sparse_calc_miptail_properties(struct anv_device *device,
879 struct anv_image *image,
880 VkImageAspectFlags vk_aspect,
881 uint32_t *imageMipTailFirstLod,
882 VkDeviceSize *imageMipTailSize,
883 VkDeviceSize *imageMipTailOffset,
884 VkDeviceSize *imageMipTailStride)
885 {
886 assert(__builtin_popcount(vk_aspect) == 1);
887 const uint32_t plane = anv_image_aspect_to_plane(image, vk_aspect);
888 struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
889 uint64_t binding_plane_offset =
890 image->planes[plane].primary_surface.memory_range.offset;
891 const struct isl_format_layout *isl_layout =
892 isl_format_get_layout(surf->format);
893 const int Bpb = isl_layout->bpb / 8;
894 struct isl_tile_info tile_info;
895 isl_surf_get_tile_info(surf, &tile_info);
896 uint32_t tile_size = tile_info.logical_extent_el.width * Bpb *
897 tile_info.logical_extent_el.height *
898 tile_info.logical_extent_el.depth;
899
900 uint64_t layer1_offset;
901 uint32_t x_off, y_off;
902
903 /* Treat the whole thing as a single miptail. We should have already
904 * reported this image as VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT.
905 *
906 * In theory we could try to make ISL massage the alignments so that we
907 * could at least claim mip level 0 to be not part of the miptail, but
908 * that could end up wasting a lot of memory, so it's better to do
909 * nothing and focus our efforts into making things use the appropriate
910 * tiling formats that give us the standard block shapes.
911 */
912 if (tile_size != ANV_SPARSE_BLOCK_SIZE)
913 goto out_everything_is_miptail;
914
915 assert(surf->tiling != ISL_TILING_LINEAR);
916
917 if (image->vk.array_layers == 1) {
918 layer1_offset = surf->size_B;
919 } else {
920 isl_surf_get_image_offset_B_tile_sa(surf, 0, 1, 0, &layer1_offset,
921 &x_off, &y_off);
922 if (x_off || y_off)
923 goto out_everything_is_miptail;
924 }
925 assert(layer1_offset % tile_size == 0);
926
927 /* We could try to do better here, but there's not really any point since
928 * we should be supporting the appropriate tiling formats everywhere.
929 */
930 if (!isl_tiling_supports_standard_block_shapes(surf->tiling))
931 goto out_everything_is_miptail;
932
933 int miptail_first_level = surf->miptail_start_level;
934 if (miptail_first_level >= image->vk.mip_levels)
935 goto out_no_miptail;
936
937 uint64_t miptail_offset = 0;
938 isl_surf_get_image_offset_B_tile_sa(surf, miptail_first_level, 0, 0,
939 &miptail_offset,
940 &x_off, &y_off);
941 assert(x_off == 0 && y_off == 0);
942 assert(miptail_offset % tile_size == 0);
943
944 *imageMipTailFirstLod = miptail_first_level;
945 *imageMipTailSize = tile_size;
946 *imageMipTailOffset = binding_plane_offset + miptail_offset;
947 *imageMipTailStride = layer1_offset;
948 goto out_debug;
949
950 out_no_miptail:
951 *imageMipTailFirstLod = image->vk.mip_levels;
952 *imageMipTailSize = 0;
953 *imageMipTailOffset = 0;
954 *imageMipTailStride = 0;
955 goto out_debug;
956
957 out_everything_is_miptail:
958 *imageMipTailFirstLod = 0;
959 *imageMipTailSize = surf->size_B;
960 *imageMipTailOffset = binding_plane_offset;
961 *imageMipTailStride = 0;
962
963 out_debug:
964 sparse_debug("miptail first_lod:%d size:%"PRIu64" offset:%"PRIu64" "
965 "stride:%"PRIu64"\n",
966 *imageMipTailFirstLod, *imageMipTailSize,
967 *imageMipTailOffset, *imageMipTailStride);
968 }
969
970 static struct anv_vm_bind
vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data * sparse,const struct VkSparseMemoryBind * vk_bind)971 vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data *sparse,
972 const struct VkSparseMemoryBind *vk_bind)
973 {
974 struct anv_vm_bind anv_bind = {
975 .bo = NULL,
976 .address = sparse->address + vk_bind->resourceOffset,
977 .bo_offset = 0,
978 .size = vk_bind->size,
979 .op = ANV_VM_BIND,
980 };
981
982 assert(vk_bind->size);
983 assert(vk_bind->resourceOffset + vk_bind->size <= sparse->size);
984
985 if (vk_bind->memory != VK_NULL_HANDLE) {
986 anv_bind.bo = anv_device_memory_from_handle(vk_bind->memory)->bo;
987 anv_bind.bo_offset = vk_bind->memoryOffset,
988 assert(vk_bind->memoryOffset + vk_bind->size <= anv_bind.bo->size);
989 }
990
991 return anv_bind;
992 }
993
994 static VkResult
anv_sparse_bind_resource_memory(struct anv_device * device,struct anv_sparse_binding_data * sparse,uint64_t resource_size,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)995 anv_sparse_bind_resource_memory(struct anv_device *device,
996 struct anv_sparse_binding_data *sparse,
997 uint64_t resource_size,
998 const VkSparseMemoryBind *vk_bind,
999 struct anv_sparse_submission *submit)
1000 {
1001 struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse, vk_bind);
1002 uint64_t rem = vk_bind->size % ANV_SPARSE_BLOCK_SIZE;
1003
1004 if (rem != 0) {
1005 if (vk_bind->resourceOffset + vk_bind->size == resource_size)
1006 bind.size += ANV_SPARSE_BLOCK_SIZE - rem;
1007 else
1008 return vk_error(device, VK_ERROR_VALIDATION_FAILED_EXT);
1009 }
1010
1011 return anv_sparse_submission_add(device, submit, &bind);
1012 }
1013
1014 VkResult
anv_sparse_bind_buffer(struct anv_device * device,struct anv_buffer * buffer,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)1015 anv_sparse_bind_buffer(struct anv_device *device,
1016 struct anv_buffer *buffer,
1017 const VkSparseMemoryBind *vk_bind,
1018 struct anv_sparse_submission *submit)
1019 {
1020 return anv_sparse_bind_resource_memory(device, &buffer->sparse_data,
1021 buffer->vk.size,
1022 vk_bind, submit);
1023 }
1024
1025 VkResult
anv_sparse_bind_image_opaque(struct anv_device * device,struct anv_image * image,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)1026 anv_sparse_bind_image_opaque(struct anv_device *device,
1027 struct anv_image *image,
1028 const VkSparseMemoryBind *vk_bind,
1029 struct anv_sparse_submission *submit)
1030 {
1031 struct anv_image_binding *b =
1032 &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
1033 assert(!image->disjoint);
1034
1035 return anv_sparse_bind_resource_memory(device, &b->sparse_data,
1036 b->memory_range.size,
1037 vk_bind, submit);
1038 }
1039
1040 VkResult
anv_sparse_bind_image_memory(struct anv_queue * queue,struct anv_image * image,const VkSparseImageMemoryBind * bind,struct anv_sparse_submission * submit)1041 anv_sparse_bind_image_memory(struct anv_queue *queue,
1042 struct anv_image *image,
1043 const VkSparseImageMemoryBind *bind,
1044 struct anv_sparse_submission *submit)
1045 {
1046 struct anv_device *device = queue->device;
1047 VkImageAspectFlags aspect = bind->subresource.aspectMask;
1048 uint32_t mip_level = bind->subresource.mipLevel;
1049 uint32_t array_layer = bind->subresource.arrayLayer;
1050
1051 assert(__builtin_popcount(aspect) == 1);
1052 assert(!(bind->flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT));
1053
1054 struct anv_image_binding *img_binding = image->disjoint ?
1055 anv_image_aspect_to_binding(image, aspect) :
1056 &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
1057 struct anv_sparse_binding_data *sparse_data = &img_binding->sparse_data;
1058
1059 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1060 struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
1061 uint64_t binding_plane_offset =
1062 image->planes[plane].primary_surface.memory_range.offset;
1063 const struct isl_format_layout *layout =
1064 isl_format_get_layout(surf->format);
1065 struct isl_tile_info tile_info;
1066 isl_surf_get_tile_info(surf, &tile_info);
1067
1068 sparse_debug("\n=== [%s:%d] [%s] BEGIN\n", __FILE__, __LINE__, __func__);
1069 sparse_debug("--> mip_level:%d array_layer:%d\n",
1070 mip_level, array_layer);
1071 sparse_debug("aspect:0x%x plane:%d\n", aspect, plane);
1072 sparse_debug("binding offset: [%d, %d, %d] extent: [%d, %d, %d]\n",
1073 bind->offset.x, bind->offset.y, bind->offset.z,
1074 bind->extent.width, bind->extent.height, bind->extent.depth);
1075 dump_anv_image(image);
1076 dump_isl_surf(surf);
1077
1078 VkExtent3D block_shape_px =
1079 anv_sparse_calc_block_shape(device->physical, surf);
1080 VkExtent3D block_shape_el = vk_extent3d_px_to_el(block_shape_px, layout);
1081
1082 /* Both bind->offset and bind->extent are in pixel units. */
1083 VkOffset3D bind_offset_el = vk_offset3d_px_to_el(bind->offset, layout);
1084
1085 /* The spec says we only really need to align if for a given coordinate
1086 * offset + extent equals the corresponding dimensions of the image
1087 * subresource, but all the other non-aligned usage is invalid, so just
1088 * align everything.
1089 */
1090 VkExtent3D bind_extent_px = {
1091 .width = ALIGN_NPOT(bind->extent.width, block_shape_px.width),
1092 .height = ALIGN_NPOT(bind->extent.height, block_shape_px.height),
1093 .depth = ALIGN_NPOT(bind->extent.depth, block_shape_px.depth),
1094 };
1095 VkExtent3D bind_extent_el = vk_extent3d_px_to_el(bind_extent_px, layout);
1096
1097 /* A sparse block should correspond to our tile size, so this has to be
1098 * either 4k or 64k depending on the tiling format. */
1099 const uint64_t block_size_B = block_shape_el.width * (layout->bpb / 8) *
1100 block_shape_el.height *
1101 block_shape_el.depth;
1102 /* How many blocks are necessary to form a whole line on this image? */
1103 const uint32_t blocks_per_line = surf->row_pitch_B / (layout->bpb / 8) /
1104 block_shape_el.width;
1105 /* The loop below will try to bind a whole line of blocks at a time as
1106 * they're guaranteed to be contiguous, so we calculate how many blocks
1107 * that is and how big is each block to figure the bind size of a whole
1108 * line.
1109 */
1110 uint64_t line_bind_size_in_blocks = bind_extent_el.width /
1111 block_shape_el.width;
1112 uint64_t line_bind_size = line_bind_size_in_blocks * block_size_B;
1113 assert(line_bind_size_in_blocks != 0);
1114 assert(line_bind_size != 0);
1115
1116 uint64_t memory_offset = bind->memoryOffset;
1117 for (uint32_t z = bind_offset_el.z;
1118 z < bind_offset_el.z + bind_extent_el.depth;
1119 z += block_shape_el.depth) {
1120 uint64_t subresource_offset_B;
1121 uint32_t subresource_x_offset, subresource_y_offset;
1122 isl_surf_get_image_offset_B_tile_sa(surf, mip_level, array_layer, z,
1123 &subresource_offset_B,
1124 &subresource_x_offset,
1125 &subresource_y_offset);
1126 assert(subresource_x_offset == 0 && subresource_y_offset == 0);
1127 assert(subresource_offset_B % block_size_B == 0);
1128
1129 for (uint32_t y = bind_offset_el.y;
1130 y < bind_offset_el.y + bind_extent_el.height;
1131 y+= block_shape_el.height) {
1132 uint32_t line_block_offset = y / block_shape_el.height *
1133 blocks_per_line;
1134 uint64_t line_start_B = subresource_offset_B +
1135 line_block_offset * block_size_B;
1136 uint64_t bind_offset_B = line_start_B +
1137 (bind_offset_el.x / block_shape_el.width) *
1138 block_size_B;
1139
1140 VkSparseMemoryBind opaque_bind = {
1141 .resourceOffset = binding_plane_offset + bind_offset_B,
1142 .size = line_bind_size,
1143 .memory = bind->memory,
1144 .memoryOffset = memory_offset,
1145 .flags = bind->flags,
1146 };
1147
1148 memory_offset += line_bind_size;
1149
1150 assert(line_start_B % block_size_B == 0);
1151 assert(opaque_bind.resourceOffset % block_size_B == 0);
1152 assert(opaque_bind.size % block_size_B == 0);
1153
1154 struct anv_vm_bind anv_bind = vk_bind_to_anv_vm_bind(sparse_data,
1155 &opaque_bind);
1156 VkResult result = anv_sparse_submission_add(device, submit,
1157 &anv_bind);
1158 if (result != VK_SUCCESS)
1159 return result;
1160 }
1161 }
1162
1163 sparse_debug("\n=== [%s:%d] [%s] END\n", __FILE__, __LINE__, __func__);
1164 return VK_SUCCESS;
1165 }
1166
1167 VkResult
anv_sparse_image_check_support(struct anv_physical_device * pdevice,VkImageCreateFlags flags,VkImageTiling tiling,VkSampleCountFlagBits samples,VkImageType type,VkFormat vk_format)1168 anv_sparse_image_check_support(struct anv_physical_device *pdevice,
1169 VkImageCreateFlags flags,
1170 VkImageTiling tiling,
1171 VkSampleCountFlagBits samples,
1172 VkImageType type,
1173 VkFormat vk_format)
1174 {
1175 assert(flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT);
1176
1177 /* The spec says:
1178 * "A sparse image created using VK_IMAGE_CREATE_SPARSE_BINDING_BIT (but
1179 * not VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) supports all formats that
1180 * non-sparse usage supports, and supports both VK_IMAGE_TILING_OPTIMAL
1181 * and VK_IMAGE_TILING_LINEAR tiling."
1182 */
1183 if (!(flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
1184 return VK_SUCCESS;
1185
1186 /* From here on, these are the rules:
1187 * "A sparse image created using VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT
1188 * supports all non-compressed color formats with power-of-two element
1189 * size that non-sparse usage supports. Additional formats may also be
1190 * supported and can be queried via
1191 * vkGetPhysicalDeviceSparseImageFormatProperties.
1192 * VK_IMAGE_TILING_LINEAR tiling is not supported."
1193 */
1194
1195 /* We choose not to support sparse residency on emulated compressed
1196 * formats due to the additional image plane. It would make the
1197 * implementation extremely complicated.
1198 */
1199 if (anv_is_format_emulated(pdevice, vk_format))
1200 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1201
1202 /* While the spec itself says linear is not supported (see above), deqp-vk
1203 * tries anyway to create linear sparse images, so we have to check for it.
1204 * This is also said in VUID-VkImageCreateInfo-tiling-04121:
1205 * "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain
1206 * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
1207 */
1208 if (tiling == VK_IMAGE_TILING_LINEAR)
1209 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1210
1211 /* TODO: not supported yet. */
1212 if (samples != VK_SAMPLE_COUNT_1_BIT)
1213 return VK_ERROR_FEATURE_NOT_PRESENT;
1214
1215 /* While the Vulkan spec allows us to support depth/stencil sparse images
1216 * everywhere, sometimes we're not able to have them with the tiling
1217 * formats that give us the standard block shapes. Having standard block
1218 * shapes is higher priority than supporting depth/stencil sparse images.
1219 *
1220 * Please see ISL's filter_tiling() functions for accurate explanations on
1221 * why depth/stencil images are not always supported with the tiling
1222 * formats we want. But in short: depth/stencil support in our HW is
1223 * limited to 2D and we can't build a 2D view of a 3D image with these
1224 * tiling formats due to the address swizzling being different.
1225 */
1226 VkImageAspectFlags aspects = vk_format_aspects(vk_format);
1227 if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1228 /* For 125+, isl_gfx125_filter_tiling() claims 3D is not supported.
1229 * For the previous platforms, isl_gfx6_filter_tiling() says only 2D is
1230 * supported.
1231 */
1232 if (pdevice->info.verx10 >= 125) {
1233 if (type == VK_IMAGE_TYPE_3D)
1234 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1235 } else {
1236 if (type != VK_IMAGE_TYPE_2D)
1237 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1238 }
1239 }
1240
1241 const struct anv_format *anv_format = anv_get_format(vk_format);
1242 if (!anv_format)
1243 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1244
1245 for (int p = 0; p < anv_format->n_planes; p++) {
1246 enum isl_format isl_format = anv_format->planes[p].isl_format;
1247
1248 if (isl_format == ISL_FORMAT_UNSUPPORTED)
1249 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1250
1251 const struct isl_format_layout *isl_layout =
1252 isl_format_get_layout(isl_format);
1253
1254 /* As quoted above, we only need to support the power-of-two formats.
1255 * The problem with the non-power-of-two formats is that we need an
1256 * integer number of pixels to fit into a sparse block, so we'd need the
1257 * sparse block sizes to be, for example, 192k for 24bpp.
1258 *
1259 * TODO: add support for these formats.
1260 */
1261 if (isl_layout->bpb != 8 && isl_layout->bpb != 16 &&
1262 isl_layout->bpb != 32 && isl_layout->bpb != 64 &&
1263 isl_layout->bpb != 128)
1264 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1265 }
1266
1267 /* These YUV formats are considered by Vulkan to be compressed 2x1 blocks.
1268 * We don't need to support them since they're compressed. On Gfx12 we
1269 * can't even have Tile64 for them. Once we do support these formats we'll
1270 * have to report the correct block shapes because dEQP cares about them,
1271 * and we'll have to adjust for the fact that ISL treats these as 16bpp 1x1
1272 * blocks instead of 32bpp 2x1 compressed blocks (as block shapes are
1273 * reported in units of compressed blocks).
1274 */
1275 if (vk_format == VK_FORMAT_G8B8G8R8_422_UNORM ||
1276 vk_format == VK_FORMAT_B8G8R8G8_422_UNORM)
1277 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1278
1279 return VK_SUCCESS;
1280 }
1281
1282 static VkResult
anv_trtt_garbage_collect_batches(struct anv_device * device)1283 anv_trtt_garbage_collect_batches(struct anv_device *device)
1284 {
1285 struct anv_trtt *trtt = &device->trtt;
1286
1287 if (trtt->timeline_val % 8 != 7)
1288 return VK_SUCCESS;
1289
1290 uint64_t cur_timeline_val = 0;
1291 struct drm_syncobj_timeline_array array = {
1292 .handles = (uintptr_t)&trtt->timeline_handle,
1293 .points = (uintptr_t)&cur_timeline_val,
1294 .count_handles = 1,
1295 .flags = 0,
1296 };
1297 if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_QUERY, &array))
1298 return vk_error(device, VK_ERROR_UNKNOWN);
1299
1300 list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo,
1301 &trtt->in_flight_batches, link) {
1302 if (trtt_bbo->timeline_val > cur_timeline_val)
1303 return VK_SUCCESS;
1304
1305 anv_trtt_batch_bo_free(device, trtt_bbo);
1306 }
1307
1308 return VK_SUCCESS;
1309 }
1310
1311 VkResult
anv_trtt_batch_bo_new(struct anv_device * device,uint32_t batch_size,struct anv_trtt_batch_bo ** out_trtt_bbo)1312 anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
1313 struct anv_trtt_batch_bo **out_trtt_bbo)
1314 {
1315 struct anv_trtt *trtt = &device->trtt;
1316 VkResult result;
1317
1318 anv_trtt_garbage_collect_batches(device);
1319
1320 struct anv_trtt_batch_bo *trtt_bbo =
1321 vk_alloc(&device->vk.alloc, sizeof(*trtt_bbo), 8,
1322 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1323 if (!trtt_bbo)
1324 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1325
1326 result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size,
1327 &trtt_bbo->bo);
1328 if (result != VK_SUCCESS)
1329 goto out;
1330
1331 trtt_bbo->size = batch_size;
1332 trtt_bbo->timeline_val = ++trtt->timeline_val;
1333
1334 list_addtail(&trtt_bbo->link, &trtt->in_flight_batches);
1335
1336 *out_trtt_bbo = trtt_bbo;
1337
1338 return VK_SUCCESS;
1339 out:
1340 vk_free(&device->vk.alloc, trtt_bbo);
1341 return result;
1342 }
1343