1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <anv_private.h>
25
26 /* Sparse binding handling.
27 *
28 * There is one main structure passed around all over this file:
29 *
30 * - struct anv_sparse_binding_data: every resource (VkBuffer or VkImage) has
31 * a pointer to an instance of this structure. It contains the virtual
32 * memory address (VMA) used by the binding operations (which is different
33 * from the VMA used by the anv_bo it's bound to) and the VMA range size. We
34 * do not keep record of our our list of bindings (which ranges were bound
35 * to which buffers).
36 */
37
38 __attribute__((format(printf, 1, 2)))
39 static void
sparse_debug(const char * format,...)40 sparse_debug(const char *format, ...)
41 {
42 if (!INTEL_DEBUG(DEBUG_SPARSE))
43 return;
44
45 va_list args;
46 va_start(args, format);
47 vfprintf(stderr, format, args);
48 va_end(args);
49 }
50
51 static void
dump_anv_vm_bind(struct anv_device * device,const struct anv_vm_bind * bind)52 dump_anv_vm_bind(struct anv_device *device,
53 const struct anv_vm_bind *bind)
54 {
55 sparse_debug("[%s] ", bind->op == ANV_VM_BIND ? " bind " : "unbind");
56
57 if (bind->bo)
58 sparse_debug("bo:%04u ", bind->bo->gem_handle);
59 else
60 sparse_debug("bo:---- ");
61 sparse_debug("address:%016"PRIx64" size:%08"PRIx64" "
62 "mem_offset:%08"PRIx64"\n",
63 bind->address, bind->size, bind->bo_offset);
64 }
65
66 static void
dump_anv_image(struct anv_image * i)67 dump_anv_image(struct anv_image *i)
68 {
69 if (!INTEL_DEBUG(DEBUG_SPARSE))
70 return;
71
72 sparse_debug("anv_image:\n");
73 sparse_debug("- format: %d\n", i->vk.format);
74 sparse_debug("- extent: [%d, %d, %d]\n",
75 i->vk.extent.width, i->vk.extent.height, i->vk.extent.depth);
76 sparse_debug("- mip_levels: %d array_layers: %d samples: %d\n",
77 i->vk.mip_levels, i->vk.array_layers, i->vk.samples);
78 sparse_debug("- n_planes: %d\n", i->n_planes);
79 sparse_debug("- disjoint: %d\n", i->disjoint);
80 }
81
82 static void
dump_isl_surf(struct isl_surf * s)83 dump_isl_surf(struct isl_surf *s)
84 {
85 if (!INTEL_DEBUG(DEBUG_SPARSE))
86 return;
87
88 sparse_debug("isl_surf:\n");
89
90 const char *dim_s = s->dim == ISL_SURF_DIM_1D ? "1D" :
91 s->dim == ISL_SURF_DIM_2D ? "2D" :
92 s->dim == ISL_SURF_DIM_3D ? "3D" :
93 "(ERROR)";
94 sparse_debug("- dim: %s\n", dim_s);
95 sparse_debug("- tiling: %d (%s)\n", s->tiling,
96 isl_tiling_to_name(s->tiling));
97 sparse_debug("- format: %s\n", isl_format_get_short_name(s->format));
98 sparse_debug("- image_alignment_el: [%d, %d, %d]\n",
99 s->image_alignment_el.w, s->image_alignment_el.h,
100 s->image_alignment_el.d);
101 sparse_debug("- logical_level0_px: [%d, %d, %d, %d]\n",
102 s->logical_level0_px.w,
103 s->logical_level0_px.h,
104 s->logical_level0_px.d,
105 s->logical_level0_px.a);
106 sparse_debug("- phys_level0_sa: [%d, %d, %d, %d]\n",
107 s->phys_level0_sa.w,
108 s->phys_level0_sa.h,
109 s->phys_level0_sa.d,
110 s->phys_level0_sa.a);
111 sparse_debug("- levels: %d samples: %d\n", s->levels, s->samples);
112 sparse_debug("- size_B: %"PRIu64" alignment_B: %u\n",
113 s->size_B, s->alignment_B);
114 sparse_debug("- row_pitch_B: %u\n", s->row_pitch_B);
115 sparse_debug("- array_pitch_el_rows: %u\n", s->array_pitch_el_rows);
116
117 const struct isl_format_layout *layout = isl_format_get_layout(s->format);
118 sparse_debug("- format layout:\n");
119 sparse_debug(" - format:%d bpb:%d bw:%d bh:%d bd:%d\n",
120 layout->format, layout->bpb, layout->bw, layout->bh,
121 layout->bd);
122
123 struct isl_tile_info tile_info;
124 isl_surf_get_tile_info(s, &tile_info);
125
126 sparse_debug("- tile info:\n");
127 sparse_debug(" - format_bpb: %d\n", tile_info.format_bpb);
128 sparse_debug(" - logical_extent_el: [%d, %d, %d, %d]\n",
129 tile_info.logical_extent_el.w,
130 tile_info.logical_extent_el.h,
131 tile_info.logical_extent_el.d,
132 tile_info.logical_extent_el.a);
133 sparse_debug(" - phys_extent_B: [%d, %d]\n",
134 tile_info.phys_extent_B.w,
135 tile_info.phys_extent_B.h);
136 }
137
138 static VkOffset3D
vk_offset3d_px_to_el(const VkOffset3D offset_px,const struct isl_format_layout * layout)139 vk_offset3d_px_to_el(const VkOffset3D offset_px,
140 const struct isl_format_layout *layout)
141 {
142 return (VkOffset3D) {
143 .x = offset_px.x / layout->bw,
144 .y = offset_px.y / layout->bh,
145 .z = offset_px.z / layout->bd,
146 };
147 }
148
149 static VkOffset3D
vk_offset3d_el_to_px(const VkOffset3D offset_el,const struct isl_format_layout * layout)150 vk_offset3d_el_to_px(const VkOffset3D offset_el,
151 const struct isl_format_layout *layout)
152 {
153 return (VkOffset3D) {
154 .x = offset_el.x * layout->bw,
155 .y = offset_el.y * layout->bh,
156 .z = offset_el.z * layout->bd,
157 };
158 }
159
160 static VkExtent3D
vk_extent3d_px_to_el(const VkExtent3D extent_px,const struct isl_format_layout * layout)161 vk_extent3d_px_to_el(const VkExtent3D extent_px,
162 const struct isl_format_layout *layout)
163 {
164 return (VkExtent3D) {
165 .width = extent_px.width / layout->bw,
166 .height = extent_px.height / layout->bh,
167 .depth = extent_px.depth / layout->bd,
168 };
169 }
170
171 static VkExtent3D
vk_extent3d_el_to_px(const VkExtent3D extent_el,const struct isl_format_layout * layout)172 vk_extent3d_el_to_px(const VkExtent3D extent_el,
173 const struct isl_format_layout *layout)
174 {
175 return (VkExtent3D) {
176 .width = extent_el.width * layout->bw,
177 .height = extent_el.height * layout->bh,
178 .depth = extent_el.depth * layout->bd,
179 };
180 }
181
182 static bool
isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling)183 isl_tiling_supports_standard_block_shapes(enum isl_tiling tiling)
184 {
185 return isl_tiling_is_64(tiling) ||
186 tiling == ISL_TILING_ICL_Ys ||
187 tiling == ISL_TILING_SKL_Ys;
188 }
189
190 static uint32_t
isl_calc_tile_size(struct isl_tile_info * tile_info)191 isl_calc_tile_size(struct isl_tile_info *tile_info)
192 {
193 uint32_t tile_size = tile_info->phys_extent_B.w *
194 tile_info->phys_extent_B.h;
195 assert(tile_size == 64 * 1024 || tile_size == 4096 || tile_size == 1);
196 return tile_size;
197 }
198
199 static const VkExtent3D block_shapes_2d_1sample[] = {
200 /* 8 bits: */ { .width = 256, .height = 256, .depth = 1 },
201 /* 16 bits: */ { .width = 256, .height = 128, .depth = 1 },
202 /* 32 bits: */ { .width = 128, .height = 128, .depth = 1 },
203 /* 64 bits: */ { .width = 128, .height = 64, .depth = 1 },
204 /* 128 bits: */ { .width = 64, .height = 64, .depth = 1 },
205 };
206 static const VkExtent3D block_shapes_3d_1sample[] = {
207 /* 8 bits: */ { .width = 64, .height = 32, .depth = 32 },
208 /* 16 bits: */ { .width = 32, .height = 32, .depth = 32 },
209 /* 32 bits: */ { .width = 32, .height = 32, .depth = 16 },
210 /* 64 bits: */ { .width = 32, .height = 16, .depth = 16 },
211 /* 128 bits: */ { .width = 16, .height = 16, .depth = 16 },
212 };
213 static const VkExtent3D block_shapes_2d_2samples[] = {
214 /* 8 bits: */ { .width = 128, .height = 256, .depth = 1 },
215 /* 16 bits: */ { .width = 128, .height = 128, .depth = 1 },
216 /* 32 bits: */ { .width = 64, .height = 128, .depth = 1 },
217 /* 64 bits: */ { .width = 64, .height = 64, .depth = 1 },
218 /* 128 bits: */ { .width = 32, .height = 64, .depth = 1 },
219 };
220 static const VkExtent3D block_shapes_2d_4samples[] = {
221 /* 8 bits: */ { .width = 128, .height = 128, .depth = 1 },
222 /* 16 bits: */ { .width = 128, .height = 64, .depth = 1 },
223 /* 32 bits: */ { .width = 64, .height = 64, .depth = 1 },
224 /* 64 bits: */ { .width = 64, .height = 32, .depth = 1 },
225 /* 128 bits: */ { .width = 32, .height = 32, .depth = 1 },
226 };
227 static const VkExtent3D block_shapes_2d_8samples[] = {
228 /* 8 bits: */ { .width = 64, .height = 128, .depth = 1 },
229 /* 16 bits: */ { .width = 64, .height = 64, .depth = 1 },
230 /* 32 bits: */ { .width = 32, .height = 64, .depth = 1 },
231 /* 64 bits: */ { .width = 32, .height = 32, .depth = 1 },
232 /* 128 bits: */ { .width = 16, .height = 32, .depth = 1 },
233 };
234 static const VkExtent3D block_shapes_2d_16samples[] = {
235 /* 8 bits: */ { .width = 64, .height = 64, .depth = 1 },
236 /* 16 bits: */ { .width = 64, .height = 32, .depth = 1 },
237 /* 32 bits: */ { .width = 32, .height = 32, .depth = 1 },
238 /* 64 bits: */ { .width = 32, .height = 16, .depth = 1 },
239 /* 128 bits: */ { .width = 16, .height = 16, .depth = 1 },
240 };
241
242 static VkExtent3D
anv_sparse_get_standard_image_block_shape(enum isl_format format,VkImageType image_type,VkSampleCountFlagBits samples,uint16_t texel_size)243 anv_sparse_get_standard_image_block_shape(enum isl_format format,
244 VkImageType image_type,
245 VkSampleCountFlagBits samples,
246 uint16_t texel_size)
247 {
248 const struct isl_format_layout *layout = isl_format_get_layout(format);
249 VkExtent3D block_shape = { .width = 0, .height = 0, .depth = 0 };
250
251 int table_idx = ffs(texel_size) - 4;
252
253 switch (samples) {
254 case VK_SAMPLE_COUNT_1_BIT:
255 switch (image_type) {
256 case VK_IMAGE_TYPE_1D:
257 /* 1D images don't have a standard block format. */
258 assert(false);
259 break;
260 case VK_IMAGE_TYPE_2D:
261 block_shape = block_shapes_2d_1sample[table_idx];
262 break;
263 case VK_IMAGE_TYPE_3D:
264 block_shape = block_shapes_3d_1sample[table_idx];
265 break;
266 default:
267 fprintf(stderr, "unexpected image_type %d\n", image_type);
268 assert(false);
269 }
270 break;
271 case VK_SAMPLE_COUNT_2_BIT:
272 block_shape = block_shapes_2d_2samples[table_idx];
273 break;
274 case VK_SAMPLE_COUNT_4_BIT:
275 block_shape = block_shapes_2d_4samples[table_idx];
276 break;
277 case VK_SAMPLE_COUNT_8_BIT:
278 block_shape = block_shapes_2d_8samples[table_idx];
279 break;
280 case VK_SAMPLE_COUNT_16_BIT:
281 block_shape = block_shapes_2d_16samples[table_idx];
282 break;
283 default:
284 fprintf(stderr, "unexpected sample count: %d\n", samples);
285 assert(false);
286 }
287
288 return vk_extent3d_el_to_px(block_shape, layout);
289 }
290
291 /* Adds "bind_op" to the list in "submit", while also trying to check if we
292 * can just extend the last operation instead.
293 */
294 static VkResult
anv_sparse_submission_add(struct anv_device * device,struct anv_sparse_submission * submit,struct anv_vm_bind * bind_op)295 anv_sparse_submission_add(struct anv_device *device,
296 struct anv_sparse_submission *submit,
297 struct anv_vm_bind *bind_op)
298 {
299 struct anv_vm_bind *prev_bind = submit->binds_len == 0 ? NULL :
300 &submit->binds[submit->binds_len - 1];
301
302 if (prev_bind &&
303 bind_op->op == prev_bind->op &&
304 bind_op->bo == prev_bind->bo &&
305 bind_op->address == prev_bind->address + prev_bind->size &&
306 (bind_op->bo_offset == prev_bind->bo_offset + prev_bind->size ||
307 prev_bind->bo == NULL)) {
308 prev_bind->size += bind_op->size;
309 return VK_SUCCESS;
310 }
311
312 if (submit->binds_len < submit->binds_capacity) {
313 submit->binds[submit->binds_len++] = *bind_op;
314 return VK_SUCCESS;
315 }
316
317 int new_capacity = MAX2(32, submit->binds_capacity * 2);
318 struct anv_vm_bind *new_binds =
319 vk_realloc(&device->vk.alloc, submit->binds,
320 new_capacity * sizeof(*new_binds), 8,
321 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
322 if (!new_binds)
323 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
324
325 new_binds[submit->binds_len] = *bind_op;
326
327 submit->binds = new_binds;
328 submit->binds_len++;
329 submit->binds_capacity = new_capacity;
330
331 return VK_SUCCESS;
332 }
333
334 /* We really want to try to have all the page tables on as few BOs as possible
335 * to benefit from cache locality and to keep the i915.ko relocation lists
336 * small. On the other hand, we don't want to waste memory on unused space.
337 */
338 #define ANV_TRTT_PAGE_TABLE_BO_SIZE (2 * 1024 * 1024)
339
340 static VkResult
trtt_make_page_table_bo(struct anv_device * device,struct anv_bo ** bo)341 trtt_make_page_table_bo(struct anv_device *device, struct anv_bo **bo)
342 {
343 VkResult result;
344 struct anv_trtt *trtt = &device->trtt;
345
346 result = anv_device_alloc_bo(device, "trtt-page-table",
347 ANV_TRTT_PAGE_TABLE_BO_SIZE,
348 ANV_BO_ALLOC_INTERNAL,
349 0 /* explicit_address */, bo);
350 if (result != VK_SUCCESS)
351 return result;
352
353 if (trtt->num_page_table_bos < trtt->page_table_bos_capacity) {
354 trtt->page_table_bos[trtt->num_page_table_bos++] = *bo;
355 } else {
356
357 int new_capacity = MAX2(8, trtt->page_table_bos_capacity * 2);
358 struct anv_bo **new_page_table_bos =
359 vk_realloc(&device->vk.alloc, trtt->page_table_bos,
360 new_capacity * sizeof(*trtt->page_table_bos), 8,
361 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
362 if (!new_page_table_bos) {
363 anv_device_release_bo(device, *bo);
364 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
365 }
366
367 new_page_table_bos[trtt->num_page_table_bos] = *bo;
368
369 trtt->page_table_bos = new_page_table_bos;
370 trtt->page_table_bos_capacity = new_capacity;
371 trtt->num_page_table_bos++;
372 }
373
374 trtt->cur_page_table_bo = *bo;
375 trtt->next_page_table_bo_offset = 0;
376
377 sparse_debug("new number of page table BOs: %d\n",
378 trtt->num_page_table_bos);
379
380 return VK_SUCCESS;
381 }
382
383 static VkResult
trtt_get_page_table_bo(struct anv_device * device,struct anv_bo ** bo,uint64_t * bo_addr)384 trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
385 uint64_t *bo_addr)
386 {
387 struct anv_trtt *trtt = &device->trtt;
388 VkResult result;
389
390 if (!trtt->cur_page_table_bo) {
391 result = trtt_make_page_table_bo(device, bo);
392 if (result != VK_SUCCESS)
393 return result;
394 }
395
396 *bo = trtt->cur_page_table_bo;
397 *bo_addr = trtt->cur_page_table_bo->offset +
398 trtt->next_page_table_bo_offset;
399
400 trtt->next_page_table_bo_offset += 4096;
401 if (trtt->next_page_table_bo_offset >= ANV_TRTT_PAGE_TABLE_BO_SIZE)
402 trtt->cur_page_table_bo = NULL;
403
404 return VK_SUCCESS;
405 }
406
407 /* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
408 * respectively. For L1 entries, the hardware compares the addresses against
409 * what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
410 */
411 #define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
412 #define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
413
414 static void
anv_trtt_bind_list_add_entry(struct util_dynarray * binds,uint64_t pte_addr,uint64_t entry_addr)415 anv_trtt_bind_list_add_entry(struct util_dynarray *binds, uint64_t pte_addr,
416 uint64_t entry_addr)
417 {
418 struct anv_trtt_bind b = {
419 .pte_addr = pte_addr,
420 .entry_addr = entry_addr,
421 };
422 util_dynarray_append(binds, struct anv_trtt_bind, b);
423 }
424
425 /* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
426 * entries to the HW yet.
427 */
428 static VkResult
anv_trtt_bind_add(struct anv_device * device,uint64_t trtt_addr,uint64_t dest_addr,struct util_dynarray * l3l2_binds,struct util_dynarray * l1_binds)429 anv_trtt_bind_add(struct anv_device *device,
430 uint64_t trtt_addr, uint64_t dest_addr,
431 struct util_dynarray *l3l2_binds,
432 struct util_dynarray *l1_binds)
433 {
434 VkResult result = VK_SUCCESS;
435 struct anv_trtt *trtt = &device->trtt;
436 bool is_null_bind = dest_addr == ANV_TRTT_L1_NULL_TILE_VAL;
437
438 int l3_index = (trtt_addr >> 35) & 0x1FF;
439 int l2_index = (trtt_addr >> 26) & 0x1FF;
440 int l1_index = (trtt_addr >> 16) & 0x3FF;
441
442 uint64_t l2_addr = trtt->l3_mirror[l3_index];
443 if (l2_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
444 return VK_SUCCESS;
445 } else if (l2_addr == 0 || l2_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
446 if (is_null_bind) {
447 trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
448
449 anv_trtt_bind_list_add_entry(l3l2_binds, trtt->l3_addr +
450 l3_index * sizeof(uint64_t),
451 ANV_TRTT_L3L2_NULL_ENTRY);
452
453 return VK_SUCCESS;
454 }
455
456 struct anv_bo *l2_bo;
457 result = trtt_get_page_table_bo(device, &l2_bo, &l2_addr);
458 if (result != VK_SUCCESS)
459 return result;
460
461 trtt->l3_mirror[l3_index] = l2_addr;
462
463 anv_trtt_bind_list_add_entry(l3l2_binds, trtt->l3_addr +
464 l3_index * sizeof(uint64_t), l2_addr);
465
466 /* We have just created a new L2 table. Other resources may already have
467 * been pointing to this L2 table relying on the fact that it was marked
468 * as NULL, so now we need to mark every one of its entries as NULL in
469 * order to preserve behavior for those entries.
470 */
471 if (!util_dynarray_ensure_cap(l3l2_binds,
472 l3l2_binds->capacity + 512 * sizeof(struct anv_trtt_bind)))
473 return VK_ERROR_OUT_OF_HOST_MEMORY;
474
475 for (int i = 0; i < 512; i++) {
476 if (i != l2_index) {
477 trtt->l2_mirror[l3_index * 512 + i] = ANV_TRTT_L3L2_NULL_ENTRY;
478 anv_trtt_bind_list_add_entry(l3l2_binds,
479 l2_addr + i * sizeof(uint64_t),
480 ANV_TRTT_L3L2_NULL_ENTRY);
481 }
482 }
483 }
484 assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
485
486 /* The first page in the l2_mirror corresponds to l3_index=0 and so on. */
487 uint64_t l1_addr = trtt->l2_mirror[l3_index * 512 + l2_index];
488 if (l1_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
489 return VK_SUCCESS;
490 } else if (l1_addr == 0 || l1_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
491 if (is_null_bind) {
492 trtt->l2_mirror[l3_index * 512 + l2_index] =
493 ANV_TRTT_L3L2_NULL_ENTRY;
494
495 anv_trtt_bind_list_add_entry(l3l2_binds,
496 l2_addr + l2_index * sizeof(uint64_t),
497 ANV_TRTT_L3L2_NULL_ENTRY);
498
499 return VK_SUCCESS;
500 }
501
502 struct anv_bo *l1_bo;
503 result = trtt_get_page_table_bo(device, &l1_bo, &l1_addr);
504 if (result != VK_SUCCESS)
505 return result;
506
507 trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
508
509 anv_trtt_bind_list_add_entry(l3l2_binds,
510 l2_addr + l2_index * sizeof(uint64_t),
511 l1_addr);
512 }
513 assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
514
515 anv_trtt_bind_list_add_entry(l1_binds,
516 l1_addr + l1_index * sizeof(uint32_t),
517 dest_addr);
518
519 return VK_SUCCESS;
520 }
521
522 VkResult
anv_sparse_trtt_garbage_collect_batches(struct anv_device * device,bool wait_completion)523 anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
524 bool wait_completion)
525 {
526 struct anv_trtt *trtt = &device->trtt;
527
528 uint64_t last_value;
529 if (!wait_completion) {
530 VkResult result =
531 vk_sync_get_value(&device->vk, trtt->timeline, &last_value);
532 if (result != VK_SUCCESS)
533 return result;
534
535 /* Valgrind doesn't know that drmSyncobjQuery writes to 'last_value' on
536 * success.
537 */
538 VG(VALGRIND_MAKE_MEM_DEFINED(&last_value, sizeof(last_value)));
539 } else {
540 last_value = trtt->timeline_val;
541 }
542
543 list_for_each_entry_safe(struct anv_trtt_submission, submit,
544 &trtt->in_flight_batches, link) {
545 if (submit->base.signal.signal_value <= last_value) {
546 list_del(&submit->link);
547 anv_async_submit_fini(&submit->base);
548 vk_free(&device->vk.alloc, submit);
549 continue;
550 }
551
552 if (!wait_completion)
553 break;
554
555 VkResult result = vk_sync_wait(
556 &device->vk,
557 submit->base.signal.sync,
558 submit->base.signal.signal_value,
559 VK_SYNC_WAIT_COMPLETE,
560 os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
561 if (result == VK_SUCCESS) {
562 list_del(&submit->link);
563 anv_async_submit_fini(&submit->base);
564 vk_free(&device->vk.alloc, submit);
565 continue;
566 }
567
568 /* If the wait failed but the caller wanted completion, return the
569 * error.
570 */
571 return result;
572 }
573
574 return VK_SUCCESS;
575 }
576
577 /* On success, this function initializes 'submit' and submits it, but doesn't
578 * wait or free it. This allows the caller to submit multiple queues at the
579 * same time before starting to wait for anything to complete.
580 * If the function fails, the caller doesn't need to wait or fini anything,
581 * just whatever other submissions may have succeeded in the past.
582 */
583 static VkResult
anv_trtt_first_bind_init_queue(struct anv_queue * queue,struct anv_async_submit * submit,bool init_l3_table,struct anv_bo * l3_bo)584 anv_trtt_first_bind_init_queue(struct anv_queue *queue,
585 struct anv_async_submit *submit,
586 bool init_l3_table, struct anv_bo *l3_bo)
587 {
588 struct anv_device *device = queue->device;
589 struct anv_trtt *trtt = &device->trtt;
590 VkResult result;
591
592 result = anv_async_submit_init(submit, queue, &device->batch_bo_pool,
593 false, true);
594 if (result != VK_SUCCESS)
595 return result;
596
597 result = anv_genX(device->info, init_trtt_context_state)(submit);
598 if (result != VK_SUCCESS)
599 goto out_submit_fini;
600
601 /* We only need to do this once, so pick the first queue. */
602 if (init_l3_table) {
603 struct anv_trtt_bind l3l2_binds_data[512];
604 struct util_dynarray l3l2_binds;
605 util_dynarray_init_from_stack(&l3l2_binds, l3l2_binds_data,
606 sizeof(l3l2_binds_data));
607
608 for (int entry = 0; entry < 512; entry++) {
609 trtt->l3_mirror[entry] = ANV_TRTT_L3L2_NULL_ENTRY;
610 anv_trtt_bind_list_add_entry(&l3l2_binds,
611 trtt->l3_addr +
612 entry * sizeof(uint64_t),
613 ANV_TRTT_L3L2_NULL_ENTRY);
614 }
615
616 anv_genX(device->info, write_trtt_entries)(
617 submit, l3l2_binds.data,
618 util_dynarray_num_elements(&l3l2_binds, struct anv_trtt_bind),
619 NULL, 0);
620
621 result = anv_reloc_list_add_bo(&submit->relocs, l3_bo);
622 if (result != VK_SUCCESS)
623 goto out_submit_fini;
624 }
625
626 anv_genX(device->info, async_submit_end)(submit);
627
628 result = device->kmd_backend->queue_exec_async(submit, 0, NULL, 1,
629 &submit->signal);
630 if (result != VK_SUCCESS)
631 goto out_submit_fini;
632
633 /* If we succeed, it's our caller that's going to call
634 * anv_async_submit_fini(). We do this so we can start waiting for the
635 * submissions only after all the submissions are submitted.
636 */
637 return VK_SUCCESS;
638
639 out_submit_fini:
640 /* If we fail, undo everything this function has done so the caller has
641 * nothing to free.
642 */
643 anv_async_submit_fini(submit);
644 return result;
645 }
646
647 /* There are lots of applications that request for sparse binding to be
648 * enabled but never use it, so we choose to delay the initialization of TR-TT
649 * until the moment we know we're going to need it.
650 */
651 static VkResult
anv_trtt_first_bind_init(struct anv_device * device)652 anv_trtt_first_bind_init(struct anv_device *device)
653 {
654 struct anv_trtt *trtt = &device->trtt;
655 VkResult result = VK_SUCCESS;
656
657 /* TR-TT submission needs a queue even when the API entry point doesn't
658 * provide one, such as resource creation. We pick this queue from the user
659 * created queues at init_device_state() under anv_CreateDevice.
660 *
661 * It is technically possible for the user to create sparse resources even
662 * when they don't have a sparse queue: they won't be able to bind the
663 * resource but they should still be able to use the resource and rely on
664 * its unbound behavior. We haven't spotted any real world application or
665 * even test suite that exercises this behavior.
666 *
667 * For now let's just print an error message and return, which means that
668 * resource creation will succeed but the behavior will be undefined if the
669 * resource is used, which goes against our claim that we support the
670 * sparseResidencyNonResidentStrict property.
671 *
672 * TODO: be fully spec-compliant here. Maybe have a device-internal queue
673 * independent of the application's queues for the TR-TT operations.
674 */
675 if (unlikely(!trtt->queue)) {
676 static bool warned = false;
677 if (unlikely(!warned)) {
678 fprintf(stderr, "FIXME: application has created a sparse resource "
679 "but no queues capable of binding sparse resources were "
680 "created. Using these resources will result in undefined "
681 "behavior.\n");
682 warned = true;
683 }
684 return VK_SUCCESS;
685 }
686
687 simple_mtx_lock(&trtt->mutex);
688
689 /* This means we have already initialized the first bind. */
690 if (likely(trtt->l3_addr)) {
691 simple_mtx_unlock(&trtt->mutex);
692 return VK_SUCCESS;
693 }
694
695 struct anv_async_submit submits[device->queue_count];
696
697 struct anv_bo *l3_bo;
698 result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
699 if (result != VK_SUCCESS)
700 goto out;
701
702 trtt->l3_mirror = vk_zalloc(&device->vk.alloc, 4096, 8,
703 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
704 if (!trtt->l3_mirror) {
705 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
706 goto out;
707 }
708
709 /* L3 has 512 entries, so we can have up to 512 L2 tables. */
710 trtt->l2_mirror = vk_zalloc(&device->vk.alloc, 512 * 4096, 8,
711 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
712 if (!trtt->l2_mirror) {
713 vk_free(&device->vk.alloc, trtt->l3_mirror);
714 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
715 goto out;
716 }
717
718 int n_submits;
719 for (n_submits = 0; n_submits < device->queue_count; n_submits++) {
720 result = anv_trtt_first_bind_init_queue(&device->queues[n_submits],
721 &submits[n_submits],
722 n_submits == 0, l3_bo);
723 if (result != VK_SUCCESS)
724 break;
725 }
726
727 for (uint32_t i = 0; i < n_submits; i++) {
728 anv_async_submit_wait(&submits[i]);
729 anv_async_submit_fini(&submits[i]);
730 }
731
732 out:
733 if (result != VK_SUCCESS)
734 trtt->l3_addr = 0;
735
736 simple_mtx_unlock(&trtt->mutex);
737 return result;
738 }
739
740 static VkResult
anv_sparse_bind_trtt(struct anv_device * device,struct anv_sparse_submission * sparse_submit)741 anv_sparse_bind_trtt(struct anv_device *device,
742 struct anv_sparse_submission *sparse_submit)
743 {
744 struct anv_trtt *trtt = &device->trtt;
745 VkResult result;
746
747 /* See the same check at anv_trtt_first_bind_init(). */
748 if (unlikely(!trtt->queue))
749 return VK_SUCCESS;
750
751 if (!sparse_submit->queue)
752 sparse_submit->queue = trtt->queue;
753
754 struct anv_trtt_submission *submit =
755 vk_zalloc(&device->vk.alloc, sizeof(*submit), 8,
756 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
757 if (submit == NULL)
758 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
759
760 result = anv_async_submit_init(&submit->base, sparse_submit->queue,
761 &device->batch_bo_pool,
762 false, false);
763 if (result != VK_SUCCESS)
764 goto out_async;
765
766 simple_mtx_lock(&trtt->mutex);
767
768 /* Do this so we can avoid reallocs later. */
769 int l1_binds_capacity = 0;
770 for (int b = 0; b < sparse_submit->binds_len; b++) {
771 assert(sparse_submit->binds[b].size % (64 * 1024) == 0);
772 int pages = sparse_submit->binds[b].size / (64 * 1024);
773 l1_binds_capacity += pages;
774 }
775
776 /* Turn a series of virtual address maps, into a list of L3/L2/L1 TRTT page
777 * table updates.
778 */
779
780 /* These are arrays of struct anv_trtt_bind. */
781 struct util_dynarray l3l2_binds = {};
782 struct util_dynarray l1_binds;
783
784 if (l1_binds_capacity <= 32) {
785 size_t alloc_size = l1_binds_capacity * sizeof(struct anv_trtt_bind);
786 struct anv_trtt_bind *ptr = alloca(alloc_size);
787 util_dynarray_init_from_stack(&l1_binds, ptr, alloc_size);
788 } else {
789 util_dynarray_init(&l1_binds, NULL);
790 if (!util_dynarray_ensure_cap(&l1_binds,
791 l1_binds_capacity * sizeof(struct anv_trtt_bind)))
792 goto out_dynarrays;
793 }
794
795 for (int b = 0; b < sparse_submit->binds_len; b++) {
796 struct anv_vm_bind *vm_bind = &sparse_submit->binds[b];
797 for (uint64_t i = 0; i < vm_bind->size; i += 64 * 1024) {
798 uint64_t trtt_addr = vm_bind->address + i;
799 uint64_t dest_addr =
800 (vm_bind->op == ANV_VM_BIND && vm_bind->bo) ?
801 vm_bind->bo->offset + vm_bind->bo_offset + i :
802 ANV_TRTT_L1_NULL_TILE_VAL;
803
804 result = anv_trtt_bind_add(device, trtt_addr, dest_addr,
805 &l3l2_binds, &l1_binds);
806 if (result != VK_SUCCESS)
807 goto out_dynarrays;
808 }
809 }
810
811 /* Convert the L3/L2/L1 TRTT page table updates in anv_trtt_bind elements
812 * into MI commands.
813 */
814 uint32_t n_l3l2_binds =
815 util_dynarray_num_elements(&l3l2_binds, struct anv_trtt_bind);
816 uint32_t n_l1_binds =
817 util_dynarray_num_elements(&l1_binds, struct anv_trtt_bind);
818 sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
819 sparse_submit->binds_len, n_l3l2_binds, n_l1_binds);
820
821 /* This is not an error, the application is simply trying to reset state
822 * that was already there. */
823 if (n_l3l2_binds == 0 && n_l1_binds == 0)
824 goto out_dynarrays;
825
826 anv_genX(device->info, write_trtt_entries)(&submit->base,
827 l3l2_binds.data, n_l3l2_binds,
828 l1_binds.data, n_l1_binds);
829
830 util_dynarray_fini(&l1_binds);
831 util_dynarray_fini(&l3l2_binds);
832
833 anv_genX(device->info, async_submit_end)(&submit->base);
834
835 if (submit->base.batch.status != VK_SUCCESS) {
836 result = submit->base.batch.status;
837 goto out_add_bind;
838 }
839
840 /* Add all the BOs backing TRTT page tables to the reloc list. */
841 if (device->physical->uses_relocs) {
842 for (int i = 0; i < trtt->num_page_table_bos; i++) {
843 result = anv_reloc_list_add_bo(&submit->base.relocs,
844 trtt->page_table_bos[i]);
845 if (result != VK_SUCCESS)
846 goto out_add_bind;
847 }
848 }
849
850 anv_sparse_trtt_garbage_collect_batches(device, false);
851
852 submit->base.signal = (struct vk_sync_signal) {
853 .sync = trtt->timeline,
854 .signal_value = ++trtt->timeline_val,
855 };
856
857 result =
858 device->kmd_backend->queue_exec_async(&submit->base,
859 sparse_submit->wait_count,
860 sparse_submit->waits,
861 sparse_submit->signal_count,
862 sparse_submit->signals);
863 if (result != VK_SUCCESS) {
864 trtt->timeline_val--;
865 goto out_add_bind;
866 }
867
868 list_addtail(&submit->link, &trtt->in_flight_batches);
869
870 simple_mtx_unlock(&trtt->mutex);
871
872 ANV_RMV(vm_binds, device, sparse_submit->binds, sparse_submit->binds_len);
873
874 return VK_SUCCESS;
875
876 out_dynarrays:
877 util_dynarray_fini(&l1_binds);
878 util_dynarray_fini(&l3l2_binds);
879 out_add_bind:
880 simple_mtx_unlock(&trtt->mutex);
881 anv_async_submit_fini(&submit->base);
882 out_async:
883 vk_free(&device->vk.alloc, submit);
884 return result;
885 }
886
887 static VkResult
anv_sparse_bind_vm_bind(struct anv_device * device,struct anv_sparse_submission * submit)888 anv_sparse_bind_vm_bind(struct anv_device *device,
889 struct anv_sparse_submission *submit)
890 {
891 struct anv_queue *queue = submit->queue;
892
893 VkResult result = device->kmd_backend->vm_bind(device, submit,
894 ANV_VM_BIND_FLAG_NONE);
895 if (!queue) {
896 assert(submit->wait_count == 0 && submit->signal_count == 0 &&
897 submit->binds_len == 1);
898 return result;
899 }
900
901 if (result == VK_ERROR_OUT_OF_HOST_MEMORY) {
902 /* If we get this, the system is under memory pressure. First we
903 * manually wait for all our dependency syncobjs hoping that some memory
904 * will be released while we wait, then we try to issue each bind
905 * operation in a single ioctl as it requires less Kernel memory and so
906 * we may be able to move things forward, although slowly, while also
907 * waiting for each operation to complete before issuing the next.
908 * Performance isn't a concern at this point: we're just trying to move
909 * progress forward without crashing until whatever is eating too much
910 * memory goes away.
911 */
912
913 result = vk_sync_wait_many(&device->vk, submit->wait_count,
914 submit->waits, VK_SYNC_WAIT_COMPLETE,
915 INT64_MAX);
916 if (result != VK_SUCCESS)
917 return vk_queue_set_lost(&queue->vk, "vk_sync_wait_many failed");
918
919 struct vk_sync *sync;
920 result = vk_sync_create(&device->vk,
921 &device->physical->sync_syncobj_type,
922 VK_SYNC_IS_TIMELINE, 0 /* initial_value */,
923 &sync);
924 if (result != VK_SUCCESS)
925 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
926
927 for (int b = 0; b < submit->binds_len; b++) {
928 struct vk_sync_signal sync_signal = {
929 .sync = sync,
930 .signal_value = b + 1,
931 };
932 struct anv_sparse_submission s = {
933 .queue = submit->queue,
934 .binds = &submit->binds[b],
935 .binds_len = 1,
936 .binds_capacity = 1,
937 .wait_count = 0,
938 .signal_count = 1,
939 .waits = NULL,
940 .signals = &sync_signal,
941 };
942 result = device->kmd_backend->vm_bind(device, &s,
943 ANV_VM_BIND_FLAG_NONE);
944 if (result != VK_SUCCESS) {
945 vk_sync_destroy(&device->vk, sync);
946 return vk_error(device, result); /* Well, at least we tried... */
947 }
948
949 result = vk_sync_wait(&device->vk, sync, sync_signal.signal_value,
950 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
951 if (result != VK_SUCCESS) {
952 vk_sync_destroy(&device->vk, sync);
953 return vk_queue_set_lost(&queue->vk, "vk_sync_wait failed");
954 }
955 }
956
957 vk_sync_destroy(&device->vk, sync);
958
959 for (uint32_t i = 0; i < submit->signal_count; i++) {
960 struct vk_sync_signal *s = &submit->signals[i];
961 result = vk_sync_signal(&device->vk, s->sync, s->signal_value);
962 if (result != VK_SUCCESS)
963 return vk_queue_set_lost(&queue->vk, "vk_sync_signal failed");
964 }
965 }
966
967 return VK_SUCCESS;
968 }
969
970 VkResult
anv_sparse_bind(struct anv_device * device,struct anv_sparse_submission * submit)971 anv_sparse_bind(struct anv_device *device,
972 struct anv_sparse_submission *submit)
973 {
974 if (INTEL_DEBUG(DEBUG_SPARSE)) {
975 for (int b = 0; b < submit->binds_len; b++)
976 dump_anv_vm_bind(device, &submit->binds[b]);
977 }
978
979 return device->physical->sparse_type == ANV_SPARSE_TYPE_TRTT ?
980 anv_sparse_bind_trtt(device, submit) :
981 anv_sparse_bind_vm_bind(device, submit);
982 }
983
984 VkResult
anv_init_sparse_bindings(struct anv_device * device,uint64_t size_,struct anv_sparse_binding_data * sparse,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address,struct anv_address * out_address)985 anv_init_sparse_bindings(struct anv_device *device,
986 uint64_t size_,
987 struct anv_sparse_binding_data *sparse,
988 enum anv_bo_alloc_flags alloc_flags,
989 uint64_t client_address,
990 struct anv_address *out_address)
991 {
992 VkResult result;
993 uint64_t size = align64(size_, ANV_SPARSE_BLOCK_SIZE);
994
995 if (device->physical->sparse_type == ANV_SPARSE_TYPE_TRTT)
996 alloc_flags |= ANV_BO_ALLOC_TRTT;
997
998 sparse->address = anv_vma_alloc(device, size, ANV_SPARSE_BLOCK_SIZE,
999 alloc_flags,
1000 intel_48b_address(client_address),
1001 &sparse->vma_heap);
1002 sparse->size = size;
1003
1004 out_address->bo = NULL;
1005 out_address->offset = sparse->address;
1006
1007 if (device->physical->sparse_type == ANV_SPARSE_TYPE_TRTT) {
1008 result = anv_trtt_first_bind_init(device);
1009 if (result != VK_SUCCESS)
1010 goto out_vma_free;
1011 } else {
1012 struct anv_vm_bind bind = {
1013 .bo = NULL, /* That's a NULL binding. */
1014 .address = sparse->address,
1015 .bo_offset = 0,
1016 .size = size,
1017 .op = ANV_VM_BIND,
1018 };
1019 struct anv_sparse_submission submit = {
1020 .queue = NULL,
1021 .binds = &bind,
1022 .binds_len = 1,
1023 .binds_capacity = 1,
1024 .wait_count = 0,
1025 .signal_count = 0,
1026 };
1027 result = anv_sparse_bind(device, &submit);
1028 if (result != VK_SUCCESS)
1029 goto out_vma_free;
1030 }
1031
1032 p_atomic_inc(&device->num_sparse_resources);
1033 return VK_SUCCESS;
1034
1035 out_vma_free:
1036 anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
1037 return result;
1038
1039 }
1040
1041 void
anv_free_sparse_bindings(struct anv_device * device,struct anv_sparse_binding_data * sparse)1042 anv_free_sparse_bindings(struct anv_device *device,
1043 struct anv_sparse_binding_data *sparse)
1044 {
1045 if (!sparse->address)
1046 return;
1047
1048 sparse_debug("%s: address:0x%016"PRIx64" size:0x%08"PRIx64"\n",
1049 __func__, sparse->address, sparse->size);
1050
1051 p_atomic_dec(&device->num_sparse_resources);
1052
1053 struct anv_vm_bind unbind = {
1054 .bo = 0,
1055 .address = sparse->address,
1056 .bo_offset = 0,
1057 .size = sparse->size,
1058 .op = ANV_VM_UNBIND,
1059 };
1060 struct anv_sparse_submission submit = {
1061 .queue = NULL,
1062 .binds = &unbind,
1063 .binds_len = 1,
1064 .binds_capacity = 1,
1065 .wait_count = 0,
1066 .signal_count = 0,
1067 };
1068 VkResult res = anv_sparse_bind(device, &submit);
1069
1070 /* Our callers don't have a way to signal failure to the upper layers, so
1071 * just keep the vma if we fail to unbind it. Still, let's have an
1072 * assertion because this really shouldn't be happening.
1073 */
1074 assert(res == VK_SUCCESS);
1075 if (res != VK_SUCCESS)
1076 return;
1077
1078 anv_vma_free(device, sparse->vma_heap, sparse->address, sparse->size);
1079 }
1080
1081 static VkExtent3D
anv_sparse_calc_block_shape(struct anv_physical_device * pdevice,struct isl_surf * surf,const struct isl_tile_info * tile_info)1082 anv_sparse_calc_block_shape(struct anv_physical_device *pdevice,
1083 struct isl_surf *surf,
1084 const struct isl_tile_info *tile_info)
1085 {
1086 const struct isl_format_layout *layout =
1087 isl_format_get_layout(surf->format);
1088
1089 VkExtent3D block_shape_el = {
1090 .width = tile_info->logical_extent_el.width,
1091 .height = tile_info->logical_extent_el.height,
1092 .depth = tile_info->logical_extent_el.depth,
1093 };
1094 VkExtent3D block_shape_px = vk_extent3d_el_to_px(block_shape_el, layout);
1095
1096 assert(surf->tiling != ISL_TILING_LINEAR);
1097
1098 return block_shape_px;
1099 }
1100
1101 VkSparseImageFormatProperties
anv_sparse_calc_image_format_properties(struct anv_physical_device * pdevice,VkImageAspectFlags aspect,VkImageType vk_image_type,VkSampleCountFlagBits vk_samples,struct isl_surf * surf)1102 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
1103 VkImageAspectFlags aspect,
1104 VkImageType vk_image_type,
1105 VkSampleCountFlagBits vk_samples,
1106 struct isl_surf *surf)
1107 {
1108 const struct isl_format_layout *isl_layout =
1109 isl_format_get_layout(surf->format);
1110 struct isl_tile_info tile_info;
1111 isl_surf_get_tile_info(surf, &tile_info);
1112 const int bpb = isl_layout->bpb;
1113 assert(bpb == 8 || bpb == 16 || bpb == 32 || bpb == 64 ||bpb == 128);
1114
1115 VkExtent3D granularity = anv_sparse_calc_block_shape(pdevice, surf,
1116 &tile_info);
1117 bool is_standard = false;
1118 bool is_known_nonstandard_format = false;
1119
1120 /* We shouldn't be able to reach this function with a 1D image. */
1121 assert(vk_image_type != VK_IMAGE_TYPE_1D);
1122
1123 VkExtent3D std_shape =
1124 anv_sparse_get_standard_image_block_shape(surf->format,
1125 vk_image_type, vk_samples,
1126 bpb);
1127 /* YUV formats don't work with Tile64, which is required if we want to
1128 * claim standard block shapes. The spec requires us to support all
1129 * non-compressed color formats that non-sparse supports, so we can't just
1130 * say YUV formats are not supported by Sparse. So we end supporting this
1131 * format and anv_sparse_calc_miptail_properties() will say that everything
1132 * is part of the miptail.
1133 *
1134 * For more details on the hardware restriction, please check
1135 * isl_gfx125_filter_tiling().
1136 */
1137 if (pdevice->info.verx10 >= 125 && isl_format_is_yuv(surf->format))
1138 is_known_nonstandard_format = true;
1139
1140 /* The standard block shapes (and by extension, the tiling formats they
1141 * require) are simply incompatible with getting a 2D view of a 3D image.
1142 */
1143 if (surf->usage & ISL_SURF_USAGE_2D_3D_COMPATIBLE_BIT)
1144 is_known_nonstandard_format = true;
1145
1146 is_standard = granularity.width == std_shape.width &&
1147 granularity.height == std_shape.height &&
1148 granularity.depth == std_shape.depth;
1149
1150 /* TODO: dEQP seems to care about the block shapes being standard even for
1151 * the cases where is_known_nonstandard_format is true. Luckily as of today
1152 * all of those cases are NotSupported but sooner or later we may end up
1153 * getting a failure.
1154 * Notice that in practice we report these cases as having the mip tail
1155 * starting on mip level 0, so the reported block shapes are irrelevant
1156 * since non-opaque binds are not supported. Still, dEQP seems to care.
1157 */
1158 assert(is_standard || is_known_nonstandard_format);
1159 assert(!(is_standard && is_known_nonstandard_format));
1160
1161 bool wrong_block_size = isl_calc_tile_size(&tile_info) !=
1162 ANV_SPARSE_BLOCK_SIZE;
1163
1164 return (VkSparseImageFormatProperties) {
1165 .aspectMask = aspect,
1166 .imageGranularity = granularity,
1167 .flags = ((is_standard || is_known_nonstandard_format) ? 0 :
1168 VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT) |
1169 (wrong_block_size ? VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT :
1170 0),
1171 };
1172 }
1173
1174 /* The miptail is supposed to be this region where the tiniest mip levels
1175 * are squished together in one single page, which should save us some memory.
1176 * It's a hardware feature which our hardware supports on certain tiling
1177 * formats - the ones we always want to use for sparse resources.
1178 *
1179 * For sparse, the main feature of the miptail is that it only supports opaque
1180 * binds, so you either bind the whole miptail or you bind nothing at all,
1181 * there are no subresources inside it to separately bind. While the idea is
1182 * that the miptail as reported by sparse should match what our hardware does,
1183 * in practice we can say in our sparse functions that certain mip levels are
1184 * part of the miptail while from the point of view of our hardwared they
1185 * aren't.
1186 *
1187 * If we detect we're using the sparse-friendly tiling formats and ISL
1188 * supports miptails for them, we can just trust the miptail level set by ISL
1189 * and things can proceed as The Spec intended.
1190 *
1191 * However, if that's not the case, we have to go on a best-effort policy. We
1192 * could simply declare that every mip level is part of the miptail and be
1193 * done, but since that kinda defeats the purpose of Sparse we try to find
1194 * what level we really should be reporting as the first miptail level based
1195 * on the alignments of the surface subresources.
1196 */
1197 void
anv_sparse_calc_miptail_properties(struct anv_device * device,struct anv_image * image,VkImageAspectFlags vk_aspect,uint32_t * imageMipTailFirstLod,VkDeviceSize * imageMipTailSize,VkDeviceSize * imageMipTailOffset,VkDeviceSize * imageMipTailStride)1198 anv_sparse_calc_miptail_properties(struct anv_device *device,
1199 struct anv_image *image,
1200 VkImageAspectFlags vk_aspect,
1201 uint32_t *imageMipTailFirstLod,
1202 VkDeviceSize *imageMipTailSize,
1203 VkDeviceSize *imageMipTailOffset,
1204 VkDeviceSize *imageMipTailStride)
1205 {
1206 const uint32_t plane = anv_image_aspect_to_plane(image, vk_aspect);
1207 struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
1208 uint64_t binding_plane_offset =
1209 image->planes[plane].primary_surface.memory_range.offset;
1210 struct isl_tile_info tile_info;
1211 isl_surf_get_tile_info(surf, &tile_info);
1212 uint64_t layer1_offset;
1213 uint32_t x_off, y_off;
1214
1215 /* Treat the whole thing as a single miptail. We should have already
1216 * reported this image as VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT.
1217 *
1218 * In theory we could try to make ISL massage the alignments so that we
1219 * could at least claim mip level 0 to be not part of the miptail, but
1220 * that could end up wasting a lot of memory, so it's better to do
1221 * nothing and focus our efforts into making things use the appropriate
1222 * tiling formats that give us the standard block shapes.
1223 */
1224 if (isl_calc_tile_size(&tile_info) != ANV_SPARSE_BLOCK_SIZE)
1225 goto out_everything_is_miptail;
1226
1227 assert(surf->tiling != ISL_TILING_LINEAR);
1228
1229 if (image->vk.array_layers == 1) {
1230 layer1_offset = surf->size_B;
1231 } else {
1232 isl_surf_get_image_offset_B_tile_sa(surf, 0, 1, 0, &layer1_offset,
1233 &x_off, &y_off);
1234 if (x_off || y_off)
1235 goto out_everything_is_miptail;
1236 }
1237 assert(layer1_offset % ANV_SPARSE_BLOCK_SIZE == 0);
1238
1239 /* We could try to do better here, but there's not really any point since
1240 * we should be supporting the appropriate tiling formats everywhere.
1241 */
1242 if (!isl_tiling_supports_standard_block_shapes(surf->tiling))
1243 goto out_everything_is_miptail;
1244
1245 int miptail_first_level = surf->miptail_start_level;
1246 if (miptail_first_level >= image->vk.mip_levels)
1247 goto out_no_miptail;
1248
1249 uint64_t miptail_offset = 0;
1250 isl_surf_get_image_offset_B_tile_sa(surf, miptail_first_level, 0, 0,
1251 &miptail_offset,
1252 &x_off, &y_off);
1253 assert(x_off == 0 && y_off == 0);
1254 assert(miptail_offset % ANV_SPARSE_BLOCK_SIZE == 0);
1255
1256 *imageMipTailFirstLod = miptail_first_level;
1257 *imageMipTailSize = ANV_SPARSE_BLOCK_SIZE;
1258 *imageMipTailOffset = binding_plane_offset + miptail_offset;
1259 *imageMipTailStride = layer1_offset;
1260 goto out_debug;
1261
1262 out_no_miptail:
1263 *imageMipTailFirstLod = image->vk.mip_levels;
1264 *imageMipTailSize = 0;
1265 *imageMipTailOffset = 0;
1266 *imageMipTailStride = 0;
1267 goto out_debug;
1268
1269 out_everything_is_miptail:
1270 *imageMipTailFirstLod = 0;
1271 *imageMipTailSize = surf->size_B;
1272 *imageMipTailOffset = binding_plane_offset;
1273 *imageMipTailStride = 0;
1274
1275 out_debug:
1276 sparse_debug("miptail first_lod:%d size:%"PRIu64" offset:%"PRIu64" "
1277 "stride:%"PRIu64"\n",
1278 *imageMipTailFirstLod, *imageMipTailSize,
1279 *imageMipTailOffset, *imageMipTailStride);
1280 }
1281
1282 static struct anv_vm_bind
vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data * sparse,const struct VkSparseMemoryBind * vk_bind)1283 vk_bind_to_anv_vm_bind(struct anv_sparse_binding_data *sparse,
1284 const struct VkSparseMemoryBind *vk_bind)
1285 {
1286 struct anv_vm_bind anv_bind = {
1287 .bo = NULL,
1288 .address = sparse->address + vk_bind->resourceOffset,
1289 .bo_offset = 0,
1290 .size = vk_bind->size,
1291 .op = ANV_VM_BIND,
1292 };
1293
1294 assert(vk_bind->size);
1295 assert(vk_bind->resourceOffset + vk_bind->size <= sparse->size);
1296
1297 if (vk_bind->memory != VK_NULL_HANDLE) {
1298 anv_bind.bo = anv_device_memory_from_handle(vk_bind->memory)->bo;
1299 anv_bind.bo_offset = vk_bind->memoryOffset,
1300 assert(vk_bind->memoryOffset + vk_bind->size <= anv_bind.bo->size);
1301 }
1302
1303 return anv_bind;
1304 }
1305
1306 static VkResult
anv_sparse_bind_resource_memory(struct anv_device * device,struct anv_sparse_binding_data * sparse,uint64_t resource_size,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)1307 anv_sparse_bind_resource_memory(struct anv_device *device,
1308 struct anv_sparse_binding_data *sparse,
1309 uint64_t resource_size,
1310 const VkSparseMemoryBind *vk_bind,
1311 struct anv_sparse_submission *submit)
1312 {
1313 struct anv_vm_bind bind = vk_bind_to_anv_vm_bind(sparse, vk_bind);
1314 uint64_t rem = vk_bind->size % ANV_SPARSE_BLOCK_SIZE;
1315
1316 if (rem != 0) {
1317 if (vk_bind->resourceOffset + vk_bind->size == resource_size)
1318 bind.size += ANV_SPARSE_BLOCK_SIZE - rem;
1319 else
1320 return vk_error(device, VK_ERROR_VALIDATION_FAILED_EXT);
1321 }
1322
1323 return anv_sparse_submission_add(device, submit, &bind);
1324 }
1325
1326 VkResult
anv_sparse_bind_buffer(struct anv_device * device,struct anv_buffer * buffer,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)1327 anv_sparse_bind_buffer(struct anv_device *device,
1328 struct anv_buffer *buffer,
1329 const VkSparseMemoryBind *vk_bind,
1330 struct anv_sparse_submission *submit)
1331 {
1332 return anv_sparse_bind_resource_memory(device, &buffer->sparse_data,
1333 buffer->vk.size,
1334 vk_bind, submit);
1335 }
1336
1337 VkResult
anv_sparse_bind_image_opaque(struct anv_device * device,struct anv_image * image,const VkSparseMemoryBind * vk_bind,struct anv_sparse_submission * submit)1338 anv_sparse_bind_image_opaque(struct anv_device *device,
1339 struct anv_image *image,
1340 const VkSparseMemoryBind *vk_bind,
1341 struct anv_sparse_submission *submit)
1342 {
1343 struct anv_image_binding *b =
1344 &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
1345 assert(!image->disjoint);
1346
1347 if (INTEL_DEBUG(DEBUG_SPARSE)) {
1348 sparse_debug("%s:\n", __func__);
1349 dump_anv_image(image);
1350 u_foreach_bit(b, image->vk.aspects) {
1351 VkImageAspectFlagBits aspect = 1 << b;
1352 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1353 struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
1354 sparse_debug("aspect 0x%x (plane %d):\n", aspect, plane);
1355 dump_isl_surf(surf);
1356 }
1357 sparse_debug("\n");
1358 }
1359
1360 return anv_sparse_bind_resource_memory(device, &b->sparse_data,
1361 b->memory_range.size,
1362 vk_bind, submit);
1363 }
1364
1365 VkResult
anv_sparse_bind_image_memory(struct anv_queue * queue,struct anv_image * image,const VkSparseImageMemoryBind * bind,struct anv_sparse_submission * submit)1366 anv_sparse_bind_image_memory(struct anv_queue *queue,
1367 struct anv_image *image,
1368 const VkSparseImageMemoryBind *bind,
1369 struct anv_sparse_submission *submit)
1370 {
1371 struct anv_device *device = queue->device;
1372 VkImageAspectFlags aspect = bind->subresource.aspectMask;
1373 uint32_t mip_level = bind->subresource.mipLevel;
1374 uint32_t array_layer = bind->subresource.arrayLayer;
1375
1376 assert(!(bind->flags & VK_SPARSE_MEMORY_BIND_METADATA_BIT));
1377
1378 struct anv_image_binding *img_binding = image->disjoint ?
1379 &image->bindings[anv_image_aspect_to_binding(image, aspect)] :
1380 &image->bindings[ANV_IMAGE_MEMORY_BINDING_MAIN];
1381 struct anv_sparse_binding_data *sparse_data = &img_binding->sparse_data;
1382
1383 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1384 struct isl_surf *surf = &image->planes[plane].primary_surface.isl;
1385 uint64_t binding_plane_offset =
1386 image->planes[plane].primary_surface.memory_range.offset;
1387 const struct isl_format_layout *layout =
1388 isl_format_get_layout(surf->format);
1389 struct isl_tile_info tile_info;
1390 isl_surf_get_tile_info(surf, &tile_info);
1391
1392 if (INTEL_DEBUG(DEBUG_SPARSE)) {
1393 sparse_debug("%s:\n", __func__);
1394 sparse_debug("mip_level:%d array_layer:%d\n", mip_level, array_layer);
1395 sparse_debug("aspect:0x%x plane:%d\n", aspect, plane);
1396 sparse_debug("binding offset: [%d, %d, %d] extent: [%d, %d, %d]\n",
1397 bind->offset.x, bind->offset.y, bind->offset.z,
1398 bind->extent.width, bind->extent.height,
1399 bind->extent.depth);
1400 dump_anv_image(image);
1401 dump_isl_surf(surf);
1402 sparse_debug("\n");
1403 }
1404
1405 VkExtent3D block_shape_px =
1406 anv_sparse_calc_block_shape(device->physical, surf, &tile_info);
1407 VkExtent3D block_shape_el = vk_extent3d_px_to_el(block_shape_px, layout);
1408
1409 /* Both bind->offset and bind->extent are in pixel units. */
1410 VkOffset3D bind_offset_el = vk_offset3d_px_to_el(bind->offset, layout);
1411
1412 /* The spec says we only really need to align if for a given coordinate
1413 * offset + extent equals the corresponding dimensions of the image
1414 * subresource, but all the other non-aligned usage is invalid, so just
1415 * align everything.
1416 */
1417 VkExtent3D bind_extent_px = {
1418 .width = ALIGN_NPOT(bind->extent.width, block_shape_px.width),
1419 .height = ALIGN_NPOT(bind->extent.height, block_shape_px.height),
1420 .depth = ALIGN_NPOT(bind->extent.depth, block_shape_px.depth),
1421 };
1422 VkExtent3D bind_extent_el = vk_extent3d_px_to_el(bind_extent_px, layout);
1423
1424 /* Nothing that has a tile_size different than ANV_SPARSE_BLOCK_SIZE should
1425 * be reaching here, as these cases should be treated as "everything is
1426 * part of the miptail" (see anv_sparse_calc_miptail_properties()).
1427 */
1428 assert(isl_calc_tile_size(&tile_info) == ANV_SPARSE_BLOCK_SIZE);
1429
1430 /* How many blocks are necessary to form a whole line on this image? */
1431 const uint32_t blocks_per_line = surf->row_pitch_B / (layout->bpb / 8) /
1432 block_shape_el.width;
1433 /* The loop below will try to bind a whole line of blocks at a time as
1434 * they're guaranteed to be contiguous, so we calculate how many blocks
1435 * that is and how big is each block to figure the bind size of a whole
1436 * line.
1437 */
1438 uint64_t line_bind_size_in_blocks = bind_extent_el.width /
1439 block_shape_el.width;
1440 uint64_t line_bind_size = line_bind_size_in_blocks * ANV_SPARSE_BLOCK_SIZE;
1441 assert(line_bind_size_in_blocks != 0);
1442 assert(line_bind_size != 0);
1443
1444 uint64_t memory_offset = bind->memoryOffset;
1445 for (uint32_t z = bind_offset_el.z;
1446 z < bind_offset_el.z + bind_extent_el.depth;
1447 z += block_shape_el.depth) {
1448 uint64_t subresource_offset_B;
1449 uint32_t subresource_x_offset, subresource_y_offset;
1450 isl_surf_get_image_offset_B_tile_sa(surf, mip_level, array_layer, z,
1451 &subresource_offset_B,
1452 &subresource_x_offset,
1453 &subresource_y_offset);
1454 assert(subresource_x_offset == 0 && subresource_y_offset == 0);
1455 assert(subresource_offset_B % ANV_SPARSE_BLOCK_SIZE == 0);
1456
1457 for (uint32_t y = bind_offset_el.y;
1458 y < bind_offset_el.y + bind_extent_el.height;
1459 y+= block_shape_el.height) {
1460 uint32_t line_block_offset = y / block_shape_el.height *
1461 blocks_per_line;
1462 uint64_t line_start_B = subresource_offset_B +
1463 line_block_offset * ANV_SPARSE_BLOCK_SIZE;
1464 uint64_t bind_offset_B = line_start_B +
1465 (bind_offset_el.x / block_shape_el.width) *
1466 ANV_SPARSE_BLOCK_SIZE;
1467
1468 VkSparseMemoryBind opaque_bind = {
1469 .resourceOffset = binding_plane_offset + bind_offset_B,
1470 .size = line_bind_size,
1471 .memory = bind->memory,
1472 .memoryOffset = memory_offset,
1473 .flags = bind->flags,
1474 };
1475
1476 memory_offset += line_bind_size;
1477
1478 assert(line_start_B % ANV_SPARSE_BLOCK_SIZE == 0);
1479 assert(opaque_bind.resourceOffset % ANV_SPARSE_BLOCK_SIZE == 0);
1480 assert(opaque_bind.size % ANV_SPARSE_BLOCK_SIZE == 0);
1481
1482 struct anv_vm_bind anv_bind = vk_bind_to_anv_vm_bind(sparse_data,
1483 &opaque_bind);
1484 VkResult result = anv_sparse_submission_add(device, submit,
1485 &anv_bind);
1486 if (result != VK_SUCCESS)
1487 return result;
1488 }
1489 }
1490
1491 return VK_SUCCESS;
1492 }
1493
1494 VkResult
anv_sparse_image_check_support(struct anv_physical_device * pdevice,VkImageCreateFlags flags,VkImageTiling tiling,VkSampleCountFlagBits samples,VkImageType type,VkFormat vk_format)1495 anv_sparse_image_check_support(struct anv_physical_device *pdevice,
1496 VkImageCreateFlags flags,
1497 VkImageTiling tiling,
1498 VkSampleCountFlagBits samples,
1499 VkImageType type,
1500 VkFormat vk_format)
1501 {
1502 assert(flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT);
1503
1504 /* The spec says:
1505 * "A sparse image created using VK_IMAGE_CREATE_SPARSE_BINDING_BIT (but
1506 * not VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) supports all formats that
1507 * non-sparse usage supports, and supports both VK_IMAGE_TILING_OPTIMAL
1508 * and VK_IMAGE_TILING_LINEAR tiling."
1509 */
1510 if (!(flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
1511 return VK_SUCCESS;
1512
1513 if (type == VK_IMAGE_TYPE_1D)
1514 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1515
1516 /* From here on, these are the rules:
1517 * "A sparse image created using VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT
1518 * supports all non-compressed color formats with power-of-two element
1519 * size that non-sparse usage supports. Additional formats may also be
1520 * supported and can be queried via
1521 * vkGetPhysicalDeviceSparseImageFormatProperties.
1522 * VK_IMAGE_TILING_LINEAR tiling is not supported."
1523 */
1524
1525 /* We choose not to support sparse residency on emulated compressed
1526 * formats due to the additional image plane. It would make the
1527 * implementation extremely complicated.
1528 */
1529 if (anv_is_format_emulated(pdevice, vk_format))
1530 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1531
1532 /* While the spec itself says linear is not supported (see above), deqp-vk
1533 * tries anyway to create linear sparse images, so we have to check for it.
1534 * This is also said in VUID-VkImageCreateInfo-tiling-04121:
1535 * "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain
1536 * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
1537 */
1538 if (tiling == VK_IMAGE_TILING_LINEAR)
1539 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1540
1541 if ((samples & VK_SAMPLE_COUNT_2_BIT &&
1542 !pdevice->vk.supported_features.sparseResidency2Samples) ||
1543 (samples & VK_SAMPLE_COUNT_4_BIT &&
1544 !pdevice->vk.supported_features.sparseResidency4Samples) ||
1545 (samples & VK_SAMPLE_COUNT_8_BIT &&
1546 !pdevice->vk.supported_features.sparseResidency8Samples) ||
1547 (samples & VK_SAMPLE_COUNT_16_BIT &&
1548 !pdevice->vk.supported_features.sparseResidency16Samples) ||
1549 samples & VK_SAMPLE_COUNT_32_BIT ||
1550 samples & VK_SAMPLE_COUNT_64_BIT)
1551 return VK_ERROR_FEATURE_NOT_PRESENT;
1552
1553 /* While the Vulkan spec allows us to support depth/stencil sparse images
1554 * everywhere, sometimes we're not able to have them with the tiling
1555 * formats that give us the standard block shapes. Having standard block
1556 * shapes is higher priority than supporting depth/stencil sparse images.
1557 *
1558 * Please see ISL's filter_tiling() functions for accurate explanations on
1559 * why depth/stencil images are not always supported with the tiling
1560 * formats we want. But in short: depth/stencil support in our HW is
1561 * limited to 2D and we can't build a 2D view of a 3D image with these
1562 * tiling formats due to the address swizzling being different.
1563 */
1564 VkImageAspectFlags aspects = vk_format_aspects(vk_format);
1565 if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1566 /* For multi-sampled images, the image layouts for color and
1567 * depth/stencil are different, and only the color layout is compatible
1568 * with the standard block shapes.
1569 */
1570 if (samples != VK_SAMPLE_COUNT_1_BIT)
1571 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1572
1573 /* For 125+, isl_gfx125_filter_tiling() claims 3D is not supported.
1574 * For the previous platforms, isl_gfx6_filter_tiling() says only 2D is
1575 * supported.
1576 */
1577 if (pdevice->info.verx10 >= 125) {
1578 if (type == VK_IMAGE_TYPE_3D)
1579 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1580 } else {
1581 if (type != VK_IMAGE_TYPE_2D)
1582 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1583 }
1584 }
1585
1586 const struct anv_format *anv_format = anv_get_format(vk_format);
1587 if (!anv_format)
1588 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1589
1590 for (int p = 0; p < anv_format->n_planes; p++) {
1591 enum isl_format isl_format = anv_format->planes[p].isl_format;
1592
1593 if (isl_format == ISL_FORMAT_UNSUPPORTED)
1594 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1595
1596 const struct isl_format_layout *isl_layout =
1597 isl_format_get_layout(isl_format);
1598
1599 /* As quoted above, we only need to support the power-of-two formats.
1600 * The problem with the non-power-of-two formats is that we need an
1601 * integer number of pixels to fit into a sparse block, so we'd need the
1602 * sparse block sizes to be, for example, 192k for 24bpp.
1603 *
1604 * TODO: add support for these formats.
1605 */
1606 if (isl_layout->bpb != 8 && isl_layout->bpb != 16 &&
1607 isl_layout->bpb != 32 && isl_layout->bpb != 64 &&
1608 isl_layout->bpb != 128)
1609 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1610
1611 /* ISL_TILING_64_XE2_BIT's block shapes are not always Vulkan's standard
1612 * block shapes, so exclude what's non-standard.
1613 */
1614 if (pdevice->info.ver == 20) {
1615 switch (samples) {
1616 case VK_SAMPLE_COUNT_2_BIT:
1617 if (isl_layout->bpb == 128)
1618 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1619 break;
1620 case VK_SAMPLE_COUNT_8_BIT:
1621 if (isl_layout->bpb == 8 || isl_layout->bpb == 32)
1622 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1623 break;
1624 case VK_SAMPLE_COUNT_16_BIT:
1625 if (isl_layout->bpb == 64)
1626 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1627 break;
1628 default:
1629 break;
1630 }
1631 }
1632 }
1633
1634 /* These YUV formats are considered by Vulkan to be compressed 2x1 blocks.
1635 * We don't need to support them since they're compressed. On Gfx12 we
1636 * can't even have Tile64 for them. Once we do support these formats we'll
1637 * have to report the correct block shapes because dEQP cares about them,
1638 * and we'll have to adjust for the fact that ISL treats these as 16bpp 1x1
1639 * blocks instead of 32bpp 2x1 compressed blocks (as block shapes are
1640 * reported in units of compressed blocks).
1641 */
1642 if (vk_format == VK_FORMAT_G8B8G8R8_422_UNORM ||
1643 vk_format == VK_FORMAT_B8G8R8G8_422_UNORM)
1644 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1645
1646 return VK_SUCCESS;
1647 }
1648