1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)27 lookup_blorp_shader(struct blorp_batch *batch,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct blorp_context *blorp = batch->blorp;
32 struct anv_device *device = blorp->driver_ctx;
33
34 struct anv_shader_bin *bin =
35 anv_device_search_for_kernel(device, device->internal_cache,
36 key, key_size, NULL);
37 if (!bin)
38 return false;
39
40 /* The cache already has a reference and it's not going anywhere so there
41 * is no need to hold a second reference.
42 */
43 anv_shader_bin_unref(device, bin);
44
45 *kernel_out = bin->kernel.offset;
46 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
47
48 return true;
49 }
50
51 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)52 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
53 const void *key, uint32_t key_size,
54 const void *kernel, uint32_t kernel_size,
55 const struct brw_stage_prog_data *prog_data,
56 uint32_t prog_data_size,
57 uint32_t *kernel_out, void *prog_data_out)
58 {
59 struct blorp_context *blorp = batch->blorp;
60 struct anv_device *device = blorp->driver_ctx;
61
62 struct anv_pipeline_bind_map bind_map = {
63 .surface_count = 0,
64 .sampler_count = 0,
65 };
66
67 struct anv_shader_bin *bin =
68 anv_device_upload_kernel(device, device->internal_cache, stage,
69 key, key_size, kernel, kernel_size,
70 prog_data, prog_data_size,
71 NULL, 0, NULL, &bind_map);
72
73 if (!bin)
74 return false;
75
76 /* The cache already has a reference and it's not going anywhere so there
77 * is no need to hold a second reference.
78 */
79 anv_shader_bin_unref(device, bin);
80
81 *kernel_out = bin->kernel.offset;
82 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
83
84 return true;
85 }
86
87 void
anv_device_init_blorp(struct anv_device * device)88 anv_device_init_blorp(struct anv_device *device)
89 {
90 const struct blorp_config config = {
91 .use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader,
92 };
93
94 blorp_init(&device->blorp, device, &device->isl_dev, &config);
95 device->blorp.compiler = device->physical->compiler;
96 device->blorp.lookup_shader = lookup_blorp_shader;
97 device->blorp.upload_shader = upload_blorp_shader;
98 switch (device->info.verx10) {
99 case 70:
100 device->blorp.exec = gfx7_blorp_exec;
101 break;
102 case 75:
103 device->blorp.exec = gfx75_blorp_exec;
104 break;
105 case 80:
106 device->blorp.exec = gfx8_blorp_exec;
107 break;
108 case 90:
109 device->blorp.exec = gfx9_blorp_exec;
110 break;
111 case 110:
112 device->blorp.exec = gfx11_blorp_exec;
113 break;
114 case 120:
115 device->blorp.exec = gfx12_blorp_exec;
116 break;
117 case 125:
118 device->blorp.exec = gfx125_blorp_exec;
119 break;
120 default:
121 unreachable("Unknown hardware generation");
122 }
123 }
124
125 void
anv_device_finish_blorp(struct anv_device * device)126 anv_device_finish_blorp(struct anv_device *device)
127 {
128 blorp_finish(&device->blorp);
129 }
130
131 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)132 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
133 struct blorp_batch *batch, enum blorp_batch_flags flags)
134 {
135 if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
136 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
137 flags |= BLORP_BATCH_USE_COMPUTE;
138 }
139
140 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
141 }
142
143 static void
anv_blorp_batch_finish(struct blorp_batch * batch)144 anv_blorp_batch_finish(struct blorp_batch *batch)
145 {
146 blorp_batch_finish(batch);
147 }
148
149 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)150 get_blorp_surf_for_anv_buffer(struct anv_device *device,
151 struct anv_buffer *buffer, uint64_t offset,
152 uint32_t width, uint32_t height,
153 uint32_t row_pitch, enum isl_format format,
154 bool is_dest,
155 struct blorp_surf *blorp_surf,
156 struct isl_surf *isl_surf)
157 {
158 bool ok UNUSED;
159
160 *blorp_surf = (struct blorp_surf) {
161 .surf = isl_surf,
162 .addr = {
163 .buffer = buffer->address.bo,
164 .offset = buffer->address.offset + offset,
165 .mocs = anv_mocs(device, buffer->address.bo,
166 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
167 : ISL_SURF_USAGE_TEXTURE_BIT),
168 },
169 };
170
171 ok = isl_surf_init(&device->isl_dev, isl_surf,
172 .dim = ISL_SURF_DIM_2D,
173 .format = format,
174 .width = width,
175 .height = height,
176 .depth = 1,
177 .levels = 1,
178 .array_len = 1,
179 .samples = 1,
180 .row_pitch_B = row_pitch,
181 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
182 : ISL_SURF_USAGE_TEXTURE_BIT,
183 .tiling_flags = ISL_TILING_LINEAR_BIT);
184 assert(ok);
185 }
186
187 /* Pick something high enough that it won't be used in core and low enough it
188 * will never map to an extension.
189 */
190 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
191
192 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)193 anv_to_blorp_address(struct anv_address addr)
194 {
195 return (struct blorp_address) {
196 .buffer = addr.bo,
197 .offset = addr.offset,
198 };
199 }
200
201 static void
get_blorp_surf_for_anv_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)202 get_blorp_surf_for_anv_image(const struct anv_device *device,
203 const struct anv_image *image,
204 VkImageAspectFlags aspect,
205 VkImageUsageFlags usage,
206 VkImageLayout layout,
207 enum isl_aux_usage aux_usage,
208 struct blorp_surf *blorp_surf)
209 {
210 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
211
212 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
213 assert(usage != 0);
214 aux_usage = anv_layout_to_aux_usage(&device->info, image,
215 aspect, usage, layout);
216 }
217
218 isl_surf_usage_flags_t mocs_usage =
219 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
220 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
221
222 const struct anv_surface *surface = &image->planes[plane].primary_surface;
223 const struct anv_address address =
224 anv_image_address(image, &surface->memory_range);
225
226 *blorp_surf = (struct blorp_surf) {
227 .surf = &surface->isl,
228 .addr = {
229 .buffer = address.bo,
230 .offset = address.offset,
231 .mocs = anv_mocs(device, address.bo, mocs_usage),
232 },
233 };
234
235 if (aux_usage != ISL_AUX_USAGE_NONE) {
236 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
237 const struct anv_address aux_address =
238 anv_image_address(image, &aux_surface->memory_range);
239
240 blorp_surf->aux_usage = aux_usage;
241 blorp_surf->aux_surf = &aux_surface->isl;
242
243 if (!anv_address_is_null(aux_address)) {
244 blorp_surf->aux_addr = (struct blorp_address) {
245 .buffer = aux_address.bo,
246 .offset = aux_address.offset,
247 .mocs = anv_mocs(device, aux_address.bo, 0),
248 };
249 }
250
251 /* If we're doing a partial resolve, then we need the indirect clear
252 * color. If we are doing a fast clear and want to store/update the
253 * clear color, we also pass the address to blorp, otherwise it will only
254 * stomp the CCS to a particular value and won't care about format or
255 * clear value
256 */
257 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
258 const struct anv_address clear_color_addr =
259 anv_image_get_clear_color_addr(device, image, aspect);
260 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
261 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
262 const struct anv_address clear_color_addr =
263 anv_image_get_clear_color_addr(device, image, aspect);
264 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
265 blorp_surf->clear_color = (union isl_color_value) {
266 .f32 = { ANV_HZ_FC_VAL },
267 };
268 }
269 }
270 }
271
272 static bool
get_blorp_surf_for_anv_shadow_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,struct blorp_surf * blorp_surf)273 get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
274 const struct anv_image *image,
275 VkImageAspectFlags aspect,
276 struct blorp_surf *blorp_surf)
277 {
278
279 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
280 if (!anv_surface_is_valid(&image->planes[plane].shadow_surface))
281 return false;
282
283 const struct anv_surface *surface = &image->planes[plane].shadow_surface;
284 const struct anv_address address =
285 anv_image_address(image, &surface->memory_range);
286
287 *blorp_surf = (struct blorp_surf) {
288 .surf = &surface->isl,
289 .addr = {
290 .buffer = address.bo,
291 .offset = address.offset,
292 .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT),
293 },
294 };
295
296 return true;
297 }
298
299 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)300 copy_image(struct anv_cmd_buffer *cmd_buffer,
301 struct blorp_batch *batch,
302 struct anv_image *src_image,
303 VkImageLayout src_image_layout,
304 struct anv_image *dst_image,
305 VkImageLayout dst_image_layout,
306 const VkImageCopy2 *region)
307 {
308 VkOffset3D srcOffset =
309 vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
310 VkOffset3D dstOffset =
311 vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
312 VkExtent3D extent =
313 vk_image_sanitize_extent(&src_image->vk, region->extent);
314
315 const uint32_t dst_level = region->dstSubresource.mipLevel;
316 unsigned dst_base_layer, layer_count;
317 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
318 dst_base_layer = region->dstOffset.z;
319 layer_count = region->extent.depth;
320 } else {
321 dst_base_layer = region->dstSubresource.baseArrayLayer;
322 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
323 ®ion->dstSubresource);
324 }
325
326 const uint32_t src_level = region->srcSubresource.mipLevel;
327 unsigned src_base_layer;
328 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
329 src_base_layer = region->srcOffset.z;
330 } else {
331 src_base_layer = region->srcSubresource.baseArrayLayer;
332 assert(layer_count ==
333 vk_image_subresource_layer_count(&src_image->vk,
334 ®ion->srcSubresource));
335 }
336
337 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
338 dst_mask = region->dstSubresource.aspectMask;
339
340 assert(anv_image_aspects_compatible(src_mask, dst_mask));
341
342 if (util_bitcount(src_mask) > 1) {
343 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
344 struct blorp_surf src_surf, dst_surf;
345 get_blorp_surf_for_anv_image(cmd_buffer->device,
346 src_image, 1UL << aspect_bit,
347 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
348 src_image_layout, ISL_AUX_USAGE_NONE,
349 &src_surf);
350 get_blorp_surf_for_anv_image(cmd_buffer->device,
351 dst_image, 1UL << aspect_bit,
352 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
353 dst_image_layout, ISL_AUX_USAGE_NONE,
354 &dst_surf);
355 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
356 1UL << aspect_bit,
357 dst_surf.aux_usage, dst_level,
358 dst_base_layer, layer_count);
359
360 for (unsigned i = 0; i < layer_count; i++) {
361 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
362 &dst_surf, dst_level, dst_base_layer + i,
363 srcOffset.x, srcOffset.y,
364 dstOffset.x, dstOffset.y,
365 extent.width, extent.height);
366 }
367
368 struct blorp_surf dst_shadow_surf;
369 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
370 dst_image,
371 1UL << aspect_bit,
372 &dst_shadow_surf)) {
373 for (unsigned i = 0; i < layer_count; i++) {
374 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
375 &dst_shadow_surf, dst_level, dst_base_layer + i,
376 srcOffset.x, srcOffset.y,
377 dstOffset.x, dstOffset.y,
378 extent.width, extent.height);
379 }
380 }
381 }
382 } else {
383 struct blorp_surf src_surf, dst_surf;
384 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
385 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
386 src_image_layout, ISL_AUX_USAGE_NONE,
387 &src_surf);
388 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
389 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
390 dst_image_layout, ISL_AUX_USAGE_NONE,
391 &dst_surf);
392 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
393 dst_surf.aux_usage, dst_level,
394 dst_base_layer, layer_count);
395
396 for (unsigned i = 0; i < layer_count; i++) {
397 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
398 &dst_surf, dst_level, dst_base_layer + i,
399 srcOffset.x, srcOffset.y,
400 dstOffset.x, dstOffset.y,
401 extent.width, extent.height);
402 }
403
404 struct blorp_surf dst_shadow_surf;
405 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
406 dst_image, dst_mask,
407 &dst_shadow_surf)) {
408 for (unsigned i = 0; i < layer_count; i++) {
409 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
410 &dst_shadow_surf, dst_level, dst_base_layer + i,
411 srcOffset.x, srcOffset.y,
412 dstOffset.x, dstOffset.y,
413 extent.width, extent.height);
414 }
415 }
416 }
417 }
418
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)419 void anv_CmdCopyImage2(
420 VkCommandBuffer commandBuffer,
421 const VkCopyImageInfo2* pCopyImageInfo)
422 {
423 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
424 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
425 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
426
427 struct blorp_batch batch;
428 anv_blorp_batch_init(cmd_buffer, &batch, 0);
429
430 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
431 copy_image(cmd_buffer, &batch,
432 src_image, pCopyImageInfo->srcImageLayout,
433 dst_image, pCopyImageInfo->dstImageLayout,
434 &pCopyImageInfo->pRegions[r]);
435 }
436
437 anv_blorp_batch_finish(&batch);
438 }
439
440 static enum isl_format
isl_format_for_size(unsigned size_B)441 isl_format_for_size(unsigned size_B)
442 {
443 /* Prefer 32-bit per component formats for CmdFillBuffer */
444 switch (size_B) {
445 case 1: return ISL_FORMAT_R8_UINT;
446 case 2: return ISL_FORMAT_R16_UINT;
447 case 3: return ISL_FORMAT_R8G8B8_UINT;
448 case 4: return ISL_FORMAT_R32_UINT;
449 case 6: return ISL_FORMAT_R16G16B16_UINT;
450 case 8: return ISL_FORMAT_R32G32_UINT;
451 case 12: return ISL_FORMAT_R32G32B32_UINT;
452 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
453 default:
454 unreachable("Unknown format size");
455 }
456 }
457
458 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)459 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
460 struct blorp_batch *batch,
461 struct anv_buffer *anv_buffer,
462 struct anv_image *anv_image,
463 VkImageLayout image_layout,
464 const VkBufferImageCopy2* region,
465 bool buffer_to_image)
466 {
467 struct {
468 struct blorp_surf surf;
469 uint32_t level;
470 VkOffset3D offset;
471 } image, buffer, *src, *dst;
472
473 buffer.level = 0;
474 buffer.offset = (VkOffset3D) { 0, 0, 0 };
475
476 if (buffer_to_image) {
477 src = &buffer;
478 dst = ℑ
479 } else {
480 src = ℑ
481 dst = &buffer;
482 }
483
484 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
485
486 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
487 buffer_to_image ?
488 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
489 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
490 image_layout, ISL_AUX_USAGE_NONE,
491 &image.surf);
492 image.offset =
493 vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
494 image.level = region->imageSubresource.mipLevel;
495
496 VkExtent3D extent =
497 vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
498 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
499 image.offset.z = region->imageSubresource.baseArrayLayer;
500 extent.depth =
501 vk_image_subresource_layer_count(&anv_image->vk,
502 ®ion->imageSubresource);
503 }
504
505 const enum isl_format linear_format =
506 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk.format,
507 aspect, VK_IMAGE_TILING_LINEAR);
508 const struct isl_format_layout *linear_fmtl =
509 isl_format_get_layout(linear_format);
510
511 const struct vk_image_buffer_layout buffer_layout =
512 vk_image_buffer_copy_layout(&anv_image->vk, region);
513
514 /* Some formats have additional restrictions which may cause ISL to
515 * fail to create a surface for us. For example, YCbCr formats
516 * have to have 2-pixel aligned strides.
517 *
518 * To avoid these issues, we always bind the buffer as if it's a
519 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
520 * the format doesn't matter as long as it has the right bpb.
521 */
522 const VkExtent2D buffer_extent = {
523 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
524 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
525 };
526 const enum isl_format buffer_format =
527 isl_format_for_size(linear_fmtl->bpb / 8);
528
529 struct isl_surf buffer_isl_surf;
530 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
531 anv_buffer, region->bufferOffset,
532 buffer_extent.width, buffer_extent.height,
533 buffer_layout.row_stride_B, buffer_format,
534 false, &buffer.surf, &buffer_isl_surf);
535
536 bool dst_has_shadow = false;
537 struct blorp_surf dst_shadow_surf;
538 if (&image == dst) {
539 /* In this case, the source is the buffer and, since blorp takes its
540 * copy dimensions in terms of the source format, we have to use the
541 * scaled down version for compressed textures because the source
542 * format is an RGB format.
543 */
544 extent.width = buffer_extent.width;
545 extent.height = buffer_extent.height;
546
547 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
548 aspect, dst->surf.aux_usage,
549 dst->level,
550 dst->offset.z, extent.depth);
551
552 dst_has_shadow =
553 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
554 anv_image, aspect,
555 &dst_shadow_surf);
556 }
557
558 for (unsigned z = 0; z < extent.depth; z++) {
559 blorp_copy(batch, &src->surf, src->level, src->offset.z,
560 &dst->surf, dst->level, dst->offset.z,
561 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
562 extent.width, extent.height);
563
564 if (dst_has_shadow) {
565 blorp_copy(batch, &src->surf, src->level, src->offset.z,
566 &dst_shadow_surf, dst->level, dst->offset.z,
567 src->offset.x, src->offset.y,
568 dst->offset.x, dst->offset.y,
569 extent.width, extent.height);
570 }
571
572 image.offset.z++;
573 buffer.surf.addr.offset += buffer_layout.image_stride_B;
574 }
575 }
576
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)577 void anv_CmdCopyBufferToImage2(
578 VkCommandBuffer commandBuffer,
579 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
580 {
581 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
582 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
583 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
584
585 struct blorp_batch batch;
586 anv_blorp_batch_init(cmd_buffer, &batch, 0);
587
588 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
589 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
590 pCopyBufferToImageInfo->dstImageLayout,
591 &pCopyBufferToImageInfo->pRegions[r], true);
592 }
593
594 anv_blorp_batch_finish(&batch);
595 }
596
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)597 void anv_CmdCopyImageToBuffer2(
598 VkCommandBuffer commandBuffer,
599 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
600 {
601 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
602 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
603 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
604
605 struct blorp_batch batch;
606 anv_blorp_batch_init(cmd_buffer, &batch, 0);
607
608 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
609 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
610 pCopyImageToBufferInfo->srcImageLayout,
611 &pCopyImageToBufferInfo->pRegions[r], false);
612 }
613
614 anv_blorp_batch_finish(&batch);
615
616 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
617 }
618
619 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)620 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
621 {
622 bool flip = false;
623 if (*src0 > *src1) {
624 unsigned tmp = *src0;
625 *src0 = *src1;
626 *src1 = tmp;
627 flip = !flip;
628 }
629
630 if (*dst0 > *dst1) {
631 unsigned tmp = *dst0;
632 *dst0 = *dst1;
633 *dst1 = tmp;
634 flip = !flip;
635 }
636
637 return flip;
638 }
639
640 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)641 blit_image(struct anv_cmd_buffer *cmd_buffer,
642 struct blorp_batch *batch,
643 struct anv_image *src_image,
644 VkImageLayout src_image_layout,
645 struct anv_image *dst_image,
646 VkImageLayout dst_image_layout,
647 const VkImageBlit2 *region,
648 VkFilter filter)
649 {
650 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
651 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
652
653 struct blorp_surf src, dst;
654
655 enum blorp_filter blorp_filter;
656 switch (filter) {
657 case VK_FILTER_NEAREST:
658 blorp_filter = BLORP_FILTER_NEAREST;
659 break;
660 case VK_FILTER_LINEAR:
661 blorp_filter = BLORP_FILTER_BILINEAR;
662 break;
663 default:
664 unreachable("Invalid filter");
665 }
666
667 assert(anv_image_aspects_compatible(src_res->aspectMask,
668 dst_res->aspectMask));
669
670 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
671 get_blorp_surf_for_anv_image(cmd_buffer->device,
672 src_image, 1U << aspect_bit,
673 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
674 src_image_layout, ISL_AUX_USAGE_NONE, &src);
675 get_blorp_surf_for_anv_image(cmd_buffer->device,
676 dst_image, 1U << aspect_bit,
677 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
678 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
679
680 struct anv_format_plane src_format =
681 anv_get_format_aspect(&cmd_buffer->device->info, src_image->vk.format,
682 1U << aspect_bit, src_image->vk.tiling);
683 struct anv_format_plane dst_format =
684 anv_get_format_aspect(&cmd_buffer->device->info, dst_image->vk.format,
685 1U << aspect_bit, dst_image->vk.tiling);
686
687 unsigned dst_start, dst_end;
688 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
689 assert(dst_res->baseArrayLayer == 0);
690 dst_start = region->dstOffsets[0].z;
691 dst_end = region->dstOffsets[1].z;
692 } else {
693 dst_start = dst_res->baseArrayLayer;
694 dst_end = dst_start +
695 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
696 }
697
698 unsigned src_start, src_end;
699 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
700 assert(src_res->baseArrayLayer == 0);
701 src_start = region->srcOffsets[0].z;
702 src_end = region->srcOffsets[1].z;
703 } else {
704 src_start = src_res->baseArrayLayer;
705 src_end = src_start +
706 vk_image_subresource_layer_count(&src_image->vk, src_res);
707 }
708
709 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
710 const unsigned num_layers = dst_end - dst_start;
711 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
712
713 /* There is no interpolation to the pixel center during rendering, so
714 * add the 0.5 offset ourselves here. */
715 float depth_center_offset = 0;
716 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
717 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
718
719 if (flip_z) {
720 src_start = src_end;
721 src_z_step *= -1;
722 depth_center_offset *= -1;
723 }
724
725 unsigned src_x0 = region->srcOffsets[0].x;
726 unsigned src_x1 = region->srcOffsets[1].x;
727 unsigned dst_x0 = region->dstOffsets[0].x;
728 unsigned dst_x1 = region->dstOffsets[1].x;
729 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
730
731 unsigned src_y0 = region->srcOffsets[0].y;
732 unsigned src_y1 = region->srcOffsets[1].y;
733 unsigned dst_y0 = region->dstOffsets[0].y;
734 unsigned dst_y1 = region->dstOffsets[1].y;
735 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
736
737 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
738 1U << aspect_bit,
739 dst.aux_usage,
740 dst_res->mipLevel,
741 dst_start, num_layers);
742
743 for (unsigned i = 0; i < num_layers; i++) {
744 unsigned dst_z = dst_start + i;
745 float src_z = src_start + i * src_z_step + depth_center_offset;
746
747 blorp_blit(batch, &src, src_res->mipLevel, src_z,
748 src_format.isl_format, src_format.swizzle,
749 &dst, dst_res->mipLevel, dst_z,
750 dst_format.isl_format, dst_format.swizzle,
751 src_x0, src_y0, src_x1, src_y1,
752 dst_x0, dst_y0, dst_x1, dst_y1,
753 blorp_filter, flip_x, flip_y);
754 }
755 }
756 }
757
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)758 void anv_CmdBlitImage2(
759 VkCommandBuffer commandBuffer,
760 const VkBlitImageInfo2* pBlitImageInfo)
761 {
762 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
763 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
764 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
765
766 struct blorp_batch batch;
767 anv_blorp_batch_init(cmd_buffer, &batch, 0);
768
769 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
770 blit_image(cmd_buffer, &batch,
771 src_image, pBlitImageInfo->srcImageLayout,
772 dst_image, pBlitImageInfo->dstImageLayout,
773 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
774 }
775
776 anv_blorp_batch_finish(&batch);
777 }
778
779 /**
780 * Returns the greatest common divisor of a and b that is a power of two.
781 */
782 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)783 gcd_pow2_u64(uint64_t a, uint64_t b)
784 {
785 assert(a > 0 || b > 0);
786
787 unsigned a_log2 = ffsll(a) - 1;
788 unsigned b_log2 = ffsll(b) - 1;
789
790 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
791 * case, the MIN2() will take the other one. If both are 0 then we will
792 * hit the assert above.
793 */
794 return 1 << MIN2(a_log2, b_log2);
795 }
796
797 /* This is maximum possible width/height our HW can handle */
798 #define MAX_SURFACE_DIM (1ull << 14)
799
800 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)801 copy_buffer(struct anv_device *device,
802 struct blorp_batch *batch,
803 struct anv_buffer *src_buffer,
804 struct anv_buffer *dst_buffer,
805 const VkBufferCopy2 *region)
806 {
807 struct blorp_address src = {
808 .buffer = src_buffer->address.bo,
809 .offset = src_buffer->address.offset + region->srcOffset,
810 .mocs = anv_mocs(device, src_buffer->address.bo,
811 ISL_SURF_USAGE_TEXTURE_BIT),
812 };
813 struct blorp_address dst = {
814 .buffer = dst_buffer->address.bo,
815 .offset = dst_buffer->address.offset + region->dstOffset,
816 .mocs = anv_mocs(device, dst_buffer->address.bo,
817 ISL_SURF_USAGE_RENDER_TARGET_BIT),
818 };
819
820 blorp_buffer_copy(batch, src, dst, region->size);
821 }
822
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)823 void anv_CmdCopyBuffer2(
824 VkCommandBuffer commandBuffer,
825 const VkCopyBufferInfo2* pCopyBufferInfo)
826 {
827 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
828 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
829 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
830
831 struct blorp_batch batch;
832 anv_blorp_batch_init(cmd_buffer, &batch, 0);
833
834 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
835 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
836 &pCopyBufferInfo->pRegions[r]);
837 }
838
839 anv_blorp_batch_finish(&batch);
840
841 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
842 }
843
844
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)845 void anv_CmdUpdateBuffer(
846 VkCommandBuffer commandBuffer,
847 VkBuffer dstBuffer,
848 VkDeviceSize dstOffset,
849 VkDeviceSize dataSize,
850 const void* pData)
851 {
852 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
853 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
854
855 struct blorp_batch batch;
856 anv_blorp_batch_init(cmd_buffer, &batch, 0);
857
858 /* We can't quite grab a full block because the state stream needs a
859 * little data at the top to build its linked list.
860 */
861 const uint32_t max_update_size =
862 cmd_buffer->device->dynamic_state_pool.block_size - 64;
863
864 assert(max_update_size < MAX_SURFACE_DIM * 4);
865
866 /* We're about to read data that was written from the CPU. Flush the
867 * texture cache so we don't get anything stale.
868 */
869 anv_add_pending_pipe_bits(cmd_buffer,
870 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
871 "before UpdateBuffer");
872
873 while (dataSize) {
874 const uint32_t copy_size = MIN2(dataSize, max_update_size);
875
876 struct anv_state tmp_data =
877 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
878
879 memcpy(tmp_data.map, pData, copy_size);
880
881 struct blorp_address src = {
882 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
883 .offset = tmp_data.offset,
884 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
885 ISL_SURF_USAGE_TEXTURE_BIT, false)
886 };
887 struct blorp_address dst = {
888 .buffer = dst_buffer->address.bo,
889 .offset = dst_buffer->address.offset + dstOffset,
890 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
891 ISL_SURF_USAGE_RENDER_TARGET_BIT),
892 };
893
894 blorp_buffer_copy(&batch, src, dst, copy_size);
895
896 dataSize -= copy_size;
897 dstOffset += copy_size;
898 pData = (void *)pData + copy_size;
899 }
900
901 anv_blorp_batch_finish(&batch);
902
903 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
904 }
905
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)906 void anv_CmdFillBuffer(
907 VkCommandBuffer commandBuffer,
908 VkBuffer dstBuffer,
909 VkDeviceSize dstOffset,
910 VkDeviceSize fillSize,
911 uint32_t data)
912 {
913 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
914 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
915 struct blorp_surf surf;
916 struct isl_surf isl_surf;
917
918 struct blorp_batch batch;
919 anv_blorp_batch_init(cmd_buffer, &batch, 0);
920
921 fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
922
923 /* From the Vulkan spec:
924 *
925 * "size is the number of bytes to fill, and must be either a multiple
926 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
927 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
928 * buffer is not a multiple of 4, then the nearest smaller multiple is
929 * used."
930 */
931 fillSize &= ~3ull;
932
933 /* First, we compute the biggest format that can be used with the
934 * given offsets and size.
935 */
936 int bs = 16;
937 bs = gcd_pow2_u64(bs, dstOffset);
938 bs = gcd_pow2_u64(bs, fillSize);
939 enum isl_format isl_format = isl_format_for_size(bs);
940
941 union isl_color_value color = {
942 .u32 = { data, data, data, data },
943 };
944
945 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
946 while (fillSize >= max_fill_size) {
947 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
948 dst_buffer, dstOffset,
949 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
950 MAX_SURFACE_DIM * bs, isl_format, true,
951 &surf, &isl_surf);
952
953 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
954 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
955 color, 0 /* color_write_disable */);
956 fillSize -= max_fill_size;
957 dstOffset += max_fill_size;
958 }
959
960 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
961 assert(height < MAX_SURFACE_DIM);
962 if (height != 0) {
963 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
964 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
965 dst_buffer, dstOffset,
966 MAX_SURFACE_DIM, height,
967 MAX_SURFACE_DIM * bs, isl_format, true,
968 &surf, &isl_surf);
969
970 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
971 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
972 color, 0 /* color_write_disable */);
973 fillSize -= rect_fill_size;
974 dstOffset += rect_fill_size;
975 }
976
977 if (fillSize != 0) {
978 const uint32_t width = fillSize / bs;
979 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
980 dst_buffer, dstOffset,
981 width, 1,
982 width * bs, isl_format, true,
983 &surf, &isl_surf);
984
985 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
986 0, 0, 1, 0, 0, width, 1,
987 color, 0 /* color_write_disable */);
988 }
989
990 anv_blorp_batch_finish(&batch);
991
992 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
993 }
994
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)995 void anv_CmdClearColorImage(
996 VkCommandBuffer commandBuffer,
997 VkImage _image,
998 VkImageLayout imageLayout,
999 const VkClearColorValue* pColor,
1000 uint32_t rangeCount,
1001 const VkImageSubresourceRange* pRanges)
1002 {
1003 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1004 ANV_FROM_HANDLE(anv_image, image, _image);
1005
1006 struct blorp_batch batch;
1007 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1008
1009 for (unsigned r = 0; r < rangeCount; r++) {
1010 if (pRanges[r].aspectMask == 0)
1011 continue;
1012
1013 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1014
1015 struct blorp_surf surf;
1016 get_blorp_surf_for_anv_image(cmd_buffer->device,
1017 image, pRanges[r].aspectMask,
1018 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1019 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1020
1021 struct anv_format_plane src_format =
1022 anv_get_format_aspect(&cmd_buffer->device->info, image->vk.format,
1023 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1024
1025 unsigned base_layer = pRanges[r].baseArrayLayer;
1026 uint32_t layer_count =
1027 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1028 uint32_t level_count =
1029 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1030
1031 for (uint32_t i = 0; i < level_count; i++) {
1032 const unsigned level = pRanges[r].baseMipLevel + i;
1033 const unsigned level_width = anv_minify(image->vk.extent.width, level);
1034 const unsigned level_height = anv_minify(image->vk.extent.height, level);
1035
1036 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1037 base_layer = 0;
1038 layer_count = anv_minify(image->vk.extent.depth, level);
1039 }
1040
1041 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1042 pRanges[r].aspectMask,
1043 surf.aux_usage, level,
1044 base_layer, layer_count);
1045
1046 blorp_clear(&batch, &surf,
1047 src_format.isl_format, src_format.swizzle,
1048 level, base_layer, layer_count,
1049 0, 0, level_width, level_height,
1050 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1051 }
1052 }
1053
1054 anv_blorp_batch_finish(&batch);
1055 }
1056
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1057 void anv_CmdClearDepthStencilImage(
1058 VkCommandBuffer commandBuffer,
1059 VkImage image_h,
1060 VkImageLayout imageLayout,
1061 const VkClearDepthStencilValue* pDepthStencil,
1062 uint32_t rangeCount,
1063 const VkImageSubresourceRange* pRanges)
1064 {
1065 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1066 ANV_FROM_HANDLE(anv_image, image, image_h);
1067
1068 struct blorp_batch batch;
1069 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1070 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1071
1072 struct blorp_surf depth, stencil, stencil_shadow;
1073 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1074 get_blorp_surf_for_anv_image(cmd_buffer->device,
1075 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1076 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1077 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1078 } else {
1079 memset(&depth, 0, sizeof(depth));
1080 }
1081
1082 bool has_stencil_shadow = false;
1083 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1084 get_blorp_surf_for_anv_image(cmd_buffer->device,
1085 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1086 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1087 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1088
1089 has_stencil_shadow =
1090 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1091 VK_IMAGE_ASPECT_STENCIL_BIT,
1092 &stencil_shadow);
1093 } else {
1094 memset(&stencil, 0, sizeof(stencil));
1095 }
1096
1097 for (unsigned r = 0; r < rangeCount; r++) {
1098 if (pRanges[r].aspectMask == 0)
1099 continue;
1100
1101 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1102 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1103
1104 unsigned base_layer = pRanges[r].baseArrayLayer;
1105 uint32_t layer_count =
1106 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1107 uint32_t level_count =
1108 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1109
1110 for (uint32_t i = 0; i < level_count; i++) {
1111 const unsigned level = pRanges[r].baseMipLevel + i;
1112 const unsigned level_width = anv_minify(image->vk.extent.width, level);
1113 const unsigned level_height = anv_minify(image->vk.extent.height, level);
1114
1115 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1116 layer_count = anv_minify(image->vk.extent.depth, level);
1117
1118 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1119 level, base_layer, layer_count,
1120 0, 0, level_width, level_height,
1121 clear_depth, pDepthStencil->depth,
1122 clear_stencil ? 0xff : 0,
1123 pDepthStencil->stencil);
1124
1125 if (clear_stencil && has_stencil_shadow) {
1126 union isl_color_value stencil_color = {
1127 .u32 = { pDepthStencil->stencil, },
1128 };
1129 blorp_clear(&batch, &stencil_shadow,
1130 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1131 level, base_layer, layer_count,
1132 0, 0, level_width, level_height,
1133 stencil_color, 0 /* color_write_disable */);
1134 }
1135 }
1136 }
1137
1138 anv_blorp_batch_finish(&batch);
1139 }
1140
1141 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1142 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1143 uint32_t num_entries,
1144 uint32_t *state_offset,
1145 struct anv_state *bt_state)
1146 {
1147 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1148 state_offset);
1149 if (bt_state->map == NULL) {
1150 /* We ran out of space. Grab a new binding table block. */
1151 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1152 if (result != VK_SUCCESS)
1153 return result;
1154
1155 /* Re-emit state base addresses so we get the new surface state base
1156 * address before we start emitting binding tables etc.
1157 */
1158 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1159
1160 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1161 state_offset);
1162 assert(bt_state->map != NULL);
1163 }
1164
1165 return VK_SUCCESS;
1166 }
1167
1168 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1169 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1170 struct anv_state surface_state,
1171 uint32_t *bt_offset)
1172 {
1173 uint32_t state_offset;
1174 struct anv_state bt_state;
1175
1176 VkResult result =
1177 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1178 &bt_state);
1179 if (result != VK_SUCCESS)
1180 return result;
1181
1182 uint32_t *bt_map = bt_state.map;
1183 bt_map[0] = surface_state.offset + state_offset;
1184
1185 *bt_offset = bt_state.offset;
1186 return VK_SUCCESS;
1187 }
1188
1189 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1190 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1191 struct blorp_batch *batch,
1192 const VkClearAttachment *attachment,
1193 uint32_t rectCount, const VkClearRect *pRects)
1194 {
1195 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1196 const uint32_t att_idx = attachment->colorAttachment;
1197 assert(att_idx < gfx->color_att_count);
1198 const struct anv_attachment *att = &gfx->color_att[att_idx];
1199
1200 if (att->vk_format == VK_FORMAT_UNDEFINED)
1201 return;
1202
1203 uint32_t binding_table;
1204 VkResult result =
1205 binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1206 &binding_table);
1207 if (result != VK_SUCCESS)
1208 return;
1209
1210 union isl_color_value clear_color =
1211 vk_to_isl_color(attachment->clearValue.color);
1212
1213 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1214 if (gfx->view_mask) {
1215 u_foreach_bit(view_idx, gfx->view_mask) {
1216 for (uint32_t r = 0; r < rectCount; ++r) {
1217 const VkOffset2D offset = pRects[r].rect.offset;
1218 const VkExtent2D extent = pRects[r].rect.extent;
1219 blorp_clear_attachments(batch, binding_table,
1220 ISL_FORMAT_UNSUPPORTED,
1221 gfx->samples,
1222 view_idx, 1,
1223 offset.x, offset.y,
1224 offset.x + extent.width,
1225 offset.y + extent.height,
1226 true, clear_color, false, 0.0f, 0, 0);
1227 }
1228 }
1229 return;
1230 }
1231
1232 for (uint32_t r = 0; r < rectCount; ++r) {
1233 const VkOffset2D offset = pRects[r].rect.offset;
1234 const VkExtent2D extent = pRects[r].rect.extent;
1235 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1236 blorp_clear_attachments(batch, binding_table,
1237 ISL_FORMAT_UNSUPPORTED,
1238 gfx->samples,
1239 pRects[r].baseArrayLayer,
1240 pRects[r].layerCount,
1241 offset.x, offset.y,
1242 offset.x + extent.width, offset.y + extent.height,
1243 true, clear_color, false, 0.0f, 0, 0);
1244 }
1245 }
1246
1247 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1248 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1249 struct blorp_batch *batch,
1250 const VkClearAttachment *attachment,
1251 uint32_t rectCount, const VkClearRect *pRects)
1252 {
1253 static const union isl_color_value color_value = { .u32 = { 0, } };
1254 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1255 const struct anv_attachment *d_att = &gfx->depth_att;
1256 const struct anv_attachment *s_att = &gfx->stencil_att;
1257 if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1258 s_att->vk_format == VK_FORMAT_UNDEFINED)
1259 return;
1260
1261 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1262 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1263
1264 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1265 if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1266 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1267 d_att->vk_format,
1268 VK_IMAGE_ASPECT_DEPTH_BIT,
1269 VK_IMAGE_TILING_OPTIMAL);
1270 }
1271
1272 uint32_t binding_table;
1273 VkResult result =
1274 binding_table_for_surface_state(cmd_buffer,
1275 gfx->null_surface_state,
1276 &binding_table);
1277 if (result != VK_SUCCESS)
1278 return;
1279
1280 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1281 if (gfx->view_mask) {
1282 u_foreach_bit(view_idx, gfx->view_mask) {
1283 for (uint32_t r = 0; r < rectCount; ++r) {
1284 const VkOffset2D offset = pRects[r].rect.offset;
1285 const VkExtent2D extent = pRects[r].rect.extent;
1286 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1287 blorp_clear_attachments(batch, binding_table,
1288 depth_format,
1289 gfx->samples,
1290 view_idx, 1,
1291 offset.x, offset.y,
1292 offset.x + extent.width,
1293 offset.y + extent.height,
1294 false, color_value,
1295 clear_depth, value.depth,
1296 clear_stencil ? 0xff : 0, value.stencil);
1297 }
1298 }
1299 return;
1300 }
1301
1302 for (uint32_t r = 0; r < rectCount; ++r) {
1303 const VkOffset2D offset = pRects[r].rect.offset;
1304 const VkExtent2D extent = pRects[r].rect.extent;
1305 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1306 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1307 blorp_clear_attachments(batch, binding_table,
1308 depth_format,
1309 gfx->samples,
1310 pRects[r].baseArrayLayer,
1311 pRects[r].layerCount,
1312 offset.x, offset.y,
1313 offset.x + extent.width, offset.y + extent.height,
1314 false, color_value,
1315 clear_depth, value.depth,
1316 clear_stencil ? 0xff : 0, value.stencil);
1317 }
1318 }
1319
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1320 void anv_CmdClearAttachments(
1321 VkCommandBuffer commandBuffer,
1322 uint32_t attachmentCount,
1323 const VkClearAttachment* pAttachments,
1324 uint32_t rectCount,
1325 const VkClearRect* pRects)
1326 {
1327 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1328
1329 /* Because this gets called within a render pass, we tell blorp not to
1330 * trash our depth and stencil buffers.
1331 */
1332 struct blorp_batch batch;
1333 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1334 if (cmd_buffer->state.conditional_render_enabled) {
1335 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1336 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1337 }
1338 anv_blorp_batch_init(cmd_buffer, &batch, flags);
1339
1340 for (uint32_t a = 0; a < attachmentCount; ++a) {
1341 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1342 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1343 clear_color_attachment(cmd_buffer, &batch,
1344 &pAttachments[a],
1345 rectCount, pRects);
1346 } else {
1347 clear_depth_stencil_attachment(cmd_buffer, &batch,
1348 &pAttachments[a],
1349 rectCount, pRects);
1350 }
1351 }
1352
1353 anv_blorp_batch_finish(&batch);
1354 }
1355
1356 enum subpass_stage {
1357 SUBPASS_STAGE_LOAD,
1358 SUBPASS_STAGE_DRAW,
1359 SUBPASS_STAGE_RESOLVE,
1360 };
1361
1362 void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)1363 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1364 const struct anv_image *src_image,
1365 enum isl_aux_usage src_aux_usage,
1366 uint32_t src_level, uint32_t src_base_layer,
1367 const struct anv_image *dst_image,
1368 enum isl_aux_usage dst_aux_usage,
1369 uint32_t dst_level, uint32_t dst_base_layer,
1370 VkImageAspectFlagBits aspect,
1371 uint32_t src_x, uint32_t src_y,
1372 uint32_t dst_x, uint32_t dst_y,
1373 uint32_t width, uint32_t height,
1374 uint32_t layer_count,
1375 enum blorp_filter filter)
1376 {
1377 struct blorp_batch batch;
1378 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1379 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1380
1381 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
1382 assert(src_image->vk.samples > 1);
1383 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
1384 assert(dst_image->vk.samples == 1);
1385
1386 struct blorp_surf src_surf, dst_surf;
1387 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1388 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1389 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1390 src_aux_usage, &src_surf);
1391 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1392 src_surf.clear_color_addr = anv_to_blorp_address(
1393 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1394 VK_IMAGE_ASPECT_COLOR_BIT));
1395 }
1396 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1397 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1398 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1399 dst_aux_usage, &dst_surf);
1400 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1401 aspect, dst_aux_usage,
1402 dst_level, dst_base_layer, layer_count);
1403
1404 if (filter == BLORP_FILTER_NONE) {
1405 /* If no explicit filter is provided, then it's implied by the type of
1406 * the source image.
1407 */
1408 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1409 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1410 isl_format_has_int_channel(src_surf.surf->format)) {
1411 filter = BLORP_FILTER_SAMPLE_0;
1412 } else {
1413 filter = BLORP_FILTER_AVERAGE;
1414 }
1415 }
1416
1417 for (uint32_t l = 0; l < layer_count; l++) {
1418 blorp_blit(&batch,
1419 &src_surf, src_level, src_base_layer + l,
1420 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1421 &dst_surf, dst_level, dst_base_layer + l,
1422 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1423 src_x, src_y, src_x + width, src_y + height,
1424 dst_x, dst_y, dst_x + width, dst_y + height,
1425 filter, false, false);
1426 }
1427
1428 anv_blorp_batch_finish(&batch);
1429 }
1430
1431 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)1432 resolve_image(struct anv_cmd_buffer *cmd_buffer,
1433 struct anv_image *src_image,
1434 VkImageLayout src_image_layout,
1435 struct anv_image *dst_image,
1436 VkImageLayout dst_image_layout,
1437 const VkImageResolve2 *region)
1438 {
1439 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
1440 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
1441 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
1442
1443 const uint32_t layer_count =
1444 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
1445
1446 anv_foreach_image_aspect_bit(aspect_bit, src_image,
1447 region->srcSubresource.aspectMask) {
1448 enum isl_aux_usage src_aux_usage =
1449 anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image,
1450 (1 << aspect_bit),
1451 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1452 src_image_layout);
1453 enum isl_aux_usage dst_aux_usage =
1454 anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image,
1455 (1 << aspect_bit),
1456 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1457 dst_image_layout);
1458
1459 anv_image_msaa_resolve(cmd_buffer,
1460 src_image, src_aux_usage,
1461 region->srcSubresource.mipLevel,
1462 region->srcSubresource.baseArrayLayer,
1463 dst_image, dst_aux_usage,
1464 region->dstSubresource.mipLevel,
1465 region->dstSubresource.baseArrayLayer,
1466 (1 << aspect_bit),
1467 region->srcOffset.x,
1468 region->srcOffset.y,
1469 region->dstOffset.x,
1470 region->dstOffset.y,
1471 region->extent.width,
1472 region->extent.height,
1473 layer_count, BLORP_FILTER_NONE);
1474 }
1475 }
1476
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1477 void anv_CmdResolveImage2(
1478 VkCommandBuffer commandBuffer,
1479 const VkResolveImageInfo2* pResolveImageInfo)
1480 {
1481 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1482 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
1483 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
1484
1485 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
1486 resolve_image(cmd_buffer,
1487 src_image, pResolveImageInfo->srcImageLayout,
1488 dst_image, pResolveImageInfo->dstImageLayout,
1489 &pResolveImageInfo->pRegions[r]);
1490 }
1491 }
1492
1493 void
anv_image_copy_to_shadow(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t base_level,uint32_t level_count,uint32_t base_layer,uint32_t layer_count)1494 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1495 const struct anv_image *image,
1496 VkImageAspectFlagBits aspect,
1497 uint32_t base_level, uint32_t level_count,
1498 uint32_t base_layer, uint32_t layer_count)
1499 {
1500 struct blorp_batch batch;
1501 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1502
1503 /* We don't know who touched the main surface last so flush a bunch of
1504 * caches to ensure we get good data.
1505 */
1506 anv_add_pending_pipe_bits(cmd_buffer,
1507 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1508 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1509 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1510 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1511 "before copy_to_shadow");
1512
1513 struct blorp_surf surf;
1514 get_blorp_surf_for_anv_image(cmd_buffer->device,
1515 image, aspect,
1516 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1517 VK_IMAGE_LAYOUT_GENERAL,
1518 ISL_AUX_USAGE_NONE, &surf);
1519 assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1520
1521 struct blorp_surf shadow_surf;
1522 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1523 image, aspect, &shadow_surf);
1524
1525 for (uint32_t l = 0; l < level_count; l++) {
1526 const uint32_t level = base_level + l;
1527
1528 const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level);
1529
1530 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1531 layer_count = extent.depth;
1532
1533 for (uint32_t a = 0; a < layer_count; a++) {
1534 const uint32_t layer = base_layer + a;
1535
1536 blorp_copy(&batch, &surf, level, layer,
1537 &shadow_surf, level, layer,
1538 0, 0, 0, 0, extent.width, extent.height);
1539 }
1540 }
1541
1542 /* We just wrote to the buffer with the render cache. Flush it. */
1543 anv_add_pending_pipe_bits(cmd_buffer,
1544 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
1545 "after copy_to_shadow");
1546
1547 anv_blorp_batch_finish(&batch);
1548 }
1549
1550 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)1551 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1552 const struct anv_image *image,
1553 VkImageAspectFlagBits aspect,
1554 enum isl_aux_usage aux_usage,
1555 enum isl_format format, struct isl_swizzle swizzle,
1556 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1557 VkRect2D area, union isl_color_value clear_color)
1558 {
1559 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1560
1561 /* We don't support planar images with multisampling yet */
1562 assert(image->n_planes == 1);
1563
1564 struct blorp_batch batch;
1565 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1566
1567 struct blorp_surf surf;
1568 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1569 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1570 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1571 aux_usage, &surf);
1572 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1573 level, base_layer, layer_count);
1574
1575 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1576 level, base_layer, layer_count,
1577 area.offset.x, area.offset.y,
1578 area.offset.x + area.extent.width,
1579 area.offset.y + area.extent.height,
1580 clear_color, 0 /* color_write_disable */);
1581
1582 anv_blorp_batch_finish(&batch);
1583 }
1584
1585 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)1586 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1587 const struct anv_image *image,
1588 VkImageAspectFlags aspects,
1589 enum isl_aux_usage depth_aux_usage,
1590 uint32_t level,
1591 uint32_t base_layer, uint32_t layer_count,
1592 VkRect2D area,
1593 float depth_value, uint8_t stencil_value)
1594 {
1595 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1596 VK_IMAGE_ASPECT_STENCIL_BIT));
1597
1598 struct blorp_batch batch;
1599 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1600 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1601
1602 struct blorp_surf depth = {};
1603 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1604 get_blorp_surf_for_anv_image(cmd_buffer->device,
1605 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1606 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1607 depth_aux_usage, &depth);
1608 }
1609
1610 struct blorp_surf stencil = {};
1611 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1612 const uint32_t plane =
1613 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1614 get_blorp_surf_for_anv_image(cmd_buffer->device,
1615 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1616 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1617 image->planes[plane].aux_usage, &stencil);
1618 }
1619
1620 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1621 * performance. If it does this, we need to flush it out of the depth
1622 * cache before rendering to it.
1623 */
1624 anv_add_pending_pipe_bits(cmd_buffer,
1625 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1626 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1627 "before clear DS");
1628
1629 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1630 level, base_layer, layer_count,
1631 area.offset.x, area.offset.y,
1632 area.offset.x + area.extent.width,
1633 area.offset.y + area.extent.height,
1634 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1635 depth_value,
1636 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1637 stencil_value);
1638
1639 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1640 * performance. If it does this, we need to flush it out of the render
1641 * cache before someone starts trying to do stencil on it.
1642 */
1643 anv_add_pending_pipe_bits(cmd_buffer,
1644 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1645 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1646 "after clear DS");
1647
1648 struct blorp_surf stencil_shadow;
1649 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1650 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1651 VK_IMAGE_ASPECT_STENCIL_BIT,
1652 &stencil_shadow)) {
1653 union isl_color_value stencil_color = {
1654 .u32 = { stencil_value },
1655 };
1656 blorp_clear(&batch, &stencil_shadow,
1657 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1658 level, base_layer, layer_count,
1659 area.offset.x, area.offset.y,
1660 area.offset.x + area.extent.width,
1661 area.offset.y + area.extent.height,
1662 stencil_color, 0 /* color_write_disable */);
1663 }
1664
1665 anv_blorp_batch_finish(&batch);
1666 }
1667
1668 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)1669 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1670 const struct anv_image *image,
1671 VkImageAspectFlagBits aspect, uint32_t level,
1672 uint32_t base_layer, uint32_t layer_count,
1673 enum isl_aux_op hiz_op)
1674 {
1675 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1676 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1677 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1678 assert(plane == 0);
1679
1680 struct blorp_batch batch;
1681 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1682 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1683
1684 struct blorp_surf surf;
1685 get_blorp_surf_for_anv_image(cmd_buffer->device,
1686 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1687 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1688 image->planes[plane].aux_usage, &surf);
1689
1690 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1691
1692 anv_blorp_batch_finish(&batch);
1693 }
1694
1695 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1696 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1697 const struct anv_image *image,
1698 VkImageAspectFlags aspects,
1699 uint32_t level,
1700 uint32_t base_layer, uint32_t layer_count,
1701 VkRect2D area, uint8_t stencil_value)
1702 {
1703 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1704 VK_IMAGE_ASPECT_STENCIL_BIT));
1705
1706 struct blorp_batch batch;
1707 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1708 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1709
1710 struct blorp_surf depth = {};
1711 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1712 const uint32_t plane =
1713 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1714 assert(base_layer + layer_count <=
1715 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1716 get_blorp_surf_for_anv_image(cmd_buffer->device,
1717 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1718 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1719 image->planes[plane].aux_usage, &depth);
1720 }
1721
1722 struct blorp_surf stencil = {};
1723 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1724 const uint32_t plane =
1725 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1726 get_blorp_surf_for_anv_image(cmd_buffer->device,
1727 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1728 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1729 image->planes[plane].aux_usage, &stencil);
1730 }
1731
1732 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1733 *
1734 * "The following is required when performing a depth buffer clear with
1735 * using the WM_STATE or 3DSTATE_WM:
1736 *
1737 * * If other rendering operations have preceded this clear, a
1738 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1739 * enabled must be issued before the rectangle primitive used for
1740 * the depth buffer clear operation.
1741 * * [...]"
1742 *
1743 * Even though the PRM only says that this is required if using 3DSTATE_WM
1744 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1745 * hangs when doing a clear with WM_HZ_OP.
1746 */
1747 anv_add_pending_pipe_bits(cmd_buffer,
1748 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1749 ANV_PIPE_DEPTH_STALL_BIT,
1750 "before clear hiz");
1751
1752 if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1753 depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1754 /* From Bspec 47010 (Depth Buffer Clear):
1755 *
1756 * Since the fast clear cycles to CCS are not cached in TileCache,
1757 * any previous depth buffer writes to overlapping pixels must be
1758 * flushed out of TileCache before a succeeding Depth Buffer Clear.
1759 * This restriction only applies to Depth Buffer with write-thru
1760 * enabled, since fast clears to CCS only occur for write-thru mode.
1761 *
1762 * There may have been a write to this depth buffer. Flush it from the
1763 * tile cache just in case.
1764 */
1765 anv_add_pending_pipe_bits(cmd_buffer,
1766 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1767 ANV_PIPE_TILE_CACHE_FLUSH_BIT,
1768 "before clear hiz_ccs_wt");
1769 }
1770
1771 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1772 level, base_layer, layer_count,
1773 area.offset.x, area.offset.y,
1774 area.offset.x + area.extent.width,
1775 area.offset.y + area.extent.height,
1776 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1777 ANV_HZ_FC_VAL,
1778 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1779 stencil_value);
1780
1781 anv_blorp_batch_finish(&batch);
1782
1783 /* From the SKL PRM, Depth Buffer Clear:
1784 *
1785 * "Depth Buffer Clear Workaround
1786 *
1787 * Depth buffer clear pass using any of the methods (WM_STATE,
1788 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1789 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1790 * starting to render. DepthStall and DepthFlush are not needed between
1791 * consecutive depth clear passes nor is it required if the depth-clear
1792 * pass was done with “full_surf_clear” bit set in the
1793 * 3DSTATE_WM_HZ_OP."
1794 *
1795 * Even though the PRM provides a bunch of conditions under which this is
1796 * supposedly unnecessary, we choose to perform the flush unconditionally
1797 * just to be safe.
1798 */
1799 anv_add_pending_pipe_bits(cmd_buffer,
1800 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1801 ANV_PIPE_DEPTH_STALL_BIT,
1802 "after clear hiz");
1803 }
1804
1805 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)1806 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1807 const struct anv_image *image,
1808 enum isl_format format, struct isl_swizzle swizzle,
1809 VkImageAspectFlagBits aspect,
1810 uint32_t base_layer, uint32_t layer_count,
1811 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1812 bool predicate)
1813 {
1814 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1815 assert(image->vk.samples > 1);
1816 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1817
1818 /* Multisampling with multi-planar formats is not supported */
1819 assert(image->n_planes == 1);
1820
1821 const struct intel_device_info *devinfo = &cmd_buffer->device->info;
1822 struct blorp_batch batch;
1823 anv_blorp_batch_init(cmd_buffer, &batch,
1824 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1825 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1826 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1827
1828 struct blorp_surf surf;
1829 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1830 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1831 ISL_AUX_USAGE_MCS, &surf);
1832
1833 /* Blorp will store the clear color for us if we provide the clear color
1834 * address and we are doing a fast clear. So we save the clear value into
1835 * the blorp surface.
1836 */
1837 if (clear_value)
1838 surf.clear_color = *clear_value;
1839
1840 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1841 *
1842 * "After Render target fast clear, pipe-control with color cache
1843 * write-flush must be issued before sending any DRAW commands on
1844 * that render target."
1845 *
1846 * This comment is a bit cryptic and doesn't really tell you what's going
1847 * or what's really needed. It appears that fast clear ops are not
1848 * properly synchronized with other drawing. This means that we cannot
1849 * have a fast clear operation in the pipe at the same time as other
1850 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1851 * that the contents of the previous draw hit the render target before we
1852 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1853 * that it is completed before any additional drawing occurs.
1854 */
1855 anv_add_pending_pipe_bits(cmd_buffer,
1856 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1857 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1858 (devinfo->verx10 == 120 ?
1859 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1860 (devinfo->verx10 == 125 ?
1861 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1862 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1863 ANV_PIPE_PSS_STALL_SYNC_BIT |
1864 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1865 "before fast clear mcs");
1866
1867 switch (mcs_op) {
1868 case ISL_AUX_OP_FAST_CLEAR:
1869 blorp_fast_clear(&batch, &surf, format, swizzle,
1870 0, base_layer, layer_count,
1871 0, 0, image->vk.extent.width, image->vk.extent.height);
1872 break;
1873 case ISL_AUX_OP_PARTIAL_RESOLVE:
1874 blorp_mcs_partial_resolve(&batch, &surf, format,
1875 base_layer, layer_count);
1876 break;
1877 case ISL_AUX_OP_FULL_RESOLVE:
1878 case ISL_AUX_OP_AMBIGUATE:
1879 default:
1880 unreachable("Unsupported MCS operation");
1881 }
1882
1883 anv_add_pending_pipe_bits(cmd_buffer,
1884 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1885 (devinfo->verx10 == 120 ?
1886 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1887 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1888 ANV_PIPE_PSS_STALL_SYNC_BIT |
1889 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1890 "after fast clear mcs");
1891
1892 anv_blorp_batch_finish(&batch);
1893 }
1894
1895 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)1896 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1897 const struct anv_image *image,
1898 enum isl_format format, struct isl_swizzle swizzle,
1899 VkImageAspectFlagBits aspect, uint32_t level,
1900 uint32_t base_layer, uint32_t layer_count,
1901 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1902 bool predicate)
1903 {
1904 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1905 assert(image->vk.samples == 1);
1906 assert(level < anv_image_aux_levels(image, aspect));
1907 /* Multi-LOD YcBcR is not allowed */
1908 assert(image->n_planes == 1 || level == 0);
1909 assert(base_layer + layer_count <=
1910 anv_image_aux_layers(image, aspect, level));
1911
1912 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1913 const struct intel_device_info *devinfo = &cmd_buffer->device->info;
1914
1915 struct blorp_batch batch;
1916 anv_blorp_batch_init(cmd_buffer, &batch,
1917 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1918 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1919 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1920
1921 struct blorp_surf surf;
1922 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1923 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1924 image->planes[plane].aux_usage,
1925 &surf);
1926
1927 uint32_t level_width = anv_minify(surf.surf->logical_level0_px.w, level);
1928 uint32_t level_height = anv_minify(surf.surf->logical_level0_px.h, level);
1929
1930 /* Blorp will store the clear color for us if we provide the clear color
1931 * address and we are doing a fast clear. So we save the clear value into
1932 * the blorp surface.
1933 */
1934 if (clear_value)
1935 surf.clear_color = *clear_value;
1936
1937 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1938 *
1939 * "After Render target fast clear, pipe-control with color cache
1940 * write-flush must be issued before sending any DRAW commands on
1941 * that render target."
1942 *
1943 * This comment is a bit cryptic and doesn't really tell you what's going
1944 * or what's really needed. It appears that fast clear ops are not
1945 * properly synchronized with other drawing. This means that we cannot
1946 * have a fast clear operation in the pipe at the same time as other
1947 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1948 * that the contents of the previous draw hit the render target before we
1949 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1950 * that it is completed before any additional drawing occurs.
1951 */
1952 anv_add_pending_pipe_bits(cmd_buffer,
1953 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1954 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1955 (devinfo->verx10 == 120 ?
1956 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1957 (devinfo->verx10 == 125 ?
1958 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1959 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1960 ANV_PIPE_PSS_STALL_SYNC_BIT |
1961 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1962 "before fast clear ccs");
1963
1964 switch (ccs_op) {
1965 case ISL_AUX_OP_FAST_CLEAR:
1966 blorp_fast_clear(&batch, &surf, format, swizzle,
1967 level, base_layer, layer_count,
1968 0, 0, level_width, level_height);
1969 break;
1970 case ISL_AUX_OP_FULL_RESOLVE:
1971 case ISL_AUX_OP_PARTIAL_RESOLVE: {
1972 /* Wa_1508744258: Enable RHWO optimization for resolves */
1973 const bool enable_rhwo_opt = cmd_buffer->device->info.verx10 == 120;
1974
1975 if (enable_rhwo_opt)
1976 cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1977
1978 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1979 format, ccs_op);
1980
1981 if (enable_rhwo_opt)
1982 cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1983 break;
1984 }
1985 case ISL_AUX_OP_AMBIGUATE:
1986 for (uint32_t a = 0; a < layer_count; a++) {
1987 const uint32_t layer = base_layer + a;
1988 blorp_ccs_ambiguate(&batch, &surf, level, layer);
1989 }
1990 break;
1991 default:
1992 unreachable("Unsupported CCS operation");
1993 }
1994
1995 anv_add_pending_pipe_bits(cmd_buffer,
1996 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1997 (devinfo->verx10 == 120 ?
1998 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1999 ANV_PIPE_DEPTH_STALL_BIT : 0) |
2000 ANV_PIPE_PSS_STALL_SYNC_BIT |
2001 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2002 "after fast clear ccs");
2003
2004 anv_blorp_batch_finish(&batch);
2005 }
2006