1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25 #include "genxml/gen8_pack.h"
26
27 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)28 lookup_blorp_shader(struct blorp_batch *batch,
29 const void *key, uint32_t key_size,
30 uint32_t *kernel_out, void *prog_data_out)
31 {
32 struct blorp_context *blorp = batch->blorp;
33 struct anv_device *device = blorp->driver_ctx;
34
35 struct anv_shader_bin *bin =
36 anv_device_search_for_kernel(device, device->internal_cache,
37 key, key_size, NULL);
38 if (!bin)
39 return false;
40
41 /* The cache already has a reference and it's not going anywhere so there
42 * is no need to hold a second reference.
43 */
44 anv_shader_bin_unref(device, bin);
45
46 *kernel_out = bin->kernel.offset;
47 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48
49 return true;
50 }
51
52 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)53 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
54 const void *key, uint32_t key_size,
55 const void *kernel, uint32_t kernel_size,
56 const void *prog_data,
57 uint32_t prog_data_size,
58 uint32_t *kernel_out, void *prog_data_out)
59 {
60 struct blorp_context *blorp = batch->blorp;
61 struct anv_device *device = blorp->driver_ctx;
62
63 struct anv_pipeline_bind_map empty_bind_map = {};
64 struct anv_push_descriptor_info empty_push_desc_info = {};
65 struct anv_shader_upload_params upload_params = {
66 .stage = stage,
67 .key_data = key,
68 .key_size = key_size,
69 .kernel_data = kernel,
70 .kernel_size = kernel_size,
71 .prog_data = prog_data,
72 .prog_data_size = prog_data_size,
73 .bind_map = &empty_bind_map,
74 .push_desc_info = &empty_push_desc_info,
75 };
76
77 struct anv_shader_bin *bin =
78 anv_device_upload_kernel(device, device->internal_cache, &upload_params);
79
80 if (!bin)
81 return false;
82
83 /* The cache already has a reference and it's not going anywhere so there
84 * is no need to hold a second reference.
85 */
86 anv_shader_bin_unref(device, bin);
87
88 *kernel_out = bin->kernel.offset;
89 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
90
91 return true;
92 }
93
94 void
anv_device_init_blorp(struct anv_device * device)95 anv_device_init_blorp(struct anv_device *device)
96 {
97 const struct blorp_config config = {
98 .use_mesh_shading = device->vk.enabled_extensions.EXT_mesh_shader,
99 .use_unrestricted_depth_range =
100 device->vk.enabled_extensions.EXT_depth_range_unrestricted,
101 };
102
103 blorp_init_brw(&device->blorp, device, &device->isl_dev,
104 device->physical->compiler, &config);
105 device->blorp.lookup_shader = lookup_blorp_shader;
106 device->blorp.upload_shader = upload_blorp_shader;
107 device->blorp.enable_tbimr = device->physical->instance->enable_tbimr;
108 device->blorp.exec = anv_genX(device->info, blorp_exec);
109 }
110
111 void
anv_device_finish_blorp(struct anv_device * device)112 anv_device_finish_blorp(struct anv_device *device)
113 {
114 blorp_finish(&device->blorp);
115 }
116
117 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)118 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
119 struct blorp_batch *batch, enum blorp_batch_flags flags)
120 {
121 VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags;
122
123 if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
124 /* blorp runs on render engine by default */
125 } else if (queue_flags & VK_QUEUE_COMPUTE_BIT) {
126 flags |= BLORP_BATCH_USE_COMPUTE;
127 } else if (queue_flags & VK_QUEUE_TRANSFER_BIT) {
128 flags |= BLORP_BATCH_USE_BLITTER;
129 } else {
130 unreachable("unknown queue family");
131 }
132
133 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
134 }
135
136 static void
anv_blorp_batch_finish(struct blorp_batch * batch)137 anv_blorp_batch_finish(struct blorp_batch *batch)
138 {
139 blorp_batch_finish(batch);
140 }
141
142 static void
get_blorp_surf_for_anv_address(struct anv_device * device,struct anv_address address,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)143 get_blorp_surf_for_anv_address(struct anv_device *device,
144 struct anv_address address,
145 uint32_t width, uint32_t height,
146 uint32_t row_pitch, enum isl_format format,
147 bool is_dest,
148 struct blorp_surf *blorp_surf,
149 struct isl_surf *isl_surf)
150 {
151 bool ok UNUSED;
152
153 *blorp_surf = (struct blorp_surf) {
154 .surf = isl_surf,
155 .addr = {
156 .buffer = address.bo,
157 .offset = address.offset,
158 .mocs = anv_mocs(device, address.bo,
159 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
160 : ISL_SURF_USAGE_TEXTURE_BIT),
161 },
162 };
163
164 ok = isl_surf_init(&device->isl_dev, isl_surf,
165 .dim = ISL_SURF_DIM_2D,
166 .format = format,
167 .width = width,
168 .height = height,
169 .depth = 1,
170 .levels = 1,
171 .array_len = 1,
172 .samples = 1,
173 .row_pitch_B = row_pitch,
174 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
175 : ISL_SURF_USAGE_TEXTURE_BIT,
176 .tiling_flags = ISL_TILING_LINEAR_BIT);
177 assert(ok);
178 }
179
180 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)181 get_blorp_surf_for_anv_buffer(struct anv_device *device,
182 struct anv_buffer *buffer, uint64_t offset,
183 uint32_t width, uint32_t height,
184 uint32_t row_pitch, enum isl_format format,
185 bool is_dest,
186 struct blorp_surf *blorp_surf,
187 struct isl_surf *isl_surf)
188 {
189 get_blorp_surf_for_anv_address(device,
190 anv_address_add(buffer->address, offset),
191 width, height, row_pitch, format,
192 is_dest, blorp_surf, isl_surf);
193 }
194
195 /* Pick something high enough that it won't be used in core and low enough it
196 * will never map to an extension.
197 */
198 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
199
200 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)201 anv_to_blorp_address(struct anv_address addr)
202 {
203 return (struct blorp_address) {
204 .buffer = addr.bo,
205 .offset = addr.offset,
206 };
207 }
208
209 static void
get_blorp_surf_for_anv_image(const struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)210 get_blorp_surf_for_anv_image(const struct anv_cmd_buffer *cmd_buffer,
211 const struct anv_image *image,
212 VkImageAspectFlags aspect,
213 VkImageUsageFlags usage,
214 VkImageLayout layout,
215 enum isl_aux_usage aux_usage,
216 struct blorp_surf *blorp_surf)
217 {
218 const struct anv_device *device = cmd_buffer->device;
219 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
220
221 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
222 assert(usage != 0);
223 aux_usage = anv_layout_to_aux_usage(device->info, image,
224 aspect, usage, layout,
225 cmd_buffer->queue_family->queueFlags);
226 }
227
228 isl_surf_usage_flags_t mocs_usage =
229 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
230 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
231
232 const struct anv_surface *surface = &image->planes[plane].primary_surface;
233 const struct anv_address address =
234 anv_image_address(image, &surface->memory_range);
235
236 *blorp_surf = (struct blorp_surf) {
237 .surf = &surface->isl,
238 .addr = {
239 .buffer = address.bo,
240 .offset = address.offset,
241 .mocs = anv_mocs(device, address.bo, mocs_usage),
242 },
243 };
244
245 if (aux_usage != ISL_AUX_USAGE_NONE) {
246 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
247 const struct anv_address aux_address =
248 anv_image_address(image, &aux_surface->memory_range);
249
250 blorp_surf->aux_usage = aux_usage;
251 blorp_surf->aux_surf = &aux_surface->isl;
252
253 if (!anv_address_is_null(aux_address)) {
254 blorp_surf->aux_addr = (struct blorp_address) {
255 .buffer = aux_address.bo,
256 .offset = aux_address.offset,
257 .mocs = anv_mocs(device, aux_address.bo, 0),
258 };
259 }
260
261 /* If we're doing a partial resolve, then we need the indirect clear
262 * color. If we are doing a fast clear and want to store/update the
263 * clear color, we also pass the address to blorp, otherwise it will only
264 * stomp the CCS to a particular value and won't care about format or
265 * clear value
266 */
267 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
268 const struct anv_address clear_color_addr =
269 anv_image_get_clear_color_addr(device, image, aspect);
270 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
271 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
272 const struct anv_address clear_color_addr =
273 anv_image_get_clear_color_addr(device, image, aspect);
274 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
275 blorp_surf->clear_color = (union isl_color_value) {
276 .f32 = { ANV_HZ_FC_VAL },
277 };
278 }
279 }
280 }
281
282 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)283 copy_image(struct anv_cmd_buffer *cmd_buffer,
284 struct blorp_batch *batch,
285 struct anv_image *src_image,
286 VkImageLayout src_image_layout,
287 struct anv_image *dst_image,
288 VkImageLayout dst_image_layout,
289 const VkImageCopy2 *region)
290 {
291 VkOffset3D srcOffset =
292 vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
293 VkOffset3D dstOffset =
294 vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
295 VkExtent3D extent =
296 vk_image_sanitize_extent(&src_image->vk, region->extent);
297
298 const uint32_t dst_level = region->dstSubresource.mipLevel;
299 unsigned dst_base_layer, layer_count;
300 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
301 dst_base_layer = region->dstOffset.z;
302 layer_count = region->extent.depth;
303 } else {
304 dst_base_layer = region->dstSubresource.baseArrayLayer;
305 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
306 ®ion->dstSubresource);
307 }
308
309 const uint32_t src_level = region->srcSubresource.mipLevel;
310 unsigned src_base_layer;
311 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
312 src_base_layer = region->srcOffset.z;
313 } else {
314 src_base_layer = region->srcSubresource.baseArrayLayer;
315 assert(layer_count ==
316 vk_image_subresource_layer_count(&src_image->vk,
317 ®ion->srcSubresource));
318 }
319
320 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
321 dst_mask = region->dstSubresource.aspectMask;
322
323 assert(anv_image_aspects_compatible(src_mask, dst_mask));
324
325 if (util_bitcount(src_mask) > 1) {
326 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
327 struct blorp_surf src_surf, dst_surf;
328 get_blorp_surf_for_anv_image(cmd_buffer,
329 src_image, 1UL << aspect_bit,
330 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
331 src_image_layout, ISL_AUX_USAGE_NONE,
332 &src_surf);
333 get_blorp_surf_for_anv_image(cmd_buffer,
334 dst_image, 1UL << aspect_bit,
335 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
336 dst_image_layout, ISL_AUX_USAGE_NONE,
337 &dst_surf);
338 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
339 1UL << aspect_bit,
340 dst_surf.aux_usage, dst_level,
341 dst_base_layer, layer_count);
342
343 for (unsigned i = 0; i < layer_count; i++) {
344 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
345 &dst_surf, dst_level, dst_base_layer + i,
346 srcOffset.x, srcOffset.y,
347 dstOffset.x, dstOffset.y,
348 extent.width, extent.height);
349 }
350 }
351 } else {
352 /* This case handles the ycbcr images, aspect mask are compatible but
353 * don't need to be the same.
354 */
355 struct blorp_surf src_surf, dst_surf;
356 get_blorp_surf_for_anv_image(cmd_buffer, src_image, src_mask,
357 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
358 src_image_layout, ISL_AUX_USAGE_NONE,
359 &src_surf);
360 get_blorp_surf_for_anv_image(cmd_buffer, dst_image, dst_mask,
361 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
362 dst_image_layout, ISL_AUX_USAGE_NONE,
363 &dst_surf);
364 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
365 dst_surf.aux_usage, dst_level,
366 dst_base_layer, layer_count);
367
368 for (unsigned i = 0; i < layer_count; i++) {
369 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
370 &dst_surf, dst_level, dst_base_layer + i,
371 srcOffset.x, srcOffset.y,
372 dstOffset.x, dstOffset.y,
373 extent.width, extent.height);
374 }
375 }
376 }
377
378 static struct anv_state
record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer)379 record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer)
380 {
381 const struct intel_device_info *info = cmd_buffer->device->info;
382
383 const VkResult result = anv_cmd_buffer_ensure_rcs_companion(cmd_buffer);
384 if (result != VK_SUCCESS) {
385 anv_batch_set_error(&cmd_buffer->batch, result);
386 return ANV_STATE_NULL;
387 }
388
389 assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
390
391 /* Re-emit the aux table register in every command buffer. This way we're
392 * ensured that we have the table even if this command buffer doesn't
393 * initialize any images.
394 */
395 if (cmd_buffer->device->info->has_aux_map) {
396 anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
397 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
398 "new cmd buffer with aux-tt");
399 }
400
401 return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer);
402 }
403
404 static void
end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer,struct anv_state syncpoint)405 end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
406 struct anv_state syncpoint)
407 {
408 const struct intel_device_info *info = cmd_buffer->device->info;
409 anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer,
410 syncpoint);
411 }
412
413 static bool
anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)414 anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
415 struct anv_image *image,
416 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
417 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
418 {
419 if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
420 return false;
421
422 assert((pCopyBufferToImageInfo && !pCopyImageToBufferInfo) ||
423 (pCopyImageToBufferInfo && !pCopyBufferToImageInfo));
424
425 bool blorp_execute_on_companion = false;
426 VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_NONE;
427 const uint32_t region_count = pCopyBufferToImageInfo ?
428 pCopyBufferToImageInfo->regionCount :
429 pCopyImageToBufferInfo->regionCount;
430
431 for (unsigned r = 0; r < region_count &&
432 !blorp_execute_on_companion; r++) {
433 if (pCopyBufferToImageInfo) {
434 aspect_mask =
435 pCopyBufferToImageInfo->pRegions[r].imageSubresource.aspectMask;
436 } else {
437 aspect_mask =
438 pCopyImageToBufferInfo->pRegions[r].imageSubresource.aspectMask;
439 }
440
441 enum isl_format linear_format =
442 anv_get_isl_format(cmd_buffer->device->info, image->vk.format,
443 aspect_mask, VK_IMAGE_TILING_LINEAR);
444 const struct isl_format_layout *linear_fmtl =
445 isl_format_get_layout(linear_format);
446
447 switch (linear_fmtl->bpb) {
448 case 96:
449 /* We can only support linear mode for 96bpp on blitter engine. */
450 blorp_execute_on_companion |=
451 image->vk.tiling != VK_IMAGE_TILING_LINEAR;
452 break;
453 default:
454 blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
455 break;
456 }
457 }
458
459 return blorp_execute_on_companion;
460 }
461
462 static bool
anv_blorp_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * dst_image)463 anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
464 struct anv_image *dst_image)
465 {
466 /* MSAA images have to be dealt with on the companion RCS command buffer
467 * for both CCS && BCS engines.
468 */
469 if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
470 anv_cmd_buffer_is_compute_queue(cmd_buffer)) &&
471 dst_image->vk.samples > 1)
472 return true;
473
474 /* Emulation of formats is done through a compute shader, so we need
475 * the companion command buffer for the BCS engine.
476 */
477 if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) &&
478 dst_image->emu_plane_format != VK_FORMAT_UNDEFINED)
479 return true;
480
481 return false;
482 }
483
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)484 void anv_CmdCopyImage2(
485 VkCommandBuffer commandBuffer,
486 const VkCopyImageInfo2* pCopyImageInfo)
487 {
488 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
489 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
490 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
491
492 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
493 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
494
495 if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
496 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
497 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
498 }
499
500 struct blorp_batch batch;
501 anv_blorp_batch_init(cmd_buffer, &batch, 0);
502
503 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
504 copy_image(cmd_buffer, &batch,
505 src_image, pCopyImageInfo->srcImageLayout,
506 dst_image, pCopyImageInfo->dstImageLayout,
507 &pCopyImageInfo->pRegions[r]);
508 }
509
510 anv_blorp_batch_finish(&batch);
511
512 if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
513 assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
514 const enum anv_pipe_bits pipe_bits =
515 anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
516 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
517 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
518 anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
519 "Copy flush before astc emu");
520
521 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
522 const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
523 const VkOffset3D block_offset = vk_image_offset_to_elements(
524 &dst_image->vk, region->dstOffset);
525 const VkExtent3D block_extent = vk_image_extent_to_elements(
526 &src_image->vk, region->extent);
527 anv_astc_emu_process(cmd_buffer, dst_image,
528 pCopyImageInfo->dstImageLayout,
529 ®ion->dstSubresource,
530 block_offset, block_extent);
531 }
532 }
533
534 if (rcs_done.alloc_size)
535 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
536 }
537
538 static enum isl_format
isl_format_for_size(unsigned size_B)539 isl_format_for_size(unsigned size_B)
540 {
541 /* Prefer 32-bit per component formats for CmdFillBuffer */
542 switch (size_B) {
543 case 1: return ISL_FORMAT_R8_UINT;
544 case 2: return ISL_FORMAT_R16_UINT;
545 case 3: return ISL_FORMAT_R8G8B8_UINT;
546 case 4: return ISL_FORMAT_R32_UINT;
547 case 6: return ISL_FORMAT_R16G16B16_UINT;
548 case 8: return ISL_FORMAT_R32G32_UINT;
549 case 12: return ISL_FORMAT_R32G32B32_UINT;
550 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
551 default:
552 unreachable("Unknown format size");
553 }
554 }
555
556 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)557 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
558 struct blorp_batch *batch,
559 struct anv_buffer *anv_buffer,
560 struct anv_image *anv_image,
561 VkImageLayout image_layout,
562 const VkBufferImageCopy2* region,
563 bool buffer_to_image)
564 {
565 struct {
566 struct blorp_surf surf;
567 uint32_t level;
568 VkOffset3D offset;
569 } image, buffer, *src, *dst;
570
571 buffer.level = 0;
572 buffer.offset = (VkOffset3D) { 0, 0, 0 };
573
574 if (buffer_to_image) {
575 src = &buffer;
576 dst = ℑ
577 } else {
578 src = ℑ
579 dst = &buffer;
580 }
581
582 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
583
584 get_blorp_surf_for_anv_image(cmd_buffer, anv_image, aspect,
585 buffer_to_image ?
586 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
587 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
588 image_layout, ISL_AUX_USAGE_NONE,
589 &image.surf);
590 image.offset =
591 vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
592 image.level = region->imageSubresource.mipLevel;
593
594 VkExtent3D extent =
595 vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
596 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
597 image.offset.z = region->imageSubresource.baseArrayLayer;
598 extent.depth =
599 vk_image_subresource_layer_count(&anv_image->vk,
600 ®ion->imageSubresource);
601 }
602
603 const enum isl_format linear_format =
604 anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
605 aspect, VK_IMAGE_TILING_LINEAR);
606 const struct isl_format_layout *linear_fmtl =
607 isl_format_get_layout(linear_format);
608
609 const struct vk_image_buffer_layout buffer_layout =
610 vk_image_buffer_copy_layout(&anv_image->vk, region);
611
612 /* Some formats have additional restrictions which may cause ISL to
613 * fail to create a surface for us. For example, YCbCr formats
614 * have to have 2-pixel aligned strides.
615 *
616 * To avoid these issues, we always bind the buffer as if it's a
617 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
618 * the format doesn't matter as long as it has the right bpb.
619 */
620 const VkExtent2D buffer_extent = {
621 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
622 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
623 };
624 const enum isl_format buffer_format =
625 isl_format_for_size(linear_fmtl->bpb / 8);
626
627 struct isl_surf buffer_isl_surf;
628 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
629 anv_buffer, region->bufferOffset,
630 buffer_extent.width, buffer_extent.height,
631 buffer_layout.row_stride_B, buffer_format,
632 false, &buffer.surf, &buffer_isl_surf);
633
634 if (&image == dst) {
635 /* In this case, the source is the buffer and, since blorp takes its
636 * copy dimensions in terms of the source format, we have to use the
637 * scaled down version for compressed textures because the source
638 * format is an RGB format.
639 */
640 extent.width = buffer_extent.width;
641 extent.height = buffer_extent.height;
642
643 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
644 aspect, dst->surf.aux_usage,
645 dst->level,
646 dst->offset.z, extent.depth);
647 }
648
649 for (unsigned z = 0; z < extent.depth; z++) {
650 blorp_copy(batch, &src->surf, src->level, src->offset.z,
651 &dst->surf, dst->level, dst->offset.z,
652 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
653 extent.width, extent.height);
654
655 image.offset.z++;
656 buffer.surf.addr.offset += buffer_layout.image_stride_B;
657 }
658 }
659
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)660 void anv_CmdCopyBufferToImage2(
661 VkCommandBuffer commandBuffer,
662 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
663 {
664 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
665 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
666 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
667
668 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
669 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
670
671 bool blorp_execute_on_companion =
672 anv_blorp_execute_on_companion(cmd_buffer, dst_image);
673
674 /* Check if any one of the aspects is incompatible with the blitter engine,
675 * if true, use the companion RCS command buffer for blit operation since 3
676 * component formats are not supported natively except 96bpb on the blitter.
677 */
678 blorp_execute_on_companion |=
679 anv_blorp_blitter_execute_on_companion(cmd_buffer, dst_image,
680 pCopyBufferToImageInfo, NULL);
681
682 if (blorp_execute_on_companion) {
683 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
684 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
685 }
686
687 struct blorp_batch batch;
688 anv_blorp_batch_init(cmd_buffer, &batch, 0);
689
690 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
691 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
692 pCopyBufferToImageInfo->dstImageLayout,
693 &pCopyBufferToImageInfo->pRegions[r], true);
694 }
695
696 anv_blorp_batch_finish(&batch);
697
698 if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
699 assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
700 const enum anv_pipe_bits pipe_bits =
701 anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
702 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
703 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
704 anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
705 "Copy flush before astc emu");
706
707 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
708 const VkBufferImageCopy2 *region =
709 &pCopyBufferToImageInfo->pRegions[r];
710 const VkOffset3D block_offset = vk_image_offset_to_elements(
711 &dst_image->vk, region->imageOffset);
712 const VkExtent3D block_extent = vk_image_extent_to_elements(
713 &dst_image->vk, region->imageExtent);
714 anv_astc_emu_process(cmd_buffer, dst_image,
715 pCopyBufferToImageInfo->dstImageLayout,
716 ®ion->imageSubresource,
717 block_offset, block_extent);
718 }
719 }
720
721 if (rcs_done.alloc_size)
722 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
723 }
724
725 static void
anv_add_buffer_write_pending_bits(struct anv_cmd_buffer * cmd_buffer,const char * reason)726 anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
727 const char *reason)
728 {
729 const struct intel_device_info *devinfo = cmd_buffer->device->info;
730
731 cmd_buffer->state.queries.buffer_write_bits |=
732 (cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0 ?
733 ANV_QUERY_COMPUTE_WRITES_PENDING_BITS :
734 ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo);
735 }
736
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)737 void anv_CmdCopyImageToBuffer2(
738 VkCommandBuffer commandBuffer,
739 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
740 {
741 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
742 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
743 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
744
745 UNUSED struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
746 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
747
748 bool blorp_execute_on_companion =
749 anv_blorp_execute_on_companion(cmd_buffer, src_image);
750
751 /* Check if any one of the aspects is incompatible with the blitter engine,
752 * if true, use the companion RCS command buffer for blit operation since 3
753 * component formats are not supported natively except 96bpb on the blitter.
754 */
755 blorp_execute_on_companion |=
756 anv_blorp_blitter_execute_on_companion(cmd_buffer, src_image, NULL,
757 pCopyImageToBufferInfo);
758
759 if (blorp_execute_on_companion) {
760 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
761 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
762 }
763
764 struct blorp_batch batch;
765 anv_blorp_batch_init(cmd_buffer, &batch, 0);
766
767 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
768 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
769 pCopyImageToBufferInfo->srcImageLayout,
770 &pCopyImageToBufferInfo->pRegions[r], false);
771 }
772
773 anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
774
775 anv_blorp_batch_finish(&batch);
776
777 if (rcs_done.alloc_size)
778 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
779 }
780
781 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)782 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
783 {
784 bool flip = false;
785 if (*src0 > *src1) {
786 unsigned tmp = *src0;
787 *src0 = *src1;
788 *src1 = tmp;
789 flip = !flip;
790 }
791
792 if (*dst0 > *dst1) {
793 unsigned tmp = *dst0;
794 *dst0 = *dst1;
795 *dst1 = tmp;
796 flip = !flip;
797 }
798
799 return flip;
800 }
801
802 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)803 blit_image(struct anv_cmd_buffer *cmd_buffer,
804 struct blorp_batch *batch,
805 struct anv_image *src_image,
806 VkImageLayout src_image_layout,
807 struct anv_image *dst_image,
808 VkImageLayout dst_image_layout,
809 const VkImageBlit2 *region,
810 VkFilter filter)
811 {
812 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
813 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
814
815 struct blorp_surf src, dst;
816
817 enum blorp_filter blorp_filter;
818 switch (filter) {
819 case VK_FILTER_NEAREST:
820 blorp_filter = BLORP_FILTER_NEAREST;
821 break;
822 case VK_FILTER_LINEAR:
823 blorp_filter = BLORP_FILTER_BILINEAR;
824 break;
825 default:
826 unreachable("Invalid filter");
827 }
828
829 assert(anv_image_aspects_compatible(src_res->aspectMask,
830 dst_res->aspectMask));
831
832 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
833 get_blorp_surf_for_anv_image(cmd_buffer,
834 src_image, 1U << aspect_bit,
835 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
836 src_image_layout, ISL_AUX_USAGE_NONE, &src);
837 get_blorp_surf_for_anv_image(cmd_buffer,
838 dst_image, 1U << aspect_bit,
839 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
840 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
841
842 VkFormat src_vk_format = src_image->vk.format;
843
844 if (src_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
845 /* redirect src to the hidden plane */
846 const uint32_t plane = src_image->n_planes;
847 const struct anv_surface *surface =
848 &src_image->planes[plane].primary_surface;
849 const struct anv_address address =
850 anv_image_address(src_image, &surface->memory_range);
851 src.surf = &surface->isl,
852 src.addr.offset = address.offset;
853
854 src_vk_format = src_image->emu_plane_format;
855 }
856
857 struct anv_format_plane src_format =
858 anv_get_format_aspect(cmd_buffer->device->info, src_vk_format,
859 1U << aspect_bit, src_image->vk.tiling);
860 struct anv_format_plane dst_format =
861 anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
862 1U << aspect_bit, dst_image->vk.tiling);
863
864 unsigned dst_start, dst_end;
865 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
866 assert(dst_res->baseArrayLayer == 0);
867 dst_start = region->dstOffsets[0].z;
868 dst_end = region->dstOffsets[1].z;
869 } else {
870 dst_start = dst_res->baseArrayLayer;
871 dst_end = dst_start +
872 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
873 }
874
875 unsigned src_start, src_end;
876 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
877 assert(src_res->baseArrayLayer == 0);
878 src_start = region->srcOffsets[0].z;
879 src_end = region->srcOffsets[1].z;
880 } else {
881 src_start = src_res->baseArrayLayer;
882 src_end = src_start +
883 vk_image_subresource_layer_count(&src_image->vk, src_res);
884 }
885
886 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
887 const unsigned num_layers = dst_end - dst_start;
888 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
889
890 /* There is no interpolation to the pixel center during rendering, so
891 * add the 0.5 offset ourselves here. */
892 float depth_center_offset = 0;
893 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
894 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
895
896 if (flip_z) {
897 src_start = src_end;
898 src_z_step *= -1;
899 depth_center_offset *= -1;
900 }
901
902 unsigned src_x0 = region->srcOffsets[0].x;
903 unsigned src_x1 = region->srcOffsets[1].x;
904 unsigned dst_x0 = region->dstOffsets[0].x;
905 unsigned dst_x1 = region->dstOffsets[1].x;
906 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
907
908 unsigned src_y0 = region->srcOffsets[0].y;
909 unsigned src_y1 = region->srcOffsets[1].y;
910 unsigned dst_y0 = region->dstOffsets[0].y;
911 unsigned dst_y1 = region->dstOffsets[1].y;
912 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
913
914 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
915 1U << aspect_bit,
916 dst.aux_usage,
917 dst_res->mipLevel,
918 dst_start, num_layers);
919
920 for (unsigned i = 0; i < num_layers; i++) {
921 unsigned dst_z = dst_start + i;
922 float src_z = src_start + i * src_z_step + depth_center_offset;
923
924 blorp_blit(batch, &src, src_res->mipLevel, src_z,
925 src_format.isl_format, src_format.swizzle,
926 &dst, dst_res->mipLevel, dst_z,
927 dst_format.isl_format, dst_format.swizzle,
928 src_x0, src_y0, src_x1, src_y1,
929 dst_x0, dst_y0, dst_x1, dst_y1,
930 blorp_filter, flip_x, flip_y);
931 }
932 }
933 }
934
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)935 void anv_CmdBlitImage2(
936 VkCommandBuffer commandBuffer,
937 const VkBlitImageInfo2* pBlitImageInfo)
938 {
939 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
940 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
941 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
942
943 struct blorp_batch batch;
944 anv_blorp_batch_init(cmd_buffer, &batch, 0);
945
946 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
947 blit_image(cmd_buffer, &batch,
948 src_image, pBlitImageInfo->srcImageLayout,
949 dst_image, pBlitImageInfo->dstImageLayout,
950 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
951 }
952
953 anv_blorp_batch_finish(&batch);
954 }
955
956 /**
957 * Returns the greatest common divisor of a and b that is a power of two.
958 */
959 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)960 gcd_pow2_u64(uint64_t a, uint64_t b)
961 {
962 assert(a > 0 || b > 0);
963
964 unsigned a_log2 = ffsll(a) - 1;
965 unsigned b_log2 = ffsll(b) - 1;
966
967 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
968 * case, the MIN2() will take the other one. If both are 0 then we will
969 * hit the assert above.
970 */
971 return 1 << MIN2(a_log2, b_log2);
972 }
973
974 /* This is maximum possible width/height our HW can handle */
975 #define MAX_SURFACE_DIM (1ull << 14)
976
977 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)978 copy_buffer(struct anv_device *device,
979 struct blorp_batch *batch,
980 struct anv_buffer *src_buffer,
981 struct anv_buffer *dst_buffer,
982 const VkBufferCopy2 *region)
983 {
984 struct blorp_address src = {
985 .buffer = src_buffer->address.bo,
986 .offset = src_buffer->address.offset + region->srcOffset,
987 .mocs = anv_mocs(device, src_buffer->address.bo,
988 ISL_SURF_USAGE_TEXTURE_BIT),
989 };
990 struct blorp_address dst = {
991 .buffer = dst_buffer->address.bo,
992 .offset = dst_buffer->address.offset + region->dstOffset,
993 .mocs = anv_mocs(device, dst_buffer->address.bo,
994 ISL_SURF_USAGE_RENDER_TARGET_BIT),
995 };
996
997 blorp_buffer_copy(batch, src, dst, region->size);
998 }
999
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1000 void anv_CmdCopyBuffer2(
1001 VkCommandBuffer commandBuffer,
1002 const VkCopyBufferInfo2* pCopyBufferInfo)
1003 {
1004 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1005 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1006 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1007
1008 struct blorp_batch batch;
1009 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1010
1011 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
1012 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
1013 &pCopyBufferInfo->pRegions[r]);
1014 }
1015
1016 anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
1017
1018 anv_blorp_batch_finish(&batch);
1019 }
1020
1021
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1022 void anv_CmdUpdateBuffer(
1023 VkCommandBuffer commandBuffer,
1024 VkBuffer dstBuffer,
1025 VkDeviceSize dstOffset,
1026 VkDeviceSize dataSize,
1027 const void* pData)
1028 {
1029 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1030 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1031
1032 struct blorp_batch batch;
1033 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1034
1035 /* We can't quite grab a full block because the state stream needs a
1036 * little data at the top to build its linked list.
1037 */
1038 const uint32_t max_update_size =
1039 cmd_buffer->device->dynamic_state_pool.block_size - 64;
1040
1041 assert(max_update_size < MAX_SURFACE_DIM * 4);
1042
1043 /* We're about to read data that was written from the CPU. Flush the
1044 * texture cache so we don't get anything stale.
1045 */
1046 anv_add_pending_pipe_bits(cmd_buffer,
1047 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1048 "before UpdateBuffer");
1049
1050 while (dataSize) {
1051 const uint32_t copy_size = MIN2(dataSize, max_update_size);
1052
1053 struct anv_state tmp_data =
1054 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
1055
1056 memcpy(tmp_data.map, pData, copy_size);
1057
1058 struct blorp_address src = {
1059 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
1060 .offset = tmp_data.offset,
1061 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
1062 ISL_SURF_USAGE_TEXTURE_BIT, false)
1063 };
1064 struct blorp_address dst = {
1065 .buffer = dst_buffer->address.bo,
1066 .offset = dst_buffer->address.offset + dstOffset,
1067 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
1068 ISL_SURF_USAGE_RENDER_TARGET_BIT),
1069 };
1070
1071 blorp_buffer_copy(&batch, src, dst, copy_size);
1072
1073 dataSize -= copy_size;
1074 dstOffset += copy_size;
1075 pData = (void *)pData + copy_size;
1076 }
1077
1078 anv_add_buffer_write_pending_bits(cmd_buffer, "update buffer");
1079
1080 anv_blorp_batch_finish(&batch);
1081 }
1082
1083 void
anv_cmd_buffer_fill_area(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,VkDeviceSize size,uint32_t data)1084 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
1085 struct anv_address address,
1086 VkDeviceSize size,
1087 uint32_t data)
1088 {
1089 struct blorp_surf surf;
1090 struct isl_surf isl_surf;
1091
1092 struct blorp_batch batch;
1093 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1094
1095 /* First, we compute the biggest format that can be used with the
1096 * given offsets and size.
1097 */
1098 int bs = 16;
1099 uint64_t offset = address.offset;
1100 bs = gcd_pow2_u64(bs, offset);
1101 bs = gcd_pow2_u64(bs, size);
1102 enum isl_format isl_format = isl_format_for_size(bs);
1103
1104 union isl_color_value color = {
1105 .u32 = { data, data, data, data },
1106 };
1107
1108 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
1109 while (size >= max_fill_size) {
1110 get_blorp_surf_for_anv_address(cmd_buffer->device,
1111 (struct anv_address) {
1112 .bo = address.bo, .offset = offset,
1113 },
1114 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1115 MAX_SURFACE_DIM * bs, isl_format, true,
1116 &surf, &isl_surf);
1117
1118 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1119 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1120 color, 0 /* color_write_disable */);
1121 size -= max_fill_size;
1122 offset += max_fill_size;
1123 }
1124
1125 uint64_t height = size / (MAX_SURFACE_DIM * bs);
1126 assert(height < MAX_SURFACE_DIM);
1127 if (height != 0) {
1128 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
1129 get_blorp_surf_for_anv_address(cmd_buffer->device,
1130 (struct anv_address) {
1131 .bo = address.bo, .offset = offset,
1132 },
1133 MAX_SURFACE_DIM, height,
1134 MAX_SURFACE_DIM * bs, isl_format, true,
1135 &surf, &isl_surf);
1136
1137 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1138 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
1139 color, 0 /* color_write_disable */);
1140 size -= rect_fill_size;
1141 offset += rect_fill_size;
1142 }
1143
1144 if (size != 0) {
1145 const uint32_t width = size / bs;
1146 get_blorp_surf_for_anv_address(cmd_buffer->device,
1147 (struct anv_address) {
1148 .bo = address.bo, .offset = offset,
1149 },
1150 width, 1,
1151 width * bs, isl_format, true,
1152 &surf, &isl_surf);
1153
1154 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1155 0, 0, 1, 0, 0, width, 1,
1156 color, 0 /* color_write_disable */);
1157 }
1158
1159 anv_blorp_batch_finish(&batch);
1160 }
1161
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1162 void anv_CmdFillBuffer(
1163 VkCommandBuffer commandBuffer,
1164 VkBuffer dstBuffer,
1165 VkDeviceSize dstOffset,
1166 VkDeviceSize fillSize,
1167 uint32_t data)
1168 {
1169 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1170 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1171
1172 fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
1173
1174 /* From the Vulkan spec:
1175 *
1176 * "size is the number of bytes to fill, and must be either a multiple
1177 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1178 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1179 * buffer is not a multiple of 4, then the nearest smaller multiple is
1180 * used."
1181 */
1182 fillSize &= ~3ull;
1183
1184 anv_cmd_buffer_fill_area(cmd_buffer,
1185 anv_address_add(dst_buffer->address, dstOffset),
1186 fillSize, data);
1187
1188 anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
1189 }
1190
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1191 void anv_CmdClearColorImage(
1192 VkCommandBuffer commandBuffer,
1193 VkImage _image,
1194 VkImageLayout imageLayout,
1195 const VkClearColorValue* pColor,
1196 uint32_t rangeCount,
1197 const VkImageSubresourceRange* pRanges)
1198 {
1199 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1200 ANV_FROM_HANDLE(anv_image, image, _image);
1201
1202 struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
1203 UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
1204
1205 if (anv_blorp_execute_on_companion(cmd_buffer, image)) {
1206 rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
1207 cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
1208 }
1209
1210 struct blorp_batch batch;
1211 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1212
1213 for (unsigned r = 0; r < rangeCount; r++) {
1214 if (pRanges[r].aspectMask == 0)
1215 continue;
1216
1217 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1218
1219 struct blorp_surf surf;
1220 get_blorp_surf_for_anv_image(cmd_buffer,
1221 image, pRanges[r].aspectMask,
1222 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1223 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1224
1225 struct anv_format_plane src_format =
1226 anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1227 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1228
1229 unsigned base_layer = pRanges[r].baseArrayLayer;
1230 uint32_t layer_count =
1231 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1232 uint32_t level_count =
1233 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1234
1235 for (uint32_t i = 0; i < level_count; i++) {
1236 const unsigned level = pRanges[r].baseMipLevel + i;
1237 const unsigned level_width = u_minify(image->vk.extent.width, level);
1238 const unsigned level_height = u_minify(image->vk.extent.height, level);
1239
1240 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1241 base_layer = 0;
1242 layer_count = u_minify(image->vk.extent.depth, level);
1243 }
1244
1245 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1246 pRanges[r].aspectMask,
1247 surf.aux_usage, level,
1248 base_layer, layer_count);
1249
1250 blorp_clear(&batch, &surf,
1251 src_format.isl_format, src_format.swizzle,
1252 level, base_layer, layer_count,
1253 0, 0, level_width, level_height,
1254 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1255 }
1256 }
1257
1258 anv_blorp_batch_finish(&batch);
1259
1260 if (rcs_done.alloc_size)
1261 end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
1262 }
1263
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1264 void anv_CmdClearDepthStencilImage(
1265 VkCommandBuffer commandBuffer,
1266 VkImage image_h,
1267 VkImageLayout imageLayout,
1268 const VkClearDepthStencilValue* pDepthStencil,
1269 uint32_t rangeCount,
1270 const VkImageSubresourceRange* pRanges)
1271 {
1272 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1273 ANV_FROM_HANDLE(anv_image, image, image_h);
1274
1275 struct blorp_batch batch;
1276 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1277 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1278
1279 struct blorp_surf depth, stencil;
1280 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1281 get_blorp_surf_for_anv_image(cmd_buffer,
1282 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1283 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1284 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1285 } else {
1286 memset(&depth, 0, sizeof(depth));
1287 }
1288
1289 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1290 get_blorp_surf_for_anv_image(cmd_buffer,
1291 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1292 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1293 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1294 } else {
1295 memset(&stencil, 0, sizeof(stencil));
1296 }
1297
1298 for (unsigned r = 0; r < rangeCount; r++) {
1299 if (pRanges[r].aspectMask == 0)
1300 continue;
1301
1302 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1303 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1304
1305 unsigned base_layer = pRanges[r].baseArrayLayer;
1306 uint32_t layer_count =
1307 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1308 uint32_t level_count =
1309 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1310
1311 for (uint32_t i = 0; i < level_count; i++) {
1312 const unsigned level = pRanges[r].baseMipLevel + i;
1313 const unsigned level_width = u_minify(image->vk.extent.width, level);
1314 const unsigned level_height = u_minify(image->vk.extent.height, level);
1315
1316 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1317 layer_count = u_minify(image->vk.extent.depth, level);
1318
1319 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1320 level, base_layer, layer_count,
1321 0, 0, level_width, level_height,
1322 clear_depth, pDepthStencil->depth,
1323 clear_stencil ? 0xff : 0,
1324 pDepthStencil->stencil);
1325 }
1326 }
1327
1328 anv_blorp_batch_finish(&batch);
1329 }
1330
1331 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1332 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1333 uint32_t num_entries,
1334 uint32_t *state_offset,
1335 struct anv_state *bt_state)
1336 {
1337 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1338 state_offset);
1339 if (bt_state->map == NULL) {
1340 /* We ran out of space. Grab a new binding table block. */
1341 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1342 if (result != VK_SUCCESS)
1343 return result;
1344
1345 /* Re-emit state base addresses so we get the new surface state base
1346 * address before we start emitting binding tables etc.
1347 */
1348 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1349
1350 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1351 state_offset);
1352 assert(bt_state->map != NULL);
1353 }
1354
1355 return VK_SUCCESS;
1356 }
1357
1358 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1359 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1360 struct anv_state surface_state,
1361 uint32_t *bt_offset)
1362 {
1363 uint32_t state_offset;
1364 struct anv_state bt_state;
1365
1366 VkResult result =
1367 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1368 &bt_state);
1369 if (result != VK_SUCCESS)
1370 return result;
1371
1372 uint32_t *bt_map = bt_state.map;
1373 bt_map[0] = surface_state.offset + state_offset;
1374
1375 *bt_offset = bt_state.offset;
1376 return VK_SUCCESS;
1377 }
1378
1379 static bool
can_fast_clear_color_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1380 can_fast_clear_color_att(struct anv_cmd_buffer *cmd_buffer,
1381 struct blorp_batch *batch,
1382 const struct anv_attachment *att,
1383 const VkClearAttachment *attachment,
1384 uint32_t rectCount, const VkClearRect *pRects)
1385 {
1386 union isl_color_value clear_color =
1387 vk_to_isl_color(attachment->clearValue.color);
1388
1389 if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1390 return false;
1391
1392 /* We don't support fast clearing with conditional rendering at the
1393 * moment. All the tracking done around fast clears (clear color updates
1394 * and fast-clear type updates) happens unconditionally.
1395 */
1396 if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1397 return false;
1398
1399 if (rectCount > 1) {
1400 anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1401 "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1402 return false;
1403 }
1404
1405 /* We only support fast-clears on the first layer */
1406 if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1407 return false;
1408
1409 bool is_multiview = cmd_buffer->state.gfx.view_mask != 0;
1410 if (is_multiview && (cmd_buffer->state.gfx.view_mask != 1))
1411 return false;
1412
1413 return anv_can_fast_clear_color_view(cmd_buffer->device,
1414 (struct anv_image_view *)att->iview,
1415 att->layout,
1416 clear_color,
1417 pRects->layerCount,
1418 pRects->rect,
1419 cmd_buffer->queue_family->queueFlags);
1420 }
1421
1422 static void
exec_ccs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value)1423 exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1424 struct blorp_batch *batch,
1425 const struct anv_image *image,
1426 enum isl_format format, struct isl_swizzle swizzle,
1427 VkImageAspectFlagBits aspect, uint32_t level,
1428 uint32_t base_layer, uint32_t layer_count,
1429 enum isl_aux_op ccs_op, union isl_color_value *clear_value)
1430 {
1431 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1432 assert(image->vk.samples == 1);
1433 assert(level < anv_image_aux_levels(image, aspect));
1434 /* Multi-LOD YcBcR is not allowed */
1435 assert(image->n_planes == 1 || level == 0);
1436 assert(base_layer + layer_count <=
1437 anv_image_aux_layers(image, aspect, level));
1438
1439 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1440 const struct intel_device_info *devinfo = cmd_buffer->device->info;
1441
1442 struct blorp_surf surf;
1443 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1444 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1445 image->planes[plane].aux_usage,
1446 &surf);
1447
1448 uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1449 uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1450
1451 /* Blorp will store the clear color for us if we provide the clear color
1452 * address and we are doing a fast clear. So we save the clear value into
1453 * the blorp surface.
1454 */
1455 if (clear_value)
1456 surf.clear_color = *clear_value;
1457
1458 char flush_reason[64];
1459 int ret =
1460 snprintf(flush_reason, sizeof(flush_reason),
1461 "ccs op start: %s", isl_aux_op_to_name(ccs_op));
1462 assert(ret < sizeof(flush_reason));
1463
1464 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1465 *
1466 * "After Render target fast clear, pipe-control with color cache
1467 * write-flush must be issued before sending any DRAW commands on
1468 * that render target."
1469 *
1470 * This comment is a bit cryptic and doesn't really tell you what's going
1471 * or what's really needed. It appears that fast clear ops are not
1472 * properly synchronized with other drawing. This means that we cannot
1473 * have a fast clear operation in the pipe at the same time as other
1474 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1475 * that the contents of the previous draw hit the render target before we
1476 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1477 * that it is completed before any additional drawing occurs.
1478 */
1479 anv_add_pending_pipe_bits(cmd_buffer,
1480 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1481 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1482 (devinfo->verx10 == 120 ?
1483 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1484 (devinfo->verx10 == 125 ?
1485 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1486 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1487 ANV_PIPE_PSS_STALL_SYNC_BIT |
1488 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1489 flush_reason);
1490
1491 switch (ccs_op) {
1492 case ISL_AUX_OP_FAST_CLEAR:
1493 /* From the ICL PRMs, Volume 9: Render Engine, State Caching :
1494 *
1495 * "Any values referenced by pointers within the RENDER_SURFACE_STATE
1496 * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
1497 * Indirect State Pointer) are considered to be part of that state
1498 * and any changes to these referenced values requires an
1499 * invalidation of the L1 state cache to ensure the new values are
1500 * being used as part of the state. In the case of surface data
1501 * pointed to by the Surface Base Address in RENDER SURFACE STATE,
1502 * the Texture Cache must be invalidated if the surface data
1503 * changes."
1504 *
1505 * and From the Render Target Fast Clear section,
1506 *
1507 * "HwManaged FastClear allows SW to store FastClearValue in separate
1508 * graphics allocation, instead of keeping them in
1509 * RENDER_SURFACE_STATE. This behavior can be enabled by setting
1510 * ClearValueAddressEnable in RENDER_SURFACE_STATE.
1511 *
1512 * Proper sequence of commands is as follows:
1513 *
1514 * 1. Storing clear color to allocation
1515 * 2. Ensuring that step 1. is finished and visible for TextureCache
1516 * 3. Performing FastClear
1517 *
1518 * Step 2. is required on products with ClearColorConversion feature.
1519 * This feature is enabled by setting ClearColorConversionEnable.
1520 * This causes HW to read stored color from ClearColorAllocation and
1521 * write back with the native format or RenderTarget - and clear
1522 * color needs to be present and visible. Reading is done from
1523 * TextureCache, writing is done to RenderCache."
1524 *
1525 * We're going to change the clear color. Invalidate the texture cache
1526 * now to ensure the clear color conversion feature works properly.
1527 * Although the docs seem to require invalidating the texture cache
1528 * after updating the clear color allocation, we can do this beforehand
1529 * so long as we ensure:
1530 *
1531 * 1. Step 1 is complete before the texture cache is accessed in step 3
1532 * 2. We don't access the texture cache between invalidation and step 3
1533 *
1534 * The second requirement is satisfied because we'll be performing step
1535 * 1 and 3 right after invalidating. The first is satisfied because
1536 * BLORP updates the clear color before performing the fast clear and it
1537 * performs the synchronizations suggested by the Render Target Fast
1538 * Clear section (not quoted here) to ensure its completion.
1539 *
1540 * While we're here, also invalidate the state cache as suggested.
1541 */
1542 if (devinfo->ver >= 11) {
1543 anv_add_pending_pipe_bits(cmd_buffer,
1544 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
1545 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1546 "before blorp clear color update");
1547 }
1548
1549 blorp_fast_clear(batch, &surf, format, swizzle,
1550 level, base_layer, layer_count,
1551 0, 0, level_width, level_height);
1552 break;
1553 case ISL_AUX_OP_FULL_RESOLVE:
1554 case ISL_AUX_OP_PARTIAL_RESOLVE: {
1555 /* Wa_1508744258: Enable RHWO optimization for resolves */
1556 const bool enable_rhwo_opt =
1557 intel_needs_workaround(cmd_buffer->device->info, 1508744258);
1558
1559 if (enable_rhwo_opt)
1560 cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1561
1562 blorp_ccs_resolve(batch, &surf, level, base_layer, layer_count,
1563 format, ccs_op);
1564
1565 if (enable_rhwo_opt)
1566 cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1567 break;
1568 }
1569 case ISL_AUX_OP_AMBIGUATE:
1570 for (uint32_t a = 0; a < layer_count; a++) {
1571 const uint32_t layer = base_layer + a;
1572 blorp_ccs_ambiguate(batch, &surf, level, layer);
1573 }
1574 break;
1575 default:
1576 unreachable("Unsupported CCS operation");
1577 }
1578
1579 anv_add_pending_pipe_bits(cmd_buffer,
1580 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1581 (devinfo->verx10 == 120 ?
1582 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1583 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1584 ANV_PIPE_PSS_STALL_SYNC_BIT |
1585 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1586 "ccs op finish");
1587 }
1588
1589 static void
exec_mcs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value)1590 exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1591 struct blorp_batch *batch,
1592 const struct anv_image *image,
1593 enum isl_format format, struct isl_swizzle swizzle,
1594 VkImageAspectFlagBits aspect,
1595 uint32_t base_layer, uint32_t layer_count,
1596 enum isl_aux_op mcs_op, union isl_color_value *clear_value)
1597 {
1598 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1599 assert(image->vk.samples > 1);
1600 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1601
1602 /* Multisampling with multi-planar formats is not supported */
1603 assert(image->n_planes == 1);
1604
1605 const struct intel_device_info *devinfo = cmd_buffer->device->info;
1606 struct blorp_surf surf;
1607 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1608 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1609 ISL_AUX_USAGE_MCS, &surf);
1610
1611 /* Blorp will store the clear color for us if we provide the clear color
1612 * address and we are doing a fast clear. So we save the clear value into
1613 * the blorp surface.
1614 */
1615 if (clear_value)
1616 surf.clear_color = *clear_value;
1617
1618 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1619 *
1620 * "After Render target fast clear, pipe-control with color cache
1621 * write-flush must be issued before sending any DRAW commands on
1622 * that render target."
1623 *
1624 * This comment is a bit cryptic and doesn't really tell you what's going
1625 * or what's really needed. It appears that fast clear ops are not
1626 * properly synchronized with other drawing. This means that we cannot
1627 * have a fast clear operation in the pipe at the same time as other
1628 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1629 * that the contents of the previous draw hit the render target before we
1630 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1631 * that it is completed before any additional drawing occurs.
1632 */
1633 anv_add_pending_pipe_bits(cmd_buffer,
1634 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1635 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1636 (devinfo->verx10 == 120 ?
1637 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1638 (devinfo->verx10 == 125 ?
1639 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1640 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1641 ANV_PIPE_PSS_STALL_SYNC_BIT |
1642 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1643 "before fast clear mcs");
1644
1645 switch (mcs_op) {
1646 case ISL_AUX_OP_FAST_CLEAR:
1647 /* From the ICL PRMs, Volume 9: Render Engine, State Caching :
1648 *
1649 * "Any values referenced by pointers within the RENDER_SURFACE_STATE
1650 * or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
1651 * Indirect State Pointer) are considered to be part of that state
1652 * and any changes to these referenced values requires an
1653 * invalidation of the L1 state cache to ensure the new values are
1654 * being used as part of the state. In the case of surface data
1655 * pointed to by the Surface Base Address in RENDER SURFACE STATE,
1656 * the Texture Cache must be invalidated if the surface data
1657 * changes."
1658 *
1659 * and From the Render Target Fast Clear section,
1660 *
1661 * "HwManaged FastClear allows SW to store FastClearValue in separate
1662 * graphics allocation, instead of keeping them in
1663 * RENDER_SURFACE_STATE. This behavior can be enabled by setting
1664 * ClearValueAddressEnable in RENDER_SURFACE_STATE.
1665 *
1666 * Proper sequence of commands is as follows:
1667 *
1668 * 1. Storing clear color to allocation
1669 * 2. Ensuring that step 1. is finished and visible for TextureCache
1670 * 3. Performing FastClear
1671 *
1672 * Step 2. is required on products with ClearColorConversion feature.
1673 * This feature is enabled by setting ClearColorConversionEnable.
1674 * This causes HW to read stored color from ClearColorAllocation and
1675 * write back with the native format or RenderTarget - and clear
1676 * color needs to be present and visible. Reading is done from
1677 * TextureCache, writing is done to RenderCache."
1678 *
1679 * We're going to change the clear color. Invalidate the texture cache
1680 * now to ensure the clear color conversion feature works properly.
1681 * Although the docs seem to require invalidating the texture cache
1682 * after updating the clear color allocation, we can do this beforehand
1683 * so long as we ensure:
1684 *
1685 * 1. Step 1 is complete before the texture cache is accessed in step 3
1686 * 2. We don't access the texture cache between invalidation and step 3
1687 *
1688 * The second requirement is satisfied because we'll be performing step
1689 * 1 and 3 right after invalidating. The first is satisfied because
1690 * BLORP updates the clear color before performing the fast clear and it
1691 * performs the synchronizations suggested by the Render Target Fast
1692 * Clear section (not quoted here) to ensure its completion.
1693 *
1694 * While we're here, also invalidate the state cache as suggested.
1695 */
1696 if (devinfo->ver >= 11) {
1697 anv_add_pending_pipe_bits(cmd_buffer,
1698 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
1699 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1700 "before blorp clear color update");
1701 }
1702
1703 blorp_fast_clear(batch, &surf, format, swizzle,
1704 0, base_layer, layer_count,
1705 0, 0, image->vk.extent.width, image->vk.extent.height);
1706 break;
1707 case ISL_AUX_OP_PARTIAL_RESOLVE:
1708 blorp_mcs_partial_resolve(batch, &surf, format,
1709 base_layer, layer_count);
1710 break;
1711 case ISL_AUX_OP_AMBIGUATE:
1712 blorp_mcs_ambiguate(batch, &surf, base_layer, layer_count);
1713 break;
1714 case ISL_AUX_OP_FULL_RESOLVE:
1715 default:
1716 unreachable("Unsupported MCS operation");
1717 }
1718
1719 anv_add_pending_pipe_bits(cmd_buffer,
1720 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1721 (devinfo->verx10 == 120 ?
1722 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1723 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1724 ANV_PIPE_PSS_STALL_SYNC_BIT |
1725 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1726 "after fast clear mcs");
1727 }
1728
1729 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1730 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1731 struct blorp_batch *batch,
1732 const VkClearAttachment *attachment,
1733 uint32_t rectCount, const VkClearRect *pRects)
1734 {
1735 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1736 const uint32_t att_idx = attachment->colorAttachment;
1737 assert(att_idx < gfx->color_att_count);
1738 const struct anv_attachment *att = &gfx->color_att[att_idx];
1739
1740 if (att->vk_format == VK_FORMAT_UNDEFINED)
1741 return;
1742
1743 union isl_color_value clear_color =
1744 vk_to_isl_color(attachment->clearValue.color);
1745
1746 const struct anv_image_view *iview = att->iview;
1747 if (iview &&
1748 can_fast_clear_color_att(cmd_buffer, batch, att,
1749 attachment, rectCount, pRects)) {
1750 if (iview->image->vk.samples == 1) {
1751 exec_ccs_op(cmd_buffer, batch, iview->image,
1752 iview->planes[0].isl.format,
1753 iview->planes[0].isl.swizzle,
1754 VK_IMAGE_ASPECT_COLOR_BIT,
1755 0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
1756 &clear_color);
1757 } else {
1758 exec_mcs_op(cmd_buffer, batch, iview->image,
1759 iview->planes[0].isl.format,
1760 iview->planes[0].isl.swizzle,
1761 VK_IMAGE_ASPECT_COLOR_BIT,
1762 0, 1, ISL_AUX_OP_FAST_CLEAR,
1763 &clear_color);
1764 }
1765
1766 anv_cmd_buffer_mark_image_fast_cleared(cmd_buffer, iview->image,
1767 iview->planes[0].isl.format,
1768 clear_color);
1769 anv_cmd_buffer_load_clear_color_from_image(cmd_buffer,
1770 att->surface_state.state,
1771 iview->image);
1772 return;
1773 }
1774
1775 uint32_t binding_table;
1776 VkResult result =
1777 binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1778 &binding_table);
1779 if (result != VK_SUCCESS)
1780 return;
1781
1782 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1783 if (gfx->view_mask) {
1784 u_foreach_bit(view_idx, gfx->view_mask) {
1785 for (uint32_t r = 0; r < rectCount; ++r) {
1786 const VkOffset2D offset = pRects[r].rect.offset;
1787 const VkExtent2D extent = pRects[r].rect.extent;
1788 blorp_clear_attachments(batch, binding_table,
1789 ISL_FORMAT_UNSUPPORTED,
1790 gfx->samples,
1791 view_idx, 1,
1792 offset.x, offset.y,
1793 offset.x + extent.width,
1794 offset.y + extent.height,
1795 true, clear_color, false, 0.0f, 0, 0);
1796 }
1797 }
1798 return;
1799 }
1800
1801 for (uint32_t r = 0; r < rectCount; ++r) {
1802 const VkOffset2D offset = pRects[r].rect.offset;
1803 const VkExtent2D extent = pRects[r].rect.extent;
1804 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1805 blorp_clear_attachments(batch, binding_table,
1806 ISL_FORMAT_UNSUPPORTED,
1807 gfx->samples,
1808 pRects[r].baseArrayLayer,
1809 pRects[r].layerCount,
1810 offset.x, offset.y,
1811 offset.x + extent.width, offset.y + extent.height,
1812 true, clear_color, false, 0.0f, 0, 0);
1813 }
1814 }
1815
1816 static void
anv_fast_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1817 anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1818 struct blorp_batch *batch,
1819 const struct anv_image *image,
1820 VkImageAspectFlags aspects,
1821 uint32_t level,
1822 uint32_t base_layer, uint32_t layer_count,
1823 VkRect2D area, uint8_t stencil_value)
1824 {
1825 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1826 VK_IMAGE_ASPECT_STENCIL_BIT));
1827
1828 struct blorp_surf depth = {};
1829 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1830 const uint32_t plane =
1831 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1832 assert(base_layer + layer_count <=
1833 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1834 get_blorp_surf_for_anv_image(cmd_buffer,
1835 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1836 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1837 image->planes[plane].aux_usage, &depth);
1838 }
1839
1840 struct blorp_surf stencil = {};
1841 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1842 const uint32_t plane =
1843 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1844 get_blorp_surf_for_anv_image(cmd_buffer,
1845 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1846 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1847 image->planes[plane].aux_usage, &stencil);
1848 }
1849
1850 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1851 *
1852 * "The following is required when performing a depth buffer clear with
1853 * using the WM_STATE or 3DSTATE_WM:
1854 *
1855 * * If other rendering operations have preceded this clear, a
1856 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1857 * enabled must be issued before the rectangle primitive used for
1858 * the depth buffer clear operation.
1859 * * [...]"
1860 *
1861 * Even though the PRM only says that this is required if using 3DSTATE_WM
1862 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1863 * hangs when doing a clear with WM_HZ_OP.
1864 */
1865 anv_add_pending_pipe_bits(cmd_buffer,
1866 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1867 ANV_PIPE_DEPTH_STALL_BIT,
1868 "before clear hiz");
1869
1870 if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1871 depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1872 /* From Bspec 47010 (Depth Buffer Clear):
1873 *
1874 * Since the fast clear cycles to CCS are not cached in TileCache,
1875 * any previous depth buffer writes to overlapping pixels must be
1876 * flushed out of TileCache before a succeeding Depth Buffer Clear.
1877 * This restriction only applies to Depth Buffer with write-thru
1878 * enabled, since fast clears to CCS only occur for write-thru mode.
1879 *
1880 * There may have been a write to this depth buffer. Flush it from the
1881 * tile cache just in case.
1882 *
1883 * Set CS stall bit to guarantee that the fast clear starts the execution
1884 * after the tile cache flush completed.
1885 *
1886 * There is no Bspec requirement to flush the data cache but the
1887 * experiment shows that flusing the data cache helps to resolve the
1888 * corruption.
1889 */
1890 unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
1891 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
1892 anv_add_pending_pipe_bits(cmd_buffer,
1893 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1894 ANV_PIPE_CS_STALL_BIT |
1895 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1896 wa_flush,
1897 "before clear hiz_ccs_wt");
1898 }
1899
1900 blorp_hiz_clear_depth_stencil(batch, &depth, &stencil,
1901 level, base_layer, layer_count,
1902 area.offset.x, area.offset.y,
1903 area.offset.x + area.extent.width,
1904 area.offset.y + area.extent.height,
1905 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1906 ANV_HZ_FC_VAL,
1907 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1908 stencil_value);
1909
1910 /* From the SKL PRM, Depth Buffer Clear:
1911 *
1912 * "Depth Buffer Clear Workaround
1913 *
1914 * Depth buffer clear pass using any of the methods (WM_STATE,
1915 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1916 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1917 * starting to render. DepthStall and DepthFlush are not needed between
1918 * consecutive depth clear passes nor is it required if the depth-clear
1919 * pass was done with “full_surf_clear” bit set in the
1920 * 3DSTATE_WM_HZ_OP."
1921 *
1922 * Even though the PRM provides a bunch of conditions under which this is
1923 * supposedly unnecessary, we choose to perform the flush unconditionally
1924 * just to be safe.
1925 *
1926 * From Bspec 46959, a programming note applicable to Gfx12+:
1927 *
1928 * "Since HZ_OP has to be sent twice (first time set the clear/resolve state
1929 * and 2nd time to clear the state), and HW internally flushes the depth
1930 * cache on HZ_OP, there is no need to explicitly send a Depth Cache flush
1931 * after Clear or Resolve."
1932 */
1933 if (cmd_buffer->device->info->verx10 < 120) {
1934 anv_add_pending_pipe_bits(cmd_buffer,
1935 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1936 ANV_PIPE_DEPTH_STALL_BIT,
1937 "after clear hiz");
1938 }
1939 }
1940
1941 static bool
can_hiz_clear_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * ds_att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1942 can_hiz_clear_att(struct anv_cmd_buffer *cmd_buffer,
1943 struct blorp_batch *batch,
1944 const struct anv_attachment *ds_att,
1945 const VkClearAttachment *attachment,
1946 uint32_t rectCount, const VkClearRect *pRects)
1947 {
1948 if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1949 return false;
1950
1951 /* From Bspec's section MI_PREDICATE:
1952 *
1953 * "The MI_PREDICATE command is used to control the Predicate state bit,
1954 * which in turn can be used to enable/disable the processing of
1955 * 3DPRIMITIVE commands."
1956 *
1957 * Also from BDW/CHV Bspec's 3DSTATE_WM_HZ_OP programming notes:
1958 *
1959 * "This command does NOT support predication from the use of the
1960 * MI_PREDICATE register. To predicate depth clears and resolves on you
1961 * must fall back to using the 3D_PRIMITIVE or GPGPU_WALKER commands."
1962 *
1963 * Since BLORP's predication is currently dependent on MI_PREDICATE, fall
1964 * back to the slow depth clear path when the BLORP_BATCH_PREDICATE_ENABLE
1965 * flag is set.
1966 */
1967 if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1968 return false;
1969
1970 if (rectCount > 1) {
1971 anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1972 "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1973 return false;
1974 }
1975
1976 /* When the BLORP_BATCH_NO_EMIT_DEPTH_STENCIL flag is set, BLORP can only
1977 * clear the first slice of the currently configured depth/stencil view.
1978 */
1979 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1980 if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1981 return false;
1982
1983 return anv_can_hiz_clear_ds_view(cmd_buffer->device, ds_att->iview,
1984 ds_att->layout,
1985 attachment->aspectMask,
1986 attachment->clearValue.depthStencil.depth,
1987 pRects->rect,
1988 cmd_buffer->queue_family->queueFlags);
1989 }
1990
1991 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1992 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1993 struct blorp_batch *batch,
1994 const VkClearAttachment *attachment,
1995 uint32_t rectCount, const VkClearRect *pRects)
1996 {
1997 static const union isl_color_value color_value = { .u32 = { 0, } };
1998 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1999 const struct anv_attachment *d_att = &gfx->depth_att;
2000 const struct anv_attachment *s_att = &gfx->stencil_att;
2001 if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
2002 s_att->vk_format == VK_FORMAT_UNDEFINED)
2003 return;
2004
2005 const struct anv_attachment *ds_att = d_att->iview ? d_att : s_att;
2006 if (ds_att->iview &&
2007 can_hiz_clear_att(cmd_buffer, batch, ds_att, attachment, rectCount, pRects)) {
2008 anv_fast_clear_depth_stencil(cmd_buffer, batch, ds_att->iview->image,
2009 attachment->aspectMask,
2010 ds_att->iview->planes[0].isl.base_level,
2011 ds_att->iview->planes[0].isl.base_array_layer,
2012 pRects[0].layerCount, pRects->rect,
2013 attachment->clearValue.depthStencil.stencil);
2014 return;
2015 }
2016
2017 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
2018 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
2019
2020 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
2021 if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
2022 depth_format = anv_get_isl_format(cmd_buffer->device->info,
2023 d_att->vk_format,
2024 VK_IMAGE_ASPECT_DEPTH_BIT,
2025 VK_IMAGE_TILING_OPTIMAL);
2026 }
2027
2028 uint32_t binding_table;
2029 VkResult result =
2030 binding_table_for_surface_state(cmd_buffer,
2031 gfx->null_surface_state,
2032 &binding_table);
2033 if (result != VK_SUCCESS)
2034 return;
2035
2036 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
2037 if (gfx->view_mask) {
2038 u_foreach_bit(view_idx, gfx->view_mask) {
2039 for (uint32_t r = 0; r < rectCount; ++r) {
2040 const VkOffset2D offset = pRects[r].rect.offset;
2041 const VkExtent2D extent = pRects[r].rect.extent;
2042 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
2043 blorp_clear_attachments(batch, binding_table,
2044 depth_format,
2045 gfx->samples,
2046 view_idx, 1,
2047 offset.x, offset.y,
2048 offset.x + extent.width,
2049 offset.y + extent.height,
2050 false, color_value,
2051 clear_depth, value.depth,
2052 clear_stencil ? 0xff : 0, value.stencil);
2053 }
2054 }
2055 return;
2056 }
2057
2058 for (uint32_t r = 0; r < rectCount; ++r) {
2059 const VkOffset2D offset = pRects[r].rect.offset;
2060 const VkExtent2D extent = pRects[r].rect.extent;
2061 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
2062 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
2063 blorp_clear_attachments(batch, binding_table,
2064 depth_format,
2065 gfx->samples,
2066 pRects[r].baseArrayLayer,
2067 pRects[r].layerCount,
2068 offset.x, offset.y,
2069 offset.x + extent.width, offset.y + extent.height,
2070 false, color_value,
2071 clear_depth, value.depth,
2072 clear_stencil ? 0xff : 0, value.stencil);
2073 }
2074 }
2075
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2076 void anv_CmdClearAttachments(
2077 VkCommandBuffer commandBuffer,
2078 uint32_t attachmentCount,
2079 const VkClearAttachment* pAttachments,
2080 uint32_t rectCount,
2081 const VkClearRect* pRects)
2082 {
2083 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2084
2085 /* Because this gets called within a render pass, we tell blorp not to
2086 * trash our depth and stencil buffers.
2087 */
2088 struct blorp_batch batch;
2089 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
2090 if (cmd_buffer->state.conditional_render_enabled) {
2091 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
2092 flags |= BLORP_BATCH_PREDICATE_ENABLE;
2093 }
2094 anv_blorp_batch_init(cmd_buffer, &batch, flags);
2095
2096 for (uint32_t a = 0; a < attachmentCount; ++a) {
2097 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
2098 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
2099 clear_color_attachment(cmd_buffer, &batch,
2100 &pAttachments[a],
2101 rectCount, pRects);
2102 } else {
2103 clear_depth_stencil_attachment(cmd_buffer, &batch,
2104 &pAttachments[a],
2105 rectCount, pRects);
2106 }
2107 }
2108
2109 anv_blorp_batch_finish(&batch);
2110 }
2111
2112 void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)2113 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2114 const struct anv_image *src_image,
2115 enum isl_aux_usage src_aux_usage,
2116 uint32_t src_level, uint32_t src_base_layer,
2117 const struct anv_image *dst_image,
2118 enum isl_aux_usage dst_aux_usage,
2119 uint32_t dst_level, uint32_t dst_base_layer,
2120 VkImageAspectFlagBits aspect,
2121 uint32_t src_x, uint32_t src_y,
2122 uint32_t dst_x, uint32_t dst_y,
2123 uint32_t width, uint32_t height,
2124 uint32_t layer_count,
2125 enum blorp_filter filter)
2126 {
2127 struct blorp_batch batch;
2128 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2129 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2130
2131 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
2132 assert(src_image->vk.samples > 1);
2133 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
2134 assert(dst_image->vk.samples == 1);
2135
2136 struct blorp_surf src_surf, dst_surf;
2137 get_blorp_surf_for_anv_image(cmd_buffer, src_image, aspect,
2138 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2139 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2140 src_aux_usage, &src_surf);
2141 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
2142 src_surf.clear_color_addr = anv_to_blorp_address(
2143 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
2144 VK_IMAGE_ASPECT_COLOR_BIT));
2145 }
2146 get_blorp_surf_for_anv_image(cmd_buffer, dst_image, aspect,
2147 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2148 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2149 dst_aux_usage, &dst_surf);
2150 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
2151 aspect, dst_aux_usage,
2152 dst_level, dst_base_layer, layer_count);
2153
2154 if (filter == BLORP_FILTER_NONE) {
2155 /* If no explicit filter is provided, then it's implied by the type of
2156 * the source image.
2157 */
2158 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
2159 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
2160 isl_format_has_int_channel(src_surf.surf->format)) {
2161 filter = BLORP_FILTER_SAMPLE_0;
2162 } else {
2163 filter = BLORP_FILTER_AVERAGE;
2164 }
2165 }
2166
2167 for (uint32_t l = 0; l < layer_count; l++) {
2168 blorp_blit(&batch,
2169 &src_surf, src_level, src_base_layer + l,
2170 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
2171 &dst_surf, dst_level, dst_base_layer + l,
2172 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
2173 src_x, src_y, src_x + width, src_y + height,
2174 dst_x, dst_y, dst_x + width, dst_y + height,
2175 filter, false, false);
2176 }
2177
2178 anv_blorp_batch_finish(&batch);
2179 }
2180
2181 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)2182 resolve_image(struct anv_cmd_buffer *cmd_buffer,
2183 struct anv_image *src_image,
2184 VkImageLayout src_image_layout,
2185 struct anv_image *dst_image,
2186 VkImageLayout dst_image_layout,
2187 const VkImageResolve2 *region)
2188 {
2189 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
2190 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
2191 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
2192
2193 const uint32_t layer_count =
2194 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
2195
2196 anv_foreach_image_aspect_bit(aspect_bit, src_image,
2197 region->srcSubresource.aspectMask) {
2198 enum isl_aux_usage src_aux_usage =
2199 anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
2200 (1 << aspect_bit),
2201 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2202 src_image_layout,
2203 cmd_buffer->queue_family->queueFlags);
2204 enum isl_aux_usage dst_aux_usage =
2205 anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
2206 (1 << aspect_bit),
2207 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2208 dst_image_layout,
2209 cmd_buffer->queue_family->queueFlags);
2210
2211 anv_image_msaa_resolve(cmd_buffer,
2212 src_image, src_aux_usage,
2213 region->srcSubresource.mipLevel,
2214 region->srcSubresource.baseArrayLayer,
2215 dst_image, dst_aux_usage,
2216 region->dstSubresource.mipLevel,
2217 region->dstSubresource.baseArrayLayer,
2218 (1 << aspect_bit),
2219 region->srcOffset.x,
2220 region->srcOffset.y,
2221 region->dstOffset.x,
2222 region->dstOffset.y,
2223 region->extent.width,
2224 region->extent.height,
2225 layer_count, BLORP_FILTER_NONE);
2226 }
2227 }
2228
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2229 void anv_CmdResolveImage2(
2230 VkCommandBuffer commandBuffer,
2231 const VkResolveImageInfo2* pResolveImageInfo)
2232 {
2233 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2234 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
2235 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
2236
2237 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
2238 resolve_image(cmd_buffer,
2239 src_image, pResolveImageInfo->srcImageLayout,
2240 dst_image, pResolveImageInfo->dstImageLayout,
2241 &pResolveImageInfo->pRegions[r]);
2242 }
2243 }
2244
2245 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)2246 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
2247 const struct anv_image *image,
2248 VkImageAspectFlagBits aspect,
2249 enum isl_aux_usage aux_usage,
2250 enum isl_format format, struct isl_swizzle swizzle,
2251 uint32_t level, uint32_t base_layer, uint32_t layer_count,
2252 VkRect2D area, union isl_color_value clear_color)
2253 {
2254 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2255
2256 /* We don't support planar images with multisampling yet */
2257 assert(image->n_planes == 1);
2258
2259 struct blorp_batch batch;
2260 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2261
2262 struct blorp_surf surf;
2263 get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
2264 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2265 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2266 aux_usage, &surf);
2267 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
2268 level, base_layer, layer_count);
2269
2270 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
2271 level, base_layer, layer_count,
2272 area.offset.x, area.offset.y,
2273 area.offset.x + area.extent.width,
2274 area.offset.y + area.extent.height,
2275 clear_color, 0 /* color_write_disable */);
2276
2277 anv_blorp_batch_finish(&batch);
2278 }
2279
2280 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)2281 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
2282 const struct anv_image *image,
2283 VkImageAspectFlags aspects,
2284 enum isl_aux_usage depth_aux_usage,
2285 uint32_t level,
2286 uint32_t base_layer, uint32_t layer_count,
2287 VkRect2D area,
2288 float depth_value, uint8_t stencil_value)
2289 {
2290 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
2291 VK_IMAGE_ASPECT_STENCIL_BIT));
2292
2293 struct blorp_batch batch;
2294 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2295 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2296
2297 struct blorp_surf depth = {};
2298 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
2299 get_blorp_surf_for_anv_image(cmd_buffer,
2300 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2301 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2302 depth_aux_usage, &depth);
2303 }
2304
2305 struct blorp_surf stencil = {};
2306 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2307 const uint32_t plane =
2308 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
2309 get_blorp_surf_for_anv_image(cmd_buffer,
2310 image, VK_IMAGE_ASPECT_STENCIL_BIT,
2311 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2312 image->planes[plane].aux_usage, &stencil);
2313 }
2314
2315 /* Blorp may choose to clear stencil using RGBA32_UINT for better
2316 * performance. If it does this, we need to flush it out of the depth
2317 * cache before rendering to it.
2318 */
2319 anv_add_pending_pipe_bits(cmd_buffer,
2320 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2321 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2322 "before clear DS");
2323
2324 blorp_clear_depth_stencil(&batch, &depth, &stencil,
2325 level, base_layer, layer_count,
2326 area.offset.x, area.offset.y,
2327 area.offset.x + area.extent.width,
2328 area.offset.y + area.extent.height,
2329 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
2330 depth_value,
2331 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
2332 stencil_value);
2333
2334 /* Blorp may choose to clear stencil using RGBA32_UINT for better
2335 * performance. If it does this, we need to flush it out of the render
2336 * cache before someone starts trying to do stencil on it.
2337 */
2338 anv_add_pending_pipe_bits(cmd_buffer,
2339 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2340 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2341 "after clear DS");
2342
2343 anv_blorp_batch_finish(&batch);
2344 }
2345
2346 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)2347 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
2348 const struct anv_image *image,
2349 VkImageAspectFlagBits aspect, uint32_t level,
2350 uint32_t base_layer, uint32_t layer_count,
2351 enum isl_aux_op hiz_op)
2352 {
2353 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
2354 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
2355 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
2356 assert(plane == 0);
2357
2358 struct blorp_batch batch;
2359 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2360 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2361
2362 struct blorp_surf surf;
2363 get_blorp_surf_for_anv_image(cmd_buffer,
2364 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2365 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2366 image->planes[plane].aux_usage, &surf);
2367
2368 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
2369
2370 anv_blorp_batch_finish(&batch);
2371 }
2372
2373 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)2374 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
2375 const struct anv_image *image,
2376 VkImageAspectFlags aspects,
2377 uint32_t level,
2378 uint32_t base_layer, uint32_t layer_count,
2379 VkRect2D area, uint8_t stencil_value)
2380 {
2381 struct blorp_batch batch;
2382 anv_blorp_batch_init(cmd_buffer, &batch, 0);
2383 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2384
2385 anv_fast_clear_depth_stencil(cmd_buffer, &batch, image, aspects, level,
2386 base_layer, layer_count, area, stencil_value);
2387
2388 anv_blorp_batch_finish(&batch);
2389 }
2390
2391 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)2392 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
2393 const struct anv_image *image,
2394 enum isl_format format, struct isl_swizzle swizzle,
2395 VkImageAspectFlagBits aspect,
2396 uint32_t base_layer, uint32_t layer_count,
2397 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
2398 bool predicate)
2399 {
2400 struct blorp_batch batch;
2401 anv_blorp_batch_init(cmd_buffer, &batch,
2402 BLORP_BATCH_PREDICATE_ENABLE * predicate +
2403 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
2404 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2405
2406 exec_mcs_op(cmd_buffer, &batch, image, format, swizzle, aspect,
2407 base_layer, layer_count, mcs_op, clear_value);
2408
2409 anv_blorp_batch_finish(&batch);
2410 }
2411
2412 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)2413 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
2414 const struct anv_image *image,
2415 enum isl_format format, struct isl_swizzle swizzle,
2416 VkImageAspectFlagBits aspect, uint32_t level,
2417 uint32_t base_layer, uint32_t layer_count,
2418 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
2419 bool predicate)
2420 {
2421 struct blorp_batch batch;
2422 anv_blorp_batch_init(cmd_buffer, &batch,
2423 BLORP_BATCH_PREDICATE_ENABLE * predicate +
2424 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
2425 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2426
2427 exec_ccs_op(cmd_buffer, &batch, image, format, swizzle, aspect, level,
2428 base_layer, layer_count, ccs_op, clear_value);
2429
2430 anv_blorp_batch_finish(&batch);
2431 }
2432