• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 #include "genxml/gen8_pack.h"
26 
27 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)28 lookup_blorp_shader(struct blorp_batch *batch,
29                     const void *key, uint32_t key_size,
30                     uint32_t *kernel_out, void *prog_data_out)
31 {
32    struct blorp_context *blorp = batch->blorp;
33    struct anv_device *device = blorp->driver_ctx;
34 
35    struct anv_shader_bin *bin =
36       anv_device_search_for_kernel(device, device->internal_cache,
37                                    key, key_size, NULL);
38    if (!bin)
39       return false;
40 
41    /* The cache already has a reference and it's not going anywhere so there
42     * is no need to hold a second reference.
43     */
44    anv_shader_bin_unref(device, bin);
45 
46    *kernel_out = bin->kernel.offset;
47    *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
48 
49    return true;
50 }
51 
52 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)53 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
54                     const void *key, uint32_t key_size,
55                     const void *kernel, uint32_t kernel_size,
56                     const void *prog_data,
57                     uint32_t prog_data_size,
58                     uint32_t *kernel_out, void *prog_data_out)
59 {
60    struct blorp_context *blorp = batch->blorp;
61    struct anv_device *device = blorp->driver_ctx;
62 
63    struct anv_pipeline_bind_map empty_bind_map = {};
64    struct anv_push_descriptor_info empty_push_desc_info = {};
65    struct anv_shader_upload_params upload_params = {
66       .stage               = stage,
67       .key_data            = key,
68       .key_size            = key_size,
69       .kernel_data         = kernel,
70       .kernel_size         = kernel_size,
71       .prog_data           = prog_data,
72       .prog_data_size      = prog_data_size,
73       .bind_map            = &empty_bind_map,
74       .push_desc_info      = &empty_push_desc_info,
75    };
76 
77    struct anv_shader_bin *bin =
78       anv_device_upload_kernel(device, device->internal_cache, &upload_params);
79 
80    if (!bin)
81       return false;
82 
83    /* The cache already has a reference and it's not going anywhere so there
84     * is no need to hold a second reference.
85     */
86    anv_shader_bin_unref(device, bin);
87 
88    *kernel_out = bin->kernel.offset;
89    *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
90 
91    return true;
92 }
93 
94 void
anv_device_init_blorp(struct anv_device * device)95 anv_device_init_blorp(struct anv_device *device)
96 {
97    const struct blorp_config config = {
98       .use_mesh_shading = device->vk.enabled_extensions.EXT_mesh_shader,
99       .use_unrestricted_depth_range =
100          device->vk.enabled_extensions.EXT_depth_range_unrestricted,
101    };
102 
103    blorp_init_brw(&device->blorp, device, &device->isl_dev,
104                   device->physical->compiler, &config);
105    device->blorp.lookup_shader = lookup_blorp_shader;
106    device->blorp.upload_shader = upload_blorp_shader;
107    device->blorp.enable_tbimr = device->physical->instance->enable_tbimr;
108    device->blorp.exec = anv_genX(device->info, blorp_exec);
109 }
110 
111 void
anv_device_finish_blorp(struct anv_device * device)112 anv_device_finish_blorp(struct anv_device *device)
113 {
114    blorp_finish(&device->blorp);
115 }
116 
117 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)118 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
119                      struct blorp_batch *batch, enum blorp_batch_flags flags)
120 {
121    VkQueueFlags queue_flags = cmd_buffer->queue_family->queueFlags;
122 
123    if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
124       /* blorp runs on render engine by default */
125    } else if (queue_flags & VK_QUEUE_COMPUTE_BIT) {
126       flags |= BLORP_BATCH_USE_COMPUTE;
127    } else if (queue_flags & VK_QUEUE_TRANSFER_BIT) {
128       flags |= BLORP_BATCH_USE_BLITTER;
129    } else {
130       unreachable("unknown queue family");
131    }
132 
133    blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
134 }
135 
136 static void
anv_blorp_batch_finish(struct blorp_batch * batch)137 anv_blorp_batch_finish(struct blorp_batch *batch)
138 {
139    blorp_batch_finish(batch);
140 }
141 
142 static void
get_blorp_surf_for_anv_address(struct anv_device * device,struct anv_address address,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)143 get_blorp_surf_for_anv_address(struct anv_device *device,
144                                struct anv_address address,
145                                uint32_t width, uint32_t height,
146                                uint32_t row_pitch, enum isl_format format,
147                                bool is_dest,
148                                struct blorp_surf *blorp_surf,
149                                struct isl_surf *isl_surf)
150 {
151    bool ok UNUSED;
152 
153    *blorp_surf = (struct blorp_surf) {
154       .surf = isl_surf,
155       .addr = {
156          .buffer = address.bo,
157          .offset = address.offset,
158          .mocs = anv_mocs(device, address.bo,
159                           is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
160                                   : ISL_SURF_USAGE_TEXTURE_BIT),
161       },
162    };
163 
164    ok = isl_surf_init(&device->isl_dev, isl_surf,
165                      .dim = ISL_SURF_DIM_2D,
166                      .format = format,
167                      .width = width,
168                      .height = height,
169                      .depth = 1,
170                      .levels = 1,
171                      .array_len = 1,
172                      .samples = 1,
173                      .row_pitch_B = row_pitch,
174                      .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
175                                       : ISL_SURF_USAGE_TEXTURE_BIT,
176                      .tiling_flags = ISL_TILING_LINEAR_BIT);
177    assert(ok);
178 }
179 
180 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)181 get_blorp_surf_for_anv_buffer(struct anv_device *device,
182                               struct anv_buffer *buffer, uint64_t offset,
183                               uint32_t width, uint32_t height,
184                               uint32_t row_pitch, enum isl_format format,
185                               bool is_dest,
186                               struct blorp_surf *blorp_surf,
187                               struct isl_surf *isl_surf)
188 {
189    get_blorp_surf_for_anv_address(device,
190                                   anv_address_add(buffer->address, offset),
191                                   width, height, row_pitch, format,
192                                   is_dest, blorp_surf, isl_surf);
193 }
194 
195 /* Pick something high enough that it won't be used in core and low enough it
196  * will never map to an extension.
197  */
198 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
199 
200 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)201 anv_to_blorp_address(struct anv_address addr)
202 {
203    return (struct blorp_address) {
204       .buffer = addr.bo,
205       .offset = addr.offset,
206    };
207 }
208 
209 static void
get_blorp_surf_for_anv_image(const struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)210 get_blorp_surf_for_anv_image(const struct anv_cmd_buffer *cmd_buffer,
211                              const struct anv_image *image,
212                              VkImageAspectFlags aspect,
213                              VkImageUsageFlags usage,
214                              VkImageLayout layout,
215                              enum isl_aux_usage aux_usage,
216                              struct blorp_surf *blorp_surf)
217 {
218    const struct anv_device *device = cmd_buffer->device;
219    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
220 
221    if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
222       assert(usage != 0);
223       aux_usage = anv_layout_to_aux_usage(device->info, image,
224                                           aspect, usage, layout,
225                                           cmd_buffer->queue_family->queueFlags);
226    }
227 
228    isl_surf_usage_flags_t mocs_usage =
229       (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
230       ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
231 
232    const struct anv_surface *surface = &image->planes[plane].primary_surface;
233    const struct anv_address address =
234       anv_image_address(image, &surface->memory_range);
235 
236    *blorp_surf = (struct blorp_surf) {
237       .surf = &surface->isl,
238       .addr = {
239          .buffer = address.bo,
240          .offset = address.offset,
241          .mocs = anv_mocs(device, address.bo, mocs_usage),
242       },
243    };
244 
245    if (aux_usage != ISL_AUX_USAGE_NONE) {
246       const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
247       const struct anv_address aux_address =
248          anv_image_address(image, &aux_surface->memory_range);
249 
250       blorp_surf->aux_usage = aux_usage;
251       blorp_surf->aux_surf = &aux_surface->isl;
252 
253       if (!anv_address_is_null(aux_address)) {
254          blorp_surf->aux_addr = (struct blorp_address) {
255             .buffer = aux_address.bo,
256             .offset = aux_address.offset,
257             .mocs = anv_mocs(device, aux_address.bo, 0),
258          };
259       }
260 
261       /* If we're doing a partial resolve, then we need the indirect clear
262        * color.  If we are doing a fast clear and want to store/update the
263        * clear color, we also pass the address to blorp, otherwise it will only
264        * stomp the CCS to a particular value and won't care about format or
265        * clear value
266        */
267       if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
268          const struct anv_address clear_color_addr =
269             anv_image_get_clear_color_addr(device, image, aspect);
270          blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
271       } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
272          const struct anv_address clear_color_addr =
273             anv_image_get_clear_color_addr(device, image, aspect);
274          blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
275          blorp_surf->clear_color = (union isl_color_value) {
276             .f32 = { ANV_HZ_FC_VAL },
277          };
278       }
279    }
280 }
281 
282 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)283 copy_image(struct anv_cmd_buffer *cmd_buffer,
284            struct blorp_batch *batch,
285            struct anv_image *src_image,
286            VkImageLayout src_image_layout,
287            struct anv_image *dst_image,
288            VkImageLayout dst_image_layout,
289            const VkImageCopy2 *region)
290 {
291    VkOffset3D srcOffset =
292       vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
293    VkOffset3D dstOffset =
294       vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
295    VkExtent3D extent =
296       vk_image_sanitize_extent(&src_image->vk, region->extent);
297 
298    const uint32_t dst_level = region->dstSubresource.mipLevel;
299    unsigned dst_base_layer, layer_count;
300    if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
301       dst_base_layer = region->dstOffset.z;
302       layer_count = region->extent.depth;
303    } else {
304       dst_base_layer = region->dstSubresource.baseArrayLayer;
305       layer_count = vk_image_subresource_layer_count(&dst_image->vk,
306                                                      &region->dstSubresource);
307    }
308 
309    const uint32_t src_level = region->srcSubresource.mipLevel;
310    unsigned src_base_layer;
311    if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
312       src_base_layer = region->srcOffset.z;
313    } else {
314       src_base_layer = region->srcSubresource.baseArrayLayer;
315       assert(layer_count ==
316              vk_image_subresource_layer_count(&src_image->vk,
317                                               &region->srcSubresource));
318    }
319 
320    VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
321       dst_mask = region->dstSubresource.aspectMask;
322 
323    assert(anv_image_aspects_compatible(src_mask, dst_mask));
324 
325    if (util_bitcount(src_mask) > 1) {
326       anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
327          struct blorp_surf src_surf, dst_surf;
328          get_blorp_surf_for_anv_image(cmd_buffer,
329                                       src_image, 1UL << aspect_bit,
330                                       VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
331                                       src_image_layout, ISL_AUX_USAGE_NONE,
332                                       &src_surf);
333          get_blorp_surf_for_anv_image(cmd_buffer,
334                                       dst_image, 1UL << aspect_bit,
335                                       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
336                                       dst_image_layout, ISL_AUX_USAGE_NONE,
337                                       &dst_surf);
338          anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
339                                            1UL << aspect_bit,
340                                            dst_surf.aux_usage, dst_level,
341                                            dst_base_layer, layer_count);
342 
343          for (unsigned i = 0; i < layer_count; i++) {
344             blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
345                        &dst_surf, dst_level, dst_base_layer + i,
346                        srcOffset.x, srcOffset.y,
347                        dstOffset.x, dstOffset.y,
348                        extent.width, extent.height);
349          }
350       }
351    } else {
352       /* This case handles the ycbcr images, aspect mask are compatible but
353        * don't need to be the same.
354        */
355       struct blorp_surf src_surf, dst_surf;
356       get_blorp_surf_for_anv_image(cmd_buffer, src_image, src_mask,
357                                    VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
358                                    src_image_layout, ISL_AUX_USAGE_NONE,
359                                    &src_surf);
360       get_blorp_surf_for_anv_image(cmd_buffer, dst_image, dst_mask,
361                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
362                                    dst_image_layout, ISL_AUX_USAGE_NONE,
363                                    &dst_surf);
364       anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
365                                         dst_surf.aux_usage, dst_level,
366                                         dst_base_layer, layer_count);
367 
368       for (unsigned i = 0; i < layer_count; i++) {
369          blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
370                     &dst_surf, dst_level, dst_base_layer + i,
371                     srcOffset.x, srcOffset.y,
372                     dstOffset.x, dstOffset.y,
373                     extent.width, extent.height);
374       }
375    }
376 }
377 
378 static struct anv_state
record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer)379 record_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer)
380 {
381    const struct intel_device_info *info = cmd_buffer->device->info;
382 
383    const VkResult result = anv_cmd_buffer_ensure_rcs_companion(cmd_buffer);
384    if (result != VK_SUCCESS) {
385       anv_batch_set_error(&cmd_buffer->batch, result);
386       return ANV_STATE_NULL;
387    }
388 
389    assert(cmd_buffer->companion_rcs_cmd_buffer != NULL);
390 
391    /* Re-emit the aux table register in every command buffer.  This way we're
392     * ensured that we have the table even if this command buffer doesn't
393     * initialize any images.
394     */
395    if (cmd_buffer->device->info->has_aux_map) {
396       anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
397                                  ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
398                                  "new cmd buffer with aux-tt");
399    }
400 
401    return anv_genX(info, cmd_buffer_begin_companion_rcs_syncpoint)(cmd_buffer);
402 }
403 
404 static void
end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer * cmd_buffer,struct anv_state syncpoint)405 end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer,
406                              struct anv_state syncpoint)
407 {
408    const struct intel_device_info *info = cmd_buffer->device->info;
409    anv_genX(info, cmd_buffer_end_companion_rcs_syncpoint)(cmd_buffer,
410                                                           syncpoint);
411 }
412 
413 static bool
anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)414 anv_blorp_blitter_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
415                                        struct anv_image *image,
416                                        const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo,
417                                        const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
418 {
419    if (!anv_cmd_buffer_is_blitter_queue(cmd_buffer))
420       return false;
421 
422    assert((pCopyBufferToImageInfo && !pCopyImageToBufferInfo) ||
423           (pCopyImageToBufferInfo && !pCopyBufferToImageInfo));
424 
425    bool blorp_execute_on_companion = false;
426    VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_NONE;
427    const uint32_t region_count = pCopyBufferToImageInfo ?
428                                  pCopyBufferToImageInfo->regionCount :
429                                  pCopyImageToBufferInfo->regionCount;
430 
431    for (unsigned r = 0; r < region_count &&
432                             !blorp_execute_on_companion; r++) {
433       if (pCopyBufferToImageInfo) {
434          aspect_mask =
435             pCopyBufferToImageInfo->pRegions[r].imageSubresource.aspectMask;
436       } else {
437          aspect_mask =
438             pCopyImageToBufferInfo->pRegions[r].imageSubresource.aspectMask;
439       }
440 
441       enum isl_format linear_format =
442          anv_get_isl_format(cmd_buffer->device->info, image->vk.format,
443                             aspect_mask, VK_IMAGE_TILING_LINEAR);
444       const struct isl_format_layout *linear_fmtl =
445          isl_format_get_layout(linear_format);
446 
447       switch (linear_fmtl->bpb) {
448       case 96:
449          /* We can only support linear mode for 96bpp on blitter engine. */
450          blorp_execute_on_companion |=
451             image->vk.tiling != VK_IMAGE_TILING_LINEAR;
452          break;
453       default:
454          blorp_execute_on_companion |= linear_fmtl->bpb % 3 == 0;
455          break;
456       }
457    }
458 
459    return blorp_execute_on_companion;
460 }
461 
462 static bool
anv_blorp_execute_on_companion(struct anv_cmd_buffer * cmd_buffer,struct anv_image * dst_image)463 anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer,
464                                struct anv_image *dst_image)
465 {
466    /* MSAA images have to be dealt with on the companion RCS command buffer
467     * for both CCS && BCS engines.
468     */
469    if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
470         anv_cmd_buffer_is_compute_queue(cmd_buffer)) &&
471        dst_image->vk.samples > 1)
472       return true;
473 
474    /* Emulation of formats is done through a compute shader, so we need
475     * the companion command buffer for the BCS engine.
476     */
477    if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) &&
478        dst_image->emu_plane_format != VK_FORMAT_UNDEFINED)
479       return true;
480 
481    return false;
482 }
483 
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)484 void anv_CmdCopyImage2(
485     VkCommandBuffer                             commandBuffer,
486     const VkCopyImageInfo2*                     pCopyImageInfo)
487 {
488    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
489    ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
490    ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
491 
492    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
493    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
494 
495    if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) {
496       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
497       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
498    }
499 
500    struct blorp_batch batch;
501    anv_blorp_batch_init(cmd_buffer, &batch, 0);
502 
503    for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
504       copy_image(cmd_buffer, &batch,
505                  src_image, pCopyImageInfo->srcImageLayout,
506                  dst_image, pCopyImageInfo->dstImageLayout,
507                  &pCopyImageInfo->pRegions[r]);
508    }
509 
510    anv_blorp_batch_finish(&batch);
511 
512    if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
513       assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
514       const enum anv_pipe_bits pipe_bits =
515          anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
516          ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
517          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
518       anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
519                                 "Copy flush before astc emu");
520 
521       for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
522          const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
523          const VkOffset3D block_offset = vk_image_offset_to_elements(
524                &dst_image->vk, region->dstOffset);
525          const VkExtent3D block_extent = vk_image_extent_to_elements(
526                &src_image->vk, region->extent);
527          anv_astc_emu_process(cmd_buffer, dst_image,
528                               pCopyImageInfo->dstImageLayout,
529                               &region->dstSubresource,
530                               block_offset, block_extent);
531       }
532    }
533 
534    if (rcs_done.alloc_size)
535       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
536 }
537 
538 static enum isl_format
isl_format_for_size(unsigned size_B)539 isl_format_for_size(unsigned size_B)
540 {
541    /* Prefer 32-bit per component formats for CmdFillBuffer */
542    switch (size_B) {
543    case 1:  return ISL_FORMAT_R8_UINT;
544    case 2:  return ISL_FORMAT_R16_UINT;
545    case 3:  return ISL_FORMAT_R8G8B8_UINT;
546    case 4:  return ISL_FORMAT_R32_UINT;
547    case 6:  return ISL_FORMAT_R16G16B16_UINT;
548    case 8:  return ISL_FORMAT_R32G32_UINT;
549    case 12: return ISL_FORMAT_R32G32B32_UINT;
550    case 16: return ISL_FORMAT_R32G32B32A32_UINT;
551    default:
552       unreachable("Unknown format size");
553    }
554 }
555 
556 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)557 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
558                      struct blorp_batch *batch,
559                      struct anv_buffer *anv_buffer,
560                      struct anv_image *anv_image,
561                      VkImageLayout image_layout,
562                      const VkBufferImageCopy2* region,
563                      bool buffer_to_image)
564 {
565    struct {
566       struct blorp_surf surf;
567       uint32_t level;
568       VkOffset3D offset;
569    } image, buffer, *src, *dst;
570 
571    buffer.level = 0;
572    buffer.offset = (VkOffset3D) { 0, 0, 0 };
573 
574    if (buffer_to_image) {
575       src = &buffer;
576       dst = &image;
577    } else {
578       src = &image;
579       dst = &buffer;
580    }
581 
582    const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
583 
584    get_blorp_surf_for_anv_image(cmd_buffer, anv_image, aspect,
585                                 buffer_to_image ?
586                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
587                                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
588                                 image_layout, ISL_AUX_USAGE_NONE,
589                                 &image.surf);
590    image.offset =
591       vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
592    image.level = region->imageSubresource.mipLevel;
593 
594    VkExtent3D extent =
595       vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
596    if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
597       image.offset.z = region->imageSubresource.baseArrayLayer;
598       extent.depth =
599          vk_image_subresource_layer_count(&anv_image->vk,
600                                           &region->imageSubresource);
601    }
602 
603    const enum isl_format linear_format =
604       anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
605                          aspect, VK_IMAGE_TILING_LINEAR);
606    const struct isl_format_layout *linear_fmtl =
607       isl_format_get_layout(linear_format);
608 
609    const struct vk_image_buffer_layout buffer_layout =
610       vk_image_buffer_copy_layout(&anv_image->vk, region);
611 
612    /* Some formats have additional restrictions which may cause ISL to
613     * fail to create a surface for us.  For example, YCbCr formats
614     * have to have 2-pixel aligned strides.
615     *
616     * To avoid these issues, we always bind the buffer as if it's a
617     * "normal" format like RGBA32_UINT.  Since we're using blorp_copy,
618     * the format doesn't matter as long as it has the right bpb.
619     */
620    const VkExtent2D buffer_extent = {
621       .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
622       .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
623    };
624    const enum isl_format buffer_format =
625       isl_format_for_size(linear_fmtl->bpb / 8);
626 
627    struct isl_surf buffer_isl_surf;
628    get_blorp_surf_for_anv_buffer(cmd_buffer->device,
629                                  anv_buffer, region->bufferOffset,
630                                  buffer_extent.width, buffer_extent.height,
631                                  buffer_layout.row_stride_B, buffer_format,
632                                  false, &buffer.surf, &buffer_isl_surf);
633 
634    if (&image == dst) {
635       /* In this case, the source is the buffer and, since blorp takes its
636        * copy dimensions in terms of the source format, we have to use the
637        * scaled down version for compressed textures because the source
638        * format is an RGB format.
639        */
640       extent.width = buffer_extent.width;
641       extent.height = buffer_extent.height;
642 
643       anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
644                                         aspect, dst->surf.aux_usage,
645                                         dst->level,
646                                         dst->offset.z, extent.depth);
647    }
648 
649    for (unsigned z = 0; z < extent.depth; z++) {
650       blorp_copy(batch, &src->surf, src->level, src->offset.z,
651                  &dst->surf, dst->level, dst->offset.z,
652                  src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
653                  extent.width, extent.height);
654 
655       image.offset.z++;
656       buffer.surf.addr.offset += buffer_layout.image_stride_B;
657    }
658 }
659 
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)660 void anv_CmdCopyBufferToImage2(
661     VkCommandBuffer                             commandBuffer,
662     const VkCopyBufferToImageInfo2*             pCopyBufferToImageInfo)
663 {
664    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
665    ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
666    ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
667 
668    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
669    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
670 
671    bool blorp_execute_on_companion =
672       anv_blorp_execute_on_companion(cmd_buffer, dst_image);
673 
674    /* Check if any one of the aspects is incompatible with the blitter engine,
675     * if true, use the companion RCS command buffer for blit operation since 3
676     * component formats are not supported natively except 96bpb on the blitter.
677     */
678    blorp_execute_on_companion |=
679       anv_blorp_blitter_execute_on_companion(cmd_buffer, dst_image,
680                                              pCopyBufferToImageInfo, NULL);
681 
682    if (blorp_execute_on_companion) {
683       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
684       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
685    }
686 
687    struct blorp_batch batch;
688    anv_blorp_batch_init(cmd_buffer, &batch, 0);
689 
690    for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
691       copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
692                            pCopyBufferToImageInfo->dstImageLayout,
693                            &pCopyBufferToImageInfo->pRegions[r], true);
694    }
695 
696    anv_blorp_batch_finish(&batch);
697 
698    if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
699       assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer));
700       const enum anv_pipe_bits pipe_bits =
701          anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
702          ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
703          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
704       anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
705                                 "Copy flush before astc emu");
706 
707       for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
708          const VkBufferImageCopy2 *region =
709             &pCopyBufferToImageInfo->pRegions[r];
710          const VkOffset3D block_offset = vk_image_offset_to_elements(
711                &dst_image->vk, region->imageOffset);
712          const VkExtent3D block_extent = vk_image_extent_to_elements(
713                &dst_image->vk, region->imageExtent);
714          anv_astc_emu_process(cmd_buffer, dst_image,
715                               pCopyBufferToImageInfo->dstImageLayout,
716                               &region->imageSubresource,
717                               block_offset, block_extent);
718       }
719    }
720 
721    if (rcs_done.alloc_size)
722       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
723 }
724 
725 static void
anv_add_buffer_write_pending_bits(struct anv_cmd_buffer * cmd_buffer,const char * reason)726 anv_add_buffer_write_pending_bits(struct anv_cmd_buffer *cmd_buffer,
727                                   const char *reason)
728 {
729    const struct intel_device_info *devinfo = cmd_buffer->device->info;
730 
731    cmd_buffer->state.queries.buffer_write_bits |=
732       (cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0 ?
733       ANV_QUERY_COMPUTE_WRITES_PENDING_BITS :
734       ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo);
735 }
736 
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)737 void anv_CmdCopyImageToBuffer2(
738     VkCommandBuffer                             commandBuffer,
739     const VkCopyImageToBufferInfo2*             pCopyImageToBufferInfo)
740 {
741    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
742    ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
743    ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
744 
745    UNUSED struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
746    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
747 
748    bool blorp_execute_on_companion =
749       anv_blorp_execute_on_companion(cmd_buffer, src_image);
750 
751    /* Check if any one of the aspects is incompatible with the blitter engine,
752     * if true, use the companion RCS command buffer for blit operation since 3
753     * component formats are not supported natively except 96bpb on the blitter.
754     */
755    blorp_execute_on_companion |=
756       anv_blorp_blitter_execute_on_companion(cmd_buffer, src_image, NULL,
757                                              pCopyImageToBufferInfo);
758 
759    if (blorp_execute_on_companion) {
760       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
761       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
762    }
763 
764    struct blorp_batch batch;
765    anv_blorp_batch_init(cmd_buffer, &batch, 0);
766 
767    for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
768       copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
769                            pCopyImageToBufferInfo->srcImageLayout,
770                            &pCopyImageToBufferInfo->pRegions[r], false);
771    }
772 
773    anv_add_buffer_write_pending_bits(cmd_buffer, "after copy image to buffer");
774 
775    anv_blorp_batch_finish(&batch);
776 
777    if (rcs_done.alloc_size)
778       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
779 }
780 
781 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)782 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
783 {
784    bool flip = false;
785    if (*src0 > *src1) {
786       unsigned tmp = *src0;
787       *src0 = *src1;
788       *src1 = tmp;
789       flip = !flip;
790    }
791 
792    if (*dst0 > *dst1) {
793       unsigned tmp = *dst0;
794       *dst0 = *dst1;
795       *dst1 = tmp;
796       flip = !flip;
797    }
798 
799    return flip;
800 }
801 
802 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)803 blit_image(struct anv_cmd_buffer *cmd_buffer,
804            struct blorp_batch *batch,
805            struct anv_image *src_image,
806            VkImageLayout src_image_layout,
807            struct anv_image *dst_image,
808            VkImageLayout dst_image_layout,
809            const VkImageBlit2 *region,
810            VkFilter filter)
811 {
812    const VkImageSubresourceLayers *src_res = &region->srcSubresource;
813    const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
814 
815    struct blorp_surf src, dst;
816 
817    enum blorp_filter blorp_filter;
818    switch (filter) {
819    case VK_FILTER_NEAREST:
820       blorp_filter = BLORP_FILTER_NEAREST;
821       break;
822    case VK_FILTER_LINEAR:
823       blorp_filter = BLORP_FILTER_BILINEAR;
824       break;
825    default:
826       unreachable("Invalid filter");
827    }
828 
829    assert(anv_image_aspects_compatible(src_res->aspectMask,
830                                        dst_res->aspectMask));
831 
832    anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
833       get_blorp_surf_for_anv_image(cmd_buffer,
834                                    src_image, 1U << aspect_bit,
835                                    VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
836                                    src_image_layout, ISL_AUX_USAGE_NONE, &src);
837       get_blorp_surf_for_anv_image(cmd_buffer,
838                                    dst_image, 1U << aspect_bit,
839                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
840                                    dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
841 
842       VkFormat src_vk_format = src_image->vk.format;
843 
844       if (src_image->emu_plane_format != VK_FORMAT_UNDEFINED) {
845          /* redirect src to the hidden plane */
846          const uint32_t plane = src_image->n_planes;
847          const struct anv_surface *surface =
848             &src_image->planes[plane].primary_surface;
849          const struct anv_address address =
850             anv_image_address(src_image, &surface->memory_range);
851          src.surf = &surface->isl,
852          src.addr.offset = address.offset;
853 
854          src_vk_format = src_image->emu_plane_format;
855       }
856 
857       struct anv_format_plane src_format =
858          anv_get_format_aspect(cmd_buffer->device->info, src_vk_format,
859                                1U << aspect_bit, src_image->vk.tiling);
860       struct anv_format_plane dst_format =
861          anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
862                                1U << aspect_bit, dst_image->vk.tiling);
863 
864       unsigned dst_start, dst_end;
865       if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
866          assert(dst_res->baseArrayLayer == 0);
867          dst_start = region->dstOffsets[0].z;
868          dst_end = region->dstOffsets[1].z;
869       } else {
870          dst_start = dst_res->baseArrayLayer;
871          dst_end = dst_start +
872             vk_image_subresource_layer_count(&dst_image->vk, dst_res);
873       }
874 
875       unsigned src_start, src_end;
876       if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
877          assert(src_res->baseArrayLayer == 0);
878          src_start = region->srcOffsets[0].z;
879          src_end = region->srcOffsets[1].z;
880       } else {
881          src_start = src_res->baseArrayLayer;
882          src_end = src_start +
883             vk_image_subresource_layer_count(&src_image->vk, src_res);
884       }
885 
886       bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
887       const unsigned num_layers = dst_end - dst_start;
888       float src_z_step = (float)(src_end - src_start) / (float)num_layers;
889 
890       /* There is no interpolation to the pixel center during rendering, so
891        * add the 0.5 offset ourselves here. */
892       float depth_center_offset = 0;
893       if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
894          depth_center_offset = 0.5 / num_layers * (src_end - src_start);
895 
896       if (flip_z) {
897          src_start = src_end;
898          src_z_step *= -1;
899          depth_center_offset *= -1;
900       }
901 
902       unsigned src_x0 = region->srcOffsets[0].x;
903       unsigned src_x1 = region->srcOffsets[1].x;
904       unsigned dst_x0 = region->dstOffsets[0].x;
905       unsigned dst_x1 = region->dstOffsets[1].x;
906       bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
907 
908       unsigned src_y0 = region->srcOffsets[0].y;
909       unsigned src_y1 = region->srcOffsets[1].y;
910       unsigned dst_y0 = region->dstOffsets[0].y;
911       unsigned dst_y1 = region->dstOffsets[1].y;
912       bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
913 
914       anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
915                                         1U << aspect_bit,
916                                         dst.aux_usage,
917                                         dst_res->mipLevel,
918                                         dst_start, num_layers);
919 
920       for (unsigned i = 0; i < num_layers; i++) {
921          unsigned dst_z = dst_start + i;
922          float src_z = src_start + i * src_z_step + depth_center_offset;
923 
924          blorp_blit(batch, &src, src_res->mipLevel, src_z,
925                     src_format.isl_format, src_format.swizzle,
926                     &dst, dst_res->mipLevel, dst_z,
927                     dst_format.isl_format, dst_format.swizzle,
928                     src_x0, src_y0, src_x1, src_y1,
929                     dst_x0, dst_y0, dst_x1, dst_y1,
930                     blorp_filter, flip_x, flip_y);
931       }
932    }
933 }
934 
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)935 void anv_CmdBlitImage2(
936     VkCommandBuffer                             commandBuffer,
937     const VkBlitImageInfo2*                     pBlitImageInfo)
938 {
939    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
940    ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
941    ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
942 
943    struct blorp_batch batch;
944    anv_blorp_batch_init(cmd_buffer, &batch, 0);
945 
946    for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
947       blit_image(cmd_buffer, &batch,
948                  src_image, pBlitImageInfo->srcImageLayout,
949                  dst_image, pBlitImageInfo->dstImageLayout,
950                  &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
951    }
952 
953    anv_blorp_batch_finish(&batch);
954 }
955 
956 /**
957  * Returns the greatest common divisor of a and b that is a power of two.
958  */
959 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)960 gcd_pow2_u64(uint64_t a, uint64_t b)
961 {
962    assert(a > 0 || b > 0);
963 
964    unsigned a_log2 = ffsll(a) - 1;
965    unsigned b_log2 = ffsll(b) - 1;
966 
967    /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
968     * case, the MIN2() will take the other one.  If both are 0 then we will
969     * hit the assert above.
970     */
971    return 1 << MIN2(a_log2, b_log2);
972 }
973 
974 /* This is maximum possible width/height our HW can handle */
975 #define MAX_SURFACE_DIM (1ull << 14)
976 
977 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)978 copy_buffer(struct anv_device *device,
979             struct blorp_batch *batch,
980             struct anv_buffer *src_buffer,
981             struct anv_buffer *dst_buffer,
982             const VkBufferCopy2 *region)
983 {
984    struct blorp_address src = {
985       .buffer = src_buffer->address.bo,
986       .offset = src_buffer->address.offset + region->srcOffset,
987       .mocs = anv_mocs(device, src_buffer->address.bo,
988                        ISL_SURF_USAGE_TEXTURE_BIT),
989    };
990    struct blorp_address dst = {
991       .buffer = dst_buffer->address.bo,
992       .offset = dst_buffer->address.offset + region->dstOffset,
993       .mocs = anv_mocs(device, dst_buffer->address.bo,
994                        ISL_SURF_USAGE_RENDER_TARGET_BIT),
995    };
996 
997    blorp_buffer_copy(batch, src, dst, region->size);
998 }
999 
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1000 void anv_CmdCopyBuffer2(
1001     VkCommandBuffer                             commandBuffer,
1002     const VkCopyBufferInfo2*                    pCopyBufferInfo)
1003 {
1004    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1005    ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1006    ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1007 
1008    struct blorp_batch batch;
1009    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1010 
1011    for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
1012       copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
1013                   &pCopyBufferInfo->pRegions[r]);
1014    }
1015 
1016    anv_add_buffer_write_pending_bits(cmd_buffer, "after copy buffer");
1017 
1018    anv_blorp_batch_finish(&batch);
1019 }
1020 
1021 
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1022 void anv_CmdUpdateBuffer(
1023     VkCommandBuffer                             commandBuffer,
1024     VkBuffer                                    dstBuffer,
1025     VkDeviceSize                                dstOffset,
1026     VkDeviceSize                                dataSize,
1027     const void*                                 pData)
1028 {
1029    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1030    ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1031 
1032    struct blorp_batch batch;
1033    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1034 
1035    /* We can't quite grab a full block because the state stream needs a
1036     * little data at the top to build its linked list.
1037     */
1038    const uint32_t max_update_size =
1039       cmd_buffer->device->dynamic_state_pool.block_size - 64;
1040 
1041    assert(max_update_size < MAX_SURFACE_DIM * 4);
1042 
1043    /* We're about to read data that was written from the CPU.  Flush the
1044     * texture cache so we don't get anything stale.
1045     */
1046    anv_add_pending_pipe_bits(cmd_buffer,
1047                              ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1048                              "before UpdateBuffer");
1049 
1050    while (dataSize) {
1051       const uint32_t copy_size = MIN2(dataSize, max_update_size);
1052 
1053       struct anv_state tmp_data =
1054          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
1055 
1056       memcpy(tmp_data.map, pData, copy_size);
1057 
1058       struct blorp_address src = {
1059          .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
1060          .offset = tmp_data.offset,
1061          .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
1062                           ISL_SURF_USAGE_TEXTURE_BIT, false)
1063       };
1064       struct blorp_address dst = {
1065          .buffer = dst_buffer->address.bo,
1066          .offset = dst_buffer->address.offset + dstOffset,
1067          .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
1068                           ISL_SURF_USAGE_RENDER_TARGET_BIT),
1069       };
1070 
1071       blorp_buffer_copy(&batch, src, dst, copy_size);
1072 
1073       dataSize -= copy_size;
1074       dstOffset += copy_size;
1075       pData = (void *)pData + copy_size;
1076    }
1077 
1078    anv_add_buffer_write_pending_bits(cmd_buffer, "update buffer");
1079 
1080    anv_blorp_batch_finish(&batch);
1081 }
1082 
1083 void
anv_cmd_buffer_fill_area(struct anv_cmd_buffer * cmd_buffer,struct anv_address address,VkDeviceSize size,uint32_t data)1084 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
1085                          struct anv_address address,
1086                          VkDeviceSize size,
1087                          uint32_t data)
1088 {
1089    struct blorp_surf surf;
1090    struct isl_surf isl_surf;
1091 
1092    struct blorp_batch batch;
1093    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1094 
1095    /* First, we compute the biggest format that can be used with the
1096     * given offsets and size.
1097     */
1098    int bs = 16;
1099    uint64_t offset = address.offset;
1100    bs = gcd_pow2_u64(bs, offset);
1101    bs = gcd_pow2_u64(bs, size);
1102    enum isl_format isl_format = isl_format_for_size(bs);
1103 
1104    union isl_color_value color = {
1105       .u32 = { data, data, data, data },
1106    };
1107 
1108    const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
1109    while (size >= max_fill_size) {
1110       get_blorp_surf_for_anv_address(cmd_buffer->device,
1111                                      (struct anv_address) {
1112                                         .bo = address.bo, .offset = offset,
1113                                      },
1114                                      MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1115                                      MAX_SURFACE_DIM * bs, isl_format, true,
1116                                      &surf, &isl_surf);
1117 
1118       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1119                   0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
1120                   color, 0 /* color_write_disable */);
1121       size -= max_fill_size;
1122       offset += max_fill_size;
1123    }
1124 
1125    uint64_t height = size / (MAX_SURFACE_DIM * bs);
1126    assert(height < MAX_SURFACE_DIM);
1127    if (height != 0) {
1128       const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
1129       get_blorp_surf_for_anv_address(cmd_buffer->device,
1130                                      (struct anv_address) {
1131                                         .bo = address.bo, .offset = offset,
1132                                      },
1133                                      MAX_SURFACE_DIM, height,
1134                                      MAX_SURFACE_DIM * bs, isl_format, true,
1135                                      &surf, &isl_surf);
1136 
1137       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1138                   0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
1139                   color, 0 /* color_write_disable */);
1140       size -= rect_fill_size;
1141       offset += rect_fill_size;
1142    }
1143 
1144    if (size != 0) {
1145       const uint32_t width = size / bs;
1146       get_blorp_surf_for_anv_address(cmd_buffer->device,
1147                                      (struct anv_address) {
1148                                         .bo = address.bo, .offset = offset,
1149                                      },
1150                                      width, 1,
1151                                      width * bs, isl_format, true,
1152                                      &surf, &isl_surf);
1153 
1154       blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1155                   0, 0, 1, 0, 0, width, 1,
1156                   color, 0 /* color_write_disable */);
1157    }
1158 
1159    anv_blorp_batch_finish(&batch);
1160 }
1161 
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1162 void anv_CmdFillBuffer(
1163     VkCommandBuffer                             commandBuffer,
1164     VkBuffer                                    dstBuffer,
1165     VkDeviceSize                                dstOffset,
1166     VkDeviceSize                                fillSize,
1167     uint32_t                                    data)
1168 {
1169    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1170    ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
1171 
1172    fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
1173 
1174    /* From the Vulkan spec:
1175     *
1176     *    "size is the number of bytes to fill, and must be either a multiple
1177     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1178     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1179     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1180     *    used."
1181     */
1182    fillSize &= ~3ull;
1183 
1184    anv_cmd_buffer_fill_area(cmd_buffer,
1185                             anv_address_add(dst_buffer->address, dstOffset),
1186                             fillSize, data);
1187 
1188    anv_add_buffer_write_pending_bits(cmd_buffer, "after fill buffer");
1189 }
1190 
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1191 void anv_CmdClearColorImage(
1192     VkCommandBuffer                             commandBuffer,
1193     VkImage                                     _image,
1194     VkImageLayout                               imageLayout,
1195     const VkClearColorValue*                    pColor,
1196     uint32_t                                    rangeCount,
1197     const VkImageSubresourceRange*              pRanges)
1198 {
1199    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1200    ANV_FROM_HANDLE(anv_image, image, _image);
1201 
1202    struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer;
1203    UNUSED struct anv_state rcs_done = ANV_STATE_NULL;
1204 
1205    if (anv_blorp_execute_on_companion(cmd_buffer, image)) {
1206       rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer);
1207       cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer;
1208    }
1209 
1210    struct blorp_batch batch;
1211    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1212 
1213    for (unsigned r = 0; r < rangeCount; r++) {
1214       if (pRanges[r].aspectMask == 0)
1215          continue;
1216 
1217       assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1218 
1219       struct blorp_surf surf;
1220       get_blorp_surf_for_anv_image(cmd_buffer,
1221                                    image, pRanges[r].aspectMask,
1222                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1223                                    imageLayout, ISL_AUX_USAGE_NONE, &surf);
1224 
1225       struct anv_format_plane src_format =
1226          anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1227                                VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1228 
1229       unsigned base_layer = pRanges[r].baseArrayLayer;
1230       uint32_t layer_count =
1231          vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1232       uint32_t level_count =
1233          vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1234 
1235       for (uint32_t i = 0; i < level_count; i++) {
1236          const unsigned level = pRanges[r].baseMipLevel + i;
1237          const unsigned level_width = u_minify(image->vk.extent.width, level);
1238          const unsigned level_height = u_minify(image->vk.extent.height, level);
1239 
1240          if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1241             base_layer = 0;
1242             layer_count = u_minify(image->vk.extent.depth, level);
1243          }
1244 
1245          anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1246                                            pRanges[r].aspectMask,
1247                                            surf.aux_usage, level,
1248                                            base_layer, layer_count);
1249 
1250          blorp_clear(&batch, &surf,
1251                      src_format.isl_format, src_format.swizzle,
1252                      level, base_layer, layer_count,
1253                      0, 0, level_width, level_height,
1254                      vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1255       }
1256    }
1257 
1258    anv_blorp_batch_finish(&batch);
1259 
1260    if (rcs_done.alloc_size)
1261       end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done);
1262 }
1263 
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1264 void anv_CmdClearDepthStencilImage(
1265     VkCommandBuffer                             commandBuffer,
1266     VkImage                                     image_h,
1267     VkImageLayout                               imageLayout,
1268     const VkClearDepthStencilValue*             pDepthStencil,
1269     uint32_t                                    rangeCount,
1270     const VkImageSubresourceRange*              pRanges)
1271 {
1272    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1273    ANV_FROM_HANDLE(anv_image, image, image_h);
1274 
1275    struct blorp_batch batch;
1276    anv_blorp_batch_init(cmd_buffer, &batch, 0);
1277    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1278 
1279    struct blorp_surf depth, stencil;
1280    if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1281       get_blorp_surf_for_anv_image(cmd_buffer,
1282                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
1283                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1284                                    imageLayout, ISL_AUX_USAGE_NONE, &depth);
1285    } else {
1286       memset(&depth, 0, sizeof(depth));
1287    }
1288 
1289    if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1290       get_blorp_surf_for_anv_image(cmd_buffer,
1291                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
1292                                    VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1293                                    imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1294    } else {
1295       memset(&stencil, 0, sizeof(stencil));
1296    }
1297 
1298    for (unsigned r = 0; r < rangeCount; r++) {
1299       if (pRanges[r].aspectMask == 0)
1300          continue;
1301 
1302       bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1303       bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1304 
1305       unsigned base_layer = pRanges[r].baseArrayLayer;
1306       uint32_t layer_count =
1307          vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1308       uint32_t level_count =
1309          vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1310 
1311       for (uint32_t i = 0; i < level_count; i++) {
1312          const unsigned level = pRanges[r].baseMipLevel + i;
1313          const unsigned level_width = u_minify(image->vk.extent.width, level);
1314          const unsigned level_height = u_minify(image->vk.extent.height, level);
1315 
1316          if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1317             layer_count = u_minify(image->vk.extent.depth, level);
1318 
1319          blorp_clear_depth_stencil(&batch, &depth, &stencil,
1320                                    level, base_layer, layer_count,
1321                                    0, 0, level_width, level_height,
1322                                    clear_depth, pDepthStencil->depth,
1323                                    clear_stencil ? 0xff : 0,
1324                                    pDepthStencil->stencil);
1325       }
1326    }
1327 
1328    anv_blorp_batch_finish(&batch);
1329 }
1330 
1331 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1332 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1333                                          uint32_t num_entries,
1334                                          uint32_t *state_offset,
1335                                          struct anv_state *bt_state)
1336 {
1337    *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1338                                                   state_offset);
1339    if (bt_state->map == NULL) {
1340       /* We ran out of space.  Grab a new binding table block. */
1341       VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1342       if (result != VK_SUCCESS)
1343          return result;
1344 
1345       /* Re-emit state base addresses so we get the new surface state base
1346        * address before we start emitting binding tables etc.
1347        */
1348       anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1349 
1350       *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1351                                                      state_offset);
1352       assert(bt_state->map != NULL);
1353    }
1354 
1355    return VK_SUCCESS;
1356 }
1357 
1358 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1359 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1360                                 struct anv_state surface_state,
1361                                 uint32_t *bt_offset)
1362 {
1363    uint32_t state_offset;
1364    struct anv_state bt_state;
1365 
1366    VkResult result =
1367       anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1368                                                &bt_state);
1369    if (result != VK_SUCCESS)
1370       return result;
1371 
1372    uint32_t *bt_map = bt_state.map;
1373    bt_map[0] = surface_state.offset + state_offset;
1374 
1375    *bt_offset = bt_state.offset;
1376    return VK_SUCCESS;
1377 }
1378 
1379 static bool
can_fast_clear_color_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1380 can_fast_clear_color_att(struct anv_cmd_buffer *cmd_buffer,
1381                          struct blorp_batch *batch,
1382                          const struct anv_attachment *att,
1383                          const VkClearAttachment *attachment,
1384                          uint32_t rectCount, const VkClearRect *pRects)
1385 {
1386    union isl_color_value clear_color =
1387       vk_to_isl_color(attachment->clearValue.color);
1388 
1389    if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1390       return false;
1391 
1392    /* We don't support fast clearing with conditional rendering at the
1393     * moment. All the tracking done around fast clears (clear color updates
1394     * and fast-clear type updates) happens unconditionally.
1395     */
1396    if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1397       return false;
1398 
1399    if (rectCount > 1) {
1400       anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1401                     "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1402       return false;
1403    }
1404 
1405    /* We only support fast-clears on the first layer */
1406    if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1407       return false;
1408 
1409    bool is_multiview = cmd_buffer->state.gfx.view_mask != 0;
1410    if (is_multiview && (cmd_buffer->state.gfx.view_mask != 1))
1411       return false;
1412 
1413    return anv_can_fast_clear_color_view(cmd_buffer->device,
1414                                         (struct anv_image_view *)att->iview,
1415                                         att->layout,
1416                                         clear_color,
1417                                         pRects->layerCount,
1418                                         pRects->rect,
1419                                         cmd_buffer->queue_family->queueFlags);
1420 }
1421 
1422 static void
exec_ccs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value)1423 exec_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1424             struct blorp_batch *batch,
1425             const struct anv_image *image,
1426             enum isl_format format, struct isl_swizzle swizzle,
1427             VkImageAspectFlagBits aspect, uint32_t level,
1428             uint32_t base_layer, uint32_t layer_count,
1429             enum isl_aux_op ccs_op, union isl_color_value *clear_value)
1430 {
1431    assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1432    assert(image->vk.samples == 1);
1433    assert(level < anv_image_aux_levels(image, aspect));
1434    /* Multi-LOD YcBcR is not allowed */
1435    assert(image->n_planes == 1 || level == 0);
1436    assert(base_layer + layer_count <=
1437           anv_image_aux_layers(image, aspect, level));
1438 
1439    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1440    const struct intel_device_info *devinfo = cmd_buffer->device->info;
1441 
1442    struct blorp_surf surf;
1443    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1444                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1445                                 image->planes[plane].aux_usage,
1446                                 &surf);
1447 
1448    uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1449    uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1450 
1451    /* Blorp will store the clear color for us if we provide the clear color
1452     * address and we are doing a fast clear. So we save the clear value into
1453     * the blorp surface.
1454     */
1455    if (clear_value)
1456       surf.clear_color = *clear_value;
1457 
1458    char flush_reason[64];
1459    int ret =
1460       snprintf(flush_reason, sizeof(flush_reason),
1461                "ccs op start: %s", isl_aux_op_to_name(ccs_op));
1462    assert(ret < sizeof(flush_reason));
1463 
1464    /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1465     *
1466     *    "After Render target fast clear, pipe-control with color cache
1467     *    write-flush must be issued before sending any DRAW commands on
1468     *    that render target."
1469     *
1470     * This comment is a bit cryptic and doesn't really tell you what's going
1471     * or what's really needed.  It appears that fast clear ops are not
1472     * properly synchronized with other drawing.  This means that we cannot
1473     * have a fast clear operation in the pipe at the same time as other
1474     * regular drawing operations.  We need to use a PIPE_CONTROL to ensure
1475     * that the contents of the previous draw hit the render target before we
1476     * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1477     * that it is completed before any additional drawing occurs.
1478     */
1479    anv_add_pending_pipe_bits(cmd_buffer,
1480                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1481                              ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1482                              (devinfo->verx10 == 120 ?
1483                                 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1484                              (devinfo->verx10 == 125 ?
1485                                 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1486                                 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1487                              ANV_PIPE_PSS_STALL_SYNC_BIT |
1488                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1489                              flush_reason);
1490 
1491    switch (ccs_op) {
1492    case ISL_AUX_OP_FAST_CLEAR:
1493       /* From the ICL PRMs, Volume 9: Render Engine, State Caching :
1494        *
1495        *    "Any values referenced by pointers within the RENDER_SURFACE_STATE
1496        *     or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
1497        *     Indirect State Pointer) are considered to be part of that state
1498        *     and any changes to these referenced values requires an
1499        *     invalidation of the L1 state cache to ensure the new values are
1500        *     being used as part of the state. In the case of surface data
1501        *     pointed to by the Surface Base Address in RENDER SURFACE STATE,
1502        *     the Texture Cache must be invalidated if the surface data
1503        *     changes."
1504        *
1505        * and From the Render Target Fast Clear section,
1506        *
1507        *   "HwManaged FastClear allows SW to store FastClearValue in separate
1508        *   graphics allocation, instead of keeping them in
1509        *   RENDER_SURFACE_STATE. This behavior can be enabled by setting
1510        *   ClearValueAddressEnable in RENDER_SURFACE_STATE.
1511        *
1512        *    Proper sequence of commands is as follows:
1513        *
1514        *       1. Storing clear color to allocation
1515        *       2. Ensuring that step 1. is finished and visible for TextureCache
1516        *       3. Performing FastClear
1517        *
1518        *    Step 2. is required on products with ClearColorConversion feature.
1519        *    This feature is enabled by setting ClearColorConversionEnable.
1520        *    This causes HW to read stored color from ClearColorAllocation and
1521        *    write back with the native format or RenderTarget - and clear
1522        *    color needs to be present and visible. Reading is done from
1523        *    TextureCache, writing is done to RenderCache."
1524        *
1525        * We're going to change the clear color. Invalidate the texture cache
1526        * now to ensure the clear color conversion feature works properly.
1527        * Although the docs seem to require invalidating the texture cache
1528        * after updating the clear color allocation, we can do this beforehand
1529        * so long as we ensure:
1530        *
1531        *    1. Step 1 is complete before the texture cache is accessed in step 3
1532        *    2. We don't access the texture cache between invalidation and step 3
1533        *
1534        * The second requirement is satisfied because we'll be performing step
1535        * 1 and 3 right after invalidating. The first is satisfied because
1536        * BLORP updates the clear color before performing the fast clear and it
1537        * performs the synchronizations suggested by the Render Target Fast
1538        * Clear section (not quoted here) to ensure its completion.
1539        *
1540        * While we're here, also invalidate the state cache as suggested.
1541        */
1542       if (devinfo->ver >= 11) {
1543          anv_add_pending_pipe_bits(cmd_buffer,
1544                                    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
1545                                    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1546                                    "before blorp clear color update");
1547       }
1548 
1549       blorp_fast_clear(batch, &surf, format, swizzle,
1550                        level, base_layer, layer_count,
1551                        0, 0, level_width, level_height);
1552       break;
1553    case ISL_AUX_OP_FULL_RESOLVE:
1554    case ISL_AUX_OP_PARTIAL_RESOLVE: {
1555       /* Wa_1508744258: Enable RHWO optimization for resolves */
1556       const bool enable_rhwo_opt =
1557          intel_needs_workaround(cmd_buffer->device->info, 1508744258);
1558 
1559       if (enable_rhwo_opt)
1560          cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1561 
1562       blorp_ccs_resolve(batch, &surf, level, base_layer, layer_count,
1563                         format, ccs_op);
1564 
1565       if (enable_rhwo_opt)
1566          cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1567       break;
1568    }
1569    case ISL_AUX_OP_AMBIGUATE:
1570       for (uint32_t a = 0; a < layer_count; a++) {
1571          const uint32_t layer = base_layer + a;
1572          blorp_ccs_ambiguate(batch, &surf, level, layer);
1573       }
1574       break;
1575    default:
1576       unreachable("Unsupported CCS operation");
1577    }
1578 
1579    anv_add_pending_pipe_bits(cmd_buffer,
1580                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1581                              (devinfo->verx10 == 120 ?
1582                                 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1583                                 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1584                              ANV_PIPE_PSS_STALL_SYNC_BIT |
1585                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1586                              "ccs op finish");
1587 }
1588 
1589 static void
exec_mcs_op(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value)1590 exec_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1591             struct blorp_batch *batch,
1592             const struct anv_image *image,
1593             enum isl_format format, struct isl_swizzle swizzle,
1594             VkImageAspectFlagBits aspect,
1595             uint32_t base_layer, uint32_t layer_count,
1596             enum isl_aux_op mcs_op, union isl_color_value *clear_value)
1597 {
1598    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1599    assert(image->vk.samples > 1);
1600    assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1601 
1602    /* Multisampling with multi-planar formats is not supported */
1603    assert(image->n_planes == 1);
1604 
1605    const struct intel_device_info *devinfo = cmd_buffer->device->info;
1606    struct blorp_surf surf;
1607    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
1608                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1609                                 ISL_AUX_USAGE_MCS, &surf);
1610 
1611    /* Blorp will store the clear color for us if we provide the clear color
1612     * address and we are doing a fast clear. So we save the clear value into
1613     * the blorp surface.
1614     */
1615    if (clear_value)
1616       surf.clear_color = *clear_value;
1617 
1618    /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1619     *
1620     *    "After Render target fast clear, pipe-control with color cache
1621     *    write-flush must be issued before sending any DRAW commands on
1622     *    that render target."
1623     *
1624     * This comment is a bit cryptic and doesn't really tell you what's going
1625     * or what's really needed.  It appears that fast clear ops are not
1626     * properly synchronized with other drawing.  This means that we cannot
1627     * have a fast clear operation in the pipe at the same time as other
1628     * regular drawing operations.  We need to use a PIPE_CONTROL to ensure
1629     * that the contents of the previous draw hit the render target before we
1630     * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1631     * that it is completed before any additional drawing occurs.
1632     */
1633    anv_add_pending_pipe_bits(cmd_buffer,
1634                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1635                              ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1636                              (devinfo->verx10 == 120 ?
1637                                 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1638                              (devinfo->verx10 == 125 ?
1639                                 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1640                                 ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1641                              ANV_PIPE_PSS_STALL_SYNC_BIT |
1642                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1643                              "before fast clear mcs");
1644 
1645    switch (mcs_op) {
1646    case ISL_AUX_OP_FAST_CLEAR:
1647       /* From the ICL PRMs, Volume 9: Render Engine, State Caching :
1648        *
1649        *    "Any values referenced by pointers within the RENDER_SURFACE_STATE
1650        *     or SAMPLER_STATE (e.g. Clear Color Pointer, Border Color or
1651        *     Indirect State Pointer) are considered to be part of that state
1652        *     and any changes to these referenced values requires an
1653        *     invalidation of the L1 state cache to ensure the new values are
1654        *     being used as part of the state. In the case of surface data
1655        *     pointed to by the Surface Base Address in RENDER SURFACE STATE,
1656        *     the Texture Cache must be invalidated if the surface data
1657        *     changes."
1658        *
1659        * and From the Render Target Fast Clear section,
1660        *
1661        *   "HwManaged FastClear allows SW to store FastClearValue in separate
1662        *   graphics allocation, instead of keeping them in
1663        *   RENDER_SURFACE_STATE. This behavior can be enabled by setting
1664        *   ClearValueAddressEnable in RENDER_SURFACE_STATE.
1665        *
1666        *    Proper sequence of commands is as follows:
1667        *
1668        *       1. Storing clear color to allocation
1669        *       2. Ensuring that step 1. is finished and visible for TextureCache
1670        *       3. Performing FastClear
1671        *
1672        *    Step 2. is required on products with ClearColorConversion feature.
1673        *    This feature is enabled by setting ClearColorConversionEnable.
1674        *    This causes HW to read stored color from ClearColorAllocation and
1675        *    write back with the native format or RenderTarget - and clear
1676        *    color needs to be present and visible. Reading is done from
1677        *    TextureCache, writing is done to RenderCache."
1678        *
1679        * We're going to change the clear color. Invalidate the texture cache
1680        * now to ensure the clear color conversion feature works properly.
1681        * Although the docs seem to require invalidating the texture cache
1682        * after updating the clear color allocation, we can do this beforehand
1683        * so long as we ensure:
1684        *
1685        *    1. Step 1 is complete before the texture cache is accessed in step 3
1686        *    2. We don't access the texture cache between invalidation and step 3
1687        *
1688        * The second requirement is satisfied because we'll be performing step
1689        * 1 and 3 right after invalidating. The first is satisfied because
1690        * BLORP updates the clear color before performing the fast clear and it
1691        * performs the synchronizations suggested by the Render Target Fast
1692        * Clear section (not quoted here) to ensure its completion.
1693        *
1694        * While we're here, also invalidate the state cache as suggested.
1695        */
1696       if (devinfo->ver >= 11) {
1697          anv_add_pending_pipe_bits(cmd_buffer,
1698                                    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
1699                                    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1700                                    "before blorp clear color update");
1701       }
1702 
1703       blorp_fast_clear(batch, &surf, format, swizzle,
1704                        0, base_layer, layer_count,
1705                        0, 0, image->vk.extent.width, image->vk.extent.height);
1706       break;
1707    case ISL_AUX_OP_PARTIAL_RESOLVE:
1708       blorp_mcs_partial_resolve(batch, &surf, format,
1709                                 base_layer, layer_count);
1710       break;
1711    case ISL_AUX_OP_AMBIGUATE:
1712       blorp_mcs_ambiguate(batch, &surf, base_layer, layer_count);
1713       break;
1714    case ISL_AUX_OP_FULL_RESOLVE:
1715    default:
1716       unreachable("Unsupported MCS operation");
1717    }
1718 
1719    anv_add_pending_pipe_bits(cmd_buffer,
1720                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1721                              (devinfo->verx10 == 120 ?
1722                                 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1723                                 ANV_PIPE_DEPTH_STALL_BIT : 0) |
1724                              ANV_PIPE_PSS_STALL_SYNC_BIT |
1725                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1726                              "after fast clear mcs");
1727 }
1728 
1729 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1730 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1731                        struct blorp_batch *batch,
1732                        const VkClearAttachment *attachment,
1733                        uint32_t rectCount, const VkClearRect *pRects)
1734 {
1735    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1736    const uint32_t att_idx = attachment->colorAttachment;
1737    assert(att_idx < gfx->color_att_count);
1738    const struct anv_attachment *att = &gfx->color_att[att_idx];
1739 
1740    if (att->vk_format == VK_FORMAT_UNDEFINED)
1741       return;
1742 
1743    union isl_color_value clear_color =
1744       vk_to_isl_color(attachment->clearValue.color);
1745 
1746    const struct anv_image_view *iview = att->iview;
1747    if (iview &&
1748        can_fast_clear_color_att(cmd_buffer, batch, att,
1749                                 attachment, rectCount, pRects)) {
1750       if (iview->image->vk.samples == 1) {
1751          exec_ccs_op(cmd_buffer, batch, iview->image,
1752                      iview->planes[0].isl.format,
1753                      iview->planes[0].isl.swizzle,
1754                      VK_IMAGE_ASPECT_COLOR_BIT,
1755                      0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
1756                      &clear_color);
1757       } else {
1758          exec_mcs_op(cmd_buffer, batch, iview->image,
1759                      iview->planes[0].isl.format,
1760                      iview->planes[0].isl.swizzle,
1761                      VK_IMAGE_ASPECT_COLOR_BIT,
1762                      0, 1, ISL_AUX_OP_FAST_CLEAR,
1763                      &clear_color);
1764       }
1765 
1766       anv_cmd_buffer_mark_image_fast_cleared(cmd_buffer, iview->image,
1767                                              iview->planes[0].isl.format,
1768                                              clear_color);
1769       anv_cmd_buffer_load_clear_color_from_image(cmd_buffer,
1770                                                  att->surface_state.state,
1771                                                  iview->image);
1772       return;
1773    }
1774 
1775    uint32_t binding_table;
1776    VkResult result =
1777       binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1778                                       &binding_table);
1779    if (result != VK_SUCCESS)
1780       return;
1781 
1782    /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1783    if (gfx->view_mask) {
1784       u_foreach_bit(view_idx, gfx->view_mask) {
1785          for (uint32_t r = 0; r < rectCount; ++r) {
1786             const VkOffset2D offset = pRects[r].rect.offset;
1787             const VkExtent2D extent = pRects[r].rect.extent;
1788             blorp_clear_attachments(batch, binding_table,
1789                                     ISL_FORMAT_UNSUPPORTED,
1790                                     gfx->samples,
1791                                     view_idx, 1,
1792                                     offset.x, offset.y,
1793                                     offset.x + extent.width,
1794                                     offset.y + extent.height,
1795                                     true, clear_color, false, 0.0f, 0, 0);
1796          }
1797       }
1798       return;
1799    }
1800 
1801    for (uint32_t r = 0; r < rectCount; ++r) {
1802       const VkOffset2D offset = pRects[r].rect.offset;
1803       const VkExtent2D extent = pRects[r].rect.extent;
1804       assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1805       blorp_clear_attachments(batch, binding_table,
1806                               ISL_FORMAT_UNSUPPORTED,
1807                               gfx->samples,
1808                               pRects[r].baseArrayLayer,
1809                               pRects[r].layerCount,
1810                               offset.x, offset.y,
1811                               offset.x + extent.width, offset.y + extent.height,
1812                               true, clear_color, false, 0.0f, 0, 0);
1813    }
1814 }
1815 
1816 static void
anv_fast_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1817 anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1818                              struct blorp_batch *batch,
1819                              const struct anv_image *image,
1820                              VkImageAspectFlags aspects,
1821                              uint32_t level,
1822                              uint32_t base_layer, uint32_t layer_count,
1823                              VkRect2D area, uint8_t stencil_value)
1824 {
1825    assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1826                                VK_IMAGE_ASPECT_STENCIL_BIT));
1827 
1828    struct blorp_surf depth = {};
1829    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1830       const uint32_t plane =
1831          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1832       assert(base_layer + layer_count <=
1833              anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1834       get_blorp_surf_for_anv_image(cmd_buffer,
1835                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
1836                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1837                                    image->planes[plane].aux_usage, &depth);
1838    }
1839 
1840    struct blorp_surf stencil = {};
1841    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1842       const uint32_t plane =
1843          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1844       get_blorp_surf_for_anv_image(cmd_buffer,
1845                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
1846                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1847                                    image->planes[plane].aux_usage, &stencil);
1848    }
1849 
1850    /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1851     *
1852     *    "The following is required when performing a depth buffer clear with
1853     *    using the WM_STATE or 3DSTATE_WM:
1854     *
1855     *       * If other rendering operations have preceded this clear, a
1856     *         PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1857     *         enabled must be issued before the rectangle primitive used for
1858     *         the depth buffer clear operation.
1859     *       * [...]"
1860     *
1861     * Even though the PRM only says that this is required if using 3DSTATE_WM
1862     * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1863     * hangs when doing a clear with WM_HZ_OP.
1864     */
1865    anv_add_pending_pipe_bits(cmd_buffer,
1866                              ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1867                              ANV_PIPE_DEPTH_STALL_BIT,
1868                              "before clear hiz");
1869 
1870    if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1871        depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1872       /* From Bspec 47010 (Depth Buffer Clear):
1873        *
1874        *    Since the fast clear cycles to CCS are not cached in TileCache,
1875        *    any previous depth buffer writes to overlapping pixels must be
1876        *    flushed out of TileCache before a succeeding Depth Buffer Clear.
1877        *    This restriction only applies to Depth Buffer with write-thru
1878        *    enabled, since fast clears to CCS only occur for write-thru mode.
1879        *
1880        * There may have been a write to this depth buffer. Flush it from the
1881        * tile cache just in case.
1882        *
1883        * Set CS stall bit to guarantee that the fast clear starts the execution
1884        * after the tile cache flush completed.
1885        *
1886        * There is no Bspec requirement to flush the data cache but the
1887        * experiment shows that flusing the data cache helps to resolve the
1888        * corruption.
1889        */
1890       unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
1891                           ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
1892       anv_add_pending_pipe_bits(cmd_buffer,
1893                                 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1894                                 ANV_PIPE_CS_STALL_BIT |
1895                                 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1896                                 wa_flush,
1897                                 "before clear hiz_ccs_wt");
1898    }
1899 
1900    blorp_hiz_clear_depth_stencil(batch, &depth, &stencil,
1901                                  level, base_layer, layer_count,
1902                                  area.offset.x, area.offset.y,
1903                                  area.offset.x + area.extent.width,
1904                                  area.offset.y + area.extent.height,
1905                                  aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1906                                  ANV_HZ_FC_VAL,
1907                                  aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1908                                  stencil_value);
1909 
1910    /* From the SKL PRM, Depth Buffer Clear:
1911     *
1912     *    "Depth Buffer Clear Workaround
1913     *
1914     *    Depth buffer clear pass using any of the methods (WM_STATE,
1915     *    3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1916     *    command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1917     *    starting to render.  DepthStall and DepthFlush are not needed between
1918     *    consecutive depth clear passes nor is it required if the depth-clear
1919     *    pass was done with “full_surf_clear” bit set in the
1920     *    3DSTATE_WM_HZ_OP."
1921     *
1922     * Even though the PRM provides a bunch of conditions under which this is
1923     * supposedly unnecessary, we choose to perform the flush unconditionally
1924     * just to be safe.
1925     *
1926     * From Bspec 46959, a programming note applicable to Gfx12+:
1927     *
1928     *    "Since HZ_OP has to be sent twice (first time set the clear/resolve state
1929     *    and 2nd time to clear the state), and HW internally flushes the depth
1930     *    cache on HZ_OP, there is no need to explicitly send a Depth Cache flush
1931     *    after Clear or Resolve."
1932     */
1933    if (cmd_buffer->device->info->verx10 < 120) {
1934       anv_add_pending_pipe_bits(cmd_buffer,
1935                                 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1936                                 ANV_PIPE_DEPTH_STALL_BIT,
1937                                 "after clear hiz");
1938    }
1939 }
1940 
1941 static bool
can_hiz_clear_att(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const struct anv_attachment * ds_att,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1942 can_hiz_clear_att(struct anv_cmd_buffer *cmd_buffer,
1943                   struct blorp_batch *batch,
1944                   const struct anv_attachment *ds_att,
1945                   const VkClearAttachment *attachment,
1946                   uint32_t rectCount, const VkClearRect *pRects)
1947 {
1948    if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
1949       return false;
1950 
1951    /* From Bspec's section MI_PREDICATE:
1952     *
1953     *    "The MI_PREDICATE command is used to control the Predicate state bit,
1954     *    which in turn can be used to enable/disable the processing of
1955     *    3DPRIMITIVE commands."
1956     *
1957     * Also from BDW/CHV Bspec's 3DSTATE_WM_HZ_OP programming notes:
1958     *
1959     *    "This command does NOT support predication from the use of the
1960     *    MI_PREDICATE register. To predicate depth clears and resolves on you
1961     *    must fall back to using the 3D_PRIMITIVE or GPGPU_WALKER commands."
1962     *
1963     * Since BLORP's predication is currently dependent on MI_PREDICATE, fall
1964     * back to the slow depth clear path when the BLORP_BATCH_PREDICATE_ENABLE
1965     * flag is set.
1966     */
1967    if (batch->flags & BLORP_BATCH_PREDICATE_ENABLE)
1968       return false;
1969 
1970    if (rectCount > 1) {
1971       anv_perf_warn(VK_LOG_OBJS(&cmd_buffer->device->vk.base),
1972                     "Fast clears for vkCmdClearAttachments supported only for rectCount == 1");
1973       return false;
1974    }
1975 
1976    /* When the BLORP_BATCH_NO_EMIT_DEPTH_STENCIL flag is set, BLORP can only
1977     * clear the first slice of the currently configured depth/stencil view.
1978     */
1979    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1980    if (pRects[0].layerCount > 1 || pRects[0].baseArrayLayer > 0)
1981       return false;
1982 
1983    return anv_can_hiz_clear_ds_view(cmd_buffer->device, ds_att->iview,
1984                                     ds_att->layout,
1985                                     attachment->aspectMask,
1986                                     attachment->clearValue.depthStencil.depth,
1987                                     pRects->rect,
1988                                     cmd_buffer->queue_family->queueFlags);
1989 }
1990 
1991 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1992 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1993                                struct blorp_batch *batch,
1994                                const VkClearAttachment *attachment,
1995                                uint32_t rectCount, const VkClearRect *pRects)
1996 {
1997    static const union isl_color_value color_value = { .u32 = { 0, } };
1998    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1999    const struct anv_attachment *d_att = &gfx->depth_att;
2000    const struct anv_attachment *s_att = &gfx->stencil_att;
2001    if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
2002        s_att->vk_format == VK_FORMAT_UNDEFINED)
2003       return;
2004 
2005    const struct anv_attachment *ds_att = d_att->iview ? d_att : s_att;
2006    if (ds_att->iview &&
2007        can_hiz_clear_att(cmd_buffer, batch, ds_att, attachment, rectCount, pRects)) {
2008       anv_fast_clear_depth_stencil(cmd_buffer, batch, ds_att->iview->image,
2009                                    attachment->aspectMask,
2010                                    ds_att->iview->planes[0].isl.base_level,
2011                                    ds_att->iview->planes[0].isl.base_array_layer,
2012                                    pRects[0].layerCount, pRects->rect,
2013                                    attachment->clearValue.depthStencil.stencil);
2014       return;
2015    }
2016 
2017    bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
2018    bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
2019 
2020    enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
2021    if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
2022       depth_format = anv_get_isl_format(cmd_buffer->device->info,
2023                                         d_att->vk_format,
2024                                         VK_IMAGE_ASPECT_DEPTH_BIT,
2025                                         VK_IMAGE_TILING_OPTIMAL);
2026    }
2027 
2028    uint32_t binding_table;
2029    VkResult result =
2030       binding_table_for_surface_state(cmd_buffer,
2031                                       gfx->null_surface_state,
2032                                       &binding_table);
2033    if (result != VK_SUCCESS)
2034       return;
2035 
2036    /* If multiview is enabled we ignore baseArrayLayer and layerCount */
2037    if (gfx->view_mask) {
2038       u_foreach_bit(view_idx, gfx->view_mask) {
2039          for (uint32_t r = 0; r < rectCount; ++r) {
2040             const VkOffset2D offset = pRects[r].rect.offset;
2041             const VkExtent2D extent = pRects[r].rect.extent;
2042             VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
2043             blorp_clear_attachments(batch, binding_table,
2044                                     depth_format,
2045                                     gfx->samples,
2046                                     view_idx, 1,
2047                                     offset.x, offset.y,
2048                                     offset.x + extent.width,
2049                                     offset.y + extent.height,
2050                                     false, color_value,
2051                                     clear_depth, value.depth,
2052                                     clear_stencil ? 0xff : 0, value.stencil);
2053          }
2054       }
2055       return;
2056    }
2057 
2058    for (uint32_t r = 0; r < rectCount; ++r) {
2059       const VkOffset2D offset = pRects[r].rect.offset;
2060       const VkExtent2D extent = pRects[r].rect.extent;
2061       VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
2062       assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
2063       blorp_clear_attachments(batch, binding_table,
2064                               depth_format,
2065                               gfx->samples,
2066                               pRects[r].baseArrayLayer,
2067                               pRects[r].layerCount,
2068                               offset.x, offset.y,
2069                               offset.x + extent.width, offset.y + extent.height,
2070                               false, color_value,
2071                               clear_depth, value.depth,
2072                               clear_stencil ? 0xff : 0, value.stencil);
2073    }
2074 }
2075 
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2076 void anv_CmdClearAttachments(
2077     VkCommandBuffer                             commandBuffer,
2078     uint32_t                                    attachmentCount,
2079     const VkClearAttachment*                    pAttachments,
2080     uint32_t                                    rectCount,
2081     const VkClearRect*                          pRects)
2082 {
2083    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2084 
2085    /* Because this gets called within a render pass, we tell blorp not to
2086     * trash our depth and stencil buffers.
2087     */
2088    struct blorp_batch batch;
2089    enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
2090    if (cmd_buffer->state.conditional_render_enabled) {
2091       anv_cmd_emit_conditional_render_predicate(cmd_buffer);
2092       flags |= BLORP_BATCH_PREDICATE_ENABLE;
2093    }
2094    anv_blorp_batch_init(cmd_buffer, &batch, flags);
2095 
2096    for (uint32_t a = 0; a < attachmentCount; ++a) {
2097       if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
2098          assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
2099          clear_color_attachment(cmd_buffer, &batch,
2100                                 &pAttachments[a],
2101                                 rectCount, pRects);
2102       } else {
2103          clear_depth_stencil_attachment(cmd_buffer, &batch,
2104                                         &pAttachments[a],
2105                                         rectCount, pRects);
2106       }
2107    }
2108 
2109    anv_blorp_batch_finish(&batch);
2110 }
2111 
2112 void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)2113 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
2114                        const struct anv_image *src_image,
2115                        enum isl_aux_usage src_aux_usage,
2116                        uint32_t src_level, uint32_t src_base_layer,
2117                        const struct anv_image *dst_image,
2118                        enum isl_aux_usage dst_aux_usage,
2119                        uint32_t dst_level, uint32_t dst_base_layer,
2120                        VkImageAspectFlagBits aspect,
2121                        uint32_t src_x, uint32_t src_y,
2122                        uint32_t dst_x, uint32_t dst_y,
2123                        uint32_t width, uint32_t height,
2124                        uint32_t layer_count,
2125                        enum blorp_filter filter)
2126 {
2127    struct blorp_batch batch;
2128    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2129    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2130 
2131    assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
2132    assert(src_image->vk.samples > 1);
2133    assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
2134    assert(dst_image->vk.samples == 1);
2135 
2136    struct blorp_surf src_surf, dst_surf;
2137    get_blorp_surf_for_anv_image(cmd_buffer, src_image, aspect,
2138                                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2139                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2140                                 src_aux_usage, &src_surf);
2141    if (src_aux_usage == ISL_AUX_USAGE_MCS) {
2142       src_surf.clear_color_addr = anv_to_blorp_address(
2143          anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
2144                                         VK_IMAGE_ASPECT_COLOR_BIT));
2145    }
2146    get_blorp_surf_for_anv_image(cmd_buffer, dst_image, aspect,
2147                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2148                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2149                                 dst_aux_usage, &dst_surf);
2150    anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
2151                                      aspect, dst_aux_usage,
2152                                      dst_level, dst_base_layer, layer_count);
2153 
2154    if (filter == BLORP_FILTER_NONE) {
2155       /* If no explicit filter is provided, then it's implied by the type of
2156        * the source image.
2157        */
2158       if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
2159           (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
2160           isl_format_has_int_channel(src_surf.surf->format)) {
2161          filter = BLORP_FILTER_SAMPLE_0;
2162       } else {
2163          filter = BLORP_FILTER_AVERAGE;
2164       }
2165    }
2166 
2167    for (uint32_t l = 0; l < layer_count; l++) {
2168       blorp_blit(&batch,
2169                  &src_surf, src_level, src_base_layer + l,
2170                  ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
2171                  &dst_surf, dst_level, dst_base_layer + l,
2172                  ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
2173                  src_x, src_y, src_x + width, src_y + height,
2174                  dst_x, dst_y, dst_x + width, dst_y + height,
2175                  filter, false, false);
2176    }
2177 
2178    anv_blorp_batch_finish(&batch);
2179 }
2180 
2181 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)2182 resolve_image(struct anv_cmd_buffer *cmd_buffer,
2183               struct anv_image *src_image,
2184               VkImageLayout src_image_layout,
2185               struct anv_image *dst_image,
2186               VkImageLayout dst_image_layout,
2187               const VkImageResolve2 *region)
2188 {
2189    assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
2190    assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) ==
2191           vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource));
2192 
2193    const uint32_t layer_count =
2194       vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource);
2195 
2196    anv_foreach_image_aspect_bit(aspect_bit, src_image,
2197                                 region->srcSubresource.aspectMask) {
2198       enum isl_aux_usage src_aux_usage =
2199          anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
2200                                  (1 << aspect_bit),
2201                                  VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2202                                  src_image_layout,
2203                                  cmd_buffer->queue_family->queueFlags);
2204       enum isl_aux_usage dst_aux_usage =
2205          anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
2206                                  (1 << aspect_bit),
2207                                  VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2208                                  dst_image_layout,
2209                                  cmd_buffer->queue_family->queueFlags);
2210 
2211       anv_image_msaa_resolve(cmd_buffer,
2212                              src_image, src_aux_usage,
2213                              region->srcSubresource.mipLevel,
2214                              region->srcSubresource.baseArrayLayer,
2215                              dst_image, dst_aux_usage,
2216                              region->dstSubresource.mipLevel,
2217                              region->dstSubresource.baseArrayLayer,
2218                              (1 << aspect_bit),
2219                              region->srcOffset.x,
2220                              region->srcOffset.y,
2221                              region->dstOffset.x,
2222                              region->dstOffset.y,
2223                              region->extent.width,
2224                              region->extent.height,
2225                              layer_count, BLORP_FILTER_NONE);
2226    }
2227 }
2228 
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2229 void anv_CmdResolveImage2(
2230     VkCommandBuffer                             commandBuffer,
2231     const VkResolveImageInfo2*                  pResolveImageInfo)
2232 {
2233    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2234    ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
2235    ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
2236 
2237    for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
2238       resolve_image(cmd_buffer,
2239                     src_image, pResolveImageInfo->srcImageLayout,
2240                     dst_image, pResolveImageInfo->dstImageLayout,
2241                     &pResolveImageInfo->pRegions[r]);
2242    }
2243 }
2244 
2245 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)2246 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
2247                       const struct anv_image *image,
2248                       VkImageAspectFlagBits aspect,
2249                       enum isl_aux_usage aux_usage,
2250                       enum isl_format format, struct isl_swizzle swizzle,
2251                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
2252                       VkRect2D area, union isl_color_value clear_color)
2253 {
2254    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2255 
2256    /* We don't support planar images with multisampling yet */
2257    assert(image->n_planes == 1);
2258 
2259    struct blorp_batch batch;
2260    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2261 
2262    struct blorp_surf surf;
2263    get_blorp_surf_for_anv_image(cmd_buffer, image, aspect,
2264                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2265                                 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2266                                 aux_usage, &surf);
2267    anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
2268                                      level, base_layer, layer_count);
2269 
2270    blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
2271                level, base_layer, layer_count,
2272                area.offset.x, area.offset.y,
2273                area.offset.x + area.extent.width,
2274                area.offset.y + area.extent.height,
2275                clear_color, 0 /* color_write_disable */);
2276 
2277    anv_blorp_batch_finish(&batch);
2278 }
2279 
2280 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)2281 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
2282                               const struct anv_image *image,
2283                               VkImageAspectFlags aspects,
2284                               enum isl_aux_usage depth_aux_usage,
2285                               uint32_t level,
2286                               uint32_t base_layer, uint32_t layer_count,
2287                               VkRect2D area,
2288                               float depth_value, uint8_t stencil_value)
2289 {
2290    assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
2291                                VK_IMAGE_ASPECT_STENCIL_BIT));
2292 
2293    struct blorp_batch batch;
2294    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2295    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2296 
2297    struct blorp_surf depth = {};
2298    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
2299       get_blorp_surf_for_anv_image(cmd_buffer,
2300                                    image, VK_IMAGE_ASPECT_DEPTH_BIT,
2301                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2302                                    depth_aux_usage, &depth);
2303    }
2304 
2305    struct blorp_surf stencil = {};
2306    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2307       const uint32_t plane =
2308          anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
2309       get_blorp_surf_for_anv_image(cmd_buffer,
2310                                    image, VK_IMAGE_ASPECT_STENCIL_BIT,
2311                                    0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2312                                    image->planes[plane].aux_usage, &stencil);
2313    }
2314 
2315    /* Blorp may choose to clear stencil using RGBA32_UINT for better
2316     * performance.  If it does this, we need to flush it out of the depth
2317     * cache before rendering to it.
2318     */
2319    anv_add_pending_pipe_bits(cmd_buffer,
2320                              ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2321                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2322                              "before clear DS");
2323 
2324    blorp_clear_depth_stencil(&batch, &depth, &stencil,
2325                              level, base_layer, layer_count,
2326                              area.offset.x, area.offset.y,
2327                              area.offset.x + area.extent.width,
2328                              area.offset.y + area.extent.height,
2329                              aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
2330                              depth_value,
2331                              (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
2332                              stencil_value);
2333 
2334    /* Blorp may choose to clear stencil using RGBA32_UINT for better
2335     * performance.  If it does this, we need to flush it out of the render
2336     * cache before someone starts trying to do stencil on it.
2337     */
2338    anv_add_pending_pipe_bits(cmd_buffer,
2339                              ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2340                              ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2341                              "after clear DS");
2342 
2343    anv_blorp_batch_finish(&batch);
2344 }
2345 
2346 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)2347 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
2348                  const struct anv_image *image,
2349                  VkImageAspectFlagBits aspect, uint32_t level,
2350                  uint32_t base_layer, uint32_t layer_count,
2351                  enum isl_aux_op hiz_op)
2352 {
2353    assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
2354    assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
2355    const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
2356    assert(plane == 0);
2357 
2358    struct blorp_batch batch;
2359    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2360    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2361 
2362    struct blorp_surf surf;
2363    get_blorp_surf_for_anv_image(cmd_buffer,
2364                                 image, VK_IMAGE_ASPECT_DEPTH_BIT,
2365                                 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
2366                                 image->planes[plane].aux_usage, &surf);
2367 
2368    blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
2369 
2370    anv_blorp_batch_finish(&batch);
2371 }
2372 
2373 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)2374 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
2375                     const struct anv_image *image,
2376                     VkImageAspectFlags aspects,
2377                     uint32_t level,
2378                     uint32_t base_layer, uint32_t layer_count,
2379                     VkRect2D area, uint8_t stencil_value)
2380 {
2381    struct blorp_batch batch;
2382    anv_blorp_batch_init(cmd_buffer, &batch, 0);
2383    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2384 
2385    anv_fast_clear_depth_stencil(cmd_buffer, &batch, image, aspects, level,
2386                                 base_layer, layer_count, area, stencil_value);
2387 
2388    anv_blorp_batch_finish(&batch);
2389 }
2390 
2391 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)2392 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
2393                  const struct anv_image *image,
2394                  enum isl_format format, struct isl_swizzle swizzle,
2395                  VkImageAspectFlagBits aspect,
2396                  uint32_t base_layer, uint32_t layer_count,
2397                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
2398                  bool predicate)
2399 {
2400    struct blorp_batch batch;
2401    anv_blorp_batch_init(cmd_buffer, &batch,
2402                         BLORP_BATCH_PREDICATE_ENABLE * predicate +
2403                         BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
2404    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2405 
2406    exec_mcs_op(cmd_buffer, &batch, image, format, swizzle, aspect,
2407                base_layer, layer_count, mcs_op, clear_value);
2408 
2409    anv_blorp_batch_finish(&batch);
2410 }
2411 
2412 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)2413 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
2414                  const struct anv_image *image,
2415                  enum isl_format format, struct isl_swizzle swizzle,
2416                  VkImageAspectFlagBits aspect, uint32_t level,
2417                  uint32_t base_layer, uint32_t layer_count,
2418                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
2419                  bool predicate)
2420 {
2421    struct blorp_batch batch;
2422    anv_blorp_batch_init(cmd_buffer, &batch,
2423                         BLORP_BATCH_PREDICATE_ENABLE * predicate +
2424                         BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
2425    assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
2426 
2427    exec_ccs_op(cmd_buffer, &batch, image, format, swizzle, aspect, level,
2428                base_layer, layer_count, ccs_op, clear_value);
2429 
2430    anv_blorp_batch_finish(&batch);
2431 }
2432