1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)27 lookup_blorp_shader(struct blorp_batch *batch,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct blorp_context *blorp = batch->blorp;
32 struct anv_device *device = blorp->driver_ctx;
33
34 struct anv_shader_bin *bin =
35 anv_device_search_for_kernel(device, device->internal_cache,
36 key, key_size, NULL);
37 if (!bin)
38 return false;
39
40 /* The cache already has a reference and it's not going anywhere so there
41 * is no need to hold a second reference.
42 */
43 anv_shader_bin_unref(device, bin);
44
45 *kernel_out = bin->kernel.offset;
46 *(const struct elk_stage_prog_data **)prog_data_out = bin->prog_data;
47
48 return true;
49 }
50
51 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)52 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
53 const void *key, uint32_t key_size,
54 const void *kernel, uint32_t kernel_size,
55 const void *prog_data,
56 uint32_t prog_data_size,
57 uint32_t *kernel_out, void *prog_data_out)
58 {
59 struct blorp_context *blorp = batch->blorp;
60 struct anv_device *device = blorp->driver_ctx;
61
62 struct anv_pipeline_bind_map bind_map = {
63 .surface_count = 0,
64 .sampler_count = 0,
65 };
66
67 struct anv_shader_bin *bin =
68 anv_device_upload_kernel(device, device->internal_cache, stage,
69 key, key_size, kernel, kernel_size,
70 prog_data, prog_data_size,
71 NULL, 0, NULL, &bind_map);
72
73 if (!bin)
74 return false;
75
76 /* The cache already has a reference and it's not going anywhere so there
77 * is no need to hold a second reference.
78 */
79 anv_shader_bin_unref(device, bin);
80
81 *kernel_out = bin->kernel.offset;
82 *(const struct elk_stage_prog_data **)prog_data_out = bin->prog_data;
83
84 return true;
85 }
86
87 void
anv_device_init_blorp(struct anv_device * device)88 anv_device_init_blorp(struct anv_device *device)
89 {
90 const struct blorp_config config = {};
91
92 blorp_init_elk(&device->blorp, device, &device->isl_dev,
93 device->physical->compiler, &config);
94 device->blorp.lookup_shader = lookup_blorp_shader;
95 device->blorp.upload_shader = upload_blorp_shader;
96 switch (device->info->verx10) {
97 case 70:
98 device->blorp.exec = gfx7_blorp_exec;
99 break;
100 case 75:
101 device->blorp.exec = gfx75_blorp_exec;
102 break;
103 case 80:
104 device->blorp.exec = gfx8_blorp_exec;
105 break;
106 default:
107 unreachable("Unknown hardware generation");
108 }
109 }
110
111 void
anv_device_finish_blorp(struct anv_device * device)112 anv_device_finish_blorp(struct anv_device *device)
113 {
114 blorp_finish(&device->blorp);
115 }
116
117 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)118 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
119 struct blorp_batch *batch, enum blorp_batch_flags flags)
120 {
121 if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
122 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
123 flags |= BLORP_BATCH_USE_COMPUTE;
124 }
125
126 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
127 }
128
129 static void
anv_blorp_batch_finish(struct blorp_batch * batch)130 anv_blorp_batch_finish(struct blorp_batch *batch)
131 {
132 blorp_batch_finish(batch);
133 }
134
135 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)136 get_blorp_surf_for_anv_buffer(struct anv_device *device,
137 struct anv_buffer *buffer, uint64_t offset,
138 uint32_t width, uint32_t height,
139 uint32_t row_pitch, enum isl_format format,
140 bool is_dest,
141 struct blorp_surf *blorp_surf,
142 struct isl_surf *isl_surf)
143 {
144 bool ok UNUSED;
145
146 *blorp_surf = (struct blorp_surf) {
147 .surf = isl_surf,
148 .addr = {
149 .buffer = buffer->address.bo,
150 .offset = buffer->address.offset + offset,
151 .mocs = anv_mocs(device, buffer->address.bo,
152 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
153 : ISL_SURF_USAGE_TEXTURE_BIT),
154 },
155 };
156
157 ok = isl_surf_init(&device->isl_dev, isl_surf,
158 .dim = ISL_SURF_DIM_2D,
159 .format = format,
160 .width = width,
161 .height = height,
162 .depth = 1,
163 .levels = 1,
164 .array_len = 1,
165 .samples = 1,
166 .row_pitch_B = row_pitch,
167 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
168 : ISL_SURF_USAGE_TEXTURE_BIT,
169 .tiling_flags = ISL_TILING_LINEAR_BIT);
170 assert(ok);
171 }
172
173 /* Pick something high enough that it won't be used in core and low enough it
174 * will never map to an extension.
175 */
176 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
177
178 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)179 anv_to_blorp_address(struct anv_address addr)
180 {
181 return (struct blorp_address) {
182 .buffer = addr.bo,
183 .offset = addr.offset,
184 };
185 }
186
187 static void
get_blorp_surf_for_anv_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)188 get_blorp_surf_for_anv_image(const struct anv_device *device,
189 const struct anv_image *image,
190 VkImageAspectFlags aspect,
191 VkImageUsageFlags usage,
192 VkImageLayout layout,
193 enum isl_aux_usage aux_usage,
194 struct blorp_surf *blorp_surf)
195 {
196 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
197
198 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
199 assert(usage != 0);
200 aux_usage = anv_layout_to_aux_usage(device->info, image,
201 aspect, usage, layout);
202 }
203
204 isl_surf_usage_flags_t mocs_usage =
205 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
206 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
207
208 const struct anv_surface *surface = &image->planes[plane].primary_surface;
209 const struct anv_address address =
210 anv_image_address(image, &surface->memory_range);
211
212 *blorp_surf = (struct blorp_surf) {
213 .surf = &surface->isl,
214 .addr = {
215 .buffer = address.bo,
216 .offset = address.offset,
217 .mocs = anv_mocs(device, address.bo, mocs_usage),
218 },
219 };
220
221 if (aux_usage != ISL_AUX_USAGE_NONE) {
222 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
223 const struct anv_address aux_address =
224 anv_image_address(image, &aux_surface->memory_range);
225
226 blorp_surf->aux_usage = aux_usage;
227 blorp_surf->aux_surf = &aux_surface->isl;
228
229 if (!anv_address_is_null(aux_address)) {
230 blorp_surf->aux_addr = (struct blorp_address) {
231 .buffer = aux_address.bo,
232 .offset = aux_address.offset,
233 .mocs = anv_mocs(device, aux_address.bo, 0),
234 };
235 }
236
237 /* If we're doing a partial resolve, then we need the indirect clear
238 * color. If we are doing a fast clear and want to store/update the
239 * clear color, we also pass the address to blorp, otherwise it will only
240 * stomp the CCS to a particular value and won't care about format or
241 * clear value
242 */
243 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
244 const struct anv_address clear_color_addr =
245 anv_image_get_clear_color_addr(device, image, aspect);
246 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
247 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
248 const struct anv_address clear_color_addr =
249 anv_image_get_clear_color_addr(device, image, aspect);
250 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
251 blorp_surf->clear_color = (union isl_color_value) {
252 .f32 = { ANV_HZ_FC_VAL },
253 };
254 }
255 }
256 }
257
258 static bool
get_blorp_surf_for_anv_shadow_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,struct blorp_surf * blorp_surf)259 get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
260 const struct anv_image *image,
261 VkImageAspectFlags aspect,
262 struct blorp_surf *blorp_surf)
263 {
264
265 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
266 if (!anv_surface_is_valid(&image->planes[plane].shadow_surface))
267 return false;
268
269 const struct anv_surface *surface = &image->planes[plane].shadow_surface;
270 const struct anv_address address =
271 anv_image_address(image, &surface->memory_range);
272
273 *blorp_surf = (struct blorp_surf) {
274 .surf = &surface->isl,
275 .addr = {
276 .buffer = address.bo,
277 .offset = address.offset,
278 .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT),
279 },
280 };
281
282 return true;
283 }
284
285 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)286 copy_image(struct anv_cmd_buffer *cmd_buffer,
287 struct blorp_batch *batch,
288 struct anv_image *src_image,
289 VkImageLayout src_image_layout,
290 struct anv_image *dst_image,
291 VkImageLayout dst_image_layout,
292 const VkImageCopy2 *region)
293 {
294 VkOffset3D srcOffset =
295 vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
296 VkOffset3D dstOffset =
297 vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
298 VkExtent3D extent =
299 vk_image_sanitize_extent(&src_image->vk, region->extent);
300
301 const uint32_t dst_level = region->dstSubresource.mipLevel;
302 unsigned dst_base_layer, layer_count;
303 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
304 dst_base_layer = region->dstOffset.z;
305 layer_count = region->extent.depth;
306 } else {
307 dst_base_layer = region->dstSubresource.baseArrayLayer;
308 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
309 ®ion->dstSubresource);
310 }
311
312 const uint32_t src_level = region->srcSubresource.mipLevel;
313 unsigned src_base_layer;
314 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
315 src_base_layer = region->srcOffset.z;
316 } else {
317 src_base_layer = region->srcSubresource.baseArrayLayer;
318 assert(layer_count ==
319 vk_image_subresource_layer_count(&src_image->vk,
320 ®ion->srcSubresource));
321 }
322
323 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
324 dst_mask = region->dstSubresource.aspectMask;
325
326 assert(anv_image_aspects_compatible(src_mask, dst_mask));
327
328 if (util_bitcount(src_mask) > 1) {
329 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
330 struct blorp_surf src_surf, dst_surf;
331 get_blorp_surf_for_anv_image(cmd_buffer->device,
332 src_image, 1UL << aspect_bit,
333 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
334 src_image_layout, ISL_AUX_USAGE_NONE,
335 &src_surf);
336 get_blorp_surf_for_anv_image(cmd_buffer->device,
337 dst_image, 1UL << aspect_bit,
338 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
339 dst_image_layout, ISL_AUX_USAGE_NONE,
340 &dst_surf);
341 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
342 1UL << aspect_bit,
343 dst_surf.aux_usage, dst_level,
344 dst_base_layer, layer_count);
345
346 for (unsigned i = 0; i < layer_count; i++) {
347 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
348 &dst_surf, dst_level, dst_base_layer + i,
349 srcOffset.x, srcOffset.y,
350 dstOffset.x, dstOffset.y,
351 extent.width, extent.height);
352 }
353
354 struct blorp_surf dst_shadow_surf;
355 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
356 dst_image,
357 1UL << aspect_bit,
358 &dst_shadow_surf)) {
359 for (unsigned i = 0; i < layer_count; i++) {
360 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
361 &dst_shadow_surf, dst_level, dst_base_layer + i,
362 srcOffset.x, srcOffset.y,
363 dstOffset.x, dstOffset.y,
364 extent.width, extent.height);
365 }
366 }
367 }
368 } else {
369 struct blorp_surf src_surf, dst_surf;
370 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
371 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
372 src_image_layout, ISL_AUX_USAGE_NONE,
373 &src_surf);
374 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
375 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
376 dst_image_layout, ISL_AUX_USAGE_NONE,
377 &dst_surf);
378 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
379 dst_surf.aux_usage, dst_level,
380 dst_base_layer, layer_count);
381
382 for (unsigned i = 0; i < layer_count; i++) {
383 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
384 &dst_surf, dst_level, dst_base_layer + i,
385 srcOffset.x, srcOffset.y,
386 dstOffset.x, dstOffset.y,
387 extent.width, extent.height);
388 }
389
390 struct blorp_surf dst_shadow_surf;
391 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
392 dst_image, dst_mask,
393 &dst_shadow_surf)) {
394 for (unsigned i = 0; i < layer_count; i++) {
395 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
396 &dst_shadow_surf, dst_level, dst_base_layer + i,
397 srcOffset.x, srcOffset.y,
398 dstOffset.x, dstOffset.y,
399 extent.width, extent.height);
400 }
401 }
402 }
403 }
404
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)405 void anv_CmdCopyImage2(
406 VkCommandBuffer commandBuffer,
407 const VkCopyImageInfo2* pCopyImageInfo)
408 {
409 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
410 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
411 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
412
413 struct blorp_batch batch;
414 anv_blorp_batch_init(cmd_buffer, &batch, 0);
415
416 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
417 copy_image(cmd_buffer, &batch,
418 src_image, pCopyImageInfo->srcImageLayout,
419 dst_image, pCopyImageInfo->dstImageLayout,
420 &pCopyImageInfo->pRegions[r]);
421 }
422
423 anv_blorp_batch_finish(&batch);
424 }
425
426 static enum isl_format
isl_format_for_size(unsigned size_B)427 isl_format_for_size(unsigned size_B)
428 {
429 /* Prefer 32-bit per component formats for CmdFillBuffer */
430 switch (size_B) {
431 case 1: return ISL_FORMAT_R8_UINT;
432 case 2: return ISL_FORMAT_R16_UINT;
433 case 3: return ISL_FORMAT_R8G8B8_UINT;
434 case 4: return ISL_FORMAT_R32_UINT;
435 case 6: return ISL_FORMAT_R16G16B16_UINT;
436 case 8: return ISL_FORMAT_R32G32_UINT;
437 case 12: return ISL_FORMAT_R32G32B32_UINT;
438 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
439 default:
440 unreachable("Unknown format size");
441 }
442 }
443
444 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)445 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
446 struct blorp_batch *batch,
447 struct anv_buffer *anv_buffer,
448 struct anv_image *anv_image,
449 VkImageLayout image_layout,
450 const VkBufferImageCopy2* region,
451 bool buffer_to_image)
452 {
453 struct {
454 struct blorp_surf surf;
455 uint32_t level;
456 VkOffset3D offset;
457 } image, buffer, *src, *dst;
458
459 buffer.level = 0;
460 buffer.offset = (VkOffset3D) { 0, 0, 0 };
461
462 if (buffer_to_image) {
463 src = &buffer;
464 dst = ℑ
465 } else {
466 src = ℑ
467 dst = &buffer;
468 }
469
470 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
471
472 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
473 buffer_to_image ?
474 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
475 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
476 image_layout, ISL_AUX_USAGE_NONE,
477 &image.surf);
478 image.offset =
479 vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
480 image.level = region->imageSubresource.mipLevel;
481
482 VkExtent3D extent =
483 vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
484 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
485 image.offset.z = region->imageSubresource.baseArrayLayer;
486 extent.depth =
487 vk_image_subresource_layer_count(&anv_image->vk,
488 ®ion->imageSubresource);
489 }
490
491 const enum isl_format linear_format =
492 anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
493 aspect, VK_IMAGE_TILING_LINEAR);
494 const struct isl_format_layout *linear_fmtl =
495 isl_format_get_layout(linear_format);
496
497 const struct vk_image_buffer_layout buffer_layout =
498 vk_image_buffer_copy_layout(&anv_image->vk, region);
499
500 /* Some formats have additional restrictions which may cause ISL to
501 * fail to create a surface for us. For example, YCbCr formats
502 * have to have 2-pixel aligned strides.
503 *
504 * To avoid these issues, we always bind the buffer as if it's a
505 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
506 * the format doesn't matter as long as it has the right bpb.
507 */
508 const VkExtent2D buffer_extent = {
509 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
510 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
511 };
512 const enum isl_format buffer_format =
513 isl_format_for_size(linear_fmtl->bpb / 8);
514
515 struct isl_surf buffer_isl_surf;
516 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
517 anv_buffer, region->bufferOffset,
518 buffer_extent.width, buffer_extent.height,
519 buffer_layout.row_stride_B, buffer_format,
520 false, &buffer.surf, &buffer_isl_surf);
521
522 bool dst_has_shadow = false;
523 struct blorp_surf dst_shadow_surf;
524 if (&image == dst) {
525 /* In this case, the source is the buffer and, since blorp takes its
526 * copy dimensions in terms of the source format, we have to use the
527 * scaled down version for compressed textures because the source
528 * format is an RGB format.
529 */
530 extent.width = buffer_extent.width;
531 extent.height = buffer_extent.height;
532
533 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
534 aspect, dst->surf.aux_usage,
535 dst->level,
536 dst->offset.z, extent.depth);
537
538 dst_has_shadow =
539 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
540 anv_image, aspect,
541 &dst_shadow_surf);
542 }
543
544 for (unsigned z = 0; z < extent.depth; z++) {
545 blorp_copy(batch, &src->surf, src->level, src->offset.z,
546 &dst->surf, dst->level, dst->offset.z,
547 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
548 extent.width, extent.height);
549
550 if (dst_has_shadow) {
551 blorp_copy(batch, &src->surf, src->level, src->offset.z,
552 &dst_shadow_surf, dst->level, dst->offset.z,
553 src->offset.x, src->offset.y,
554 dst->offset.x, dst->offset.y,
555 extent.width, extent.height);
556 }
557
558 image.offset.z++;
559 buffer.surf.addr.offset += buffer_layout.image_stride_B;
560 }
561 }
562
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)563 void anv_CmdCopyBufferToImage2(
564 VkCommandBuffer commandBuffer,
565 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
566 {
567 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
568 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
569 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
570
571 struct blorp_batch batch;
572 anv_blorp_batch_init(cmd_buffer, &batch, 0);
573
574 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
575 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
576 pCopyBufferToImageInfo->dstImageLayout,
577 &pCopyBufferToImageInfo->pRegions[r], true);
578 }
579
580 anv_blorp_batch_finish(&batch);
581 }
582
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)583 void anv_CmdCopyImageToBuffer2(
584 VkCommandBuffer commandBuffer,
585 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
586 {
587 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
588 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
589 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
590
591 struct blorp_batch batch;
592 anv_blorp_batch_init(cmd_buffer, &batch, 0);
593
594 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
595 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
596 pCopyImageToBufferInfo->srcImageLayout,
597 &pCopyImageToBufferInfo->pRegions[r], false);
598 }
599
600 anv_blorp_batch_finish(&batch);
601
602 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
603 }
604
605 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)606 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
607 {
608 bool flip = false;
609 if (*src0 > *src1) {
610 unsigned tmp = *src0;
611 *src0 = *src1;
612 *src1 = tmp;
613 flip = !flip;
614 }
615
616 if (*dst0 > *dst1) {
617 unsigned tmp = *dst0;
618 *dst0 = *dst1;
619 *dst1 = tmp;
620 flip = !flip;
621 }
622
623 return flip;
624 }
625
626 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)627 blit_image(struct anv_cmd_buffer *cmd_buffer,
628 struct blorp_batch *batch,
629 struct anv_image *src_image,
630 VkImageLayout src_image_layout,
631 struct anv_image *dst_image,
632 VkImageLayout dst_image_layout,
633 const VkImageBlit2 *region,
634 VkFilter filter)
635 {
636 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
637 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
638
639 struct blorp_surf src, dst;
640
641 enum blorp_filter blorp_filter;
642 switch (filter) {
643 case VK_FILTER_NEAREST:
644 blorp_filter = BLORP_FILTER_NEAREST;
645 break;
646 case VK_FILTER_LINEAR:
647 blorp_filter = BLORP_FILTER_BILINEAR;
648 break;
649 default:
650 unreachable("Invalid filter");
651 }
652
653 assert(anv_image_aspects_compatible(src_res->aspectMask,
654 dst_res->aspectMask));
655
656 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
657 get_blorp_surf_for_anv_image(cmd_buffer->device,
658 src_image, 1U << aspect_bit,
659 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
660 src_image_layout, ISL_AUX_USAGE_NONE, &src);
661 get_blorp_surf_for_anv_image(cmd_buffer->device,
662 dst_image, 1U << aspect_bit,
663 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
664 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
665
666 struct anv_format_plane src_format =
667 anv_get_format_aspect(cmd_buffer->device->info, src_image->vk.format,
668 1U << aspect_bit, src_image->vk.tiling);
669 struct anv_format_plane dst_format =
670 anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
671 1U << aspect_bit, dst_image->vk.tiling);
672
673 unsigned dst_start, dst_end;
674 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
675 assert(dst_res->baseArrayLayer == 0);
676 dst_start = region->dstOffsets[0].z;
677 dst_end = region->dstOffsets[1].z;
678 } else {
679 dst_start = dst_res->baseArrayLayer;
680 dst_end = dst_start +
681 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
682 }
683
684 unsigned src_start, src_end;
685 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
686 assert(src_res->baseArrayLayer == 0);
687 src_start = region->srcOffsets[0].z;
688 src_end = region->srcOffsets[1].z;
689 } else {
690 src_start = src_res->baseArrayLayer;
691 src_end = src_start +
692 vk_image_subresource_layer_count(&src_image->vk, src_res);
693 }
694
695 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
696 const unsigned num_layers = dst_end - dst_start;
697 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
698
699 /* There is no interpolation to the pixel center during rendering, so
700 * add the 0.5 offset ourselves here. */
701 float depth_center_offset = 0;
702 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
703 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
704
705 if (flip_z) {
706 src_start = src_end;
707 src_z_step *= -1;
708 depth_center_offset *= -1;
709 }
710
711 unsigned src_x0 = region->srcOffsets[0].x;
712 unsigned src_x1 = region->srcOffsets[1].x;
713 unsigned dst_x0 = region->dstOffsets[0].x;
714 unsigned dst_x1 = region->dstOffsets[1].x;
715 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
716
717 unsigned src_y0 = region->srcOffsets[0].y;
718 unsigned src_y1 = region->srcOffsets[1].y;
719 unsigned dst_y0 = region->dstOffsets[0].y;
720 unsigned dst_y1 = region->dstOffsets[1].y;
721 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
722
723 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
724 1U << aspect_bit,
725 dst.aux_usage,
726 dst_res->mipLevel,
727 dst_start, num_layers);
728
729 for (unsigned i = 0; i < num_layers; i++) {
730 unsigned dst_z = dst_start + i;
731 float src_z = src_start + i * src_z_step + depth_center_offset;
732
733 blorp_blit(batch, &src, src_res->mipLevel, src_z,
734 src_format.isl_format, src_format.swizzle,
735 &dst, dst_res->mipLevel, dst_z,
736 dst_format.isl_format, dst_format.swizzle,
737 src_x0, src_y0, src_x1, src_y1,
738 dst_x0, dst_y0, dst_x1, dst_y1,
739 blorp_filter, flip_x, flip_y);
740 }
741 }
742 }
743
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)744 void anv_CmdBlitImage2(
745 VkCommandBuffer commandBuffer,
746 const VkBlitImageInfo2* pBlitImageInfo)
747 {
748 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
749 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
750 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
751
752 struct blorp_batch batch;
753 anv_blorp_batch_init(cmd_buffer, &batch, 0);
754
755 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
756 blit_image(cmd_buffer, &batch,
757 src_image, pBlitImageInfo->srcImageLayout,
758 dst_image, pBlitImageInfo->dstImageLayout,
759 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
760 }
761
762 anv_blorp_batch_finish(&batch);
763 }
764
765 /**
766 * Returns the greatest common divisor of a and b that is a power of two.
767 */
768 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)769 gcd_pow2_u64(uint64_t a, uint64_t b)
770 {
771 assert(a > 0 || b > 0);
772
773 unsigned a_log2 = ffsll(a) - 1;
774 unsigned b_log2 = ffsll(b) - 1;
775
776 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
777 * case, the MIN2() will take the other one. If both are 0 then we will
778 * hit the assert above.
779 */
780 return 1 << MIN2(a_log2, b_log2);
781 }
782
783 /* This is maximum possible width/height our HW can handle */
784 #define MAX_SURFACE_DIM (1ull << 14)
785
786 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)787 copy_buffer(struct anv_device *device,
788 struct blorp_batch *batch,
789 struct anv_buffer *src_buffer,
790 struct anv_buffer *dst_buffer,
791 const VkBufferCopy2 *region)
792 {
793 struct blorp_address src = {
794 .buffer = src_buffer->address.bo,
795 .offset = src_buffer->address.offset + region->srcOffset,
796 .mocs = anv_mocs(device, src_buffer->address.bo,
797 ISL_SURF_USAGE_TEXTURE_BIT),
798 };
799 struct blorp_address dst = {
800 .buffer = dst_buffer->address.bo,
801 .offset = dst_buffer->address.offset + region->dstOffset,
802 .mocs = anv_mocs(device, dst_buffer->address.bo,
803 ISL_SURF_USAGE_RENDER_TARGET_BIT),
804 };
805
806 blorp_buffer_copy(batch, src, dst, region->size);
807 }
808
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)809 void anv_CmdCopyBuffer2(
810 VkCommandBuffer commandBuffer,
811 const VkCopyBufferInfo2* pCopyBufferInfo)
812 {
813 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
814 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
815 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
816
817 struct blorp_batch batch;
818 anv_blorp_batch_init(cmd_buffer, &batch, 0);
819
820 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
821 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
822 &pCopyBufferInfo->pRegions[r]);
823 }
824
825 anv_blorp_batch_finish(&batch);
826
827 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
828 }
829
830
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)831 void anv_CmdUpdateBuffer(
832 VkCommandBuffer commandBuffer,
833 VkBuffer dstBuffer,
834 VkDeviceSize dstOffset,
835 VkDeviceSize dataSize,
836 const void* pData)
837 {
838 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
839 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
840
841 struct blorp_batch batch;
842 anv_blorp_batch_init(cmd_buffer, &batch, 0);
843
844 /* We can't quite grab a full block because the state stream needs a
845 * little data at the top to build its linked list.
846 */
847 const uint32_t max_update_size =
848 cmd_buffer->device->dynamic_state_pool.block_size - 64;
849
850 assert(max_update_size < MAX_SURFACE_DIM * 4);
851
852 /* We're about to read data that was written from the CPU. Flush the
853 * texture cache so we don't get anything stale.
854 */
855 anv_add_pending_pipe_bits(cmd_buffer,
856 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
857 "before UpdateBuffer");
858
859 while (dataSize) {
860 const uint32_t copy_size = MIN2(dataSize, max_update_size);
861
862 struct anv_state tmp_data =
863 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
864
865 memcpy(tmp_data.map, pData, copy_size);
866
867 struct blorp_address src = {
868 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
869 .offset = tmp_data.offset,
870 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
871 ISL_SURF_USAGE_TEXTURE_BIT, false)
872 };
873 struct blorp_address dst = {
874 .buffer = dst_buffer->address.bo,
875 .offset = dst_buffer->address.offset + dstOffset,
876 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
877 ISL_SURF_USAGE_RENDER_TARGET_BIT),
878 };
879
880 blorp_buffer_copy(&batch, src, dst, copy_size);
881
882 dataSize -= copy_size;
883 dstOffset += copy_size;
884 pData = (void *)pData + copy_size;
885 }
886
887 anv_blorp_batch_finish(&batch);
888
889 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
890 }
891
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)892 void anv_CmdFillBuffer(
893 VkCommandBuffer commandBuffer,
894 VkBuffer dstBuffer,
895 VkDeviceSize dstOffset,
896 VkDeviceSize fillSize,
897 uint32_t data)
898 {
899 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
900 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
901 struct blorp_surf surf;
902 struct isl_surf isl_surf;
903
904 struct blorp_batch batch;
905 anv_blorp_batch_init(cmd_buffer, &batch, 0);
906
907 fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
908
909 /* From the Vulkan spec:
910 *
911 * "size is the number of bytes to fill, and must be either a multiple
912 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
913 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
914 * buffer is not a multiple of 4, then the nearest smaller multiple is
915 * used."
916 */
917 fillSize &= ~3ull;
918
919 /* First, we compute the biggest format that can be used with the
920 * given offsets and size.
921 */
922 int bs = 16;
923 bs = gcd_pow2_u64(bs, dstOffset);
924 bs = gcd_pow2_u64(bs, fillSize);
925 enum isl_format isl_format = isl_format_for_size(bs);
926
927 union isl_color_value color = {
928 .u32 = { data, data, data, data },
929 };
930
931 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
932 while (fillSize >= max_fill_size) {
933 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
934 dst_buffer, dstOffset,
935 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
936 MAX_SURFACE_DIM * bs, isl_format, true,
937 &surf, &isl_surf);
938
939 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
940 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
941 color, 0 /* color_write_disable */);
942 fillSize -= max_fill_size;
943 dstOffset += max_fill_size;
944 }
945
946 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
947 assert(height < MAX_SURFACE_DIM);
948 if (height != 0) {
949 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
950 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
951 dst_buffer, dstOffset,
952 MAX_SURFACE_DIM, height,
953 MAX_SURFACE_DIM * bs, isl_format, true,
954 &surf, &isl_surf);
955
956 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
957 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
958 color, 0 /* color_write_disable */);
959 fillSize -= rect_fill_size;
960 dstOffset += rect_fill_size;
961 }
962
963 if (fillSize != 0) {
964 const uint32_t width = fillSize / bs;
965 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
966 dst_buffer, dstOffset,
967 width, 1,
968 width * bs, isl_format, true,
969 &surf, &isl_surf);
970
971 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
972 0, 0, 1, 0, 0, width, 1,
973 color, 0 /* color_write_disable */);
974 }
975
976 anv_blorp_batch_finish(&batch);
977
978 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
979 }
980
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)981 void anv_CmdClearColorImage(
982 VkCommandBuffer commandBuffer,
983 VkImage _image,
984 VkImageLayout imageLayout,
985 const VkClearColorValue* pColor,
986 uint32_t rangeCount,
987 const VkImageSubresourceRange* pRanges)
988 {
989 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
990 ANV_FROM_HANDLE(anv_image, image, _image);
991
992 struct blorp_batch batch;
993 anv_blorp_batch_init(cmd_buffer, &batch, 0);
994
995 for (unsigned r = 0; r < rangeCount; r++) {
996 if (pRanges[r].aspectMask == 0)
997 continue;
998
999 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1000
1001 struct blorp_surf surf;
1002 get_blorp_surf_for_anv_image(cmd_buffer->device,
1003 image, pRanges[r].aspectMask,
1004 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1005 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1006
1007 struct anv_format_plane src_format =
1008 anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1009 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1010
1011 unsigned base_layer = pRanges[r].baseArrayLayer;
1012 uint32_t layer_count =
1013 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1014 uint32_t level_count =
1015 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1016
1017 for (uint32_t i = 0; i < level_count; i++) {
1018 const unsigned level = pRanges[r].baseMipLevel + i;
1019 const unsigned level_width = u_minify(image->vk.extent.width, level);
1020 const unsigned level_height = u_minify(image->vk.extent.height, level);
1021
1022 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1023 base_layer = 0;
1024 layer_count = u_minify(image->vk.extent.depth, level);
1025 }
1026
1027 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1028 pRanges[r].aspectMask,
1029 surf.aux_usage, level,
1030 base_layer, layer_count);
1031
1032 blorp_clear(&batch, &surf,
1033 src_format.isl_format, src_format.swizzle,
1034 level, base_layer, layer_count,
1035 0, 0, level_width, level_height,
1036 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1037 }
1038 }
1039
1040 anv_blorp_batch_finish(&batch);
1041 }
1042
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1043 void anv_CmdClearDepthStencilImage(
1044 VkCommandBuffer commandBuffer,
1045 VkImage image_h,
1046 VkImageLayout imageLayout,
1047 const VkClearDepthStencilValue* pDepthStencil,
1048 uint32_t rangeCount,
1049 const VkImageSubresourceRange* pRanges)
1050 {
1051 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1052 ANV_FROM_HANDLE(anv_image, image, image_h);
1053
1054 struct blorp_batch batch;
1055 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1056 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1057
1058 struct blorp_surf depth, stencil, stencil_shadow;
1059 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1060 get_blorp_surf_for_anv_image(cmd_buffer->device,
1061 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1062 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1063 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1064 } else {
1065 memset(&depth, 0, sizeof(depth));
1066 }
1067
1068 bool has_stencil_shadow = false;
1069 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1070 get_blorp_surf_for_anv_image(cmd_buffer->device,
1071 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1072 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1073 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1074
1075 has_stencil_shadow =
1076 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1077 VK_IMAGE_ASPECT_STENCIL_BIT,
1078 &stencil_shadow);
1079 } else {
1080 memset(&stencil, 0, sizeof(stencil));
1081 }
1082
1083 for (unsigned r = 0; r < rangeCount; r++) {
1084 if (pRanges[r].aspectMask == 0)
1085 continue;
1086
1087 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1088 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1089
1090 unsigned base_layer = pRanges[r].baseArrayLayer;
1091 uint32_t layer_count =
1092 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1093 uint32_t level_count =
1094 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1095
1096 for (uint32_t i = 0; i < level_count; i++) {
1097 const unsigned level = pRanges[r].baseMipLevel + i;
1098 const unsigned level_width = u_minify(image->vk.extent.width, level);
1099 const unsigned level_height = u_minify(image->vk.extent.height, level);
1100
1101 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1102 layer_count = u_minify(image->vk.extent.depth, level);
1103
1104 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1105 level, base_layer, layer_count,
1106 0, 0, level_width, level_height,
1107 clear_depth, pDepthStencil->depth,
1108 clear_stencil ? 0xff : 0,
1109 pDepthStencil->stencil);
1110
1111 if (clear_stencil && has_stencil_shadow) {
1112 union isl_color_value stencil_color = {
1113 .u32 = { pDepthStencil->stencil, },
1114 };
1115 blorp_clear(&batch, &stencil_shadow,
1116 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1117 level, base_layer, layer_count,
1118 0, 0, level_width, level_height,
1119 stencil_color, 0 /* color_write_disable */);
1120 }
1121 }
1122 }
1123
1124 anv_blorp_batch_finish(&batch);
1125 }
1126
1127 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1128 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1129 uint32_t num_entries,
1130 uint32_t *state_offset,
1131 struct anv_state *bt_state)
1132 {
1133 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1134 state_offset);
1135 if (bt_state->map == NULL) {
1136 /* We ran out of space. Grab a new binding table block. */
1137 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1138 if (result != VK_SUCCESS)
1139 return result;
1140
1141 /* Re-emit state base addresses so we get the new surface state base
1142 * address before we start emitting binding tables etc.
1143 */
1144 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1145
1146 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1147 state_offset);
1148 assert(bt_state->map != NULL);
1149 }
1150
1151 return VK_SUCCESS;
1152 }
1153
1154 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1155 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1156 struct anv_state surface_state,
1157 uint32_t *bt_offset)
1158 {
1159 uint32_t state_offset;
1160 struct anv_state bt_state;
1161
1162 VkResult result =
1163 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1164 &bt_state);
1165 if (result != VK_SUCCESS)
1166 return result;
1167
1168 uint32_t *bt_map = bt_state.map;
1169 bt_map[0] = surface_state.offset + state_offset;
1170
1171 *bt_offset = bt_state.offset;
1172 return VK_SUCCESS;
1173 }
1174
1175 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1176 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1177 struct blorp_batch *batch,
1178 const VkClearAttachment *attachment,
1179 uint32_t rectCount, const VkClearRect *pRects)
1180 {
1181 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1182 const uint32_t att_idx = attachment->colorAttachment;
1183 assert(att_idx < gfx->color_att_count);
1184 const struct anv_attachment *att = &gfx->color_att[att_idx];
1185
1186 if (att->vk_format == VK_FORMAT_UNDEFINED)
1187 return;
1188
1189 uint32_t binding_table;
1190 VkResult result =
1191 binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1192 &binding_table);
1193 if (result != VK_SUCCESS)
1194 return;
1195
1196 union isl_color_value clear_color =
1197 vk_to_isl_color(attachment->clearValue.color);
1198
1199 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1200 if (gfx->view_mask) {
1201 u_foreach_bit(view_idx, gfx->view_mask) {
1202 for (uint32_t r = 0; r < rectCount; ++r) {
1203 const VkOffset2D offset = pRects[r].rect.offset;
1204 const VkExtent2D extent = pRects[r].rect.extent;
1205 blorp_clear_attachments(batch, binding_table,
1206 ISL_FORMAT_UNSUPPORTED,
1207 gfx->samples,
1208 view_idx, 1,
1209 offset.x, offset.y,
1210 offset.x + extent.width,
1211 offset.y + extent.height,
1212 true, clear_color, false, 0.0f, 0, 0);
1213 }
1214 }
1215 return;
1216 }
1217
1218 for (uint32_t r = 0; r < rectCount; ++r) {
1219 const VkOffset2D offset = pRects[r].rect.offset;
1220 const VkExtent2D extent = pRects[r].rect.extent;
1221 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1222 blorp_clear_attachments(batch, binding_table,
1223 ISL_FORMAT_UNSUPPORTED,
1224 gfx->samples,
1225 pRects[r].baseArrayLayer,
1226 pRects[r].layerCount,
1227 offset.x, offset.y,
1228 offset.x + extent.width, offset.y + extent.height,
1229 true, clear_color, false, 0.0f, 0, 0);
1230 }
1231 }
1232
1233 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1234 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1235 struct blorp_batch *batch,
1236 const VkClearAttachment *attachment,
1237 uint32_t rectCount, const VkClearRect *pRects)
1238 {
1239 static const union isl_color_value color_value = { .u32 = { 0, } };
1240 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1241 const struct anv_attachment *d_att = &gfx->depth_att;
1242 const struct anv_attachment *s_att = &gfx->stencil_att;
1243 if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1244 s_att->vk_format == VK_FORMAT_UNDEFINED)
1245 return;
1246
1247 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1248 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1249
1250 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1251 if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1252 depth_format = anv_get_isl_format(cmd_buffer->device->info,
1253 d_att->vk_format,
1254 VK_IMAGE_ASPECT_DEPTH_BIT,
1255 VK_IMAGE_TILING_OPTIMAL);
1256 }
1257
1258 uint32_t binding_table;
1259 VkResult result =
1260 binding_table_for_surface_state(cmd_buffer,
1261 gfx->null_surface_state,
1262 &binding_table);
1263 if (result != VK_SUCCESS)
1264 return;
1265
1266 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1267 if (gfx->view_mask) {
1268 u_foreach_bit(view_idx, gfx->view_mask) {
1269 for (uint32_t r = 0; r < rectCount; ++r) {
1270 const VkOffset2D offset = pRects[r].rect.offset;
1271 const VkExtent2D extent = pRects[r].rect.extent;
1272 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1273 blorp_clear_attachments(batch, binding_table,
1274 depth_format,
1275 gfx->samples,
1276 view_idx, 1,
1277 offset.x, offset.y,
1278 offset.x + extent.width,
1279 offset.y + extent.height,
1280 false, color_value,
1281 clear_depth, value.depth,
1282 clear_stencil ? 0xff : 0, value.stencil);
1283 }
1284 }
1285 return;
1286 }
1287
1288 for (uint32_t r = 0; r < rectCount; ++r) {
1289 const VkOffset2D offset = pRects[r].rect.offset;
1290 const VkExtent2D extent = pRects[r].rect.extent;
1291 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1292 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1293 blorp_clear_attachments(batch, binding_table,
1294 depth_format,
1295 gfx->samples,
1296 pRects[r].baseArrayLayer,
1297 pRects[r].layerCount,
1298 offset.x, offset.y,
1299 offset.x + extent.width, offset.y + extent.height,
1300 false, color_value,
1301 clear_depth, value.depth,
1302 clear_stencil ? 0xff : 0, value.stencil);
1303 }
1304 }
1305
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1306 void anv_CmdClearAttachments(
1307 VkCommandBuffer commandBuffer,
1308 uint32_t attachmentCount,
1309 const VkClearAttachment* pAttachments,
1310 uint32_t rectCount,
1311 const VkClearRect* pRects)
1312 {
1313 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1314
1315 /* Because this gets called within a render pass, we tell blorp not to
1316 * trash our depth and stencil buffers.
1317 */
1318 struct blorp_batch batch;
1319 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1320 if (cmd_buffer->state.conditional_render_enabled) {
1321 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1322 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1323 }
1324 anv_blorp_batch_init(cmd_buffer, &batch, flags);
1325
1326 for (uint32_t a = 0; a < attachmentCount; ++a) {
1327 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1328 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1329 clear_color_attachment(cmd_buffer, &batch,
1330 &pAttachments[a],
1331 rectCount, pRects);
1332 } else {
1333 clear_depth_stencil_attachment(cmd_buffer, &batch,
1334 &pAttachments[a],
1335 rectCount, pRects);
1336 }
1337 }
1338
1339 anv_blorp_batch_finish(&batch);
1340 }
1341
1342 static void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_format src_format_override,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_format dst_format_override,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)1343 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1344 const struct anv_image *src_image,
1345 enum isl_format src_format_override,
1346 enum isl_aux_usage src_aux_usage,
1347 uint32_t src_level, uint32_t src_base_layer,
1348 const struct anv_image *dst_image,
1349 enum isl_format dst_format_override,
1350 enum isl_aux_usage dst_aux_usage,
1351 uint32_t dst_level, uint32_t dst_base_layer,
1352 VkImageAspectFlagBits aspect,
1353 uint32_t src_x, uint32_t src_y,
1354 uint32_t dst_x, uint32_t dst_y,
1355 uint32_t width, uint32_t height,
1356 uint32_t layer_count,
1357 enum blorp_filter filter)
1358 {
1359 struct blorp_batch batch;
1360 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1361 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1362
1363 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
1364 assert(src_image->vk.samples > 1);
1365 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
1366 assert(dst_image->vk.samples == 1);
1367
1368 struct blorp_surf src_surf, dst_surf;
1369 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1370 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1371 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1372 src_aux_usage, &src_surf);
1373 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1374 src_surf.clear_color_addr = anv_to_blorp_address(
1375 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1376 VK_IMAGE_ASPECT_COLOR_BIT));
1377 }
1378 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1379 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1380 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1381 dst_aux_usage, &dst_surf);
1382 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1383 aspect, dst_aux_usage,
1384 dst_level, dst_base_layer, layer_count);
1385
1386 if (filter == BLORP_FILTER_NONE) {
1387 /* If no explicit filter is provided, then it's implied by the type of
1388 * the source image.
1389 */
1390 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1391 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1392 isl_format_has_int_channel(src_surf.surf->format)) {
1393 filter = BLORP_FILTER_SAMPLE_0;
1394 } else {
1395 filter = BLORP_FILTER_AVERAGE;
1396 }
1397 }
1398
1399 for (uint32_t l = 0; l < layer_count; l++) {
1400 blorp_blit(&batch,
1401 &src_surf, src_level, src_base_layer + l,
1402 src_format_override, ISL_SWIZZLE_IDENTITY,
1403 &dst_surf, dst_level, dst_base_layer + l,
1404 dst_format_override, ISL_SWIZZLE_IDENTITY,
1405 src_x, src_y, src_x + width, src_y + height,
1406 dst_x, dst_y, dst_x + width, dst_y + height,
1407 filter, false, false);
1408 }
1409
1410 anv_blorp_batch_finish(&batch);
1411 }
1412
1413 static enum blorp_filter
vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)1414 vk_to_blorp_resolve_mode(VkResolveModeFlagBits vk_mode)
1415 {
1416 switch (vk_mode) {
1417 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
1418 return BLORP_FILTER_SAMPLE_0;
1419 case VK_RESOLVE_MODE_AVERAGE_BIT:
1420 return BLORP_FILTER_AVERAGE;
1421 case VK_RESOLVE_MODE_MIN_BIT:
1422 return BLORP_FILTER_MIN_SAMPLE;
1423 case VK_RESOLVE_MODE_MAX_BIT:
1424 return BLORP_FILTER_MAX_SAMPLE;
1425 default:
1426 return BLORP_FILTER_NONE;
1427 }
1428 }
1429
1430 void
anv_attachment_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_attachment * att,VkImageLayout layout,VkImageAspectFlagBits aspect)1431 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1432 const struct anv_attachment *att,
1433 VkImageLayout layout,
1434 VkImageAspectFlagBits aspect)
1435 {
1436 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1437 const struct anv_image_view *src_iview = att->iview;
1438 const struct anv_image_view *dst_iview = att->resolve_iview;
1439
1440 enum isl_aux_usage src_aux_usage =
1441 anv_layout_to_aux_usage(cmd_buffer->device->info,
1442 src_iview->image, aspect,
1443 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1444 layout);
1445
1446 enum isl_aux_usage dst_aux_usage =
1447 anv_layout_to_aux_usage(cmd_buffer->device->info,
1448 dst_iview->image, aspect,
1449 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1450 att->resolve_layout);
1451
1452 enum blorp_filter filter = vk_to_blorp_resolve_mode(att->resolve_mode);
1453
1454 /* Depth/stencil should not use their view format for resolve because they
1455 * go in pairs.
1456 */
1457 enum isl_format src_format = ISL_FORMAT_UNSUPPORTED;
1458 enum isl_format dst_format = ISL_FORMAT_UNSUPPORTED;
1459 if (!(aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
1460 src_format = src_iview->planes[0].isl.format;
1461 dst_format = dst_iview->planes[0].isl.format;
1462 }
1463
1464 const VkRect2D render_area = gfx->render_area;
1465 if (gfx->view_mask == 0) {
1466 anv_image_msaa_resolve(cmd_buffer,
1467 src_iview->image, src_format, src_aux_usage,
1468 src_iview->planes[0].isl.base_level,
1469 src_iview->planes[0].isl.base_array_layer,
1470 dst_iview->image, dst_format, dst_aux_usage,
1471 dst_iview->planes[0].isl.base_level,
1472 dst_iview->planes[0].isl.base_array_layer,
1473 aspect,
1474 render_area.offset.x, render_area.offset.y,
1475 render_area.offset.x, render_area.offset.y,
1476 render_area.extent.width,
1477 render_area.extent.height,
1478 gfx->layer_count, filter);
1479 } else {
1480 uint32_t res_view_mask = gfx->view_mask;
1481 while (res_view_mask) {
1482 int i = u_bit_scan(&res_view_mask);
1483
1484 anv_image_msaa_resolve(cmd_buffer,
1485 src_iview->image, src_format, src_aux_usage,
1486 src_iview->planes[0].isl.base_level,
1487 src_iview->planes[0].isl.base_array_layer + i,
1488 dst_iview->image, dst_format, dst_aux_usage,
1489 dst_iview->planes[0].isl.base_level,
1490 dst_iview->planes[0].isl.base_array_layer + i,
1491 aspect,
1492 render_area.offset.x, render_area.offset.y,
1493 render_area.offset.x, render_area.offset.y,
1494 render_area.extent.width,
1495 render_area.extent.height,
1496 1, filter);
1497 }
1498 }
1499 }
1500
1501 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)1502 resolve_image(struct anv_cmd_buffer *cmd_buffer,
1503 struct anv_image *src_image,
1504 VkImageLayout src_image_layout,
1505 struct anv_image *dst_image,
1506 VkImageLayout dst_image_layout,
1507 const VkImageResolve2 *region)
1508 {
1509 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
1510 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
1511 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
1512
1513 const uint32_t layer_count =
1514 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
1515
1516 anv_foreach_image_aspect_bit(aspect_bit, src_image,
1517 region->srcSubresource.aspectMask) {
1518 enum isl_aux_usage src_aux_usage =
1519 anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
1520 (1 << aspect_bit),
1521 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1522 src_image_layout);
1523 enum isl_aux_usage dst_aux_usage =
1524 anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
1525 (1 << aspect_bit),
1526 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1527 dst_image_layout);
1528
1529 anv_image_msaa_resolve(cmd_buffer,
1530 src_image, ISL_FORMAT_UNSUPPORTED, src_aux_usage,
1531 region->srcSubresource.mipLevel,
1532 region->srcSubresource.baseArrayLayer,
1533 dst_image, ISL_FORMAT_UNSUPPORTED, dst_aux_usage,
1534 region->dstSubresource.mipLevel,
1535 region->dstSubresource.baseArrayLayer,
1536 (1 << aspect_bit),
1537 region->srcOffset.x,
1538 region->srcOffset.y,
1539 region->dstOffset.x,
1540 region->dstOffset.y,
1541 region->extent.width,
1542 region->extent.height,
1543 layer_count, BLORP_FILTER_NONE);
1544 }
1545 }
1546
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1547 void anv_CmdResolveImage2(
1548 VkCommandBuffer commandBuffer,
1549 const VkResolveImageInfo2* pResolveImageInfo)
1550 {
1551 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1552 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
1553 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
1554
1555 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
1556 resolve_image(cmd_buffer,
1557 src_image, pResolveImageInfo->srcImageLayout,
1558 dst_image, pResolveImageInfo->dstImageLayout,
1559 &pResolveImageInfo->pRegions[r]);
1560 }
1561 }
1562
1563 void
anv_image_copy_to_shadow(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t base_level,uint32_t level_count,uint32_t base_layer,uint32_t layer_count)1564 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1565 const struct anv_image *image,
1566 VkImageAspectFlagBits aspect,
1567 uint32_t base_level, uint32_t level_count,
1568 uint32_t base_layer, uint32_t layer_count)
1569 {
1570 struct blorp_batch batch;
1571 anv_blorp_batch_init(cmd_buffer, &batch,
1572 /* If the sample count is set, we are in a render pass
1573 * and don't want blorp to overwrite depth/stencil
1574 * state
1575 */
1576 cmd_buffer->state.gfx.samples ? BLORP_BATCH_NO_EMIT_DEPTH_STENCIL : 0);
1577
1578 /* We don't know who touched the main surface last so flush a bunch of
1579 * caches to ensure we get good data.
1580 */
1581 anv_add_pending_pipe_bits(cmd_buffer,
1582 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1583 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1584 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1585 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1586 "before copy_to_shadow");
1587
1588 struct blorp_surf surf;
1589 get_blorp_surf_for_anv_image(cmd_buffer->device,
1590 image, aspect,
1591 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1592 VK_IMAGE_LAYOUT_GENERAL,
1593 ISL_AUX_USAGE_NONE, &surf);
1594 assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1595
1596 struct blorp_surf shadow_surf;
1597 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1598 image, aspect, &shadow_surf);
1599
1600 for (uint32_t l = 0; l < level_count; l++) {
1601 const uint32_t level = base_level + l;
1602
1603 const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level);
1604
1605 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1606 layer_count = extent.depth;
1607
1608 for (uint32_t a = 0; a < layer_count; a++) {
1609 const uint32_t layer = base_layer + a;
1610
1611 blorp_copy(&batch, &surf, level, layer,
1612 &shadow_surf, level, layer,
1613 0, 0, 0, 0, extent.width, extent.height);
1614 }
1615 }
1616
1617 /* We just wrote to the buffer with the render cache. Flush it. */
1618 anv_add_pending_pipe_bits(cmd_buffer,
1619 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
1620 "after copy_to_shadow");
1621
1622 anv_blorp_batch_finish(&batch);
1623 }
1624
1625 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)1626 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1627 const struct anv_image *image,
1628 VkImageAspectFlagBits aspect,
1629 enum isl_aux_usage aux_usage,
1630 enum isl_format format, struct isl_swizzle swizzle,
1631 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1632 VkRect2D area, union isl_color_value clear_color)
1633 {
1634 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1635
1636 /* We don't support planar images with multisampling yet */
1637 assert(image->n_planes == 1);
1638
1639 struct blorp_batch batch;
1640 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1641
1642 struct blorp_surf surf;
1643 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1644 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1645 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1646 aux_usage, &surf);
1647 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1648 level, base_layer, layer_count);
1649
1650 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1651 level, base_layer, layer_count,
1652 area.offset.x, area.offset.y,
1653 area.offset.x + area.extent.width,
1654 area.offset.y + area.extent.height,
1655 clear_color, 0 /* color_write_disable */);
1656
1657 anv_blorp_batch_finish(&batch);
1658 }
1659
1660 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)1661 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1662 const struct anv_image *image,
1663 VkImageAspectFlags aspects,
1664 enum isl_aux_usage depth_aux_usage,
1665 uint32_t level,
1666 uint32_t base_layer, uint32_t layer_count,
1667 VkRect2D area,
1668 float depth_value, uint8_t stencil_value)
1669 {
1670 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1671 VK_IMAGE_ASPECT_STENCIL_BIT));
1672
1673 struct blorp_batch batch;
1674 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1675 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1676
1677 struct blorp_surf depth = {};
1678 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1679 get_blorp_surf_for_anv_image(cmd_buffer->device,
1680 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1681 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1682 depth_aux_usage, &depth);
1683 }
1684
1685 struct blorp_surf stencil = {};
1686 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1687 const uint32_t plane =
1688 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1689 get_blorp_surf_for_anv_image(cmd_buffer->device,
1690 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1691 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1692 image->planes[plane].aux_usage, &stencil);
1693 }
1694
1695 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1696 * performance. If it does this, we need to flush it out of the depth
1697 * cache before rendering to it.
1698 */
1699 anv_add_pending_pipe_bits(cmd_buffer,
1700 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1701 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1702 "before clear DS");
1703
1704 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1705 level, base_layer, layer_count,
1706 area.offset.x, area.offset.y,
1707 area.offset.x + area.extent.width,
1708 area.offset.y + area.extent.height,
1709 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1710 depth_value,
1711 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1712 stencil_value);
1713
1714 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1715 * performance. If it does this, we need to flush it out of the render
1716 * cache before someone starts trying to do stencil on it.
1717 */
1718 anv_add_pending_pipe_bits(cmd_buffer,
1719 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1720 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1721 "after clear DS");
1722
1723 struct blorp_surf stencil_shadow;
1724 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1725 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1726 VK_IMAGE_ASPECT_STENCIL_BIT,
1727 &stencil_shadow)) {
1728 union isl_color_value stencil_color = {
1729 .u32 = { stencil_value },
1730 };
1731 blorp_clear(&batch, &stencil_shadow,
1732 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1733 level, base_layer, layer_count,
1734 area.offset.x, area.offset.y,
1735 area.offset.x + area.extent.width,
1736 area.offset.y + area.extent.height,
1737 stencil_color, 0 /* color_write_disable */);
1738 }
1739
1740 anv_blorp_batch_finish(&batch);
1741 }
1742
1743 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)1744 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1745 const struct anv_image *image,
1746 VkImageAspectFlagBits aspect, uint32_t level,
1747 uint32_t base_layer, uint32_t layer_count,
1748 enum isl_aux_op hiz_op)
1749 {
1750 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1751 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1752 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1753 assert(plane == 0);
1754
1755 struct blorp_batch batch;
1756 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1757 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1758
1759 struct blorp_surf surf;
1760 get_blorp_surf_for_anv_image(cmd_buffer->device,
1761 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1762 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1763 image->planes[plane].aux_usage, &surf);
1764
1765 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1766
1767 anv_blorp_batch_finish(&batch);
1768 }
1769
1770 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1771 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1772 const struct anv_image *image,
1773 VkImageAspectFlags aspects,
1774 uint32_t level,
1775 uint32_t base_layer, uint32_t layer_count,
1776 VkRect2D area, uint8_t stencil_value)
1777 {
1778 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1779 VK_IMAGE_ASPECT_STENCIL_BIT));
1780
1781 struct blorp_batch batch;
1782 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1783 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1784
1785 struct blorp_surf depth = {};
1786 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1787 const uint32_t plane =
1788 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1789 assert(base_layer + layer_count <=
1790 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1791 get_blorp_surf_for_anv_image(cmd_buffer->device,
1792 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1793 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1794 image->planes[plane].aux_usage, &depth);
1795 }
1796
1797 struct blorp_surf stencil = {};
1798 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1799 const uint32_t plane =
1800 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1801 get_blorp_surf_for_anv_image(cmd_buffer->device,
1802 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1803 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1804 image->planes[plane].aux_usage, &stencil);
1805 }
1806
1807 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1808 *
1809 * "The following is required when performing a depth buffer clear with
1810 * using the WM_STATE or 3DSTATE_WM:
1811 *
1812 * * If other rendering operations have preceded this clear, a
1813 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1814 * enabled must be issued before the rectangle primitive used for
1815 * the depth buffer clear operation.
1816 * * [...]"
1817 *
1818 * Even though the PRM only says that this is required if using 3DSTATE_WM
1819 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1820 * hangs when doing a clear with WM_HZ_OP.
1821 */
1822 anv_add_pending_pipe_bits(cmd_buffer,
1823 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1824 ANV_PIPE_DEPTH_STALL_BIT,
1825 "before clear hiz");
1826
1827 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1828 level, base_layer, layer_count,
1829 area.offset.x, area.offset.y,
1830 area.offset.x + area.extent.width,
1831 area.offset.y + area.extent.height,
1832 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1833 ANV_HZ_FC_VAL,
1834 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1835 stencil_value);
1836
1837 anv_blorp_batch_finish(&batch);
1838
1839 /* From the SKL PRM, Depth Buffer Clear:
1840 *
1841 * "Depth Buffer Clear Workaround
1842 *
1843 * Depth buffer clear pass using any of the methods (WM_STATE,
1844 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1845 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1846 * starting to render. DepthStall and DepthFlush are not needed between
1847 * consecutive depth clear passes nor is it required if the depth-clear
1848 * pass was done with “full_surf_clear” bit set in the
1849 * 3DSTATE_WM_HZ_OP."
1850 *
1851 * Even though the PRM provides a bunch of conditions under which this is
1852 * supposedly unnecessary, we choose to perform the flush unconditionally
1853 * just to be safe.
1854 */
1855 anv_add_pending_pipe_bits(cmd_buffer,
1856 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1857 ANV_PIPE_DEPTH_STALL_BIT,
1858 "after clear hiz");
1859 }
1860
1861 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)1862 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1863 const struct anv_image *image,
1864 enum isl_format format, struct isl_swizzle swizzle,
1865 VkImageAspectFlagBits aspect,
1866 uint32_t base_layer, uint32_t layer_count,
1867 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1868 bool predicate)
1869 {
1870 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1871 assert(image->vk.samples > 1);
1872 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1873
1874 /* Multisampling with multi-planar formats is not supported */
1875 assert(image->n_planes == 1);
1876
1877 struct blorp_batch batch;
1878 anv_blorp_batch_init(cmd_buffer, &batch,
1879 BLORP_BATCH_PREDICATE_ENABLE * predicate);
1880 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1881
1882 struct blorp_surf surf;
1883 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1884 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1885 ISL_AUX_USAGE_MCS, &surf);
1886
1887 /* Blorp will store the clear color for us if we provide the clear color
1888 * address and we are doing a fast clear. So we save the clear value into
1889 * the blorp surface.
1890 */
1891 if (clear_value)
1892 surf.clear_color = *clear_value;
1893
1894 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1895 *
1896 * "After Render target fast clear, pipe-control with color cache
1897 * write-flush must be issued before sending any DRAW commands on
1898 * that render target."
1899 *
1900 * This comment is a bit cryptic and doesn't really tell you what's going
1901 * or what's really needed. It appears that fast clear ops are not
1902 * properly synchronized with other drawing. This means that we cannot
1903 * have a fast clear operation in the pipe at the same time as other
1904 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1905 * that the contents of the previous draw hit the render target before we
1906 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1907 * that it is completed before any additional drawing occurs.
1908 */
1909 anv_add_pending_pipe_bits(cmd_buffer,
1910 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1911 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1912 ANV_PIPE_PSS_STALL_SYNC_BIT |
1913 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1914 "before fast clear mcs");
1915
1916 if (!blorp_address_is_null(surf.clear_color_addr)) {
1917 anv_add_pending_pipe_bits(cmd_buffer,
1918 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
1919 "before blorp clear color edit");
1920 }
1921
1922 switch (mcs_op) {
1923 case ISL_AUX_OP_FAST_CLEAR:
1924 blorp_fast_clear(&batch, &surf, format, swizzle,
1925 0, base_layer, layer_count,
1926 0, 0, image->vk.extent.width, image->vk.extent.height);
1927 break;
1928 case ISL_AUX_OP_PARTIAL_RESOLVE:
1929 blorp_mcs_partial_resolve(&batch, &surf, format,
1930 base_layer, layer_count);
1931 break;
1932 case ISL_AUX_OP_FULL_RESOLVE:
1933 case ISL_AUX_OP_AMBIGUATE:
1934 default:
1935 unreachable("Unsupported MCS operation");
1936 }
1937
1938 anv_add_pending_pipe_bits(cmd_buffer,
1939 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1940 ANV_PIPE_PSS_STALL_SYNC_BIT |
1941 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1942 "after fast clear mcs");
1943
1944 anv_blorp_batch_finish(&batch);
1945 }
1946
1947 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)1948 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1949 const struct anv_image *image,
1950 enum isl_format format, struct isl_swizzle swizzle,
1951 VkImageAspectFlagBits aspect, uint32_t level,
1952 uint32_t base_layer, uint32_t layer_count,
1953 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1954 bool predicate)
1955 {
1956 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1957 assert(image->vk.samples == 1);
1958 assert(level < anv_image_aux_levels(image, aspect));
1959 /* Multi-LOD YcBcR is not allowed */
1960 assert(image->n_planes == 1 || level == 0);
1961 assert(base_layer + layer_count <=
1962 anv_image_aux_layers(image, aspect, level));
1963
1964 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1965
1966 struct blorp_batch batch;
1967 anv_blorp_batch_init(cmd_buffer, &batch,
1968 BLORP_BATCH_PREDICATE_ENABLE * predicate);
1969 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1970
1971 struct blorp_surf surf;
1972 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1973 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1974 image->planes[plane].aux_usage,
1975 &surf);
1976
1977 uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1978 uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1979
1980 /* Blorp will store the clear color for us if we provide the clear color
1981 * address and we are doing a fast clear. So we save the clear value into
1982 * the blorp surface.
1983 */
1984 if (clear_value)
1985 surf.clear_color = *clear_value;
1986
1987 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1988 *
1989 * "After Render target fast clear, pipe-control with color cache
1990 * write-flush must be issued before sending any DRAW commands on
1991 * that render target."
1992 *
1993 * This comment is a bit cryptic and doesn't really tell you what's going
1994 * or what's really needed. It appears that fast clear ops are not
1995 * properly synchronized with other drawing. This means that we cannot
1996 * have a fast clear operation in the pipe at the same time as other
1997 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1998 * that the contents of the previous draw hit the render target before we
1999 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
2000 * that it is completed before any additional drawing occurs.
2001 */
2002 anv_add_pending_pipe_bits(cmd_buffer,
2003 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2004 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2005 ANV_PIPE_PSS_STALL_SYNC_BIT |
2006 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2007 "before fast clear ccs");
2008
2009 if (!blorp_address_is_null(surf.clear_color_addr)) {
2010 anv_add_pending_pipe_bits(cmd_buffer,
2011 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
2012 "before blorp clear color edit");
2013 }
2014
2015 switch (ccs_op) {
2016 case ISL_AUX_OP_FAST_CLEAR:
2017 blorp_fast_clear(&batch, &surf, format, swizzle,
2018 level, base_layer, layer_count,
2019 0, 0, level_width, level_height);
2020 break;
2021 case ISL_AUX_OP_FULL_RESOLVE:
2022 case ISL_AUX_OP_PARTIAL_RESOLVE:
2023 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
2024 format, ccs_op);
2025 break;
2026 case ISL_AUX_OP_AMBIGUATE:
2027 for (uint32_t a = 0; a < layer_count; a++) {
2028 const uint32_t layer = base_layer + a;
2029 blorp_ccs_ambiguate(&batch, &surf, level, layer);
2030 }
2031 break;
2032 default:
2033 unreachable("Unsupported CCS operation");
2034 }
2035
2036 anv_add_pending_pipe_bits(cmd_buffer,
2037 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2038 ANV_PIPE_PSS_STALL_SYNC_BIT |
2039 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2040 "after fast clear ccs");
2041
2042 anv_blorp_batch_finish(&batch);
2043 }
2044