1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vulkan/runtime/vk_common_entrypoints.h"
30
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42
43 static bool
44 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
45 VkImageAspectFlags aspect,
46 struct v3dv_image *image,
47 VkFormat dst_format,
48 VkFormat src_format,
49 struct v3dv_buffer *buffer,
50 uint32_t buffer_bpp,
51 VkColorComponentFlags cmask,
52 VkComponentMapping *cswizzle,
53 uint32_t region_count,
54 const VkBufferImageCopy2 *regions);
55
56 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)57 create_blit_pipeline_layout(struct v3dv_device *device,
58 VkDescriptorSetLayout *descriptor_set_layout,
59 VkPipelineLayout *pipeline_layout)
60 {
61 VkResult result;
62
63 if (*descriptor_set_layout == 0) {
64 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
65 .binding = 0,
66 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
67 .descriptorCount = 1,
68 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
69 };
70 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
71 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
72 .bindingCount = 1,
73 .pBindings = &descriptor_set_layout_binding,
74 };
75 result =
76 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
77 &descriptor_set_layout_info,
78 &device->vk.alloc,
79 descriptor_set_layout);
80 if (result != VK_SUCCESS)
81 return false;
82 }
83
84 assert(*pipeline_layout == 0);
85 VkPipelineLayoutCreateInfo pipeline_layout_info = {
86 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
87 .setLayoutCount = 1,
88 .pSetLayouts = descriptor_set_layout,
89 .pushConstantRangeCount = 1,
90 .pPushConstantRanges =
91 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
92 };
93
94 result =
95 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
96 &pipeline_layout_info,
97 &device->vk.alloc,
98 pipeline_layout);
99 return result == VK_SUCCESS;
100 }
101
102 void
v3dv_meta_blit_init(struct v3dv_device * device)103 v3dv_meta_blit_init(struct v3dv_device *device)
104 {
105 for (uint32_t i = 0; i < 3; i++) {
106 device->meta.blit.cache[i] =
107 _mesa_hash_table_create(NULL,
108 meta_blit_key_hash,
109 meta_blit_key_compare);
110 }
111
112 create_blit_pipeline_layout(device,
113 &device->meta.blit.ds_layout,
114 &device->meta.blit.p_layout);
115 }
116
117 void
v3dv_meta_blit_finish(struct v3dv_device * device)118 v3dv_meta_blit_finish(struct v3dv_device *device)
119 {
120 VkDevice _device = v3dv_device_to_handle(device);
121
122 for (uint32_t i = 0; i < 3; i++) {
123 hash_table_foreach(device->meta.blit.cache[i], entry) {
124 struct v3dv_meta_blit_pipeline *item = entry->data;
125 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
126 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
127 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
128 vk_free(&device->vk.alloc, item);
129 }
130 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
131 }
132
133 if (device->meta.blit.p_layout) {
134 v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
135 &device->vk.alloc);
136 }
137
138 if (device->meta.blit.ds_layout) {
139 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
140 &device->vk.alloc);
141 }
142 }
143
144 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)145 meta_texel_buffer_copy_key_hash(const void *key)
146 {
147 return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
148 }
149
150 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)151 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
152 {
153 return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
154 }
155
156 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)157 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
158 VkDescriptorSetLayout *ds_layout,
159 VkPipelineLayout *p_layout)
160 {
161 VkResult result;
162
163 if (*ds_layout == 0) {
164 VkDescriptorSetLayoutBinding ds_layout_binding = {
165 .binding = 0,
166 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
167 .descriptorCount = 1,
168 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
169 };
170 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
171 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
172 .bindingCount = 1,
173 .pBindings = &ds_layout_binding,
174 };
175 result =
176 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
177 &ds_layout_info,
178 &device->vk.alloc,
179 ds_layout);
180 if (result != VK_SUCCESS)
181 return false;
182 }
183
184 assert(*p_layout == 0);
185 /* FIXME: this is abusing a bit the API, since not all of our copy
186 * pipelines have a geometry shader. We could create 2 different pipeline
187 * layouts, but this works for us for now.
188 */
189 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
190 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
191 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
192 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
193 VkPushConstantRange ranges[2] = {
194 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
195 { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
196 };
197
198 VkPipelineLayoutCreateInfo p_layout_info = {
199 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
200 .setLayoutCount = 1,
201 .pSetLayouts = ds_layout,
202 .pushConstantRangeCount = 2,
203 .pPushConstantRanges = ranges,
204 };
205
206 result =
207 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
208 &p_layout_info,
209 &device->vk.alloc,
210 p_layout);
211 return result == VK_SUCCESS;
212 }
213
214 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)215 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
216 {
217 for (uint32_t i = 0; i < 3; i++) {
218 device->meta.texel_buffer_copy.cache[i] =
219 _mesa_hash_table_create(NULL,
220 meta_texel_buffer_copy_key_hash,
221 meta_texel_buffer_copy_key_compare);
222 }
223
224 create_texel_buffer_copy_pipeline_layout(
225 device,
226 &device->meta.texel_buffer_copy.ds_layout,
227 &device->meta.texel_buffer_copy.p_layout);
228 }
229
230 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)231 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
232 {
233 VkDevice _device = v3dv_device_to_handle(device);
234
235 for (uint32_t i = 0; i < 3; i++) {
236 hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
237 struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
238 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
239 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
240 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
241 vk_free(&device->vk.alloc, item);
242 }
243 _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
244 }
245
246 if (device->meta.texel_buffer_copy.p_layout) {
247 v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
248 &device->vk.alloc);
249 }
250
251 if (device->meta.texel_buffer_copy.ds_layout) {
252 v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
253 &device->vk.alloc);
254 }
255 }
256
257 static VkFormat
get_compatible_tlb_format(VkFormat format)258 get_compatible_tlb_format(VkFormat format)
259 {
260 switch (format) {
261 case VK_FORMAT_R8G8B8A8_SNORM:
262 return VK_FORMAT_R8G8B8A8_UINT;
263
264 case VK_FORMAT_R8G8_SNORM:
265 return VK_FORMAT_R8G8_UINT;
266
267 case VK_FORMAT_R8_SNORM:
268 return VK_FORMAT_R8_UINT;
269
270 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
271 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
272
273 case VK_FORMAT_R16_UNORM:
274 case VK_FORMAT_R16_SNORM:
275 return VK_FORMAT_R16_UINT;
276
277 case VK_FORMAT_R16G16_UNORM:
278 case VK_FORMAT_R16G16_SNORM:
279 return VK_FORMAT_R16G16_UINT;
280
281 case VK_FORMAT_R16G16B16A16_UNORM:
282 case VK_FORMAT_R16G16B16A16_SNORM:
283 return VK_FORMAT_R16G16B16A16_UINT;
284
285 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
286 return VK_FORMAT_R32_SFLOAT;
287
288 /* We can't render to compressed formats using the TLB so instead we use
289 * a compatible format with the same bpp as the compressed format. Because
290 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
291 * case of ETC), when we implement copies with the compatible format we
292 * will have to divide offsets and dimensions on the compressed image by
293 * the compressed block size.
294 */
295 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
296 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
297 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
298 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
299 case VK_FORMAT_BC2_UNORM_BLOCK:
300 case VK_FORMAT_BC2_SRGB_BLOCK:
301 case VK_FORMAT_BC3_SRGB_BLOCK:
302 case VK_FORMAT_BC3_UNORM_BLOCK:
303 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
304 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
305 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
306 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
307 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
308 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
309 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
310 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
311 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
312 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
313 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
314 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
315 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
316 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
317 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
318 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
319 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
320 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
321 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
322 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
323 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
324 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
325 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
326 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
327 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
328 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
329 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
330 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
331 return VK_FORMAT_R32G32B32A32_UINT;
332
333 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
334 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
335 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
336 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
337 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
338 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
339 case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
340 case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
341 case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
342 case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
343 return VK_FORMAT_R16G16B16A16_UINT;
344
345 default:
346 return VK_FORMAT_UNDEFINED;
347 }
348 }
349
350 /**
351 * Checks if we can implement an image copy or clear operation using the TLB
352 * hardware.
353 *
354 * The extent and miplevel are only used to validate tile stores (to match the
355 * region to store against the miplevel dimensions to avoid avoid cases where
356 * the region to store is not a aligned to tile boundaries). If extent is
357 * NULL no checks are done (which is fine if the image will only be used for a
358 * TLB load or when we know in advance that the store will be for the entire
359 * size of the image miplevel).
360 *
361 * For tlb copies we are doing a per-plane copy, so for multi-plane formats,
362 * the compatible format will be single-plane.
363 */
364 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,uint8_t plane,uint8_t miplevel,const VkOffset3D * offset,const VkExtent3D * extent,VkFormat * compat_format)365 v3dv_meta_can_use_tlb(struct v3dv_image *image,
366 uint8_t plane,
367 uint8_t miplevel,
368 const VkOffset3D *offset,
369 const VkExtent3D *extent,
370 VkFormat *compat_format)
371 {
372 if (offset->x != 0 || offset->y != 0)
373 return false;
374
375 /* FIXME: this is suboptimal, what we really want to check is that the
376 * extent of the region to copy is the full slice or a multiple of the
377 * tile size.
378 */
379 if (extent) {
380 struct v3d_resource_slice *slice = &image->planes[plane].slices[miplevel];
381 if (slice->width != extent->width || slice->height != extent->height)
382 return false;
383 }
384
385 if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
386 if (compat_format)
387 *compat_format = image->planes[plane].vk_format;
388 return true;
389 }
390
391 /* If the image format is not TLB-supported, then check if we can use
392 * a compatible format instead.
393 */
394 if (compat_format) {
395 *compat_format = get_compatible_tlb_format(image->planes[plane].vk_format);
396 if (*compat_format != VK_FORMAT_UNDEFINED) {
397 assert(vk_format_get_plane_count(*compat_format) == 1);
398 return true;
399 }
400 }
401
402 return false;
403 }
404
405 /* Implements a copy using the TLB.
406 *
407 * This only works if we are copying from offset (0,0), since a TLB store for
408 * tile (x,y) will be written at the same tile offset into the destination.
409 * When this requirement is not met, we need to use a blit instead.
410 *
411 * Returns true if the implementation supports the requested operation (even if
412 * it failed to process it, for example, due to an out-of-memory error).
413 *
414 */
415 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)416 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
417 struct v3dv_buffer *buffer,
418 struct v3dv_image *image,
419 const VkBufferImageCopy2 *region)
420 {
421 VkFormat fb_format;
422 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
423 assert(plane < image->plane_count);
424
425 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
426 ®ion->imageOffset, ®ion->imageExtent,
427 &fb_format)) {
428 return false;
429 }
430
431 uint32_t internal_type, internal_bpp;
432 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
433 (fb_format, region->imageSubresource.aspectMask,
434 &internal_type, &internal_bpp);
435
436 uint32_t num_layers;
437 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
438 num_layers = region->imageSubresource.layerCount;
439 else
440 num_layers = region->imageExtent.depth;
441 assert(num_layers > 0);
442
443 struct v3dv_job *job =
444 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
445 if (!job)
446 return true;
447
448 /* Handle copy from compressed format using a compatible format */
449 const uint32_t block_w =
450 vk_format_get_blockwidth(image->planes[plane].vk_format);
451 const uint32_t block_h =
452 vk_format_get_blockheight(image->planes[plane].vk_format);
453 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
454 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
455
456 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
457 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
458 false);
459
460 struct v3dv_meta_framebuffer framebuffer;
461 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
462 internal_type, &job->frame_tiling);
463
464 v3dv_X(job->device, job_emit_binning_flush)(job);
465 v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
466 (job, buffer, image, &framebuffer, region);
467
468 v3dv_cmd_buffer_finish_job(cmd_buffer);
469
470 return true;
471 }
472
473 static bool
474 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
475 struct v3dv_image *dst,
476 VkFormat dst_format,
477 struct v3dv_image *src,
478 VkFormat src_format,
479 VkColorComponentFlags cmask,
480 VkComponentMapping *cswizzle,
481 const VkImageBlit2 *region,
482 VkFilter filter,
483 bool dst_is_padded_image);
484
485
486 /**
487 * A structure that contains all the information we may need in various
488 * processes involving image to buffer copies implemented with blit paths.
489 */
490 struct image_to_buffer_info {
491 /* Source image info */
492 VkFormat src_format;
493 uint8_t plane;
494 VkColorComponentFlags cmask;
495 VkComponentMapping cswizzle;
496 VkImageAspectFlags src_copy_aspect;
497 uint32_t block_width;
498 uint32_t block_height;
499
500 /* Destination buffer info */
501 VkFormat dst_format;
502 uint32_t buf_width;
503 uint32_t buf_height;
504 uint32_t buf_bpp;
505 VkImageAspectFlags dst_copy_aspect;
506 };
507
508 static VkImageBlit2
blit_region_for_image_to_buffer(const VkOffset3D * offset,const VkExtent3D * extent,uint32_t mip_level,uint32_t base_layer,uint32_t layer_offset,struct image_to_buffer_info * info)509 blit_region_for_image_to_buffer(const VkOffset3D *offset,
510 const VkExtent3D *extent,
511 uint32_t mip_level,
512 uint32_t base_layer,
513 uint32_t layer_offset,
514 struct image_to_buffer_info *info)
515 {
516 VkImageBlit2 output = {
517 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
518 .srcSubresource = {
519 .aspectMask = info->src_copy_aspect,
520 .mipLevel = mip_level,
521 .baseArrayLayer = base_layer + layer_offset,
522 .layerCount = 1,
523 },
524 .srcOffsets = {
525 {
526 DIV_ROUND_UP(offset->x, info->block_width),
527 DIV_ROUND_UP(offset->y, info->block_height),
528 offset->z + layer_offset,
529 },
530 {
531 DIV_ROUND_UP(offset->x + extent->width, info->block_width),
532 DIV_ROUND_UP(offset->y + extent->height, info->block_height),
533 offset->z + layer_offset + 1,
534 },
535 },
536 .dstSubresource = {
537 .aspectMask = info->dst_copy_aspect,
538 .mipLevel = 0,
539 .baseArrayLayer = 0,
540 .layerCount = 1,
541 },
542 .dstOffsets = {
543 { 0, 0, 0 },
544 {
545 DIV_ROUND_UP(extent->width, info->block_width),
546 DIV_ROUND_UP(extent->height, info->block_height),
547 1
548 },
549 },
550 };
551
552 return output;
553 }
554
555 /**
556 * Produces an image_to_buffer_info struct from a VkBufferImageCopy2 that we can
557 * use to implement buffer to image copies with blit paths.
558 *
559 * Returns false if the copy operation can't be implemented with a blit.
560 */
561 static bool
gather_image_to_buffer_info(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region,struct image_to_buffer_info * out_info)562 gather_image_to_buffer_info(struct v3dv_cmd_buffer *cmd_buffer,
563 struct v3dv_image *image,
564 const VkBufferImageCopy2 *region,
565 struct image_to_buffer_info *out_info)
566 {
567 bool supported = false;
568
569 VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask;
570 /* For multi-planar images we copy one plane at a time using an image alias
571 * with a color aspect for each plane.
572 */
573 if (image->plane_count > 1)
574 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
575
576 VkImageAspectFlags src_copy_aspect = region->imageSubresource.aspectMask;
577 uint8_t plane = v3dv_plane_from_aspect(src_copy_aspect);
578 assert(plane < image->plane_count);
579
580 /* Generally, the bpp of the data in the buffer matches that of the
581 * source image. The exception is the case where we are copying
582 * stencil (8bpp) to a combined d24s8 image (32bpp).
583 */
584 uint32_t buffer_bpp = image->planes[plane].cpp;
585
586 /* Because we are going to implement the copy as a blit, we need to create
587 * a linear image from the destination buffer and we also want our blit
588 * source and destination formats to be the same (to avoid any format
589 * conversions), so we choose a canonical format that matches the
590 * source image bpp.
591 *
592 * The exception to the above is copying from combined depth/stencil images
593 * because we are copying only one aspect of the image, so we need to setup
594 * our formats, color write mask and source swizzle mask to match that.
595 */
596 VkFormat dst_format;
597 VkFormat src_format;
598 VkColorComponentFlags cmask = 0; /* All components */
599 VkComponentMapping cswizzle = {
600 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
601 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
602 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
603 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
604 };
605 switch (buffer_bpp) {
606 case 16:
607 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
608 dst_format = VK_FORMAT_R32G32B32A32_UINT;
609 src_format = dst_format;
610 break;
611 case 8:
612 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
613 dst_format = VK_FORMAT_R16G16B16A16_UINT;
614 src_format = dst_format;
615 break;
616 case 4:
617 switch (dst_copy_aspect) {
618 case VK_IMAGE_ASPECT_COLOR_BIT:
619 src_format = VK_FORMAT_R8G8B8A8_UINT;
620 dst_format = VK_FORMAT_R8G8B8A8_UINT;
621 break;
622 case VK_IMAGE_ASPECT_DEPTH_BIT:
623 assert(image->plane_count == 1);
624 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
625 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
626 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
627 if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
628 src_format = VK_FORMAT_R32_UINT;
629 dst_format = VK_FORMAT_R32_UINT;
630 } else {
631 /* We want to write depth in the buffer in the first 24-bits,
632 * however, the hardware has depth in bits 8-31, so swizzle the
633 * the source components to match what we want. Also, we don't
634 * want to write bits 24-31 in the destination.
635 */
636 src_format = VK_FORMAT_R8G8B8A8_UINT;
637 dst_format = VK_FORMAT_R8G8B8A8_UINT;
638 cmask = VK_COLOR_COMPONENT_R_BIT |
639 VK_COLOR_COMPONENT_G_BIT |
640 VK_COLOR_COMPONENT_B_BIT;
641 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
642 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
643 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
644 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
645 }
646 break;
647 case VK_IMAGE_ASPECT_STENCIL_BIT:
648 assert(image->plane_count == 1);
649 assert(dst_copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
650 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
651 /* Copying from S8D24. We want to write 8-bit stencil values only,
652 * so adjust the buffer bpp for that. Since the hardware stores stencil
653 * in the LSB, we can just do a RGBA8UI to R8UI blit.
654 */
655 src_format = VK_FORMAT_R8G8B8A8_UINT;
656 dst_format = VK_FORMAT_R8_UINT;
657 buffer_bpp = 1;
658 break;
659 default:
660 unreachable("unsupported aspect");
661 return supported;
662 };
663 break;
664 case 2:
665 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
666 dst_copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
667 dst_format = VK_FORMAT_R16_UINT;
668 src_format = dst_format;
669 break;
670 case 1:
671 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
672 dst_format = VK_FORMAT_R8_UINT;
673 src_format = dst_format;
674 break;
675 default:
676 unreachable("unsupported bit-size");
677 return supported;
678 };
679
680 /* The hardware doesn't support linear depth/stencil stores, so we
681 * implement copies of depth/stencil aspect as color copies using a
682 * compatible color format.
683 */
684 assert(vk_format_is_color(src_format));
685 assert(vk_format_is_color(dst_format));
686 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
687
688 /* We should be able to handle the blit if we got this far */
689 supported = true;
690
691 /* Obtain the 2D buffer region spec */
692 uint32_t buf_width, buf_height;
693 if (region->bufferRowLength == 0)
694 buf_width = region->imageExtent.width;
695 else
696 buf_width = region->bufferRowLength;
697
698 if (region->bufferImageHeight == 0)
699 buf_height = region->imageExtent.height;
700 else
701 buf_height = region->bufferImageHeight;
702
703 /* If the image is compressed, the bpp refers to blocks, not pixels */
704 uint32_t block_width =
705 vk_format_get_blockwidth(image->planes[plane].vk_format);
706 uint32_t block_height =
707 vk_format_get_blockheight(image->planes[plane].vk_format);
708 buf_width = DIV_ROUND_UP(buf_width, block_width);
709 buf_height = DIV_ROUND_UP(buf_height, block_height);
710
711 out_info->src_format = src_format;
712 out_info->dst_format = dst_format;
713 out_info->src_copy_aspect = src_copy_aspect;
714 out_info->dst_copy_aspect = dst_copy_aspect;
715 out_info->buf_width = buf_width;
716 out_info->buf_height = buf_height;
717 out_info->buf_bpp = buffer_bpp;
718 out_info->block_width = block_width;
719 out_info->block_height = block_height;
720 out_info->cmask = cmask;
721 out_info->cswizzle = cswizzle;
722 out_info->plane = plane;
723
724 return supported;
725 }
726
727 /* Creates a linear image to alias buffer memory. It also includes that image
728 * as a private object in the cmd_buffer.
729 *
730 * This is used for cases where we want to implement an image to buffer copy,
731 * but we need to rely on a mechanism that uses an image as destination, like
732 * blitting.
733 */
734 static VkResult
create_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer,VkImage * out_image)735 create_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
736 struct v3dv_buffer *buffer,
737 const VkBufferImageCopy2 *region,
738 struct image_to_buffer_info *info,
739 uint32_t layer,
740 VkImage *out_image)
741 {
742 VkImageCreateInfo image_info = {
743 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
744 .imageType = VK_IMAGE_TYPE_2D,
745 .format = info->dst_format,
746 .extent = { info->buf_width, info->buf_height, 1 },
747 .mipLevels = 1,
748 .arrayLayers = 1,
749 .samples = VK_SAMPLE_COUNT_1_BIT,
750 .tiling = VK_IMAGE_TILING_LINEAR,
751 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
752 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
753 .queueFamilyIndexCount = 0,
754 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
755 };
756
757 VkResult result;
758 struct v3dv_device *device = cmd_buffer->device;
759 VkDevice _device = v3dv_device_to_handle(device);
760
761 VkImage buffer_image;
762 result =
763 v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
764 if (result != VK_SUCCESS)
765 return result;
766
767 *out_image = buffer_image;
768
769 v3dv_cmd_buffer_add_private_obj(
770 cmd_buffer, (uintptr_t)buffer_image,
771 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
772
773 /* Bind the buffer memory to the image
774 */
775 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
776 layer * info->buf_width * info->buf_height * info->buf_bpp;
777
778 result =
779 vk_common_BindImageMemory(_device, buffer_image,
780 v3dv_device_memory_to_handle(buffer->mem),
781 buffer_offset);
782 return result;
783 }
784
785 /**
786 * Creates an image with a single mip level that aliases the memory of a
787 * mip level in another image, re-interpreting the memory with an uncompressed
788 * format. The image is added to the command buffer as a private object for
789 * disposal.
790 */
791 static bool
create_image_mip_level_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,VkFormat format,uint32_t plane,uint32_t mip_level,uint32_t layer,VkImage * alias)792 create_image_mip_level_alias(struct v3dv_cmd_buffer *cmd_buffer,
793 struct v3dv_image *image,
794 VkFormat format,
795 uint32_t plane,
796 uint32_t mip_level,
797 uint32_t layer,
798 VkImage *alias)
799 {
800 VkResult result;
801 assert(!vk_format_is_compressed(format));
802
803 struct v3dv_device *device = cmd_buffer->device;
804 VkDevice vk_device = v3dv_device_to_handle(device);
805 uint32_t mip_width = image->planes[plane].slices[mip_level].width;
806 uint32_t mip_height = image->planes[plane].slices[mip_level].height;
807
808 uint32_t block_width =
809 vk_format_get_blockwidth(image->planes[plane].vk_format);
810 uint32_t block_height =
811 vk_format_get_blockheight(image->planes[plane].vk_format);
812
813 VkImageCreateInfo info = {
814 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
815 .imageType = image->vk.image_type,
816 .format = format,
817 .extent = { DIV_ROUND_UP(mip_width, block_width),
818 DIV_ROUND_UP(mip_height, block_height),
819 1 },
820 .mipLevels = 1,
821 .arrayLayers = 1,
822 .samples = image->vk.samples,
823 .tiling = image->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
824 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
825 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
826 .queueFamilyIndexCount = 0,
827 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
828 };
829 result = v3dv_CreateImage(vk_device, &info, &device->vk.alloc, alias);
830 if (result != VK_SUCCESS)
831 return false;
832
833 /* The alias we have just created has just one mip, but we may be aliasing
834 * any mip in the original image. Because the slice setup changes based on
835 * the mip (particularly, for mips >= 2 it uses power of 2 sizes internally)
836 * and this can influence the tiling layout selected for the slice, we want
837 * to make sure we copy the slice description from the actual mip level in
838 * the original image, and then rewrite any fields that we need for the
839 * alias. Particularly, we want to make the offset 0 because we are going to
840 * bind the underlying image memory exactly at the start of the selected mip.
841 * We also want to relax the image alignment requirements to the minimum
842 * (the one imposed by the Texture Base Address field) since we may not be
843 * aliasing a level 0 (for which we typically want a page alignment for
844 * optimal performance).
845 */
846 V3DV_FROM_HANDLE(v3dv_image, v3dv_alias, *alias);
847 v3dv_alias->planes[plane].slices[0] = image->planes[plane].slices[mip_level];
848 v3dv_alias->planes[plane].slices[0].width = info.extent.width;
849 v3dv_alias->planes[plane].slices[0].height = info.extent.height;
850 v3dv_alias->planes[plane].slices[0].offset = 0;
851 v3dv_alias->planes[plane].alignment = 64;
852
853 v3dv_cmd_buffer_add_private_obj(
854 cmd_buffer, (uintptr_t)*alias,
855 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
856
857 result =
858 vk_common_BindImageMemory(vk_device, *alias,
859 v3dv_device_memory_to_handle(image->planes[plane].mem),
860 v3dv_layer_offset(image, mip_level, layer, plane));
861 return result == VK_SUCCESS;
862 }
863
864 /**
865 * Returns true if the implementation supports the requested operation (even if
866 * it failed to process it, for example, due to an out-of-memory error).
867 */
868 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)869 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
870 struct v3dv_buffer *buffer,
871 struct v3dv_image *image,
872 const VkBufferImageCopy2 *region)
873 {
874 bool handled = false;
875 struct image_to_buffer_info info;
876
877 /* This path uses a shader blit which doesn't support linear images. Return
878 * early to avoid all the heavy lifting in preparation for the
879 * blit_shader() call that is bound to fail in that scenario.
880 */
881 if (!image->tiled && image->vk.image_type != VK_IMAGE_TYPE_1D) {
882 return handled;
883 }
884
885 handled = gather_image_to_buffer_info(cmd_buffer, image, region,
886 &info);
887
888 if (!handled)
889 return handled;
890
891 /* We should be able to handle the blit if we got this far */
892 handled = true;
893
894 /* Compute layers to copy */
895 uint32_t num_layers;
896 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
897 num_layers = region->imageSubresource.layerCount;
898 else
899 num_layers = region->imageExtent.depth;
900 assert(num_layers > 0);
901
902 /* Copy requested layers */
903 VkResult result;
904 VkImageBlit2 blit_region;
905 uint32_t mip_level = region->imageSubresource.mipLevel;
906 uint32_t base_layer = region->imageSubresource.baseArrayLayer;
907 for (uint32_t i = 0; i < num_layers; i++) {
908 uint32_t layer_offset = i;
909
910 if (vk_format_is_compressed(image->vk.format)) {
911 /* Our blit interface can see the real format of the images to detect
912 * copies between compressed and uncompressed images and adapt the
913 * blit region accordingly. Here we are just doing a raw copy of
914 * compressed data, but we are passing an uncompressed view of the
915 * buffer for the blit destination image (since compressed formats are
916 * not renderable), so we also want to provide an uncompressed view of
917 * the source image.
918 *
919 * It is important that we create the alias over the selected mip
920 * level (instead of aliasing the entire image) because an uncompressed
921 * view of the image won't have the same number of mip levels as the
922 * original image and the implicit mip size calculations the hw will
923 * do to sample from a non-zero mip level may not match exactly between
924 * compressed and uncompressed views.
925 */
926 VkImage alias;
927 if (!create_image_mip_level_alias(cmd_buffer, image, info.dst_format,
928 info.plane, mip_level,
929 base_layer + layer_offset,
930 &alias)) {
931 return handled;
932 }
933
934 /* We are aliasing the selected mip level and layer with a
935 * single-mip and single-layer image.
936 */
937 image = v3dv_image_from_handle(alias);
938 mip_level = 0;
939 base_layer = 0;
940 layer_offset = 0;
941 }
942
943 /* Create the destination blit image from the destination buffer */
944 VkImage buffer_image;
945 result =
946 create_image_from_buffer(cmd_buffer, buffer, region, &info,
947 i, &buffer_image);
948 if (result != VK_SUCCESS)
949 return handled;
950
951 /* Blit-copy the requested image extent.
952 *
953 * Since we are copying, the blit must use the same format on the
954 * destination and source images to avoid format conversions. The
955 * only exception is copying stencil, which we upload to a R8UI source
956 * image, but that we need to blit to a S8D24 destination (the only
957 * stencil format we support).
958 */
959 blit_region =
960 blit_region_for_image_to_buffer(®ion->imageOffset,
961 ®ion->imageExtent,
962 mip_level, base_layer, layer_offset,
963 &info);
964
965 handled = blit_shader(cmd_buffer,
966 v3dv_image_from_handle(buffer_image),
967 info.dst_format,
968 image, info.src_format,
969 info.cmask, &info.cswizzle,
970 &blit_region, VK_FILTER_NEAREST, false);
971 if (!handled) {
972 /* This is unexpected, we should have a supported blit spec */
973 unreachable("Unable to blit buffer to destination image");
974 return false;
975 }
976 }
977
978 assert(handled);
979 return true;
980 }
981
982 static bool
983 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
984 struct v3dv_image *dst,
985 struct v3dv_image *src,
986 const VkImageCopy2 *region);
987
988 static VkImageCopy2
image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer)989 image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 *region,
990 struct image_to_buffer_info *info,
991 uint32_t layer)
992 {
993 VkImageCopy2 output = {
994 .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
995 .srcSubresource = {
996 .aspectMask = info->src_copy_aspect,
997 .mipLevel = region->imageSubresource.mipLevel,
998 .baseArrayLayer = region->imageSubresource.baseArrayLayer + layer,
999 .layerCount = 1,
1000 },
1001 .srcOffset = {
1002 DIV_ROUND_UP(region->imageOffset.x, info->block_width),
1003 DIV_ROUND_UP(region->imageOffset.y, info->block_height),
1004 region->imageOffset.z,
1005 },
1006 .dstSubresource = {
1007 .aspectMask = info->dst_copy_aspect,
1008 .mipLevel = 0,
1009 .baseArrayLayer = 0,
1010 .layerCount = 1,
1011 },
1012 .dstOffset = { 0, 0, 0 },
1013 .extent = {
1014 DIV_ROUND_UP(region->imageExtent.width, info->block_width),
1015 DIV_ROUND_UP(region->imageExtent.height, info->block_height),
1016 1
1017 },
1018 };
1019
1020 return output;
1021 }
1022
1023 /**
1024 * Returns true if the implementation supports the requested operation (even if
1025 * it failed to process it, for example, due to an out-of-memory error).
1026 */
1027 static bool
copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * dst_buffer,struct v3dv_image * src_image,const VkBufferImageCopy2 * region)1028 copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1029 struct v3dv_buffer *dst_buffer,
1030 struct v3dv_image *src_image,
1031 const VkBufferImageCopy2 *region)
1032 {
1033 bool handled = false;
1034 VkImage dst_buffer_image;
1035 struct image_to_buffer_info info;
1036
1037 /* This is a requirement for copy_image_linear_texel_buffer below. We check
1038 * it in advance in order to do an early return
1039 */
1040 if (src_image->tiled)
1041 return false;
1042
1043 handled =
1044 gather_image_to_buffer_info(cmd_buffer, src_image, region,
1045 &info);
1046 if (!handled)
1047 return handled;
1048
1049 /* At this point the implementation should support the copy, any possible
1050 * error below are for different reasons, like out-of-memory error
1051 */
1052 handled = true;
1053
1054 uint32_t num_layers;
1055 if (src_image->vk.image_type != VK_IMAGE_TYPE_3D)
1056 num_layers = region->imageSubresource.layerCount;
1057 else
1058 num_layers = region->imageExtent.depth;
1059 assert(num_layers > 0);
1060
1061 VkResult result;
1062 VkImageCopy2 image_region;
1063 for (uint32_t layer = 0; layer < num_layers; layer++) {
1064 /* Create the destination image from the destination buffer */
1065 result =
1066 create_image_from_buffer(cmd_buffer, dst_buffer, region, &info,
1067 layer, &dst_buffer_image);
1068 if (result != VK_SUCCESS)
1069 return handled;
1070
1071 image_region =
1072 image_copy_region_for_image_to_buffer(region, &info, layer);
1073
1074 handled =
1075 copy_image_linear_texel_buffer(cmd_buffer,
1076 v3dv_image_from_handle(dst_buffer_image),
1077 src_image, &image_region);
1078 }
1079
1080 return handled;
1081 }
1082
1083 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)1084 v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1085 const VkCopyImageToBufferInfo2 *info)
1086
1087 {
1088 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1089 V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
1090 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
1091
1092 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1093
1094 cmd_buffer->state.is_transfer = true;
1095
1096 for (uint32_t i = 0; i < info->regionCount; i++) {
1097 const VkBufferImageCopy2 *region = &info->pRegions[i];
1098
1099 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region))
1100 continue;
1101
1102 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region))
1103 continue;
1104
1105 if (copy_image_to_buffer_texel_buffer(cmd_buffer, buffer, image, region))
1106 continue;
1107
1108 unreachable("Unsupported image to buffer copy.");
1109 }
1110 cmd_buffer->state.is_transfer = false;
1111 }
1112
1113 /**
1114 * Returns true if the implementation supports the requested operation (even if
1115 * it failed to process it, for example, due to an out-of-memory error).
1116 */
1117 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1118 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1119 struct v3dv_image *dst,
1120 struct v3dv_image *src,
1121 const VkImageCopy2 *region)
1122 {
1123 if (V3D_DBG(DISABLE_TFU)) {
1124 perf_debug("Copy images: TFU disabled, fallbacks could be slower.\n");
1125 return false;
1126 }
1127
1128 /* Destination can't be raster format */
1129 if (!dst->tiled)
1130 return false;
1131
1132 /* We can only do full copies, so if the format is D24S8 both aspects need
1133 * to be copied. We only need to check the dst format because the spec
1134 * states that depth/stencil formats must match exactly.
1135 */
1136 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
1137 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
1138 VK_IMAGE_ASPECT_STENCIL_BIT;
1139 if (region->dstSubresource.aspectMask != ds_aspects)
1140 return false;
1141 }
1142
1143 /* Don't handle copies between uncompressed and compressed formats for now.
1144 *
1145 * FIXME: we should be able to handle these easily but there is no coverage
1146 * in CTS at the moment that make such copies with full images (which we
1147 * require here), only partial copies. Also, in that case the code below that
1148 * checks for "dst image complete" requires some changes, since it is
1149 * checking against the region dimensions, which are in units of the source
1150 * image format.
1151 */
1152 if (vk_format_is_compressed(dst->vk.format) !=
1153 vk_format_is_compressed(src->vk.format)) {
1154 return false;
1155 }
1156
1157 /* Source region must start at (0,0) */
1158 if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
1159 return false;
1160
1161 /* Destination image must be complete */
1162 if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
1163 return false;
1164
1165 uint8_t src_plane =
1166 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1167 uint8_t dst_plane =
1168 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1169
1170 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
1171 uint32_t dst_width = u_minify(dst->planes[dst_plane].width, dst_mip_level);
1172 uint32_t dst_height = u_minify(dst->planes[dst_plane].height, dst_mip_level);
1173 if (region->extent.width != dst_width || region->extent.height != dst_height)
1174 return false;
1175
1176 /* From vkCmdCopyImage:
1177 *
1178 * "When copying between compressed and uncompressed formats the extent
1179 * members represent the texel dimensions of the source image and not
1180 * the destination."
1181 */
1182 const uint32_t block_w =
1183 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1184 const uint32_t block_h =
1185 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1186 uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1187 uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1188
1189 /* Account for sample count */
1190 assert(dst->vk.samples == src->vk.samples);
1191 if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
1192 assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
1193 width *= 2;
1194 height *= 2;
1195 }
1196
1197 /* The TFU unit doesn't handle format conversions so we need the formats to
1198 * match. On the other hand, vkCmdCopyImage allows different color formats
1199 * on the source and destination images, but only if they are texel
1200 * compatible. For us, this means that we can effectively ignore different
1201 * formats and just make the copy using either of them, since we are just
1202 * moving raw data and not making any conversions.
1203 *
1204 * Also, the formats supported by the TFU unit are limited, but again, since
1205 * we are only doing raw copies here without interpreting or converting
1206 * the underlying pixel data according to its format, we can always choose
1207 * to use compatible formats that are supported with the TFU unit.
1208 */
1209 assert(dst->planes[dst_plane].cpp == src->planes[src_plane].cpp);
1210 const struct v3dv_format *format =
1211 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1212 dst->planes[dst_plane].cpp, NULL);
1213
1214 /* Emit a TFU job for each layer to blit */
1215 const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1216 region->dstSubresource.layerCount :
1217 region->extent.depth;
1218 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
1219
1220 const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
1221 region->srcSubresource.baseArrayLayer : region->srcOffset.z;
1222 const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1223 region->dstSubresource.baseArrayLayer : region->dstOffset.z;
1224 for (uint32_t i = 0; i < layer_count; i++) {
1225 const uint32_t dst_offset =
1226 dst->planes[dst_plane].mem->bo->offset +
1227 v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i, dst_plane);
1228 const uint32_t src_offset =
1229 src->planes[src_plane].mem->bo->offset +
1230 v3dv_layer_offset(src, src_mip_level, base_src_layer + i, src_plane);
1231
1232 const struct v3d_resource_slice *dst_slice =
1233 &dst->planes[dst_plane].slices[dst_mip_level];
1234 const struct v3d_resource_slice *src_slice =
1235 &src->planes[src_plane].slices[src_mip_level];
1236
1237 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1238 cmd_buffer,
1239 dst->planes[dst_plane].mem->bo->handle,
1240 dst_offset,
1241 dst_slice->tiling,
1242 dst_slice->padded_height,
1243 dst->planes[dst_plane].cpp,
1244 src->planes[src_plane].mem->bo->handle,
1245 src_offset,
1246 src_slice->tiling,
1247 src_slice->tiling == V3D_TILING_RASTER ?
1248 src_slice->stride : src_slice->padded_height,
1249 src->planes[src_plane].cpp,
1250 /* All compatible TFU formats are single-plane */
1251 width, height, &format->planes[0]);
1252 }
1253
1254 return true;
1255 }
1256
1257 inline bool
v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1258 v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1259 struct v3dv_image *dst,
1260 struct v3dv_image *src,
1261 const VkImageCopy2 *region)
1262 {
1263 return copy_image_tfu(cmd_buffer, dst, src, region);
1264 }
1265
1266 /**
1267 * Returns true if the implementation supports the requested operation (even if
1268 * it failed to process it, for example, due to an out-of-memory error).
1269 */
1270 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1271 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1272 struct v3dv_image *dst,
1273 struct v3dv_image *src,
1274 const VkImageCopy2 *region)
1275 {
1276 uint8_t src_plane =
1277 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1278 assert(src_plane < src->plane_count);
1279 uint8_t dst_plane =
1280 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1281 assert(dst_plane < dst->plane_count);
1282
1283 VkFormat fb_format;
1284 if (!v3dv_meta_can_use_tlb(src, src_plane, region->srcSubresource.mipLevel,
1285 ®ion->srcOffset, NULL, &fb_format) ||
1286 !v3dv_meta_can_use_tlb(dst, dst_plane, region->dstSubresource.mipLevel,
1287 ®ion->dstOffset, ®ion->extent, &fb_format)) {
1288 return false;
1289 }
1290
1291 /* From the Vulkan spec, VkImageCopy valid usage:
1292 *
1293 * "If neither the calling command’s srcImage nor the calling command’s
1294 * dstImage has a multi-planar image format then the aspectMask member
1295 * of srcSubresource and dstSubresource must match."
1296 */
1297 assert(src->plane_count != 1 || dst->plane_count != 1 ||
1298 region->dstSubresource.aspectMask ==
1299 region->srcSubresource.aspectMask);
1300 uint32_t internal_type, internal_bpp;
1301 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1302 (fb_format, region->dstSubresource.aspectMask,
1303 &internal_type, &internal_bpp);
1304
1305 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
1306 *
1307 * "The number of slices of the extent (for 3D) or layers of the
1308 * srcSubresource (for non-3D) must match the number of slices of the
1309 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
1310 */
1311 assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
1312 region->srcSubresource.layerCount : region->extent.depth) ==
1313 (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1314 region->dstSubresource.layerCount : region->extent.depth));
1315 uint32_t num_layers;
1316 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
1317 num_layers = region->dstSubresource.layerCount;
1318 else
1319 num_layers = region->extent.depth;
1320 assert(num_layers > 0);
1321
1322 struct v3dv_job *job =
1323 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1324 if (!job)
1325 return true;
1326
1327 /* Handle copy to compressed image using compatible format */
1328 const uint32_t block_w =
1329 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1330 const uint32_t block_h =
1331 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1332 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1333 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1334
1335 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1336 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1337 src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
1338
1339 struct v3dv_meta_framebuffer framebuffer;
1340 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1341 internal_type, &job->frame_tiling);
1342
1343 v3dv_X(job->device, job_emit_binning_flush)(job);
1344 v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
1345
1346 v3dv_cmd_buffer_finish_job(cmd_buffer);
1347
1348 return true;
1349 }
1350
1351 /**
1352 * Takes the image provided as argument and creates a new image that has
1353 * the same specification and aliases the same memory storage, except that:
1354 *
1355 * - It has the uncompressed format passed in.
1356 * - Its original width/height are scaled by the factors passed in.
1357 *
1358 * This is useful to implement copies from compressed images using the blit
1359 * path. The idea is that we create uncompressed "image views" of both the
1360 * source and destination images using the uncompressed format and then we
1361 * define the copy blit in terms of that format.
1362 */
1363 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)1364 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
1365 struct v3dv_image *src,
1366 float width_scale,
1367 float height_scale,
1368 VkFormat format)
1369 {
1370 assert(!vk_format_is_compressed(format));
1371 /* We don't support ycbcr compressed formats */
1372 assert(src->plane_count == 1);
1373
1374 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
1375
1376 VkImageCreateInfo info = {
1377 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1378 .imageType = src->vk.image_type,
1379 .format = format,
1380 .extent = {
1381 .width = src->vk.extent.width * width_scale,
1382 .height = src->vk.extent.height * height_scale,
1383 .depth = src->vk.extent.depth,
1384 },
1385 .mipLevels = src->vk.mip_levels,
1386 .arrayLayers = src->vk.array_layers,
1387 .samples = src->vk.samples,
1388 .tiling = src->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
1389 .usage = src->vk.usage,
1390 };
1391
1392 VkImage _image;
1393 VkResult result =
1394 v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1395 if (result != VK_SUCCESS) {
1396 v3dv_flag_oom(cmd_buffer, NULL);
1397 return NULL;
1398 }
1399
1400 struct v3dv_image *image = v3dv_image_from_handle(_image);
1401 image->planes[0].mem = src->planes[0].mem;
1402 image->planes[0].mem_offset = src->planes[0].mem_offset;
1403 return image;
1404 }
1405
1406 /**
1407 * Returns true if the implementation supports the requested operation (even if
1408 * it failed to process it, for example, due to an out-of-memory error).
1409 */
1410 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1411 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1412 struct v3dv_image *dst,
1413 struct v3dv_image *src,
1414 const VkImageCopy2 *region)
1415 {
1416 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D)
1417 return false;
1418
1419 uint8_t src_plane =
1420 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1421 assert(src_plane < src->plane_count);
1422 uint8_t dst_plane =
1423 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1424 assert(dst_plane < dst->plane_count);
1425
1426 const uint32_t src_block_w =
1427 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1428 const uint32_t src_block_h =
1429 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1430 const uint32_t dst_block_w =
1431 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1432 const uint32_t dst_block_h =
1433 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1434 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1435 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1436
1437 /* We need to choose a single format for the blit to ensure that this is
1438 * really a copy and there are not format conversions going on. Since we
1439 * going to blit, we need to make sure that the selected format can be
1440 * both rendered to and textured from.
1441 */
1442 VkFormat format;
1443 float src_scale_w = 1.0f;
1444 float src_scale_h = 1.0f;
1445 float dst_scale_w = block_scale_w;
1446 float dst_scale_h = block_scale_h;
1447 if (vk_format_is_compressed(src->vk.format)) {
1448 /* If we are copying from a compressed format we should be aware that we
1449 * are going to texture from the source image, and the texture setup
1450 * knows the actual size of the image, so we need to choose a format
1451 * that has a per-texel (not per-block) bpp that is compatible for that
1452 * image size. For example, for a source image with size Bw*WxBh*H
1453 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1454 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1455 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1456 * so we could specify a blit with size Bw*WxBh*H and a format with
1457 * a bpp of 8-bit per texel (R8_UINT).
1458 *
1459 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1460 * which is 64-bit per texel, then we would need a 4-bit format, which
1461 * we don't have, so instead we still choose an 8-bit format, but we
1462 * apply a divisor to the row dimensions of the blit, since we are
1463 * copying two texels per item.
1464 *
1465 * Generally, we can choose any format so long as we compute appropriate
1466 * divisors for the width and height depending on the source image's
1467 * bpp.
1468 */
1469 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1470
1471 format = VK_FORMAT_R32G32_UINT;
1472 switch (src->planes[src_plane].cpp) {
1473 case 16:
1474 format = VK_FORMAT_R32G32B32A32_UINT;
1475 break;
1476 case 8:
1477 format = VK_FORMAT_R16G16B16A16_UINT;
1478 break;
1479 default:
1480 unreachable("Unsupported compressed format");
1481 }
1482
1483 /* Create image views of the src/dst images that we can interpret in
1484 * terms of the canonical format.
1485 */
1486 src_scale_w /= src_block_w;
1487 src_scale_h /= src_block_h;
1488 dst_scale_w /= src_block_w;
1489 dst_scale_h /= src_block_h;
1490
1491 src = create_image_alias(cmd_buffer, src,
1492 src_scale_w, src_scale_h, format);
1493
1494 dst = create_image_alias(cmd_buffer, dst,
1495 dst_scale_w, dst_scale_h, format);
1496 } else {
1497 format = src->format->planes[src_plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1498 src->planes[src_plane].vk_format :
1499 get_compatible_tlb_format(src->planes[src_plane].vk_format);
1500 if (format == VK_FORMAT_UNDEFINED)
1501 return false;
1502
1503 const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1504 assert(f->plane_count < 2);
1505 if (!f->plane_count || f->planes[0].tex_type == TEXTURE_DATA_FORMAT_NO)
1506 return false;
1507 }
1508
1509 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1510 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1511 * are the compressed format's block width and height. This means that
1512 * copies between compressed and uncompressed images involve different
1513 * image sizes, and therefore, we need to take that into account when
1514 * setting up the source and destination blit regions below, so they are
1515 * consistent from the point of view of the single compatible format
1516 * selected for the copy.
1517 *
1518 * We should take into account that the dimensions of the region provided
1519 * to the copy command are specified in terms of the source image. With that
1520 * in mind, below we adjust the blit destination region to be consistent with
1521 * the source region for the compatible format, so basically, we apply
1522 * the block scale factor to the destination offset provided by the copy
1523 * command (because it is specified in terms of the destination image, not
1524 * the source), and then we just add the region copy dimensions to that
1525 * (since the region dimensions are already specified in terms of the source
1526 * image).
1527 */
1528 uint32_t region_width = region->extent.width * src_scale_w;
1529 uint32_t region_height = region->extent.height * src_scale_h;
1530 if (src_block_w > 1)
1531 region_width = util_next_power_of_two(region_width);
1532 if (src_block_h > 1)
1533 region_height = util_next_power_of_two(region_height);
1534
1535 const VkOffset3D src_start = {
1536 region->srcOffset.x * src_scale_w,
1537 region->srcOffset.y * src_scale_h,
1538 region->srcOffset.z,
1539 };
1540 const VkOffset3D src_end = {
1541 src_start.x + region_width,
1542 src_start.y + region_height,
1543 src_start.z + region->extent.depth,
1544 };
1545
1546 const VkOffset3D dst_start = {
1547 region->dstOffset.x * dst_scale_w,
1548 region->dstOffset.y * dst_scale_h,
1549 region->dstOffset.z,
1550 };
1551 const VkOffset3D dst_end = {
1552 dst_start.x + region_width,
1553 dst_start.y + region_height,
1554 dst_start.z + region->extent.depth,
1555 };
1556
1557 const VkImageBlit2 blit_region = {
1558 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
1559 .srcSubresource = region->srcSubresource,
1560 .srcOffsets = { src_start, src_end },
1561 .dstSubresource = region->dstSubresource,
1562 .dstOffsets = { dst_start, dst_end },
1563 };
1564 bool handled = blit_shader(cmd_buffer,
1565 dst, format,
1566 src, format,
1567 0, NULL,
1568 &blit_region, VK_FILTER_NEAREST, true);
1569
1570 /* We should have selected formats that we can blit */
1571 assert(handled);
1572 return handled;
1573 }
1574
1575 static bool
copy_image_linear_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1576 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1577 struct v3dv_image *dst,
1578 struct v3dv_image *src,
1579 const VkImageCopy2 *region)
1580 {
1581 if (src->tiled)
1582 return false;
1583
1584 /* Implementations are allowed to restrict linear images like this */
1585 assert(region->srcOffset.z == 0);
1586 assert(region->dstOffset.z == 0);
1587 assert(region->srcSubresource.mipLevel == 0);
1588 assert(region->srcSubresource.baseArrayLayer == 0);
1589 assert(region->srcSubresource.layerCount == 1);
1590 assert(region->dstSubresource.mipLevel == 0);
1591 assert(region->dstSubresource.baseArrayLayer == 0);
1592 assert(region->dstSubresource.layerCount == 1);
1593
1594 uint8_t src_plane =
1595 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1596 uint8_t dst_plane =
1597 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1598
1599 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1600 const uint32_t bpp = src->planes[src_plane].cpp;
1601
1602 VkFormat format;
1603 switch (bpp) {
1604 case 16:
1605 format = VK_FORMAT_R32G32B32A32_UINT;
1606 break;
1607 case 8:
1608 format = VK_FORMAT_R16G16B16A16_UINT;
1609 break;
1610 case 4:
1611 format = VK_FORMAT_R8G8B8A8_UINT;
1612 break;
1613 case 2:
1614 format = VK_FORMAT_R16_UINT;
1615 break;
1616 case 1:
1617 format = VK_FORMAT_R8_UINT;
1618 break;
1619 default:
1620 unreachable("unsupported bit-size");
1621 return false;
1622 }
1623
1624 VkComponentMapping ident_swizzle = {
1625 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1626 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
1627 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
1628 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
1629 };
1630
1631 const uint32_t buf_stride = src->planes[src_plane].slices[0].stride;
1632 const VkDeviceSize buf_offset =
1633 region->srcOffset.y * buf_stride + region->srcOffset.x * bpp;
1634
1635 struct v3dv_buffer src_buffer;
1636 vk_object_base_init(&cmd_buffer->device->vk, &src_buffer.base,
1637 VK_OBJECT_TYPE_BUFFER);
1638
1639 const struct VkBufferCreateInfo buf_create_info = {
1640 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1641 .size = src->planes[src_plane].size,
1642 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1643 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1644 };
1645 v3dv_buffer_init(cmd_buffer->device, &buf_create_info, &src_buffer,
1646 src->planes[src_plane].alignment);
1647
1648 const VkBindBufferMemoryInfo buf_bind_info = {
1649 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1650 .buffer = v3dv_buffer_to_handle(&src_buffer),
1651 .memory = v3dv_device_memory_to_handle(src->planes[src_plane].mem),
1652 .memoryOffset = src->planes[src_plane].mem_offset +
1653 v3dv_layer_offset(src, 0, 0, src_plane),
1654 };
1655 v3dv_buffer_bind_memory(&buf_bind_info);
1656
1657 const VkBufferImageCopy2 copy_region = {
1658 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
1659 .pNext = NULL,
1660 .bufferOffset = buf_offset,
1661 .bufferRowLength = buf_stride / bpp,
1662 .bufferImageHeight = src->vk.extent.height,
1663 .imageSubresource = region->dstSubresource,
1664 .imageOffset = region->dstOffset,
1665 .imageExtent = region->extent,
1666 };
1667
1668 return texel_buffer_shader_copy(cmd_buffer,
1669 region->dstSubresource.aspectMask,
1670 dst,
1671 format,
1672 format,
1673 &src_buffer,
1674 src->planes[src_plane].cpp,
1675 0 /* color mask: full */, &ident_swizzle,
1676 1, ©_region);
1677 }
1678
1679 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)1680 v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,
1681 const VkCopyImageInfo2 *info)
1682
1683 {
1684 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1685 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1686 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1687
1688 assert(src->vk.samples == dst->vk.samples);
1689
1690 cmd_buffer->state.is_transfer = true;
1691
1692 for (uint32_t i = 0; i < info->regionCount; i++) {
1693 const VkImageCopy2 *region = &info->pRegions[i];
1694 if (copy_image_tfu(cmd_buffer, dst, src, region))
1695 continue;
1696 if (copy_image_tlb(cmd_buffer, dst, src, region))
1697 continue;
1698 if (copy_image_blit(cmd_buffer, dst, src, region))
1699 continue;
1700 if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, region))
1701 continue;
1702 unreachable("Image copy not supported");
1703 }
1704
1705 cmd_buffer->state.is_transfer = false;
1706 }
1707
1708 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1709 v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1710 const VkCopyBufferInfo2 *pCopyBufferInfo)
1711 {
1712 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1713 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1714 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1715
1716 cmd_buffer->state.is_transfer = true;
1717
1718 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1719 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1720 (cmd_buffer,
1721 dst_buffer->mem->bo, dst_buffer->mem_offset,
1722 src_buffer->mem->bo, src_buffer->mem_offset,
1723 &pCopyBufferInfo->pRegions[i]);
1724 }
1725
1726 cmd_buffer->state.is_transfer = false;
1727 }
1728
1729 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1730 destroy_update_buffer_cb(VkDevice _device,
1731 uint64_t pobj,
1732 VkAllocationCallbacks *alloc)
1733 {
1734 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1735 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1736 v3dv_bo_free(device, bo);
1737 }
1738
1739 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1740 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1741 VkBuffer dstBuffer,
1742 VkDeviceSize dstOffset,
1743 VkDeviceSize dataSize,
1744 const void *pData)
1745 {
1746 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1747 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1748
1749 struct v3dv_bo *src_bo =
1750 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1751 if (!src_bo) {
1752 fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1753 return;
1754 }
1755
1756 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1757 if (!ok) {
1758 fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1759 return;
1760 }
1761
1762 cmd_buffer->state.is_transfer = true;
1763
1764 memcpy(src_bo->map, pData, dataSize);
1765
1766 v3dv_bo_unmap(cmd_buffer->device, src_bo);
1767
1768 VkBufferCopy2 region = {
1769 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1770 .srcOffset = 0,
1771 .dstOffset = dstOffset,
1772 .size = dataSize,
1773 };
1774 struct v3dv_job *copy_job =
1775 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1776 (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1777 src_bo, 0, ®ion);
1778
1779 if (copy_job) {
1780 v3dv_cmd_buffer_add_private_obj(
1781 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1782 }
1783
1784 cmd_buffer->state.is_transfer = false;
1785 }
1786
1787 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1788 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1789 VkBuffer dstBuffer,
1790 VkDeviceSize dstOffset,
1791 VkDeviceSize size,
1792 uint32_t data)
1793 {
1794 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1795 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1796
1797 cmd_buffer->state.is_transfer = true;
1798
1799 struct v3dv_bo *bo = dst_buffer->mem->bo;
1800
1801 /* From the Vulkan spec:
1802 *
1803 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1804 * a multiple of 4, then the nearest smaller multiple is used."
1805 */
1806 if (size == VK_WHOLE_SIZE) {
1807 size = dst_buffer->size - dstOffset;
1808 size -= size % 4;
1809 }
1810
1811 v3dv_X(cmd_buffer->device, meta_fill_buffer)
1812 (cmd_buffer, bo, dstOffset, size, data);
1813
1814 cmd_buffer->state.is_transfer = false;
1815 }
1816
1817 /**
1818 * Returns true if the implementation supports the requested operation (even if
1819 * it failed to process it, for example, due to an out-of-memory error).
1820 */
1821 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1822 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1823 struct v3dv_image *image,
1824 struct v3dv_buffer *buffer,
1825 const VkBufferImageCopy2 *region)
1826 {
1827 if (V3D_DBG(DISABLE_TFU)) {
1828 perf_debug("Copy buffer to image: TFU disabled, fallbacks could be slower.\n");
1829 return false;
1830 }
1831
1832 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1833
1834 /* Destination can't be raster format */
1835 if (!image->tiled)
1836 return false;
1837
1838 /* We can't copy D24S8 because buffer to image copies only copy one aspect
1839 * at a time, and the TFU copies full images. Also, V3D depth bits for
1840 * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1841 * the Vulkan spec has the buffer data specified the other way around, so it
1842 * is not a straight copy, we would have to swizzle the channels, which the
1843 * TFU can't do.
1844 */
1845 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1846 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1847 return false;
1848 }
1849
1850 /* Region must include full slice */
1851 const uint32_t offset_x = region->imageOffset.x;
1852 const uint32_t offset_y = region->imageOffset.y;
1853 if (offset_x != 0 || offset_y != 0)
1854 return false;
1855
1856 uint32_t width, height;
1857 if (region->bufferRowLength == 0)
1858 width = region->imageExtent.width;
1859 else
1860 width = region->bufferRowLength;
1861
1862 if (region->bufferImageHeight == 0)
1863 height = region->imageExtent.height;
1864 else
1865 height = region->bufferImageHeight;
1866
1867 const uint8_t plane =
1868 v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1869
1870 const uint32_t mip_level = region->imageSubresource.mipLevel;
1871 const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
1872
1873 if (width != slice->width || height != slice->height)
1874 return false;
1875
1876 /* Handle region semantics for compressed images */
1877 const uint32_t block_w =
1878 vk_format_get_blockwidth(image->planes[plane].vk_format);
1879 const uint32_t block_h =
1880 vk_format_get_blockheight(image->planes[plane].vk_format);
1881 width = DIV_ROUND_UP(width, block_w);
1882 height = DIV_ROUND_UP(height, block_h);
1883
1884 /* Format must be supported for texturing via the TFU. Since we are just
1885 * copying raw data and not converting between pixel formats, we can ignore
1886 * the image's format and choose a compatible TFU format for the image
1887 * texel size instead, which expands the list of formats we can handle here.
1888 */
1889 const struct v3dv_format *format =
1890 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1891 image->planes[plane].cpp, NULL);
1892 /* We only use single-plane formats with the TFU */
1893 assert(format->plane_count == 1);
1894 const struct v3dv_format_plane *format_plane = &format->planes[0];
1895
1896 uint32_t num_layers;
1897 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1898 num_layers = region->imageSubresource.layerCount;
1899 else
1900 num_layers = region->imageExtent.depth;
1901 assert(num_layers > 0);
1902
1903 assert(image->planes[plane].mem && image->planes[plane].mem->bo);
1904 const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo;
1905
1906 assert(buffer->mem && buffer->mem->bo);
1907 const struct v3dv_bo *src_bo = buffer->mem->bo;
1908
1909 /* Emit a TFU job per layer to copy */
1910 const uint32_t buffer_stride = width * image->planes[plane].cpp;
1911 for (int i = 0; i < num_layers; i++) {
1912 uint32_t layer;
1913 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1914 layer = region->imageSubresource.baseArrayLayer + i;
1915 else
1916 layer = region->imageOffset.z + i;
1917
1918 const uint32_t buffer_offset =
1919 buffer->mem_offset + region->bufferOffset +
1920 height * buffer_stride * i;
1921 const uint32_t src_offset = src_bo->offset + buffer_offset;
1922
1923 const uint32_t dst_offset =
1924 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane);
1925
1926 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1927 cmd_buffer,
1928 dst_bo->handle,
1929 dst_offset,
1930 slice->tiling,
1931 slice->padded_height,
1932 image->planes[plane].cpp,
1933 src_bo->handle,
1934 src_offset,
1935 V3D_TILING_RASTER,
1936 width,
1937 1,
1938 width, height, format_plane);
1939 }
1940
1941 return true;
1942 }
1943
1944 /**
1945 * Returns true if the implementation supports the requested operation (even if
1946 * it failed to process it, for example, due to an out-of-memory error).
1947 */
1948 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1949 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1950 struct v3dv_image *image,
1951 struct v3dv_buffer *buffer,
1952 const VkBufferImageCopy2 *region)
1953 {
1954 VkFormat fb_format;
1955 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1956 assert(plane < image->plane_count);
1957
1958 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
1959 ®ion->imageOffset, ®ion->imageExtent,
1960 &fb_format)) {
1961 return false;
1962 }
1963
1964 uint32_t internal_type, internal_bpp;
1965 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1966 (fb_format, region->imageSubresource.aspectMask,
1967 &internal_type, &internal_bpp);
1968
1969 uint32_t num_layers;
1970 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1971 num_layers = region->imageSubresource.layerCount;
1972 else
1973 num_layers = region->imageExtent.depth;
1974 assert(num_layers > 0);
1975
1976 struct v3dv_job *job =
1977 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1978 if (!job)
1979 return true;
1980
1981 /* Handle copy to compressed format using a compatible format */
1982 const uint32_t block_w =
1983 vk_format_get_blockwidth(image->planes[plane].vk_format);
1984 const uint32_t block_h =
1985 vk_format_get_blockheight(image->planes[plane].vk_format);
1986 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1987 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1988
1989 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1990 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1991 false);
1992
1993 struct v3dv_meta_framebuffer framebuffer;
1994 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1995 internal_type, &job->frame_tiling);
1996
1997 v3dv_X(job->device, job_emit_binning_flush)(job);
1998 v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
1999 (job, image, buffer, &framebuffer, region);
2000
2001 v3dv_cmd_buffer_finish_job(cmd_buffer);
2002
2003 return true;
2004 }
2005
2006 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)2007 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2008 struct v3dv_image *image,
2009 struct v3dv_buffer *buffer,
2010 const VkBufferImageCopy2 *region)
2011 {
2012 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
2013 return true;
2014 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
2015 return true;
2016 return false;
2017 }
2018
2019 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)2020 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
2021 {
2022 /* If this is not the first pool we create for this command buffer
2023 * size it based on the size of the currently exhausted pool.
2024 */
2025 uint32_t descriptor_count = 64;
2026 if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
2027 struct v3dv_descriptor_pool *exhausted_pool =
2028 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
2029 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
2030 }
2031
2032 /* Create the descriptor pool */
2033 cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
2034 VkDescriptorPoolSize pool_size = {
2035 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2036 .descriptorCount = descriptor_count,
2037 };
2038 VkDescriptorPoolCreateInfo info = {
2039 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2040 .maxSets = descriptor_count,
2041 .poolSizeCount = 1,
2042 .pPoolSizes = &pool_size,
2043 .flags = 0,
2044 };
2045 VkResult result =
2046 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
2047 &info,
2048 &cmd_buffer->device->vk.alloc,
2049 &cmd_buffer->meta.texel_buffer_copy.dspool);
2050
2051 if (result == VK_SUCCESS) {
2052 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2053 const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
2054
2055 v3dv_cmd_buffer_add_private_obj(
2056 cmd_buffer, (uintptr_t) _pool,
2057 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
2058
2059 struct v3dv_descriptor_pool *pool =
2060 v3dv_descriptor_pool_from_handle(_pool);
2061 pool->is_driver_internal = true;
2062 }
2063
2064 return result;
2065 }
2066
2067 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)2068 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
2069 VkDescriptorSet *set)
2070 {
2071 /* Make sure we have a descriptor pool */
2072 VkResult result;
2073 if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
2074 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2075 if (result != VK_SUCCESS)
2076 return result;
2077 }
2078 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2079
2080 /* Allocate descriptor set */
2081 struct v3dv_device *device = cmd_buffer->device;
2082 VkDevice _device = v3dv_device_to_handle(device);
2083 VkDescriptorSetAllocateInfo info = {
2084 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2085 .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
2086 .descriptorSetCount = 1,
2087 .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
2088 };
2089 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2090
2091 /* If we ran out of pool space, grow the pool and try again */
2092 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
2093 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2094 if (result == VK_SUCCESS) {
2095 info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
2096 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2097 }
2098 }
2099
2100 return result;
2101 }
2102
2103 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)2104 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
2105 VkColorComponentFlags cmask,
2106 VkComponentMapping *cswizzle,
2107 bool is_layered,
2108 uint8_t *key)
2109 {
2110 memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2111
2112 uint32_t *p = (uint32_t *) key;
2113
2114 *p = format;
2115 p++;
2116
2117 *p = cmask;
2118 p++;
2119
2120 /* Note that that we are using a single byte for this, so we could pack
2121 * more data into this 32-bit slot in the future.
2122 */
2123 *p = is_layered ? 1 : 0;
2124 p++;
2125
2126 memcpy(p, cswizzle, sizeof(VkComponentMapping));
2127 p += sizeof(VkComponentMapping) / sizeof(uint32_t);
2128
2129 assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2130 }
2131
2132 static bool
2133 create_blit_render_pass(struct v3dv_device *device,
2134 VkFormat dst_format,
2135 VkFormat src_format,
2136 VkRenderPass *pass_load,
2137 VkRenderPass *pass_no_load);
2138
2139 static bool
2140 create_pipeline(struct v3dv_device *device,
2141 struct v3dv_render_pass *pass,
2142 struct nir_shader *vs_nir,
2143 struct nir_shader *gs_nir,
2144 struct nir_shader *fs_nir,
2145 const VkPipelineVertexInputStateCreateInfo *vi_state,
2146 const VkPipelineDepthStencilStateCreateInfo *ds_state,
2147 const VkPipelineColorBlendStateCreateInfo *cb_state,
2148 const VkPipelineMultisampleStateCreateInfo *ms_state,
2149 const VkPipelineLayout layout,
2150 VkPipeline *pipeline);
2151
2152 static nir_shader *
get_texel_buffer_copy_vs()2153 get_texel_buffer_copy_vs()
2154 {
2155 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2156 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
2157 "meta texel buffer copy vs");
2158 nir_variable *vs_out_pos =
2159 nir_variable_create(b.shader, nir_var_shader_out,
2160 glsl_vec4_type(), "gl_Position");
2161 vs_out_pos->data.location = VARYING_SLOT_POS;
2162
2163 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
2164 nir_store_var(&b, vs_out_pos, pos, 0xf);
2165
2166 return b.shader;
2167 }
2168
2169 static nir_shader *
get_texel_buffer_copy_gs()2170 get_texel_buffer_copy_gs()
2171 {
2172 /* FIXME: this creates a geometry shader that takes the index of a single
2173 * layer to clear from push constants, so we need to emit a draw call for
2174 * each layer that we want to clear. We could actually do better and have it
2175 * take a range of layers however, if we were to do this, we would need to
2176 * be careful not to exceed the maximum number of output vertices allowed in
2177 * a geometry shader.
2178 */
2179 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2180 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2181 "meta texel buffer copy gs");
2182 nir_shader *nir = b.shader;
2183 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
2184 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
2185 (1ull << VARYING_SLOT_LAYER);
2186 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
2187 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
2188 nir->info.gs.vertices_in = 3;
2189 nir->info.gs.vertices_out = 3;
2190 nir->info.gs.invocations = 1;
2191 nir->info.gs.active_stream_mask = 0x1;
2192
2193 /* in vec4 gl_Position[3] */
2194 nir_variable *gs_in_pos =
2195 nir_variable_create(b.shader, nir_var_shader_in,
2196 glsl_array_type(glsl_vec4_type(), 3, 0),
2197 "in_gl_Position");
2198 gs_in_pos->data.location = VARYING_SLOT_POS;
2199
2200 /* out vec4 gl_Position */
2201 nir_variable *gs_out_pos =
2202 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
2203 "out_gl_Position");
2204 gs_out_pos->data.location = VARYING_SLOT_POS;
2205
2206 /* out float gl_Layer */
2207 nir_variable *gs_out_layer =
2208 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
2209 "out_gl_Layer");
2210 gs_out_layer->data.location = VARYING_SLOT_LAYER;
2211
2212 /* Emit output triangle */
2213 for (uint32_t i = 0; i < 3; i++) {
2214 /* gl_Position from shader input */
2215 nir_deref_instr *in_pos_i =
2216 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
2217 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
2218
2219 /* gl_Layer from push constants */
2220 nir_def *layer =
2221 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2222 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
2223 .range = 4);
2224 nir_store_var(&b, gs_out_layer, layer, 0x1);
2225
2226 nir_emit_vertex(&b, 0);
2227 }
2228
2229 nir_end_primitive(&b, 0);
2230
2231 return nir;
2232 }
2233
2234 static nir_def *
load_frag_coord(nir_builder * b)2235 load_frag_coord(nir_builder *b)
2236 {
2237 nir_foreach_shader_in_variable(var, b->shader) {
2238 if (var->data.location == VARYING_SLOT_POS)
2239 return nir_load_var(b, var);
2240 }
2241 nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
2242 glsl_vec4_type(), NULL);
2243 pos->data.location = VARYING_SLOT_POS;
2244 return nir_load_var(b, pos);
2245 }
2246
2247 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)2248 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
2249 {
2250 if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
2251 swz = comp;
2252
2253 switch (swz) {
2254 case VK_COMPONENT_SWIZZLE_R:
2255 return 0;
2256 case VK_COMPONENT_SWIZZLE_G:
2257 return 1;
2258 case VK_COMPONENT_SWIZZLE_B:
2259 return 2;
2260 case VK_COMPONENT_SWIZZLE_A:
2261 return 3;
2262 default:
2263 unreachable("Invalid swizzle");
2264 };
2265 }
2266
2267 static nir_shader *
get_texel_buffer_copy_fs(struct v3dv_device * device,VkFormat format,VkComponentMapping * cswizzle)2268 get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
2269 VkComponentMapping *cswizzle)
2270 {
2271 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2272 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
2273 "meta texel buffer copy fs");
2274
2275 /* We only use the copy from texel buffer shader to implement
2276 * copy_buffer_to_image_shader, which always selects a compatible integer
2277 * format for the copy.
2278 */
2279 assert(vk_format_is_int(format));
2280
2281 /* Fragment shader output color */
2282 nir_variable *fs_out_color =
2283 nir_variable_create(b.shader, nir_var_shader_out,
2284 glsl_uvec4_type(), "out_color");
2285 fs_out_color->data.location = FRAG_RESULT_DATA0;
2286
2287 /* Texel buffer input */
2288 const struct glsl_type *sampler_type =
2289 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
2290 nir_variable *sampler =
2291 nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
2292 sampler->data.descriptor_set = 0;
2293 sampler->data.binding = 0;
2294
2295 /* Load the box describing the pixel region we want to copy from the
2296 * texel buffer.
2297 */
2298 nir_def *box =
2299 nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
2300 .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
2301 .range = 16);
2302
2303 /* Load the buffer stride (this comes in texel units) */
2304 nir_def *stride =
2305 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2306 .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
2307 .range = 4);
2308
2309 /* Load the buffer offset (this comes in texel units) */
2310 nir_def *offset =
2311 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2312 .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
2313 .range = 4);
2314
2315 nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
2316
2317 /* Load pixel data from texel buffer based on the x,y offset of the pixel
2318 * within the box. Texel buffers are 1D arrays of texels.
2319 *
2320 * Notice that we already make sure that we only generate fragments that are
2321 * inside the box through the scissor/viewport state, so our offset into the
2322 * texel buffer should always be within its bounds and we we don't need
2323 * to add a check for that here.
2324 */
2325 nir_def *x_offset =
2326 nir_isub(&b, nir_channel(&b, coord, 0),
2327 nir_channel(&b, box, 0));
2328 nir_def *y_offset =
2329 nir_isub(&b, nir_channel(&b, coord, 1),
2330 nir_channel(&b, box, 1));
2331 nir_def *texel_offset =
2332 nir_iadd(&b, nir_iadd(&b, offset, x_offset),
2333 nir_imul(&b, y_offset, stride));
2334
2335 nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->def;
2336 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
2337 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
2338 tex->op = nir_texop_txf;
2339 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, texel_offset);
2340 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
2341 tex->dest_type = nir_type_uint32;
2342 tex->is_array = false;
2343 tex->coord_components = 1;
2344 nir_def_init(&tex->instr, &tex->def, 4, 32);
2345 nir_builder_instr_insert(&b, &tex->instr);
2346
2347 uint32_t swiz[4];
2348 swiz[0] =
2349 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
2350 swiz[1] =
2351 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
2352 swiz[2] =
2353 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
2354 swiz[3] =
2355 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
2356 nir_def *s = nir_swizzle(&b, &tex->def, swiz, 4);
2357 nir_store_var(&b, fs_out_color, s, 0xf);
2358
2359 return b.shader;
2360 }
2361
2362 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)2363 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
2364 VkFormat format,
2365 VkColorComponentFlags cmask,
2366 VkComponentMapping *cswizzle,
2367 bool is_layered,
2368 VkRenderPass _pass,
2369 VkPipelineLayout pipeline_layout,
2370 VkPipeline *pipeline)
2371 {
2372 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
2373
2374 assert(vk_format_is_color(format));
2375
2376 nir_shader *vs_nir = get_texel_buffer_copy_vs();
2377 nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
2378 nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
2379
2380 const VkPipelineVertexInputStateCreateInfo vi_state = {
2381 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
2382 .vertexBindingDescriptionCount = 0,
2383 .vertexAttributeDescriptionCount = 0,
2384 };
2385
2386 VkPipelineDepthStencilStateCreateInfo ds_state = {
2387 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
2388 };
2389
2390 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
2391 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
2392 .blendEnable = false,
2393 .colorWriteMask = cmask,
2394 };
2395
2396 const VkPipelineColorBlendStateCreateInfo cb_state = {
2397 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
2398 .logicOpEnable = false,
2399 .attachmentCount = 1,
2400 .pAttachments = blend_att_state
2401 };
2402
2403 const VkPipelineMultisampleStateCreateInfo ms_state = {
2404 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
2405 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
2406 .sampleShadingEnable = false,
2407 .pSampleMask = NULL,
2408 .alphaToCoverageEnable = false,
2409 .alphaToOneEnable = false,
2410 };
2411
2412 return create_pipeline(device,
2413 pass,
2414 vs_nir, gs_nir, fs_nir,
2415 &vi_state,
2416 &ds_state,
2417 &cb_state,
2418 &ms_state,
2419 pipeline_layout,
2420 pipeline);
2421 }
2422
2423 static bool
get_copy_texel_buffer_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)2424 get_copy_texel_buffer_pipeline(
2425 struct v3dv_device *device,
2426 VkFormat format,
2427 VkColorComponentFlags cmask,
2428 VkComponentMapping *cswizzle,
2429 VkImageType image_type,
2430 bool is_layered,
2431 struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
2432 {
2433 bool ok = true;
2434
2435 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
2436 get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
2437 key);
2438
2439 mtx_lock(&device->meta.mtx);
2440 struct hash_entry *entry =
2441 _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
2442 key);
2443 if (entry) {
2444 mtx_unlock(&device->meta.mtx);
2445 *pipeline = entry->data;
2446 return true;
2447 }
2448
2449 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
2450 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2451
2452 if (*pipeline == NULL)
2453 goto fail;
2454
2455 /* The blit render pass is compatible */
2456 ok = create_blit_render_pass(device, format, format,
2457 &(*pipeline)->pass,
2458 &(*pipeline)->pass_no_load);
2459 if (!ok)
2460 goto fail;
2461
2462 ok =
2463 create_texel_buffer_copy_pipeline(device,
2464 format, cmask, cswizzle, is_layered,
2465 (*pipeline)->pass,
2466 device->meta.texel_buffer_copy.p_layout,
2467 &(*pipeline)->pipeline);
2468 if (!ok)
2469 goto fail;
2470
2471 uint8_t *dupkey = malloc(V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2472 memcpy(dupkey, key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2473 _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
2474 dupkey, *pipeline);
2475
2476 mtx_unlock(&device->meta.mtx);
2477 return true;
2478
2479 fail:
2480 mtx_unlock(&device->meta.mtx);
2481
2482 VkDevice _device = v3dv_device_to_handle(device);
2483 if (*pipeline) {
2484 if ((*pipeline)->pass)
2485 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
2486 if ((*pipeline)->pipeline)
2487 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
2488 vk_free(&device->vk.alloc, *pipeline);
2489 *pipeline = NULL;
2490 }
2491
2492 return false;
2493 }
2494
2495 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2496 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
2497 VkImageAspectFlags aspect,
2498 struct v3dv_image *image,
2499 VkFormat dst_format,
2500 VkFormat src_format,
2501 struct v3dv_buffer *buffer,
2502 uint32_t buffer_bpp,
2503 VkColorComponentFlags cmask,
2504 VkComponentMapping *cswizzle,
2505 uint32_t region_count,
2506 const VkBufferImageCopy2 *regions)
2507 {
2508 VkResult result;
2509 bool handled = false;
2510
2511 assert(cswizzle);
2512
2513 /* This is a copy path, so we don't handle format conversions. The only
2514 * exception are stencil to D24S8 copies, which are handled as a color
2515 * masked R8->RGBA8 copy.
2516 */
2517 assert(src_format == dst_format ||
2518 (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
2519 src_format == VK_FORMAT_R8_UINT &&
2520 cmask == VK_COLOR_COMPONENT_R_BIT));
2521
2522 /* We only handle color copies. Callers can copy D/S aspects by using
2523 * a compatible color format and maybe a cmask/cswizzle for D24 formats.
2524 */
2525 if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
2526 return handled;
2527
2528 /* FIXME: we only handle uncompressed images for now. */
2529 if (vk_format_is_compressed(image->vk.format))
2530 return handled;
2531
2532 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
2533 VK_COLOR_COMPONENT_G_BIT |
2534 VK_COLOR_COMPONENT_B_BIT |
2535 VK_COLOR_COMPONENT_A_BIT;
2536 if (cmask == 0)
2537 cmask = full_cmask;
2538
2539 /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2540 * so we can bind it as a texel buffer. Otherwise, the buffer view
2541 * we create below won't setup the texture state that we need for this.
2542 */
2543 if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2544 if (v3dv_buffer_format_supports_features(
2545 cmd_buffer->device, src_format,
2546 VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
2547 buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2548 } else {
2549 return handled;
2550 }
2551 }
2552
2553 /* At this point we should be able to handle the copy unless an unexpected
2554 * error occurs, such as an OOM.
2555 */
2556 handled = true;
2557
2558
2559 /* Compute the number of layers to copy.
2560 *
2561 * If we are batching (region_count > 1) all our regions have the same
2562 * image subresource so we can take this from the first region. For 3D
2563 * images we require the same depth extent.
2564 */
2565 const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2566 uint32_t num_layers;
2567 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2568 num_layers = resource->layerCount;
2569 } else {
2570 assert(region_count == 1);
2571 num_layers = regions[0].imageExtent.depth;
2572 }
2573 assert(num_layers > 0);
2574
2575 /* Get the texel buffer copy pipeline */
2576 struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2577 bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2578 dst_format, cmask, cswizzle,
2579 image->vk.image_type, num_layers > 1,
2580 &pipeline);
2581 if (!ok)
2582 return handled;
2583 assert(pipeline && pipeline->pipeline && pipeline->pass);
2584
2585 /* Setup descriptor set for the source texel buffer. We don't have to
2586 * register the descriptor as a private command buffer object since
2587 * all descriptors will be freed automatically with the descriptor
2588 * pool.
2589 */
2590 VkDescriptorSet set;
2591 result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2592 if (result != VK_SUCCESS)
2593 return handled;
2594
2595 /* We can't pass region->bufferOffset here for the offset field because
2596 * the texture base pointer in the texture shader state must be a 64-byte
2597 * aligned value. Instead, we use 0 here and we pass the offset in texels
2598 * as a push constant to the shader.
2599 */
2600 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2601 VkBufferViewCreateInfo buffer_view_info = {
2602 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2603 .buffer = v3dv_buffer_to_handle(buffer),
2604 .format = src_format,
2605 .offset = 0,
2606 .range = VK_WHOLE_SIZE,
2607 };
2608
2609 VkBufferView texel_buffer_view;
2610 result = v3dv_CreateBufferView(_device, &buffer_view_info,
2611 &cmd_buffer->device->vk.alloc,
2612 &texel_buffer_view);
2613 if (result != VK_SUCCESS)
2614 return handled;
2615
2616 v3dv_cmd_buffer_add_private_obj(
2617 cmd_buffer, (uintptr_t)texel_buffer_view,
2618 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2619
2620 VkWriteDescriptorSet write = {
2621 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2622 .dstSet = set,
2623 .dstBinding = 0,
2624 .dstArrayElement = 0,
2625 .descriptorCount = 1,
2626 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2627 .pTexelBufferView = &texel_buffer_view,
2628 };
2629 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2630
2631 /* Push command buffer state before starting meta operation */
2632 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2633
2634 /* Bind common state for all layers and regions */
2635 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2636 v3dv_CmdBindPipeline(_cmd_buffer,
2637 VK_PIPELINE_BIND_POINT_GRAPHICS,
2638 pipeline->pipeline);
2639
2640 v3dv_CmdBindDescriptorSets(_cmd_buffer,
2641 VK_PIPELINE_BIND_POINT_GRAPHICS,
2642 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2643 0, 1, &set,
2644 0, NULL);
2645
2646 /* Setup framebuffer.
2647 *
2648 * For 3D images, this creates a layered framebuffer with a number of
2649 * layers matching the depth extent of the 3D image.
2650 */
2651 uint8_t plane = v3dv_plane_from_aspect(aspect);
2652 uint32_t fb_width = u_minify(image->planes[plane].width, resource->mipLevel);
2653 uint32_t fb_height = u_minify(image->planes[plane].height, resource->mipLevel);
2654
2655 VkImageViewCreateInfo image_view_info = {
2656 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2657 .image = v3dv_image_to_handle(image),
2658 .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2659 .format = dst_format,
2660 .subresourceRange = {
2661 .aspectMask = aspect,
2662 .baseMipLevel = resource->mipLevel,
2663 .levelCount = 1,
2664 .baseArrayLayer = resource->baseArrayLayer,
2665 .layerCount = num_layers,
2666 },
2667 };
2668 VkImageView image_view;
2669 result = v3dv_create_image_view(cmd_buffer->device,
2670 &image_view_info, &image_view);
2671 if (result != VK_SUCCESS)
2672 goto fail;
2673
2674 v3dv_cmd_buffer_add_private_obj(
2675 cmd_buffer, (uintptr_t)image_view,
2676 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2677
2678 VkFramebufferCreateInfo fb_info = {
2679 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2680 .renderPass = pipeline->pass,
2681 .attachmentCount = 1,
2682 .pAttachments = &image_view,
2683 .width = fb_width,
2684 .height = fb_height,
2685 .layers = num_layers,
2686 };
2687
2688 VkFramebuffer fb;
2689 result = v3dv_CreateFramebuffer(_device, &fb_info,
2690 &cmd_buffer->device->vk.alloc, &fb);
2691 if (result != VK_SUCCESS)
2692 goto fail;
2693
2694 v3dv_cmd_buffer_add_private_obj(
2695 cmd_buffer, (uintptr_t)fb,
2696 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2697
2698 /* For each layer */
2699 for (uint32_t l = 0; l < num_layers; l++) {
2700 /* Start render pass for this layer.
2701 *
2702 * If the we only have one region to copy, then we might be able to
2703 * skip the TLB load if it is aligned to tile boundaries. All layers
2704 * copy the same area, so we only need to check this once.
2705 */
2706 bool can_skip_tlb_load = false;
2707 VkRect2D render_area;
2708 if (region_count == 1) {
2709 render_area.offset.x = regions[0].imageOffset.x;
2710 render_area.offset.y = regions[0].imageOffset.y;
2711 render_area.extent.width = regions[0].imageExtent.width;
2712 render_area.extent.height = regions[0].imageExtent.height;
2713
2714 if (l == 0) {
2715 struct v3dv_render_pass *pipeline_pass =
2716 v3dv_render_pass_from_handle(pipeline->pass);
2717 can_skip_tlb_load =
2718 cmask == full_cmask &&
2719 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2720 v3dv_framebuffer_from_handle(fb),
2721 pipeline_pass, 0);
2722 }
2723 } else {
2724 render_area.offset.x = 0;
2725 render_area.offset.y = 0;
2726 render_area.extent.width = fb_width;
2727 render_area.extent.height = fb_height;
2728 }
2729
2730 VkRenderPassBeginInfo rp_info = {
2731 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2732 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2733 pipeline->pass,
2734 .framebuffer = fb,
2735 .renderArea = render_area,
2736 .clearValueCount = 0,
2737 };
2738
2739 VkSubpassBeginInfo sp_info = {
2740 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2741 .contents = VK_SUBPASS_CONTENTS_INLINE,
2742 };
2743
2744 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2745 struct v3dv_job *job = cmd_buffer->state.job;
2746 if (!job)
2747 goto fail;
2748
2749 /* If we are using a layered copy we need to specify the layer for the
2750 * Geometry Shader.
2751 */
2752 if (num_layers > 1) {
2753 uint32_t layer = resource->baseArrayLayer + l;
2754 v3dv_CmdPushConstants(_cmd_buffer,
2755 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2756 VK_SHADER_STAGE_GEOMETRY_BIT,
2757 24, 4, &layer);
2758 }
2759
2760 /* For each region */
2761 for (uint32_t r = 0; r < region_count; r++) {
2762 const VkBufferImageCopy2 *region = ®ions[r];
2763
2764 /* Obtain the 2D buffer region spec */
2765 uint32_t buf_width, buf_height;
2766 if (region->bufferRowLength == 0)
2767 buf_width = region->imageExtent.width;
2768 else
2769 buf_width = region->bufferRowLength;
2770
2771 if (region->bufferImageHeight == 0)
2772 buf_height = region->imageExtent.height;
2773 else
2774 buf_height = region->bufferImageHeight;
2775
2776 const VkViewport viewport = {
2777 .x = region->imageOffset.x,
2778 .y = region->imageOffset.y,
2779 .width = region->imageExtent.width,
2780 .height = region->imageExtent.height,
2781 .minDepth = 0.0f,
2782 .maxDepth = 1.0f
2783 };
2784 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2785 const VkRect2D scissor = {
2786 .offset = { region->imageOffset.x, region->imageOffset.y },
2787 .extent = { region->imageExtent.width, region->imageExtent.height }
2788 };
2789 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2790
2791 const VkDeviceSize buf_offset =
2792 region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2793 uint32_t push_data[6] = {
2794 region->imageOffset.x,
2795 region->imageOffset.y,
2796 region->imageOffset.x + region->imageExtent.width - 1,
2797 region->imageOffset.y + region->imageExtent.height - 1,
2798 buf_width,
2799 buf_offset,
2800 };
2801
2802 v3dv_CmdPushConstants(_cmd_buffer,
2803 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2804 VK_SHADER_STAGE_FRAGMENT_BIT,
2805 0, sizeof(push_data), &push_data);
2806
2807 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2808 } /* For each region */
2809
2810 VkSubpassEndInfo sp_end_info = {
2811 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2812 };
2813
2814 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2815 } /* For each layer */
2816
2817 fail:
2818 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
2819 return handled;
2820 }
2821
2822 /**
2823 * Returns true if the implementation supports the requested operation (even if
2824 * it failed to process it, for example, due to an out-of-memory error).
2825 */
2826 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2827 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2828 VkImageAspectFlags aspect,
2829 struct v3dv_image *image,
2830 VkFormat dst_format,
2831 VkFormat src_format,
2832 struct v3dv_buffer *buffer,
2833 uint32_t buffer_bpp,
2834 VkColorComponentFlags cmask,
2835 VkComponentMapping *cswizzle,
2836 uint32_t region_count,
2837 const VkBufferImageCopy2 *regions)
2838 {
2839 /* Since we can't sample linear images we need to upload the linear
2840 * buffer to a tiled image that we can use as a blit source, which
2841 * is slow.
2842 */
2843 perf_debug("Falling back to blit path for buffer to image copy.\n");
2844
2845 struct v3dv_device *device = cmd_buffer->device;
2846 VkDevice _device = v3dv_device_to_handle(device);
2847 bool handled = true;
2848
2849 /* Allocate memory for the tiled image. Since we copy layer by layer
2850 * we allocate memory to hold a full layer, which is the worse case.
2851 * For that we create a dummy image with that spec, get memory requirements
2852 * for it and use that information to create the memory allocation.
2853 * We will then reuse this memory store for all the regions we want to
2854 * copy.
2855 */
2856 VkImage dummy_image;
2857 VkImageCreateInfo dummy_info = {
2858 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2859 .imageType = VK_IMAGE_TYPE_2D,
2860 .format = src_format,
2861 .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2862 .mipLevels = 1,
2863 .arrayLayers = 1,
2864 .samples = VK_SAMPLE_COUNT_1_BIT,
2865 .tiling = VK_IMAGE_TILING_OPTIMAL,
2866 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2867 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2868 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2869 .queueFamilyIndexCount = 0,
2870 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2871 };
2872 VkResult result =
2873 v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2874 if (result != VK_SUCCESS)
2875 return handled;
2876
2877 VkMemoryRequirements reqs;
2878 vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2879 v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2880
2881 VkDeviceMemory mem;
2882 VkMemoryAllocateInfo alloc_info = {
2883 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2884 .allocationSize = reqs.size,
2885 .memoryTypeIndex = 0,
2886 };
2887 result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2888 if (result != VK_SUCCESS)
2889 return handled;
2890
2891 v3dv_cmd_buffer_add_private_obj(
2892 cmd_buffer, (uintptr_t)mem,
2893 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2894
2895 /* Obtain the layer count.
2896 *
2897 * If we are batching (region_count > 1) all our regions have the same
2898 * image subresource so we can take this from the first region.
2899 */
2900 uint32_t num_layers;
2901 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2902 num_layers = regions[0].imageSubresource.layerCount;
2903 else
2904 num_layers = regions[0].imageExtent.depth;
2905 assert(num_layers > 0);
2906
2907 /* Sanity check: we can only batch multiple regions together if they have
2908 * the same framebuffer (so the same layer).
2909 */
2910 assert(num_layers == 1 || region_count == 1);
2911
2912 uint8_t plane = v3dv_plane_from_aspect(aspect);
2913 assert(plane < image->plane_count);
2914
2915 const uint32_t block_width =
2916 vk_format_get_blockwidth(image->planes[plane].vk_format);
2917 const uint32_t block_height =
2918 vk_format_get_blockheight(image->planes[plane].vk_format);
2919
2920 /* Copy regions by uploading each region to a temporary tiled image using
2921 * the memory we have just allocated as storage.
2922 */
2923 for (uint32_t r = 0; r < region_count; r++) {
2924 const VkBufferImageCopy2 *region = ®ions[r];
2925
2926 /* Obtain the 2D buffer region spec */
2927 uint32_t buf_width, buf_height;
2928 if (region->bufferRowLength == 0)
2929 buf_width = region->imageExtent.width;
2930 else
2931 buf_width = region->bufferRowLength;
2932
2933 if (region->bufferImageHeight == 0)
2934 buf_height = region->imageExtent.height;
2935 else
2936 buf_height = region->bufferImageHeight;
2937
2938 /* If the image is compressed, the bpp refers to blocks, not pixels */
2939 buf_width = buf_width / block_width;
2940 buf_height = buf_height / block_height;
2941
2942 for (uint32_t i = 0; i < num_layers; i++) {
2943 /* Create the tiled image */
2944 VkImageCreateInfo image_info = {
2945 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2946 .imageType = VK_IMAGE_TYPE_2D,
2947 .format = src_format,
2948 .extent = { buf_width, buf_height, 1 },
2949 .mipLevels = 1,
2950 .arrayLayers = 1,
2951 .samples = VK_SAMPLE_COUNT_1_BIT,
2952 .tiling = VK_IMAGE_TILING_OPTIMAL,
2953 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2954 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2955 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2956 .queueFamilyIndexCount = 0,
2957 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2958 };
2959
2960 VkImage buffer_image;
2961 VkResult result =
2962 v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2963 &buffer_image);
2964 if (result != VK_SUCCESS)
2965 return handled;
2966
2967 v3dv_cmd_buffer_add_private_obj(
2968 cmd_buffer, (uintptr_t)buffer_image,
2969 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2970
2971 result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2972 if (result != VK_SUCCESS)
2973 return handled;
2974
2975 /* When copying a multi-plane image the aspect indicates the plane to
2976 * copy. For these, we only copy one plane at a time, which is always
2977 * a color plane.
2978 */
2979 VkImageAspectFlags copy_aspect =
2980 image->plane_count == 1 ? aspect : VK_IMAGE_ASPECT_COLOR_BIT;
2981
2982 /* Upload buffer contents for the selected layer */
2983 const VkDeviceSize buf_offset_bytes =
2984 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2985 const VkBufferImageCopy2 buffer_image_copy = {
2986 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
2987 .bufferOffset = buf_offset_bytes,
2988 .bufferRowLength = region->bufferRowLength / block_width,
2989 .bufferImageHeight = region->bufferImageHeight / block_height,
2990 .imageSubresource = {
2991 .aspectMask = copy_aspect,
2992 .mipLevel = 0,
2993 .baseArrayLayer = 0,
2994 .layerCount = 1,
2995 },
2996 .imageOffset = { 0, 0, 0 },
2997 .imageExtent = { buf_width, buf_height, 1 }
2998 };
2999 handled =
3000 create_tiled_image_from_buffer(cmd_buffer,
3001 v3dv_image_from_handle(buffer_image),
3002 buffer, &buffer_image_copy);
3003 if (!handled) {
3004 /* This is unexpected, we should have setup the upload to be
3005 * conformant to a TFU or TLB copy.
3006 */
3007 unreachable("Unable to copy buffer to image through TLB");
3008 return false;
3009 }
3010
3011 /* Blit-copy the requested image extent from the buffer image to the
3012 * destination image.
3013 *
3014 * Since we are copying, the blit must use the same format on the
3015 * destination and source images to avoid format conversions. The
3016 * only exception is copying stencil, which we upload to a R8UI source
3017 * image, but that we need to blit to a S8D24 destination (the only
3018 * stencil format we support).
3019 */
3020 const VkImageBlit2 blit_region = {
3021 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3022 .srcSubresource = {
3023 .aspectMask = copy_aspect,
3024 .mipLevel = 0,
3025 .baseArrayLayer = 0,
3026 .layerCount = 1,
3027 },
3028 .srcOffsets = {
3029 { 0, 0, 0 },
3030 { region->imageExtent.width, region->imageExtent.height, 1 },
3031 },
3032 .dstSubresource = {
3033 .aspectMask = aspect,
3034 .mipLevel = region->imageSubresource.mipLevel,
3035 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
3036 .layerCount = 1,
3037 },
3038 .dstOffsets = {
3039 {
3040 DIV_ROUND_UP(region->imageOffset.x, block_width),
3041 DIV_ROUND_UP(region->imageOffset.y, block_height),
3042 region->imageOffset.z + i,
3043 },
3044 {
3045 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
3046 block_width),
3047 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
3048 block_height),
3049 region->imageOffset.z + i + 1,
3050 },
3051 },
3052 };
3053
3054 handled = blit_shader(cmd_buffer,
3055 image, dst_format,
3056 v3dv_image_from_handle(buffer_image), src_format,
3057 cmask, cswizzle,
3058 &blit_region, VK_FILTER_NEAREST, true);
3059 if (!handled) {
3060 /* This is unexpected, we should have a supported blit spec */
3061 unreachable("Unable to blit buffer to destination image");
3062 return false;
3063 }
3064 }
3065 }
3066
3067 return handled;
3068 }
3069
3070 /**
3071 * Returns true if the implementation supports the requested operation (even if
3072 * it failed to process it, for example, due to an out-of-memory error).
3073 */
3074 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2 * regions,bool use_texel_buffer)3075 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
3076 struct v3dv_image *image,
3077 struct v3dv_buffer *buffer,
3078 uint32_t region_count,
3079 const VkBufferImageCopy2 *regions,
3080 bool use_texel_buffer)
3081 {
3082 /* We can only call this with region_count > 1 if we can batch the regions
3083 * together, in which case they share the same image subresource, and so
3084 * the same aspect.
3085 */
3086 VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
3087 const VkImageAspectFlagBits any_plane_aspect =
3088 VK_IMAGE_ASPECT_PLANE_0_BIT |
3089 VK_IMAGE_ASPECT_PLANE_1_BIT |
3090 VK_IMAGE_ASPECT_PLANE_2_BIT;
3091
3092 bool is_plane_aspect = aspect & any_plane_aspect;
3093
3094 /* Generally, the bpp of the data in the buffer matches that of the
3095 * destination image. The exception is the case where we are uploading
3096 * stencil (8bpp) to a combined d24s8 image (32bpp).
3097 */
3098 uint8_t plane = v3dv_plane_from_aspect(aspect);
3099 assert(plane < image->plane_count);
3100 uint32_t buf_bpp = image->planes[plane].cpp;
3101
3102 /* We are about to upload the buffer data to an image so we can then
3103 * blit that to our destination region. Because we are going to implement
3104 * the copy as a blit, we want our blit source and destination formats to be
3105 * the same (to avoid any format conversions), so we choose a canonical
3106 * format that matches the destination image bpp.
3107 */
3108 VkComponentMapping ident_swizzle = {
3109 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3110 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3111 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3112 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3113 };
3114
3115 VkComponentMapping cswizzle = ident_swizzle;
3116 VkColorComponentFlags cmask = 0; /* Write all components */
3117 VkFormat src_format;
3118 VkFormat dst_format;
3119 switch (buf_bpp) {
3120 case 16:
3121 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3122 src_format = VK_FORMAT_R32G32B32A32_UINT;
3123 dst_format = src_format;
3124 break;
3125 case 8:
3126 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3127 src_format = VK_FORMAT_R16G16B16A16_UINT;
3128 dst_format = src_format;
3129 break;
3130 case 4:
3131 switch (aspect) {
3132 case VK_IMAGE_ASPECT_COLOR_BIT:
3133 case VK_IMAGE_ASPECT_PLANE_0_BIT:
3134 case VK_IMAGE_ASPECT_PLANE_1_BIT:
3135 case VK_IMAGE_ASPECT_PLANE_2_BIT:
3136 src_format = VK_FORMAT_R8G8B8A8_UINT;
3137 dst_format = src_format;
3138 break;
3139 case VK_IMAGE_ASPECT_DEPTH_BIT:
3140 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
3141 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3142 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
3143 src_format = VK_FORMAT_R8G8B8A8_UINT;
3144 dst_format = src_format;
3145
3146 /* For D24 formats, the Vulkan spec states that the depth component
3147 * in the buffer is stored in the 24-LSB, but V3D wants it in the
3148 * 24-MSB.
3149 */
3150 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3151 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
3152 cmask = VK_COLOR_COMPONENT_G_BIT |
3153 VK_COLOR_COMPONENT_B_BIT |
3154 VK_COLOR_COMPONENT_A_BIT;
3155 cswizzle.r = VK_COMPONENT_SWIZZLE_R;
3156 cswizzle.g = VK_COMPONENT_SWIZZLE_R;
3157 cswizzle.b = VK_COMPONENT_SWIZZLE_G;
3158 cswizzle.a = VK_COMPONENT_SWIZZLE_B;
3159 }
3160 break;
3161 case VK_IMAGE_ASPECT_STENCIL_BIT:
3162 /* Since we don't support separate stencil this is always a stencil
3163 * copy to a combined depth/stencil image. Because we don't support
3164 * separate stencil images, we interpret the buffer data as a
3165 * color R8UI image, and implement the blit as a compatible color
3166 * blit to an RGBA8UI destination masking out writes to components
3167 * GBA (which map to the D24 component of a S8D24 image).
3168 */
3169 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
3170 buf_bpp = 1;
3171 src_format = VK_FORMAT_R8_UINT;
3172 dst_format = VK_FORMAT_R8G8B8A8_UINT;
3173 cmask = VK_COLOR_COMPONENT_R_BIT;
3174 break;
3175 default:
3176 unreachable("unsupported aspect");
3177 return false;
3178 };
3179 break;
3180 case 2:
3181 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
3182 aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
3183 is_plane_aspect);
3184 src_format = VK_FORMAT_R16_UINT;
3185 dst_format = src_format;
3186 break;
3187 case 1:
3188 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || is_plane_aspect);
3189 src_format = VK_FORMAT_R8_UINT;
3190 dst_format = src_format;
3191 break;
3192 default:
3193 unreachable("unsupported bit-size");
3194 return false;
3195 }
3196
3197 if (use_texel_buffer) {
3198 return texel_buffer_shader_copy(cmd_buffer, aspect, image,
3199 dst_format, src_format,
3200 buffer, buf_bpp,
3201 cmask, &cswizzle,
3202 region_count, regions);
3203 } else {
3204 return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
3205 dst_format, src_format,
3206 buffer, buf_bpp,
3207 cmask, &cswizzle,
3208 region_count, regions);
3209 }
3210 }
3211
3212 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)3213 v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
3214 const VkCopyBufferToImageInfo2 *info)
3215 {
3216 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
3217 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
3218 V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
3219
3220 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3221
3222 cmd_buffer->state.is_transfer = true;
3223
3224 uint32_t r = 0;
3225 while (r < info->regionCount) {
3226 /* The TFU and TLB paths can only copy one region at a time and the region
3227 * needs to start at the origin. We try these first for the common case
3228 * where we are copying full images, since they should be the fastest.
3229 */
3230 uint32_t batch_size = 1;
3231 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
3232 goto handled;
3233
3234 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
3235 goto handled;
3236
3237 /* Otherwise, we are copying subrects, so we fallback to copying
3238 * via shader and texel buffers and we try to batch the regions
3239 * if possible. We can only batch copies if they have the same
3240 * framebuffer spec, which is mostly determined by the image
3241 * subresource of the region.
3242 */
3243 const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
3244 for (uint32_t s = r + 1; s < info->regionCount; s++) {
3245 const VkImageSubresourceLayers *rsc_s =
3246 &info->pRegions[s].imageSubresource;
3247
3248 if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
3249 break;
3250
3251 /* For 3D images we also need to check the depth extent */
3252 if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
3253 info->pRegions[s].imageExtent.depth !=
3254 info->pRegions[r].imageExtent.depth) {
3255 break;
3256 }
3257
3258 batch_size++;
3259 }
3260
3261 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3262 batch_size, &info->pRegions[r], true)) {
3263 goto handled;
3264 }
3265
3266 /* If we still could not copy, fallback to slower paths.
3267 *
3268 * FIXME: we could try to batch these too, but since they are bound to be
3269 * slow it might not be worth it and we should instead put more effort
3270 * in handling more cases with the other paths.
3271 */
3272 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3273 batch_size, &info->pRegions[r], false)) {
3274 goto handled;
3275 }
3276
3277 unreachable("Unsupported buffer to image copy.");
3278
3279 handled:
3280 r += batch_size;
3281 }
3282
3283 cmd_buffer->state.is_transfer = false;
3284 }
3285
3286 static void
3287 compute_blit_3d_layers(const VkOffset3D *offsets,
3288 uint32_t *min_layer, uint32_t *max_layer,
3289 bool *mirror_z);
3290
3291 /**
3292 * Returns true if the implementation supports the requested operation (even if
3293 * it failed to process it, for example, due to an out-of-memory error).
3294 *
3295 * The TFU blit path doesn't handle scaling so the blit filter parameter can
3296 * be ignored.
3297 */
3298 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2 * region)3299 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
3300 struct v3dv_image *dst,
3301 struct v3dv_image *src,
3302 const VkImageBlit2 *region)
3303 {
3304 if (V3D_DBG(DISABLE_TFU)) {
3305 perf_debug("Blit: TFU disabled, fallbacks could be slower.");
3306 return false;
3307 }
3308
3309 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3310 assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3311
3312 /* From vkCmdBlitImage:
3313 * "srcImage must not use a format that requires a sampler YCBCR
3314 * conversion"
3315 * "dstImage must not use a format that requires a sampler YCBCR
3316 * conversion"
3317 */
3318 assert(dst->plane_count == 1);
3319 assert(src->plane_count == 1);
3320
3321 /* Format must match */
3322 if (src->vk.format != dst->vk.format)
3323 return false;
3324
3325 /* Destination can't be raster format */
3326 if (!dst->tiled)
3327 return false;
3328
3329 /* Source region must start at (0,0) */
3330 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
3331 return false;
3332
3333 /* Destination image must be complete */
3334 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
3335 return false;
3336
3337 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
3338 const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
3339 const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
3340 if (region->dstOffsets[1].x < dst_width - 1||
3341 region->dstOffsets[1].y < dst_height - 1) {
3342 return false;
3343 }
3344
3345 /* No XY scaling */
3346 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
3347 region->srcOffsets[1].y != region->dstOffsets[1].y) {
3348 return false;
3349 }
3350
3351 /* If the format is D24S8 both aspects need to be copied, since the TFU
3352 * can't be programmed to copy only one aspect of the image.
3353 */
3354 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
3355 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
3356 VK_IMAGE_ASPECT_STENCIL_BIT;
3357 if (region->dstSubresource.aspectMask != ds_aspects)
3358 return false;
3359 }
3360
3361 /* Our TFU blits only handle exact copies (it requires same formats
3362 * on input and output, no scaling, etc), so there is no pixel format
3363 * conversions and we can rewrite the format to use one that is TFU
3364 * compatible based on its texel size.
3365 */
3366 const struct v3dv_format *format =
3367 v3dv_get_compatible_tfu_format(cmd_buffer->device,
3368 dst->planes[0].cpp, NULL);
3369
3370 /* Emit a TFU job for each layer to blit */
3371 assert(region->dstSubresource.layerCount ==
3372 region->srcSubresource.layerCount);
3373
3374 uint32_t min_dst_layer;
3375 uint32_t max_dst_layer;
3376 bool dst_mirror_z = false;
3377 if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
3378 compute_blit_3d_layers(region->dstOffsets,
3379 &min_dst_layer, &max_dst_layer,
3380 &dst_mirror_z);
3381 } else {
3382 min_dst_layer = region->dstSubresource.baseArrayLayer;
3383 max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
3384 }
3385
3386 uint32_t min_src_layer;
3387 uint32_t max_src_layer;
3388 bool src_mirror_z = false;
3389 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
3390 compute_blit_3d_layers(region->srcOffsets,
3391 &min_src_layer, &max_src_layer,
3392 &src_mirror_z);
3393 } else {
3394 min_src_layer = region->srcSubresource.baseArrayLayer;
3395 max_src_layer = min_src_layer + region->srcSubresource.layerCount;
3396 }
3397
3398 /* No Z scaling for 3D images (for non-3D images both src and dst must
3399 * have the same layerCount).
3400 */
3401 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3402 return false;
3403
3404 const uint32_t layer_count = max_dst_layer - min_dst_layer;
3405 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3406 for (uint32_t i = 0; i < layer_count; i++) {
3407 /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
3408 * only involves reversing the order of the slices.
3409 */
3410 const uint32_t dst_layer =
3411 dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
3412 const uint32_t src_layer =
3413 src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
3414
3415 const uint32_t dst_offset =
3416 dst->planes[0].mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level,
3417 dst_layer, 0);
3418 const uint32_t src_offset =
3419 src->planes[0].mem->bo->offset + v3dv_layer_offset(src, src_mip_level,
3420 src_layer, 0);
3421
3422 const struct v3d_resource_slice *dst_slice = &dst->planes[0].slices[dst_mip_level];
3423 const struct v3d_resource_slice *src_slice = &src->planes[0].slices[src_mip_level];
3424
3425 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
3426 cmd_buffer,
3427 dst->planes[0].mem->bo->handle,
3428 dst_offset,
3429 dst_slice->tiling,
3430 dst_slice->padded_height,
3431 dst->planes[0].cpp,
3432 src->planes[0].mem->bo->handle,
3433 src_offset,
3434 src_slice->tiling,
3435 src_slice->tiling == V3D_TILING_RASTER ?
3436 src_slice->stride : src_slice->padded_height,
3437 src->planes[0].cpp,
3438 dst_width, dst_height, &format->planes[0]);
3439 }
3440
3441 return true;
3442 }
3443
3444 static bool
format_needs_software_int_clamp(VkFormat format)3445 format_needs_software_int_clamp(VkFormat format)
3446 {
3447 switch (format) {
3448 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3449 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3450 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3451 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3452 return true;
3453 default:
3454 return false;
3455 };
3456 }
3457
3458 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)3459 get_blit_pipeline_cache_key(VkFormat dst_format,
3460 VkFormat src_format,
3461 VkColorComponentFlags cmask,
3462 VkSampleCountFlagBits dst_samples,
3463 VkSampleCountFlagBits src_samples,
3464 uint8_t *key)
3465 {
3466 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3467
3468 uint32_t *p = (uint32_t *) key;
3469
3470 *p = dst_format;
3471 p++;
3472
3473 /* Generally, when blitting from a larger format to a smaller format
3474 * the hardware takes care of clamping the source to the RT range.
3475 * Specifically, for integer formats, this is done by using
3476 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3477 * clamps to the bit-size of the render type, and some formats, such as
3478 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3479 * require to clamp in software. In these cases, we need to amend the blit
3480 * shader with clamp code that depends on both the src and dst formats, so
3481 * we need the src format to be part of the key.
3482 */
3483 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3484 p++;
3485
3486 *p = cmask;
3487 p++;
3488
3489 *p = (dst_samples << 8) | src_samples;
3490 p++;
3491
3492 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3493 }
3494
3495 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)3496 create_blit_render_pass(struct v3dv_device *device,
3497 VkFormat dst_format,
3498 VkFormat src_format,
3499 VkRenderPass *pass_load,
3500 VkRenderPass *pass_no_load)
3501 {
3502 const bool is_color_blit = vk_format_is_color(dst_format);
3503
3504 /* Attachment load operation is specified below */
3505 VkAttachmentDescription2 att = {
3506 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
3507 .format = dst_format,
3508 .samples = VK_SAMPLE_COUNT_1_BIT,
3509 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3510 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3511 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3512 };
3513
3514 VkAttachmentReference2 att_ref = {
3515 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
3516 .attachment = 0,
3517 .layout = VK_IMAGE_LAYOUT_GENERAL,
3518 };
3519
3520 VkSubpassDescription2 subpass = {
3521 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
3522 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3523 .inputAttachmentCount = 0,
3524 .colorAttachmentCount = is_color_blit ? 1 : 0,
3525 .pColorAttachments = is_color_blit ? &att_ref : NULL,
3526 .pResolveAttachments = NULL,
3527 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3528 .preserveAttachmentCount = 0,
3529 .pPreserveAttachments = NULL,
3530 };
3531
3532 VkRenderPassCreateInfo2 info = {
3533 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3534 .attachmentCount = 1,
3535 .pAttachments = &att,
3536 .subpassCount = 1,
3537 .pSubpasses = &subpass,
3538 .dependencyCount = 0,
3539 .pDependencies = NULL,
3540 };
3541
3542 VkResult result;
3543 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3544 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3545 &info, &device->vk.alloc, pass_load);
3546 if (result != VK_SUCCESS)
3547 return false;
3548
3549 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3550 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3551 &info, &device->vk.alloc, pass_no_load);
3552 return result == VK_SUCCESS;
3553 }
3554
3555 static nir_def *
gen_tex_coords(nir_builder * b)3556 gen_tex_coords(nir_builder *b)
3557 {
3558 nir_def *tex_box =
3559 nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3560
3561 nir_def *tex_z =
3562 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3563
3564 nir_def *vertex_id = nir_load_vertex_id(b);
3565
3566 /* vertex 0: src0_x, src0_y
3567 * vertex 1: src0_x, src1_y
3568 * vertex 2: src1_x, src0_y
3569 * vertex 3: src1_x, src1_y
3570 *
3571 * So:
3572 *
3573 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3574 * channel 1 is vertex id & 1 ? src1_y : src0_y
3575 */
3576
3577 nir_def *one = nir_imm_int(b, 1);
3578 nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
3579 nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3580
3581 nir_def *comp[4];
3582 comp[0] = nir_bcsel(b, c0cmp,
3583 nir_channel(b, tex_box, 0),
3584 nir_channel(b, tex_box, 2));
3585
3586 comp[1] = nir_bcsel(b, c1cmp,
3587 nir_channel(b, tex_box, 3),
3588 nir_channel(b, tex_box, 1));
3589 comp[2] = tex_z;
3590 comp[3] = nir_imm_float(b, 1.0f);
3591 return nir_vec(b, comp, 4);
3592 }
3593
3594 static nir_def *
build_nir_tex_op_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3595 build_nir_tex_op_read(struct nir_builder *b,
3596 nir_def *tex_pos,
3597 enum glsl_base_type tex_type,
3598 enum glsl_sampler_dim dim)
3599 {
3600 assert(dim != GLSL_SAMPLER_DIM_MS);
3601
3602 const struct glsl_type *sampler_type =
3603 glsl_sampler_type(dim, false, false, tex_type);
3604 nir_variable *sampler =
3605 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3606 sampler->data.descriptor_set = 0;
3607 sampler->data.binding = 0;
3608
3609 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3610 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3611 tex->sampler_dim = dim;
3612 tex->op = nir_texop_tex;
3613 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3614 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3615 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref, tex_deref);
3616 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3617 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3618 tex->coord_components = tex_pos->num_components;
3619
3620 nir_def_init(&tex->instr, &tex->def, 4, 32);
3621 nir_builder_instr_insert(b, &tex->instr);
3622 return &tex->def;
3623 }
3624
3625 static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_def * tex_deref,enum glsl_base_type tex_type,nir_def * tex_pos,nir_def * sample_idx)3626 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3627 nir_variable *sampler,
3628 nir_def *tex_deref,
3629 enum glsl_base_type tex_type,
3630 nir_def *tex_pos,
3631 nir_def *sample_idx)
3632 {
3633 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3634 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3635 tex->op = nir_texop_txf_ms;
3636 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3637 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3638 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_idx);
3639 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3640 tex->is_array = false;
3641 tex->coord_components = tex_pos->num_components;
3642
3643 nir_def_init(&tex->instr, &tex->def, 4, 32);
3644 nir_builder_instr_insert(b, &tex->instr);
3645 return &tex->def;
3646 }
3647
3648 /* Fetches all samples at the given position and averages them */
3649 static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3650 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3651 nir_def *tex_pos,
3652 enum glsl_base_type tex_type,
3653 VkSampleCountFlagBits src_samples)
3654 {
3655 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3656 const struct glsl_type *sampler_type =
3657 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3658 nir_variable *sampler =
3659 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3660 sampler->data.descriptor_set = 0;
3661 sampler->data.binding = 0;
3662
3663 const bool is_int = glsl_base_type_is_integer(tex_type);
3664
3665 nir_def *tmp = NULL;
3666 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3667 for (uint32_t i = 0; i < src_samples; i++) {
3668 nir_def *s =
3669 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3670 tex_type, tex_pos,
3671 nir_imm_int(b, i));
3672
3673 /* For integer formats, the multisample resolve operation is expected to
3674 * return one of the samples, we just return the first one.
3675 */
3676 if (is_int)
3677 return s;
3678
3679 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3680 }
3681
3682 assert(!is_int);
3683 return nir_fmul_imm(b, tmp, 1.0f / src_samples);
3684 }
3685
3686 /* Fetches the current sample (gl_SampleID) at the given position */
3687 static nir_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type)3688 build_nir_tex_op_ms_read(struct nir_builder *b,
3689 nir_def *tex_pos,
3690 enum glsl_base_type tex_type)
3691 {
3692 const struct glsl_type *sampler_type =
3693 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3694 nir_variable *sampler =
3695 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3696 sampler->data.descriptor_set = 0;
3697 sampler->data.binding = 0;
3698
3699 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3700
3701 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3702 tex_type, tex_pos,
3703 nir_load_sample_id(b));
3704 }
3705
3706 static nir_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3707 build_nir_tex_op(struct nir_builder *b,
3708 struct v3dv_device *device,
3709 nir_def *tex_pos,
3710 enum glsl_base_type tex_type,
3711 VkSampleCountFlagBits dst_samples,
3712 VkSampleCountFlagBits src_samples,
3713 enum glsl_sampler_dim dim)
3714 {
3715 switch (dim) {
3716 case GLSL_SAMPLER_DIM_MS:
3717 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3718 /* For multisampled texture sources we need to use fetching instead of
3719 * normalized texture coordinates. We already configured our blit
3720 * coordinates to be in texel units, but here we still need to convert
3721 * them from floating point to integer.
3722 */
3723 tex_pos = nir_f2i32(b, tex_pos);
3724
3725 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3726 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3727 else
3728 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3729 default:
3730 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3731 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3732 }
3733 }
3734
3735 static nir_shader *
get_blit_vs()3736 get_blit_vs()
3737 {
3738 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3739 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3740 "meta blit vs");
3741
3742 const struct glsl_type *vec4 = glsl_vec4_type();
3743
3744 nir_variable *vs_out_pos =
3745 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3746 vs_out_pos->data.location = VARYING_SLOT_POS;
3747
3748 nir_variable *vs_out_tex_coord =
3749 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3750 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3751 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3752
3753 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
3754 nir_store_var(&b, vs_out_pos, pos, 0xf);
3755
3756 nir_def *tex_coord = gen_tex_coords(&b);
3757 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3758
3759 return b.shader;
3760 }
3761
3762 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3763 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3764 {
3765 switch (sampler_dim) {
3766 case GLSL_SAMPLER_DIM_1D: return 0x1;
3767 case GLSL_SAMPLER_DIM_2D: return 0x3;
3768 case GLSL_SAMPLER_DIM_MS: return 0x3;
3769 case GLSL_SAMPLER_DIM_3D: return 0x7;
3770 default:
3771 unreachable("invalid sampler dim");
3772 };
3773 }
3774
3775 static nir_shader *
get_color_blit_fs(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3776 get_color_blit_fs(struct v3dv_device *device,
3777 VkFormat dst_format,
3778 VkFormat src_format,
3779 VkSampleCountFlagBits dst_samples,
3780 VkSampleCountFlagBits src_samples,
3781 enum glsl_sampler_dim sampler_dim)
3782 {
3783 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3784 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3785 "meta blit fs");
3786
3787 const struct glsl_type *vec4 = glsl_vec4_type();
3788
3789 nir_variable *fs_in_tex_coord =
3790 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3791 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3792
3793 const struct glsl_type *fs_out_type =
3794 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3795 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3796 glsl_vec4_type();
3797
3798 enum glsl_base_type src_base_type =
3799 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3800 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3801 GLSL_TYPE_FLOAT;
3802
3803 nir_variable *fs_out_color =
3804 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3805 fs_out_color->data.location = FRAG_RESULT_DATA0;
3806
3807 nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3808 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3809 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3810
3811 nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3812 dst_samples, src_samples, sampler_dim);
3813
3814 /* For integer textures, if the bit-size of the destination is too small to
3815 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3816 * maximum value the destination can hold. The hardware can clamp to the
3817 * render target type, which usually matches the component bit-size, but
3818 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3819 * render target type, so in these cases we need to clamp manually.
3820 */
3821 if (format_needs_software_int_clamp(dst_format)) {
3822 assert(vk_format_is_int(dst_format));
3823 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3824 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3825
3826 nir_def *c[4];
3827 for (uint32_t i = 0; i < 4; i++) {
3828 c[i] = nir_channel(&b, color, i);
3829
3830 const uint32_t src_bit_size =
3831 util_format_get_component_bits(src_pformat,
3832 UTIL_FORMAT_COLORSPACE_RGB,
3833 i);
3834 const uint32_t dst_bit_size =
3835 util_format_get_component_bits(dst_pformat,
3836 UTIL_FORMAT_COLORSPACE_RGB,
3837 i);
3838
3839 if (dst_bit_size >= src_bit_size)
3840 continue;
3841
3842 assert(dst_bit_size > 0);
3843 if (util_format_is_pure_uint(dst_pformat)) {
3844 nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3845 c[i] = nir_umin(&b, c[i], max);
3846 } else {
3847 nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3848 nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3849 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3850 }
3851 }
3852
3853 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3854 }
3855
3856 nir_store_var(&b, fs_out_color, color, 0xf);
3857
3858 return b.shader;
3859 }
3860
3861 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3862 create_pipeline(struct v3dv_device *device,
3863 struct v3dv_render_pass *pass,
3864 struct nir_shader *vs_nir,
3865 struct nir_shader *gs_nir,
3866 struct nir_shader *fs_nir,
3867 const VkPipelineVertexInputStateCreateInfo *vi_state,
3868 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3869 const VkPipelineColorBlendStateCreateInfo *cb_state,
3870 const VkPipelineMultisampleStateCreateInfo *ms_state,
3871 const VkPipelineLayout layout,
3872 VkPipeline *pipeline)
3873 {
3874 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
3875 struct vk_shader_module fs_m = vk_shader_module_from_nir(fs_nir);
3876 struct vk_shader_module gs_m;
3877
3878 uint32_t num_stages = gs_nir ? 3 : 2;
3879
3880
3881 VkPipelineShaderStageCreateInfo stages[3] = {
3882 {
3883 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3884 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3885 .module = vk_shader_module_to_handle(&vs_m),
3886 .pName = "main",
3887 },
3888 {
3889 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3890 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3891 .module = vk_shader_module_to_handle(&fs_m),
3892 .pName = "main",
3893 },
3894 {
3895 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3896 .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3897 .module = VK_NULL_HANDLE,
3898 .pName = "main",
3899 },
3900 };
3901
3902 if (gs_nir) {
3903 gs_m = vk_shader_module_from_nir(gs_nir);
3904 stages[2].module = vk_shader_module_to_handle(&gs_m);
3905 }
3906
3907 VkGraphicsPipelineCreateInfo info = {
3908 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3909
3910 .stageCount = num_stages,
3911 .pStages = stages,
3912
3913 .pVertexInputState = vi_state,
3914
3915 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3916 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3917 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3918 .primitiveRestartEnable = false,
3919 },
3920
3921 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3922 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3923 .viewportCount = 1,
3924 .scissorCount = 1,
3925 },
3926
3927 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3928 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3929 .rasterizerDiscardEnable = false,
3930 .polygonMode = VK_POLYGON_MODE_FILL,
3931 .cullMode = VK_CULL_MODE_NONE,
3932 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3933 .depthBiasEnable = false,
3934 },
3935
3936 .pMultisampleState = ms_state,
3937
3938 .pDepthStencilState = ds_state,
3939
3940 .pColorBlendState = cb_state,
3941
3942 /* The meta clear pipeline declares all state as dynamic.
3943 * As a consequence, vkCmdBindPipeline writes no dynamic state
3944 * to the cmd buffer. Therefore, at the end of the meta clear,
3945 * we need only restore dynamic state that was vkCmdSet.
3946 */
3947 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3948 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3949 .dynamicStateCount = 6,
3950 .pDynamicStates = (VkDynamicState[]) {
3951 VK_DYNAMIC_STATE_VIEWPORT,
3952 VK_DYNAMIC_STATE_SCISSOR,
3953 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3954 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3955 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3956 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3957 VK_DYNAMIC_STATE_DEPTH_BIAS,
3958 VK_DYNAMIC_STATE_LINE_WIDTH,
3959 },
3960 },
3961
3962 .flags = 0,
3963 .layout = layout,
3964 .renderPass = v3dv_render_pass_to_handle(pass),
3965 .subpass = 0,
3966 };
3967
3968 VkResult result =
3969 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3970 VK_NULL_HANDLE,
3971 1, &info,
3972 &device->vk.alloc,
3973 pipeline);
3974
3975 ralloc_free(vs_nir);
3976 ralloc_free(gs_nir);
3977 ralloc_free(fs_nir);
3978
3979 return result == VK_SUCCESS;
3980 }
3981
3982 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)3983 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3984 {
3985 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3986 *
3987 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3988 * VK_IMAGE_TYPE_2D, ..."
3989 */
3990 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3991
3992 switch (type) {
3993 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3994 case VK_IMAGE_TYPE_2D:
3995 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3996 GLSL_SAMPLER_DIM_MS;
3997 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3998 default:
3999 unreachable("Invalid image type");
4000 }
4001 }
4002
4003 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)4004 create_blit_pipeline(struct v3dv_device *device,
4005 VkFormat dst_format,
4006 VkFormat src_format,
4007 VkColorComponentFlags cmask,
4008 VkImageType src_type,
4009 VkSampleCountFlagBits dst_samples,
4010 VkSampleCountFlagBits src_samples,
4011 VkRenderPass _pass,
4012 VkPipelineLayout pipeline_layout,
4013 VkPipeline *pipeline)
4014 {
4015 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
4016
4017 /* We always rewrite depth/stencil blits to compatible color blits */
4018 assert(vk_format_is_color(dst_format));
4019 assert(vk_format_is_color(src_format));
4020
4021 const enum glsl_sampler_dim sampler_dim =
4022 get_sampler_dim(src_type, src_samples);
4023
4024 nir_shader *vs_nir = get_blit_vs();
4025 nir_shader *fs_nir =
4026 get_color_blit_fs(device, dst_format, src_format,
4027 dst_samples, src_samples, sampler_dim);
4028
4029 const VkPipelineVertexInputStateCreateInfo vi_state = {
4030 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
4031 .vertexBindingDescriptionCount = 0,
4032 .vertexAttributeDescriptionCount = 0,
4033 };
4034
4035 VkPipelineDepthStencilStateCreateInfo ds_state = {
4036 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
4037 };
4038
4039 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
4040 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
4041 .blendEnable = false,
4042 .colorWriteMask = cmask,
4043 };
4044
4045 const VkPipelineColorBlendStateCreateInfo cb_state = {
4046 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
4047 .logicOpEnable = false,
4048 .attachmentCount = 1,
4049 .pAttachments = blend_att_state
4050 };
4051
4052 const VkPipelineMultisampleStateCreateInfo ms_state = {
4053 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
4054 .rasterizationSamples = dst_samples,
4055 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
4056 .pSampleMask = NULL,
4057 .alphaToCoverageEnable = false,
4058 .alphaToOneEnable = false,
4059 };
4060
4061 return create_pipeline(device,
4062 pass,
4063 vs_nir, NULL, fs_nir,
4064 &vi_state,
4065 &ds_state,
4066 &cb_state,
4067 &ms_state,
4068 pipeline_layout,
4069 pipeline);
4070 }
4071
4072 /**
4073 * Return a pipeline suitable for blitting the requested aspect given the
4074 * destination and source formats.
4075 */
4076 static bool
get_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)4077 get_blit_pipeline(struct v3dv_device *device,
4078 VkFormat dst_format,
4079 VkFormat src_format,
4080 VkColorComponentFlags cmask,
4081 VkImageType src_type,
4082 VkSampleCountFlagBits dst_samples,
4083 VkSampleCountFlagBits src_samples,
4084 struct v3dv_meta_blit_pipeline **pipeline)
4085 {
4086 bool ok = true;
4087
4088 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
4089 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
4090 dst_samples, src_samples, key);
4091 mtx_lock(&device->meta.mtx);
4092 struct hash_entry *entry =
4093 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
4094 if (entry) {
4095 mtx_unlock(&device->meta.mtx);
4096 *pipeline = entry->data;
4097 return true;
4098 }
4099
4100 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
4101 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
4102
4103 if (*pipeline == NULL)
4104 goto fail;
4105
4106 ok = create_blit_render_pass(device, dst_format, src_format,
4107 &(*pipeline)->pass,
4108 &(*pipeline)->pass_no_load);
4109 if (!ok)
4110 goto fail;
4111
4112 /* Create the pipeline using one of the render passes, they are both
4113 * compatible, so we don't care which one we use here.
4114 */
4115 ok = create_blit_pipeline(device,
4116 dst_format,
4117 src_format,
4118 cmask,
4119 src_type,
4120 dst_samples,
4121 src_samples,
4122 (*pipeline)->pass,
4123 device->meta.blit.p_layout,
4124 &(*pipeline)->pipeline);
4125 if (!ok)
4126 goto fail;
4127
4128 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
4129 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
4130 &(*pipeline)->key, *pipeline);
4131
4132 mtx_unlock(&device->meta.mtx);
4133 return true;
4134
4135 fail:
4136 mtx_unlock(&device->meta.mtx);
4137
4138 VkDevice _device = v3dv_device_to_handle(device);
4139 if (*pipeline) {
4140 if ((*pipeline)->pass)
4141 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
4142 if ((*pipeline)->pass_no_load)
4143 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
4144 if ((*pipeline)->pipeline)
4145 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
4146 vk_free(&device->vk.alloc, *pipeline);
4147 *pipeline = NULL;
4148 }
4149
4150 return false;
4151 }
4152
4153 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)4154 compute_blit_box(const VkOffset3D *offsets,
4155 uint32_t image_w, uint32_t image_h,
4156 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
4157 bool *mirror_x, bool *mirror_y)
4158 {
4159 if (offsets[1].x >= offsets[0].x) {
4160 *mirror_x = false;
4161 *x = MIN2(offsets[0].x, image_w - 1);
4162 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
4163 } else {
4164 *mirror_x = true;
4165 *x = MIN2(offsets[1].x, image_w - 1);
4166 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
4167 }
4168 if (offsets[1].y >= offsets[0].y) {
4169 *mirror_y = false;
4170 *y = MIN2(offsets[0].y, image_h - 1);
4171 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
4172 } else {
4173 *mirror_y = true;
4174 *y = MIN2(offsets[1].y, image_h - 1);
4175 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
4176 }
4177 }
4178
4179 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)4180 compute_blit_3d_layers(const VkOffset3D *offsets,
4181 uint32_t *min_layer, uint32_t *max_layer,
4182 bool *mirror_z)
4183 {
4184 if (offsets[1].z >= offsets[0].z) {
4185 *mirror_z = false;
4186 *min_layer = offsets[0].z;
4187 *max_layer = offsets[1].z;
4188 } else {
4189 *mirror_z = true;
4190 *min_layer = offsets[1].z;
4191 *max_layer = offsets[0].z;
4192 }
4193 }
4194
4195 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)4196 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
4197 {
4198 /* If this is not the first pool we create for this command buffer
4199 * size it based on the size of the currently exhausted pool.
4200 */
4201 uint32_t descriptor_count = 64;
4202 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
4203 struct v3dv_descriptor_pool *exhausted_pool =
4204 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
4205 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
4206 }
4207
4208 /* Create the descriptor pool */
4209 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
4210 VkDescriptorPoolSize pool_size = {
4211 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4212 .descriptorCount = descriptor_count,
4213 };
4214 VkDescriptorPoolCreateInfo info = {
4215 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
4216 .maxSets = descriptor_count,
4217 .poolSizeCount = 1,
4218 .pPoolSizes = &pool_size,
4219 .flags = 0,
4220 };
4221 VkResult result =
4222 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
4223 &info,
4224 &cmd_buffer->device->vk.alloc,
4225 &cmd_buffer->meta.blit.dspool);
4226
4227 if (result == VK_SUCCESS) {
4228 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4229 const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
4230
4231 v3dv_cmd_buffer_add_private_obj(
4232 cmd_buffer, (uintptr_t) _pool,
4233 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
4234
4235 struct v3dv_descriptor_pool *pool =
4236 v3dv_descriptor_pool_from_handle(_pool);
4237 pool->is_driver_internal = true;
4238 }
4239
4240 return result;
4241 }
4242
4243 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)4244 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
4245 VkDescriptorSet *set)
4246 {
4247 /* Make sure we have a descriptor pool */
4248 VkResult result;
4249 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
4250 result = create_blit_descriptor_pool(cmd_buffer);
4251 if (result != VK_SUCCESS)
4252 return result;
4253 }
4254 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4255
4256 /* Allocate descriptor set */
4257 struct v3dv_device *device = cmd_buffer->device;
4258 VkDevice _device = v3dv_device_to_handle(device);
4259 VkDescriptorSetAllocateInfo info = {
4260 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
4261 .descriptorPool = cmd_buffer->meta.blit.dspool,
4262 .descriptorSetCount = 1,
4263 .pSetLayouts = &device->meta.blit.ds_layout,
4264 };
4265 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4266
4267 /* If we ran out of pool space, grow the pool and try again */
4268 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
4269 result = create_blit_descriptor_pool(cmd_buffer);
4270 if (result == VK_SUCCESS) {
4271 info.descriptorPool = cmd_buffer->meta.blit.dspool;
4272 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4273 }
4274 }
4275
4276 return result;
4277 }
4278
4279 /**
4280 * Returns true if the implementation supports the requested operation (even if
4281 * it failed to process it, for example, due to an out-of-memory error).
4282 *
4283 * The caller can specify the channels on the destination to be written via the
4284 * cmask parameter (which can be 0 to default to all channels), as well as a
4285 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
4286 * to use the default identity swizzle).
4287 *
4288 * Supports multi-plane formats too.
4289 */
4290 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2 * region,VkFilter filter,bool dst_is_padded_image)4291 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4292 struct v3dv_image *dst,
4293 VkFormat dst_format,
4294 struct v3dv_image *src,
4295 VkFormat src_format,
4296 VkColorComponentFlags cmask,
4297 VkComponentMapping *cswizzle,
4298 const VkImageBlit2 *region,
4299 VkFilter filter,
4300 bool dst_is_padded_image)
4301 {
4302 bool handled = true;
4303 VkResult result;
4304
4305 /* We don't support rendering to linear depth/stencil, this should have
4306 * been rewritten to a compatible color blit by the caller.
4307 */
4308 assert(dst->tiled || !vk_format_is_depth_or_stencil(dst_format));
4309
4310 /* Can't sample from linear images */
4311 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D) {
4312 return false;
4313 }
4314
4315 /* Rewrite combined D/S blits to compatible color blits */
4316 if (vk_format_is_depth_or_stencil(dst_format)) {
4317 assert(src_format == dst_format);
4318 assert(cmask == 0);
4319 switch(dst_format) {
4320 case VK_FORMAT_D16_UNORM:
4321 dst_format = VK_FORMAT_R16_UINT;
4322 break;
4323 case VK_FORMAT_D32_SFLOAT:
4324 dst_format = VK_FORMAT_R32_UINT;
4325 break;
4326 case VK_FORMAT_X8_D24_UNORM_PACK32:
4327 case VK_FORMAT_D24_UNORM_S8_UINT:
4328 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
4329 cmask |= VK_COLOR_COMPONENT_G_BIT |
4330 VK_COLOR_COMPONENT_B_BIT |
4331 VK_COLOR_COMPONENT_A_BIT;
4332 }
4333 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
4334 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
4335 cmask |= VK_COLOR_COMPONENT_R_BIT;
4336 }
4337 dst_format = VK_FORMAT_R8G8B8A8_UINT;
4338 break;
4339 default:
4340 unreachable("Unsupported depth/stencil format");
4341 };
4342 src_format = dst_format;
4343 }
4344
4345 uint8_t src_plane =
4346 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
4347 assert(src_plane < src->plane_count);
4348 uint8_t dst_plane =
4349 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
4350 assert(dst_plane < dst->plane_count);
4351
4352 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
4353 VK_COLOR_COMPONENT_G_BIT |
4354 VK_COLOR_COMPONENT_B_BIT |
4355 VK_COLOR_COMPONENT_A_BIT;
4356 if (cmask == 0)
4357 cmask = full_cmask;
4358
4359 VkComponentMapping ident_swizzle = {
4360 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4361 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4362 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4363 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4364 };
4365 if (!cswizzle)
4366 cswizzle = &ident_swizzle;
4367
4368 /* When we get here from a copy between compressed / uncompressed images
4369 * we choose to specify the destination blit region based on the size
4370 * semantics of the source image of the copy (see copy_image_blit), so we
4371 * need to apply those same semantics here when we compute the size of the
4372 * destination image level.
4373 */
4374 const uint32_t dst_block_w =
4375 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
4376 const uint32_t dst_block_h =
4377 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
4378 const uint32_t src_block_w =
4379 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
4380 const uint32_t src_block_h =
4381 vk_format_get_blockheight(src->planes[src_plane].vk_format);
4382 const uint32_t dst_level_w =
4383 u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
4384 region->dstSubresource.mipLevel);
4385 const uint32_t dst_level_h =
4386 u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
4387 region->dstSubresource.mipLevel);
4388
4389 const uint32_t src_level_w =
4390 u_minify(src->planes[src_plane].width, region->srcSubresource.mipLevel);
4391 const uint32_t src_level_h =
4392 u_minify(src->planes[src_plane].height, region->srcSubresource.mipLevel);
4393
4394 assert(src->plane_count == 1 || src->vk.image_type != VK_IMAGE_TYPE_3D);
4395 const uint32_t src_level_d =
4396 u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel);
4397
4398 uint32_t dst_x, dst_y, dst_w, dst_h;
4399 bool dst_mirror_x, dst_mirror_y;
4400 compute_blit_box(region->dstOffsets,
4401 dst_level_w, dst_level_h,
4402 &dst_x, &dst_y, &dst_w, &dst_h,
4403 &dst_mirror_x, &dst_mirror_y);
4404
4405 uint32_t src_x, src_y, src_w, src_h;
4406 bool src_mirror_x, src_mirror_y;
4407 compute_blit_box(region->srcOffsets,
4408 src_level_w, src_level_h,
4409 &src_x, &src_y, &src_w, &src_h,
4410 &src_mirror_x, &src_mirror_y);
4411
4412 uint32_t min_dst_layer;
4413 uint32_t max_dst_layer;
4414 bool dst_mirror_z = false;
4415 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4416 min_dst_layer = region->dstSubresource.baseArrayLayer;
4417 max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
4418 } else {
4419 compute_blit_3d_layers(region->dstOffsets,
4420 &min_dst_layer, &max_dst_layer,
4421 &dst_mirror_z);
4422 }
4423
4424 uint32_t min_src_layer;
4425 uint32_t max_src_layer;
4426 bool src_mirror_z = false;
4427 if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
4428 min_src_layer = region->srcSubresource.baseArrayLayer;
4429 max_src_layer = min_src_layer + region->srcSubresource.layerCount;
4430 } else {
4431 compute_blit_3d_layers(region->srcOffsets,
4432 &min_src_layer, &max_src_layer,
4433 &src_mirror_z);
4434 }
4435
4436 uint32_t layer_count = max_dst_layer - min_dst_layer;
4437
4438 /* Translate source blit coordinates to normalized texture coordinates for
4439 * single sampled textures. For multisampled textures we require
4440 * unnormalized coordinates, since we can only do texelFetch on them.
4441 */
4442 float coords[4] = {
4443 (float)src_x,
4444 (float)src_y,
4445 (float)(src_x + src_w),
4446 (float)(src_y + src_h),
4447 };
4448
4449 if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
4450 coords[0] /= (float)src_level_w;
4451 coords[1] /= (float)src_level_h;
4452 coords[2] /= (float)src_level_w;
4453 coords[3] /= (float)src_level_h;
4454 }
4455
4456 /* Handle mirroring */
4457 const bool mirror_x = dst_mirror_x != src_mirror_x;
4458 const bool mirror_y = dst_mirror_y != src_mirror_y;
4459 const bool mirror_z = dst_mirror_z != src_mirror_z;
4460 float tex_coords[5] = {
4461 !mirror_x ? coords[0] : coords[2],
4462 !mirror_y ? coords[1] : coords[3],
4463 !mirror_x ? coords[2] : coords[0],
4464 !mirror_y ? coords[3] : coords[1],
4465 /* Z coordinate for 3D blit sources, to be filled for each
4466 * destination layer
4467 */
4468 0.0f
4469 };
4470
4471 /* For blits from 3D images we also need to compute the slice coordinate to
4472 * sample from, which will change for each layer in the destination.
4473 * Compute the step we should increase for each iteration.
4474 */
4475 const float src_z_step =
4476 (float)(max_src_layer - min_src_layer) / (float)layer_count;
4477
4478 /* Get the blit pipeline */
4479 struct v3dv_meta_blit_pipeline *pipeline = NULL;
4480 bool ok = get_blit_pipeline(cmd_buffer->device,
4481 dst_format, src_format, cmask, src->vk.image_type,
4482 dst->vk.samples, src->vk.samples,
4483 &pipeline);
4484 if (!ok)
4485 return handled;
4486 assert(pipeline && pipeline->pipeline &&
4487 pipeline->pass && pipeline->pass_no_load);
4488
4489 struct v3dv_device *device = cmd_buffer->device;
4490 assert(device->meta.blit.ds_layout);
4491
4492 VkDevice _device = v3dv_device_to_handle(device);
4493 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4494
4495 /* Create sampler for blit source image */
4496 VkSamplerCreateInfo sampler_info = {
4497 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4498 .magFilter = filter,
4499 .minFilter = filter,
4500 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4501 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4502 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4503 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4504 };
4505 VkSampler sampler;
4506 result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4507 &sampler);
4508 if (result != VK_SUCCESS)
4509 goto fail;
4510
4511 v3dv_cmd_buffer_add_private_obj(
4512 cmd_buffer, (uintptr_t)sampler,
4513 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4514
4515 /* Push command buffer state before starting meta operation */
4516 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4517
4518 /* Push state that is common for all layers */
4519 v3dv_CmdBindPipeline(_cmd_buffer,
4520 VK_PIPELINE_BIND_POINT_GRAPHICS,
4521 pipeline->pipeline);
4522
4523 const VkViewport viewport = {
4524 .x = dst_x,
4525 .y = dst_y,
4526 .width = dst_w,
4527 .height = dst_h,
4528 .minDepth = 0.0f,
4529 .maxDepth = 1.0f
4530 };
4531 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4532
4533 const VkRect2D scissor = {
4534 .offset = { dst_x, dst_y },
4535 .extent = { dst_w, dst_h }
4536 };
4537 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4538
4539 bool can_skip_tlb_load = false;
4540 const VkRect2D render_area = {
4541 .offset = { dst_x, dst_y },
4542 .extent = { dst_w, dst_h },
4543 };
4544
4545 /* Record per-layer commands */
4546 for (uint32_t i = 0; i < layer_count; i++) {
4547 /* Setup framebuffer */
4548 VkImageViewCreateInfo dst_image_view_info = {
4549 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4550 .image = v3dv_image_to_handle(dst),
4551 .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4552 .format = dst_format,
4553 .subresourceRange = {
4554 .aspectMask = region->dstSubresource.aspectMask,
4555 .baseMipLevel = region->dstSubresource.mipLevel,
4556 .levelCount = 1,
4557 .baseArrayLayer = min_dst_layer + i,
4558 .layerCount = 1
4559 },
4560 };
4561 VkImageView dst_image_view;
4562 result = v3dv_create_image_view(device, &dst_image_view_info,
4563 &dst_image_view);
4564 if (result != VK_SUCCESS)
4565 goto fail;
4566
4567 v3dv_cmd_buffer_add_private_obj(
4568 cmd_buffer, (uintptr_t)dst_image_view,
4569 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4570
4571 VkFramebufferCreateInfo fb_info = {
4572 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4573 .renderPass = pipeline->pass,
4574 .attachmentCount = 1,
4575 .pAttachments = &dst_image_view,
4576 .width = dst_x + dst_w,
4577 .height = dst_y + dst_h,
4578 .layers = 1,
4579 };
4580
4581 VkFramebuffer fb;
4582 result = v3dv_CreateFramebuffer(_device, &fb_info,
4583 &cmd_buffer->device->vk.alloc, &fb);
4584 if (result != VK_SUCCESS)
4585 goto fail;
4586
4587 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4588 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4589 fb_info.height == dst_level_h &&
4590 dst_is_padded_image;
4591
4592 v3dv_cmd_buffer_add_private_obj(
4593 cmd_buffer, (uintptr_t)fb,
4594 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4595
4596 /* Setup descriptor set for blit source texture. We don't have to
4597 * register the descriptor as a private command buffer object since
4598 * all descriptors will be freed automatically with the descriptor
4599 * pool.
4600 */
4601 VkDescriptorSet set;
4602 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4603 if (result != VK_SUCCESS)
4604 goto fail;
4605
4606 VkImageViewCreateInfo src_image_view_info = {
4607 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4608 .image = v3dv_image_to_handle(src),
4609 .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4610 .format = src_format,
4611 .components = *cswizzle,
4612 .subresourceRange = {
4613 .aspectMask = region->srcSubresource.aspectMask,
4614 .baseMipLevel = region->srcSubresource.mipLevel,
4615 .levelCount = 1,
4616 .baseArrayLayer =
4617 src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4618 .layerCount = 1
4619 },
4620 };
4621 VkImageView src_image_view;
4622 result = v3dv_create_image_view(device, &src_image_view_info,
4623 &src_image_view);
4624 if (result != VK_SUCCESS)
4625 goto fail;
4626
4627 v3dv_cmd_buffer_add_private_obj(
4628 cmd_buffer, (uintptr_t)src_image_view,
4629 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4630
4631 VkDescriptorImageInfo image_info = {
4632 .sampler = sampler,
4633 .imageView = src_image_view,
4634 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4635 };
4636 VkWriteDescriptorSet write = {
4637 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4638 .dstSet = set,
4639 .dstBinding = 0,
4640 .dstArrayElement = 0,
4641 .descriptorCount = 1,
4642 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4643 .pImageInfo = &image_info,
4644 };
4645 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4646
4647 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4648 VK_PIPELINE_BIND_POINT_GRAPHICS,
4649 device->meta.blit.p_layout,
4650 0, 1, &set,
4651 0, NULL);
4652
4653 /* If the region we are about to blit is tile-aligned, then we can
4654 * use the render pass version that won't pre-load the tile buffer
4655 * with the dst image contents before the blit. The exception is when we
4656 * don't have a full color mask, since in that case we need to preserve
4657 * the original value of some of the color components.
4658 *
4659 * Since all layers have the same area, we only need to compute this for
4660 * the first.
4661 */
4662 if (i == 0) {
4663 struct v3dv_render_pass *pipeline_pass =
4664 v3dv_render_pass_from_handle(pipeline->pass);
4665 can_skip_tlb_load =
4666 cmask == full_cmask &&
4667 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4668 framebuffer, pipeline_pass, 0);
4669 }
4670
4671 /* Record blit */
4672 VkRenderPassBeginInfo rp_info = {
4673 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4674 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4675 pipeline->pass,
4676 .framebuffer = fb,
4677 .renderArea = render_area,
4678 .clearValueCount = 0,
4679 };
4680
4681 VkSubpassBeginInfo sp_info = {
4682 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4683 .contents = VK_SUBPASS_CONTENTS_INLINE,
4684 };
4685
4686 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4687 struct v3dv_job *job = cmd_buffer->state.job;
4688 if (!job)
4689 goto fail;
4690
4691 /* For 3D blits we need to compute the source slice to blit from (the Z
4692 * coordinate of the source sample operation). We want to choose this
4693 * based on the ratio of the depth of the source and the destination
4694 * images, picking the coordinate in the middle of each step.
4695 */
4696 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4697 tex_coords[4] =
4698 !mirror_z ?
4699 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4700 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4701 }
4702
4703 v3dv_CmdPushConstants(_cmd_buffer,
4704 device->meta.blit.p_layout,
4705 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4706 &tex_coords);
4707
4708 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4709
4710 VkSubpassEndInfo sp_end_info = {
4711 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4712 };
4713
4714 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4715 }
4716
4717 fail:
4718 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
4719
4720 return handled;
4721 }
4722
4723 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)4724 v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,
4725 const VkBlitImageInfo2 *pBlitImageInfo)
4726 {
4727 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4728 V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4729 V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4730
4731 /* From vkCmdBlitImage:
4732 * "srcImage must not use a format that requires a sampler YCBCR
4733 * conversion"
4734 * "dstImage must not use a format that requires a sampler YCBCR
4735 * conversion"
4736 */
4737 assert(src->plane_count == 1);
4738 assert(dst->plane_count == 1);
4739
4740 /* This command can only happen outside a render pass */
4741 assert(cmd_buffer->state.pass == NULL);
4742 assert(cmd_buffer->state.job == NULL);
4743
4744 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4745 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4746 src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4747
4748 /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4749 assert(!vk_format_is_compressed(dst->vk.format));
4750
4751 cmd_buffer->state.is_transfer = true;
4752
4753 for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4754 const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
4755
4756 if (blit_tfu(cmd_buffer, dst, src, region))
4757 continue;
4758 if (blit_shader(cmd_buffer,
4759 dst, dst->vk.format,
4760 src, src->vk.format,
4761 0, NULL,
4762 region,
4763 pBlitImageInfo->filter, true)) {
4764 continue;
4765 }
4766 unreachable("Unsupported blit operation");
4767 }
4768
4769 cmd_buffer->state.is_transfer = false;
4770 }
4771
4772 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4773 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4774 struct v3dv_image *dst,
4775 struct v3dv_image *src,
4776 const VkImageResolve2 *region)
4777 {
4778 /* No resolve for multi-planar images. Using plane 0 */
4779 assert(dst->plane_count == 1);
4780 assert(src->plane_count == 1);
4781
4782 if (!v3dv_meta_can_use_tlb(src, 0, region->srcSubresource.mipLevel,
4783 ®ion->srcOffset, NULL, NULL) ||
4784 !v3dv_meta_can_use_tlb(dst, 0, region->dstSubresource.mipLevel,
4785 ®ion->dstOffset, ®ion->extent, NULL)) {
4786 return false;
4787 }
4788
4789 if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4790 return false;
4791
4792 const VkFormat fb_format = src->vk.format;
4793
4794 uint32_t num_layers;
4795 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
4796 num_layers = region->dstSubresource.layerCount;
4797 else
4798 num_layers = region->extent.depth;
4799 assert(num_layers > 0);
4800
4801 struct v3dv_job *job =
4802 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4803 if (!job)
4804 return true;
4805
4806 const uint32_t block_w =
4807 vk_format_get_blockwidth(dst->planes[0].vk_format);
4808 const uint32_t block_h =
4809 vk_format_get_blockheight(dst->planes[0].vk_format);
4810 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4811 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4812
4813 uint32_t internal_type, internal_bpp;
4814 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4815 (fb_format, region->srcSubresource.aspectMask,
4816 &internal_type, &internal_bpp);
4817
4818 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
4819 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
4820 true);
4821
4822 struct v3dv_meta_framebuffer framebuffer;
4823 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4824 internal_type, &job->frame_tiling);
4825
4826 v3dv_X(job->device, job_emit_binning_flush)(job);
4827 v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4828 &framebuffer, region);
4829
4830 v3dv_cmd_buffer_finish_job(cmd_buffer);
4831 return true;
4832 }
4833
4834 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4835 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4836 struct v3dv_image *dst,
4837 struct v3dv_image *src,
4838 const VkImageResolve2 *region)
4839 {
4840 const VkImageBlit2 blit_region = {
4841 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4842 .srcSubresource = region->srcSubresource,
4843 .srcOffsets = {
4844 region->srcOffset,
4845 {
4846 region->srcOffset.x + region->extent.width,
4847 region->srcOffset.y + region->extent.height,
4848 }
4849 },
4850 .dstSubresource = region->dstSubresource,
4851 .dstOffsets = {
4852 region->dstOffset,
4853 {
4854 region->dstOffset.x + region->extent.width,
4855 region->dstOffset.y + region->extent.height,
4856 }
4857 },
4858 };
4859 return blit_shader(cmd_buffer,
4860 dst, dst->vk.format,
4861 src, src->vk.format,
4862 0, NULL,
4863 &blit_region, VK_FILTER_NEAREST, true);
4864 }
4865
4866 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4867 v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,
4868 const VkResolveImageInfo2 *info)
4869
4870 {
4871 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4872 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4873 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4874
4875 /* This command can only happen outside a render pass */
4876 assert(cmd_buffer->state.pass == NULL);
4877 assert(cmd_buffer->state.job == NULL);
4878
4879 assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4880 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4881
4882 /* We don't support multi-sampled multi-plane images */
4883 assert(src->plane_count == 1);
4884 assert(dst->plane_count == 1);
4885
4886 cmd_buffer->state.is_transfer = true;
4887
4888 for (uint32_t i = 0; i < info->regionCount; i++) {
4889 if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4890 continue;
4891 if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4892 continue;
4893 unreachable("Unsupported multismaple resolve operation");
4894 }
4895
4896 cmd_buffer->state.is_transfer = false;
4897 }
4898