1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42
43 static bool
44 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
45 VkImageAspectFlags aspect,
46 struct v3dv_image *image,
47 VkFormat dst_format,
48 VkFormat src_format,
49 struct v3dv_buffer *buffer,
50 uint32_t buffer_bpp,
51 VkColorComponentFlags cmask,
52 VkComponentMapping *cswizzle,
53 uint32_t region_count,
54 const VkBufferImageCopy2 *regions);
55
56 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)57 create_blit_pipeline_layout(struct v3dv_device *device,
58 VkDescriptorSetLayout *descriptor_set_layout,
59 VkPipelineLayout *pipeline_layout)
60 {
61 VkResult result;
62
63 if (*descriptor_set_layout == 0) {
64 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
65 .binding = 0,
66 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
67 .descriptorCount = 1,
68 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
69 };
70 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
71 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
72 .bindingCount = 1,
73 .pBindings = &descriptor_set_layout_binding,
74 };
75 result =
76 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
77 &descriptor_set_layout_info,
78 &device->vk.alloc,
79 descriptor_set_layout);
80 if (result != VK_SUCCESS)
81 return false;
82 }
83
84 assert(*pipeline_layout == 0);
85 VkPipelineLayoutCreateInfo pipeline_layout_info = {
86 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
87 .setLayoutCount = 1,
88 .pSetLayouts = descriptor_set_layout,
89 .pushConstantRangeCount = 1,
90 .pPushConstantRanges =
91 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
92 };
93
94 result =
95 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
96 &pipeline_layout_info,
97 &device->vk.alloc,
98 pipeline_layout);
99 return result == VK_SUCCESS;
100 }
101
102 void
v3dv_meta_blit_init(struct v3dv_device * device)103 v3dv_meta_blit_init(struct v3dv_device *device)
104 {
105 for (uint32_t i = 0; i < 3; i++) {
106 device->meta.blit.cache[i] =
107 _mesa_hash_table_create(NULL,
108 meta_blit_key_hash,
109 meta_blit_key_compare);
110 }
111
112 create_blit_pipeline_layout(device,
113 &device->meta.blit.ds_layout,
114 &device->meta.blit.p_layout);
115 }
116
117 static void
destroy_meta_blit_pipeline(VkDevice vk_device,uint64_t obj,VkAllocationCallbacks * alloc)118 destroy_meta_blit_pipeline(VkDevice vk_device,
119 uint64_t obj,
120 VkAllocationCallbacks *alloc)
121 {
122 struct v3dv_meta_blit_pipeline *p =
123 (struct v3dv_meta_blit_pipeline *)(uintptr_t) obj;
124 v3dv_DestroyPipeline(vk_device, p->pipeline, alloc);
125 v3dv_DestroyRenderPass(vk_device, p->pass, alloc);
126 v3dv_DestroyRenderPass(vk_device, p->pass_no_load, alloc);
127 vk_free(alloc, p);
128 }
129
130 void
v3dv_meta_blit_finish(struct v3dv_device * device)131 v3dv_meta_blit_finish(struct v3dv_device *device)
132 {
133 VkDevice _device = v3dv_device_to_handle(device);
134
135 for (uint32_t i = 0; i < 3; i++) {
136 hash_table_foreach(device->meta.blit.cache[i], entry) {
137 destroy_meta_blit_pipeline(_device, (uintptr_t)entry->data,
138 &device->vk.alloc);
139 }
140 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
141 }
142
143 if (device->meta.blit.p_layout) {
144 v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
145 &device->vk.alloc);
146 }
147
148 if (device->meta.blit.ds_layout) {
149 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
150 &device->vk.alloc);
151 }
152 }
153
154 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)155 meta_texel_buffer_copy_key_hash(const void *key)
156 {
157 return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
158 }
159
160 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)161 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
162 {
163 return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
164 }
165
166 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)167 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
168 VkDescriptorSetLayout *ds_layout,
169 VkPipelineLayout *p_layout)
170 {
171 VkResult result;
172
173 if (*ds_layout == 0) {
174 VkDescriptorSetLayoutBinding ds_layout_binding = {
175 .binding = 0,
176 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
177 .descriptorCount = 1,
178 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
179 };
180 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
181 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
182 .bindingCount = 1,
183 .pBindings = &ds_layout_binding,
184 };
185 result =
186 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
187 &ds_layout_info,
188 &device->vk.alloc,
189 ds_layout);
190 if (result != VK_SUCCESS)
191 return false;
192 }
193
194 assert(*p_layout == 0);
195 /* FIXME: this is abusing a bit the API, since not all of our copy
196 * pipelines have a geometry shader. We could create 2 different pipeline
197 * layouts, but this works for us for now.
198 */
199 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
200 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
201 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
202 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
203 VkPushConstantRange ranges[2] = {
204 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
205 { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
206 };
207
208 VkPipelineLayoutCreateInfo p_layout_info = {
209 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
210 .setLayoutCount = 1,
211 .pSetLayouts = ds_layout,
212 .pushConstantRangeCount = 2,
213 .pPushConstantRanges = ranges,
214 };
215
216 result =
217 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
218 &p_layout_info,
219 &device->vk.alloc,
220 p_layout);
221 return result == VK_SUCCESS;
222 }
223
224 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)225 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
226 {
227 for (uint32_t i = 0; i < 3; i++) {
228 device->meta.texel_buffer_copy.cache[i] =
229 _mesa_hash_table_create(NULL,
230 meta_texel_buffer_copy_key_hash,
231 meta_texel_buffer_copy_key_compare);
232 }
233
234 create_texel_buffer_copy_pipeline_layout(
235 device,
236 &device->meta.texel_buffer_copy.ds_layout,
237 &device->meta.texel_buffer_copy.p_layout);
238 }
239
240 static void
destroy_meta_texel_buffer_copy_pipeline(VkDevice vk_device,uint64_t obj,VkAllocationCallbacks * alloc)241 destroy_meta_texel_buffer_copy_pipeline(VkDevice vk_device,
242 uint64_t obj,
243 VkAllocationCallbacks *alloc)
244 {
245 struct v3dv_meta_texel_buffer_copy_pipeline *p =
246 (struct v3dv_meta_texel_buffer_copy_pipeline *)(uintptr_t) obj;
247 v3dv_DestroyPipeline(vk_device, p->pipeline, alloc);
248 v3dv_DestroyRenderPass(vk_device, p->pass, alloc);
249 v3dv_DestroyRenderPass(vk_device, p->pass_no_load, alloc);
250 vk_free(alloc, p);
251 }
252
253 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)254 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
255 {
256 VkDevice _device = v3dv_device_to_handle(device);
257
258 for (uint32_t i = 0; i < 3; i++) {
259 hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
260 destroy_meta_texel_buffer_copy_pipeline(_device, (uintptr_t)entry->data,
261 &device->vk.alloc);
262 }
263 _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
264 }
265
266 if (device->meta.texel_buffer_copy.p_layout) {
267 v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
268 &device->vk.alloc);
269 }
270
271 if (device->meta.texel_buffer_copy.ds_layout) {
272 v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
273 &device->vk.alloc);
274 }
275 }
276
277 static VkFormat
get_compatible_tlb_format(VkFormat format)278 get_compatible_tlb_format(VkFormat format)
279 {
280 switch (format) {
281 case VK_FORMAT_R8G8B8A8_SNORM:
282 return VK_FORMAT_R8G8B8A8_UINT;
283
284 case VK_FORMAT_R8G8_SNORM:
285 return VK_FORMAT_R8G8_UINT;
286
287 case VK_FORMAT_R8_SNORM:
288 return VK_FORMAT_R8_UINT;
289
290 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
291 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
292
293 case VK_FORMAT_R16_UNORM:
294 case VK_FORMAT_R16_SNORM:
295 return VK_FORMAT_R16_UINT;
296
297 case VK_FORMAT_R16G16_UNORM:
298 case VK_FORMAT_R16G16_SNORM:
299 return VK_FORMAT_R16G16_UINT;
300
301 case VK_FORMAT_R16G16B16A16_UNORM:
302 case VK_FORMAT_R16G16B16A16_SNORM:
303 return VK_FORMAT_R16G16B16A16_UINT;
304
305 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
306 return VK_FORMAT_R32_SFLOAT;
307
308 /* We can't render to compressed formats using the TLB so instead we use
309 * a compatible format with the same bpp as the compressed format. Because
310 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
311 * case of ETC), when we implement copies with the compatible format we
312 * will have to divide offsets and dimensions on the compressed image by
313 * the compressed block size.
314 */
315 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
316 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
317 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
318 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
319 case VK_FORMAT_BC2_UNORM_BLOCK:
320 case VK_FORMAT_BC2_SRGB_BLOCK:
321 case VK_FORMAT_BC3_SRGB_BLOCK:
322 case VK_FORMAT_BC3_UNORM_BLOCK:
323 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
324 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
325 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
326 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
327 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
328 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
329 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
330 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
331 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
332 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
333 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
334 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
335 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
336 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
337 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
338 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
339 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
340 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
341 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
342 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
343 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
344 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
345 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
346 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
347 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
348 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
349 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
350 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
351 return VK_FORMAT_R32G32B32A32_UINT;
352
353 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
354 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
355 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
356 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
357 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
358 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
359 case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
360 case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
361 case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
362 case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
363 return VK_FORMAT_R16G16B16A16_UINT;
364
365 default:
366 return VK_FORMAT_UNDEFINED;
367 }
368 }
369
370 /**
371 * Checks if we can implement an image copy or clear operation using the TLB
372 * hardware.
373 *
374 * The extent and miplevel are only used to validate tile stores (to match the
375 * region to store against the miplevel dimensions to avoid avoid cases where
376 * the region to store is not a aligned to tile boundaries). If extent is
377 * NULL no checks are done (which is fine if the image will only be used for a
378 * TLB load or when we know in advance that the store will be for the entire
379 * size of the image miplevel).
380 *
381 * For tlb copies we are doing a per-plane copy, so for multi-plane formats,
382 * the compatible format will be single-plane.
383 */
384 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,uint8_t plane,uint8_t miplevel,const VkOffset3D * offset,const VkExtent3D * extent,VkFormat * compat_format)385 v3dv_meta_can_use_tlb(struct v3dv_image *image,
386 uint8_t plane,
387 uint8_t miplevel,
388 const VkOffset3D *offset,
389 const VkExtent3D *extent,
390 VkFormat *compat_format)
391 {
392 if (offset->x != 0 || offset->y != 0)
393 return false;
394
395 /* FIXME: this is suboptimal, what we really want to check is that the
396 * extent of the region to copy is the full slice or a multiple of the
397 * tile size.
398 */
399 if (extent) {
400 struct v3d_resource_slice *slice = &image->planes[plane].slices[miplevel];
401 if (slice->width != extent->width || slice->height != extent->height)
402 return false;
403 }
404
405 if (image->format->planes[plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
406 if (compat_format)
407 *compat_format = image->planes[plane].vk_format;
408 return true;
409 }
410
411 /* If the image format is not TLB-supported, then check if we can use
412 * a compatible format instead.
413 */
414 if (compat_format) {
415 *compat_format = get_compatible_tlb_format(image->planes[plane].vk_format);
416 if (*compat_format != VK_FORMAT_UNDEFINED) {
417 assert(vk_format_get_plane_count(*compat_format) == 1);
418 return true;
419 }
420 }
421
422 return false;
423 }
424
425 /* Implements a copy using the TLB.
426 *
427 * This only works if we are copying from offset (0,0), since a TLB store for
428 * tile (x,y) will be written at the same tile offset into the destination.
429 * When this requirement is not met, we need to use a blit instead.
430 *
431 * Returns true if the implementation supports the requested operation (even if
432 * it failed to process it, for example, due to an out-of-memory error).
433 *
434 */
435 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)436 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
437 struct v3dv_buffer *buffer,
438 struct v3dv_image *image,
439 const VkBufferImageCopy2 *region)
440 {
441 VkFormat fb_format;
442 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
443 assert(plane < image->plane_count);
444
445 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
446 ®ion->imageOffset, ®ion->imageExtent,
447 &fb_format)) {
448 return false;
449 }
450
451 uint32_t internal_type, internal_bpp;
452 v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
453 (fb_format, region->imageSubresource.aspectMask,
454 &internal_type, &internal_bpp);
455
456 uint32_t num_layers;
457 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
458 num_layers = vk_image_subresource_layer_count(&image->vk,
459 ®ion->imageSubresource);
460 } else {
461 num_layers = region->imageExtent.depth;
462 }
463 assert(num_layers > 0);
464
465 struct v3dv_job *job =
466 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
467 if (!job)
468 return true;
469
470 /* Handle copy from compressed format using a compatible format */
471 const uint32_t block_w =
472 vk_format_get_blockwidth(image->planes[plane].vk_format);
473 const uint32_t block_h =
474 vk_format_get_blockheight(image->planes[plane].vk_format);
475 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
476 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
477
478 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
479 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
480 false);
481
482 struct v3dv_meta_framebuffer framebuffer;
483 v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
484 internal_type, &job->frame_tiling);
485
486 v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
487 v3d_X((&job->device->devinfo), meta_emit_copy_image_to_buffer_rcl)
488 (job, buffer, image, &framebuffer, region);
489
490 v3dv_cmd_buffer_finish_job(cmd_buffer);
491
492 return true;
493 }
494
495 static bool
496 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
497 struct v3dv_image *dst,
498 VkFormat dst_format,
499 struct v3dv_image *src,
500 VkFormat src_format,
501 VkColorComponentFlags cmask,
502 VkComponentMapping *cswizzle,
503 const VkImageBlit2 *region,
504 VkFilter filter,
505 bool dst_is_padded_image);
506
507
508 /**
509 * A structure that contains all the information we may need in various
510 * processes involving image to buffer copies implemented with blit paths.
511 */
512 struct image_to_buffer_info {
513 /* Source image info */
514 VkFormat src_format;
515 uint8_t plane;
516 VkColorComponentFlags cmask;
517 VkComponentMapping cswizzle;
518 VkImageAspectFlags src_copy_aspect;
519 uint32_t block_width;
520 uint32_t block_height;
521
522 /* Destination buffer info */
523 VkFormat dst_format;
524 uint32_t buf_width;
525 uint32_t buf_height;
526 uint32_t buf_bpp;
527 VkImageAspectFlags dst_copy_aspect;
528 };
529
530 static VkImageBlit2
blit_region_for_image_to_buffer(const VkOffset3D * offset,const VkExtent3D * extent,uint32_t mip_level,uint32_t base_layer,uint32_t layer_offset,struct image_to_buffer_info * info)531 blit_region_for_image_to_buffer(const VkOffset3D *offset,
532 const VkExtent3D *extent,
533 uint32_t mip_level,
534 uint32_t base_layer,
535 uint32_t layer_offset,
536 struct image_to_buffer_info *info)
537 {
538 VkImageBlit2 output = {
539 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
540 .srcSubresource = {
541 .aspectMask = info->src_copy_aspect,
542 .mipLevel = mip_level,
543 .baseArrayLayer = base_layer + layer_offset,
544 .layerCount = 1,
545 },
546 .srcOffsets = {
547 {
548 DIV_ROUND_UP(offset->x, info->block_width),
549 DIV_ROUND_UP(offset->y, info->block_height),
550 offset->z + layer_offset,
551 },
552 {
553 DIV_ROUND_UP(offset->x + extent->width, info->block_width),
554 DIV_ROUND_UP(offset->y + extent->height, info->block_height),
555 offset->z + layer_offset + 1,
556 },
557 },
558 .dstSubresource = {
559 .aspectMask = info->dst_copy_aspect,
560 .mipLevel = 0,
561 .baseArrayLayer = 0,
562 .layerCount = 1,
563 },
564 .dstOffsets = {
565 { 0, 0, 0 },
566 {
567 DIV_ROUND_UP(extent->width, info->block_width),
568 DIV_ROUND_UP(extent->height, info->block_height),
569 1
570 },
571 },
572 };
573
574 return output;
575 }
576
577 /**
578 * Produces an image_to_buffer_info struct from a VkBufferImageCopy2 that we can
579 * use to implement buffer to image copies with blit paths.
580 *
581 * Returns false if the copy operation can't be implemented with a blit.
582 */
583 static bool
gather_image_to_buffer_info(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region,struct image_to_buffer_info * out_info)584 gather_image_to_buffer_info(struct v3dv_cmd_buffer *cmd_buffer,
585 struct v3dv_image *image,
586 const VkBufferImageCopy2 *region,
587 struct image_to_buffer_info *out_info)
588 {
589 bool supported = false;
590
591 VkImageAspectFlags dst_copy_aspect = region->imageSubresource.aspectMask;
592 /* For multi-planar images we copy one plane at a time using an image alias
593 * with a color aspect for each plane.
594 */
595 if (image->plane_count > 1)
596 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
597
598 VkImageAspectFlags src_copy_aspect = region->imageSubresource.aspectMask;
599 uint8_t plane = v3dv_plane_from_aspect(src_copy_aspect);
600 assert(plane < image->plane_count);
601
602 /* Generally, the bpp of the data in the buffer matches that of the
603 * source image. The exception is the case where we are copying
604 * stencil (8bpp) to a combined d24s8 image (32bpp).
605 */
606 uint32_t buffer_bpp = image->planes[plane].cpp;
607
608 /* Because we are going to implement the copy as a blit, we need to create
609 * a linear image from the destination buffer and we also want our blit
610 * source and destination formats to be the same (to avoid any format
611 * conversions), so we choose a canonical format that matches the
612 * source image bpp.
613 *
614 * The exception to the above is copying from combined depth/stencil images
615 * because we are copying only one aspect of the image, so we need to setup
616 * our formats, color write mask and source swizzle mask to match that.
617 */
618 VkFormat dst_format;
619 VkFormat src_format;
620 VkColorComponentFlags cmask = 0; /* All components */
621 VkComponentMapping cswizzle = {
622 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
623 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
624 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
625 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
626 };
627 switch (buffer_bpp) {
628 case 16:
629 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
630 dst_format = VK_FORMAT_R32G32B32A32_UINT;
631 src_format = dst_format;
632 break;
633 case 8:
634 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
635 dst_format = VK_FORMAT_R16G16B16A16_UINT;
636 src_format = dst_format;
637 break;
638 case 4:
639 switch (dst_copy_aspect) {
640 case VK_IMAGE_ASPECT_COLOR_BIT:
641 src_format = VK_FORMAT_R8G8B8A8_UINT;
642 dst_format = VK_FORMAT_R8G8B8A8_UINT;
643 break;
644 case VK_IMAGE_ASPECT_DEPTH_BIT:
645 assert(image->plane_count == 1);
646 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
647 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
648 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
649 if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
650 src_format = VK_FORMAT_R32_UINT;
651 dst_format = VK_FORMAT_R32_UINT;
652 } else {
653 /* We want to write depth in the buffer in the first 24-bits,
654 * however, the hardware has depth in bits 8-31, so swizzle the
655 * the source components to match what we want. Also, we don't
656 * want to write bits 24-31 in the destination.
657 */
658 src_format = VK_FORMAT_R8G8B8A8_UINT;
659 dst_format = VK_FORMAT_R8G8B8A8_UINT;
660 cmask = VK_COLOR_COMPONENT_R_BIT |
661 VK_COLOR_COMPONENT_G_BIT |
662 VK_COLOR_COMPONENT_B_BIT;
663 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
664 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
665 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
666 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
667 }
668 break;
669 case VK_IMAGE_ASPECT_STENCIL_BIT:
670 assert(image->plane_count == 1);
671 assert(dst_copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
672 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
673 /* Copying from S8D24. We want to write 8-bit stencil values only,
674 * so adjust the buffer bpp for that. Since the hardware stores stencil
675 * in the LSB, we can just do a RGBA8UI to R8UI blit.
676 */
677 src_format = VK_FORMAT_R8G8B8A8_UINT;
678 dst_format = VK_FORMAT_R8_UINT;
679 buffer_bpp = 1;
680 break;
681 default:
682 unreachable("unsupported aspect");
683 return supported;
684 };
685 break;
686 case 2:
687 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
688 dst_copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
689 dst_format = VK_FORMAT_R16_UINT;
690 src_format = dst_format;
691 break;
692 case 1:
693 assert(dst_copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
694 dst_format = VK_FORMAT_R8_UINT;
695 src_format = dst_format;
696 break;
697 default:
698 unreachable("unsupported bit-size");
699 return supported;
700 };
701
702 /* The hardware doesn't support linear depth/stencil stores, so we
703 * implement copies of depth/stencil aspect as color copies using a
704 * compatible color format.
705 */
706 assert(vk_format_is_color(src_format));
707 assert(vk_format_is_color(dst_format));
708 dst_copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
709
710 /* We should be able to handle the blit if we got this far */
711 supported = true;
712
713 /* Obtain the 2D buffer region spec */
714 uint32_t buf_width, buf_height;
715 if (region->bufferRowLength == 0)
716 buf_width = region->imageExtent.width;
717 else
718 buf_width = region->bufferRowLength;
719
720 if (region->bufferImageHeight == 0)
721 buf_height = region->imageExtent.height;
722 else
723 buf_height = region->bufferImageHeight;
724
725 /* If the image is compressed, the bpp refers to blocks, not pixels */
726 uint32_t block_width =
727 vk_format_get_blockwidth(image->planes[plane].vk_format);
728 uint32_t block_height =
729 vk_format_get_blockheight(image->planes[plane].vk_format);
730 buf_width = DIV_ROUND_UP(buf_width, block_width);
731 buf_height = DIV_ROUND_UP(buf_height, block_height);
732
733 out_info->src_format = src_format;
734 out_info->dst_format = dst_format;
735 out_info->src_copy_aspect = src_copy_aspect;
736 out_info->dst_copy_aspect = dst_copy_aspect;
737 out_info->buf_width = buf_width;
738 out_info->buf_height = buf_height;
739 out_info->buf_bpp = buffer_bpp;
740 out_info->block_width = block_width;
741 out_info->block_height = block_height;
742 out_info->cmask = cmask;
743 out_info->cswizzle = cswizzle;
744 out_info->plane = plane;
745
746 return supported;
747 }
748
749 /* Creates a linear image to alias buffer memory. It also includes that image
750 * as a private object in the cmd_buffer.
751 *
752 * This is used for cases where we want to implement an image to buffer copy,
753 * but we need to rely on a mechanism that uses an image as destination, like
754 * blitting.
755 */
756 static VkResult
create_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer,VkImage * out_image)757 create_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
758 struct v3dv_buffer *buffer,
759 const VkBufferImageCopy2 *region,
760 struct image_to_buffer_info *info,
761 uint32_t layer,
762 VkImage *out_image)
763 {
764 VkImageCreateInfo image_info = {
765 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
766 .imageType = VK_IMAGE_TYPE_2D,
767 .format = info->dst_format,
768 .extent = { info->buf_width, info->buf_height, 1 },
769 .mipLevels = 1,
770 .arrayLayers = 1,
771 .samples = VK_SAMPLE_COUNT_1_BIT,
772 .tiling = VK_IMAGE_TILING_LINEAR,
773 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
774 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
775 .queueFamilyIndexCount = 0,
776 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
777 };
778
779 VkResult result;
780 struct v3dv_device *device = cmd_buffer->device;
781 VkDevice _device = v3dv_device_to_handle(device);
782
783 VkImage buffer_image;
784 result =
785 v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
786 if (result != VK_SUCCESS)
787 return result;
788
789 *out_image = buffer_image;
790
791 v3dv_cmd_buffer_add_private_obj(
792 cmd_buffer, (uintptr_t)buffer_image,
793 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
794
795 /* Bind the buffer memory to the image
796 */
797 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
798 layer * info->buf_width * info->buf_height * info->buf_bpp;
799
800 result =
801 vk_common_BindImageMemory(_device, buffer_image,
802 v3dv_device_memory_to_handle(buffer->mem),
803 buffer_offset);
804 return result;
805 }
806
807 /**
808 * Creates an image with a single mip level that aliases the memory of a
809 * mip level in another image, re-interpreting the memory with an uncompressed
810 * format. The image is added to the command buffer as a private object for
811 * disposal.
812 */
813 static bool
create_image_mip_level_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,VkFormat format,uint32_t plane,uint32_t mip_level,uint32_t layer,VkImage * alias)814 create_image_mip_level_alias(struct v3dv_cmd_buffer *cmd_buffer,
815 struct v3dv_image *image,
816 VkFormat format,
817 uint32_t plane,
818 uint32_t mip_level,
819 uint32_t layer,
820 VkImage *alias)
821 {
822 VkResult result;
823 assert(!vk_format_is_compressed(format));
824
825 struct v3dv_device *device = cmd_buffer->device;
826 VkDevice vk_device = v3dv_device_to_handle(device);
827 uint32_t mip_width = image->planes[plane].slices[mip_level].width;
828 uint32_t mip_height = image->planes[plane].slices[mip_level].height;
829
830 uint32_t block_width =
831 vk_format_get_blockwidth(image->planes[plane].vk_format);
832 uint32_t block_height =
833 vk_format_get_blockheight(image->planes[plane].vk_format);
834
835 VkImageCreateInfo info = {
836 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
837 .imageType = image->vk.image_type,
838 .format = format,
839 .extent = { DIV_ROUND_UP(mip_width, block_width),
840 DIV_ROUND_UP(mip_height, block_height),
841 1 },
842 .mipLevels = 1,
843 .arrayLayers = 1,
844 .samples = image->vk.samples,
845 .tiling = image->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
846 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
847 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
848 .queueFamilyIndexCount = 0,
849 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
850 };
851 result = v3dv_CreateImage(vk_device, &info, &device->vk.alloc, alias);
852 if (result != VK_SUCCESS)
853 return false;
854
855 /* The alias we have just created has just one mip, but we may be aliasing
856 * any mip in the original image. Because the slice setup changes based on
857 * the mip (particularly, for mips >= 2 it uses power of 2 sizes internally)
858 * and this can influence the tiling layout selected for the slice, we want
859 * to make sure we copy the slice description from the actual mip level in
860 * the original image, and then rewrite any fields that we need for the
861 * alias. Particularly, we want to make the offset 0 because we are going to
862 * bind the underlying image memory exactly at the start of the selected mip.
863 * We also want to relax the image alignment requirements to the minimum
864 * (the one imposed by the Texture Base Address field) since we may not be
865 * aliasing a level 0 (for which we typically want a page alignment for
866 * optimal performance).
867 */
868 V3DV_FROM_HANDLE(v3dv_image, v3dv_alias, *alias);
869 v3dv_alias->planes[plane].slices[0] = image->planes[plane].slices[mip_level];
870 v3dv_alias->planes[plane].slices[0].width = info.extent.width;
871 v3dv_alias->planes[plane].slices[0].height = info.extent.height;
872 v3dv_alias->planes[plane].slices[0].offset = 0;
873 v3dv_alias->planes[plane].alignment = 64;
874
875 v3dv_cmd_buffer_add_private_obj(
876 cmd_buffer, (uintptr_t)*alias,
877 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
878
879 result =
880 vk_common_BindImageMemory(vk_device, *alias,
881 v3dv_device_memory_to_handle(image->planes[plane].mem),
882 v3dv_layer_offset(image, mip_level, layer, plane));
883 return result == VK_SUCCESS;
884 }
885
886 /**
887 * Returns true if the implementation supports the requested operation (even if
888 * it failed to process it, for example, due to an out-of-memory error).
889 */
890 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)891 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
892 struct v3dv_buffer *buffer,
893 struct v3dv_image *image,
894 const VkBufferImageCopy2 *region)
895 {
896 bool handled = false;
897 struct image_to_buffer_info info;
898
899 /* This path uses a shader blit which doesn't support linear images. Return
900 * early to avoid all the heavy lifting in preparation for the
901 * blit_shader() call that is bound to fail in that scenario.
902 */
903 if (!image->tiled && image->vk.image_type != VK_IMAGE_TYPE_1D) {
904 return handled;
905 }
906
907 handled = gather_image_to_buffer_info(cmd_buffer, image, region,
908 &info);
909
910 if (!handled)
911 return handled;
912
913 /* We should be able to handle the blit if we got this far */
914 handled = true;
915
916 /* Compute layers to copy */
917 uint32_t num_layers;
918 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
919 num_layers = vk_image_subresource_layer_count(&image->vk,
920 ®ion->imageSubresource);
921 } else {
922 num_layers = region->imageExtent.depth;
923 }
924 assert(num_layers > 0);
925
926 /* Copy requested layers */
927 VkResult result;
928 VkImageBlit2 blit_region;
929 uint32_t mip_level = region->imageSubresource.mipLevel;
930 uint32_t base_layer = region->imageSubresource.baseArrayLayer;
931 for (uint32_t i = 0; i < num_layers; i++) {
932 uint32_t layer_offset = i;
933
934 if (vk_format_is_compressed(image->vk.format)) {
935 /* Our blit interface can see the real format of the images to detect
936 * copies between compressed and uncompressed images and adapt the
937 * blit region accordingly. Here we are just doing a raw copy of
938 * compressed data, but we are passing an uncompressed view of the
939 * buffer for the blit destination image (since compressed formats are
940 * not renderable), so we also want to provide an uncompressed view of
941 * the source image.
942 *
943 * It is important that we create the alias over the selected mip
944 * level (instead of aliasing the entire image) because an uncompressed
945 * view of the image won't have the same number of mip levels as the
946 * original image and the implicit mip size calculations the hw will
947 * do to sample from a non-zero mip level may not match exactly between
948 * compressed and uncompressed views.
949 */
950 VkImage alias;
951 if (!create_image_mip_level_alias(cmd_buffer, image, info.dst_format,
952 info.plane, mip_level,
953 base_layer + layer_offset,
954 &alias)) {
955 return handled;
956 }
957
958 /* We are aliasing the selected mip level and layer with a
959 * single-mip and single-layer image.
960 */
961 image = v3dv_image_from_handle(alias);
962 mip_level = 0;
963 base_layer = 0;
964 layer_offset = 0;
965 }
966
967 /* Create the destination blit image from the destination buffer */
968 VkImage buffer_image;
969 result =
970 create_image_from_buffer(cmd_buffer, buffer, region, &info,
971 i, &buffer_image);
972 if (result != VK_SUCCESS)
973 return handled;
974
975 /* Blit-copy the requested image extent.
976 *
977 * Since we are copying, the blit must use the same format on the
978 * destination and source images to avoid format conversions. The
979 * only exception is copying stencil, which we upload to a R8UI source
980 * image, but that we need to blit to a S8D24 destination (the only
981 * stencil format we support).
982 */
983 blit_region =
984 blit_region_for_image_to_buffer(®ion->imageOffset,
985 ®ion->imageExtent,
986 mip_level, base_layer, layer_offset,
987 &info);
988
989 handled = blit_shader(cmd_buffer,
990 v3dv_image_from_handle(buffer_image),
991 info.dst_format,
992 image, info.src_format,
993 info.cmask, &info.cswizzle,
994 &blit_region, VK_FILTER_NEAREST, false);
995 if (!handled) {
996 /* This is unexpected, we should have a supported blit spec */
997 unreachable("Unable to blit buffer to destination image");
998 return false;
999 }
1000 }
1001
1002 assert(handled);
1003 return true;
1004 }
1005
1006 static bool
1007 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1008 struct v3dv_image *dst,
1009 struct v3dv_image *src,
1010 const VkImageCopy2 *region);
1011
1012 static VkImageCopy2
image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 * region,struct image_to_buffer_info * info,uint32_t layer)1013 image_copy_region_for_image_to_buffer(const VkBufferImageCopy2 *region,
1014 struct image_to_buffer_info *info,
1015 uint32_t layer)
1016 {
1017 VkImageCopy2 output = {
1018 .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
1019 .srcSubresource = {
1020 .aspectMask = info->src_copy_aspect,
1021 .mipLevel = region->imageSubresource.mipLevel,
1022 .baseArrayLayer = region->imageSubresource.baseArrayLayer + layer,
1023 .layerCount = 1,
1024 },
1025 .srcOffset = {
1026 DIV_ROUND_UP(region->imageOffset.x, info->block_width),
1027 DIV_ROUND_UP(region->imageOffset.y, info->block_height),
1028 region->imageOffset.z,
1029 },
1030 .dstSubresource = {
1031 .aspectMask = info->dst_copy_aspect,
1032 .mipLevel = 0,
1033 .baseArrayLayer = 0,
1034 .layerCount = 1,
1035 },
1036 .dstOffset = { 0, 0, 0 },
1037 .extent = {
1038 DIV_ROUND_UP(region->imageExtent.width, info->block_width),
1039 DIV_ROUND_UP(region->imageExtent.height, info->block_height),
1040 1
1041 },
1042 };
1043
1044 return output;
1045 }
1046
1047 /**
1048 * Returns true if the implementation supports the requested operation (even if
1049 * it failed to process it, for example, due to an out-of-memory error).
1050 */
1051 static bool
copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * dst_buffer,struct v3dv_image * src_image,const VkBufferImageCopy2 * region)1052 copy_image_to_buffer_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1053 struct v3dv_buffer *dst_buffer,
1054 struct v3dv_image *src_image,
1055 const VkBufferImageCopy2 *region)
1056 {
1057 bool handled = false;
1058 VkImage dst_buffer_image;
1059 struct image_to_buffer_info info;
1060
1061 /* This is a requirement for copy_image_linear_texel_buffer below. We check
1062 * it in advance in order to do an early return
1063 */
1064 if (src_image->tiled)
1065 return false;
1066
1067 handled =
1068 gather_image_to_buffer_info(cmd_buffer, src_image, region,
1069 &info);
1070 if (!handled)
1071 return handled;
1072
1073 /* At this point the implementation should support the copy, any possible
1074 * error below are for different reasons, like out-of-memory error
1075 */
1076 handled = true;
1077
1078 uint32_t num_layers;
1079 if (src_image->vk.image_type != VK_IMAGE_TYPE_3D) {
1080 num_layers = vk_image_subresource_layer_count(&src_image->vk,
1081 ®ion->imageSubresource);
1082 } else {
1083 num_layers = region->imageExtent.depth;
1084 }
1085 assert(num_layers > 0);
1086
1087 VkResult result;
1088 VkImageCopy2 image_region;
1089 for (uint32_t layer = 0; layer < num_layers; layer++) {
1090 /* Create the destination image from the destination buffer */
1091 result =
1092 create_image_from_buffer(cmd_buffer, dst_buffer, region, &info,
1093 layer, &dst_buffer_image);
1094 if (result != VK_SUCCESS)
1095 return handled;
1096
1097 image_region =
1098 image_copy_region_for_image_to_buffer(region, &info, layer);
1099
1100 handled =
1101 copy_image_linear_texel_buffer(cmd_buffer,
1102 v3dv_image_from_handle(dst_buffer_image),
1103 src_image, &image_region);
1104 }
1105
1106 return handled;
1107 }
1108
1109 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)1110 v3dv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
1111 const VkCopyImageToBufferInfo2 *info)
1112
1113 {
1114 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1115 V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
1116 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
1117
1118 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1119
1120 cmd_buffer->state.is_transfer = true;
1121
1122 for (uint32_t i = 0; i < info->regionCount; i++) {
1123 const VkBufferImageCopy2 *region = &info->pRegions[i];
1124
1125 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, region))
1126 continue;
1127
1128 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, region))
1129 continue;
1130
1131 if (copy_image_to_buffer_texel_buffer(cmd_buffer, buffer, image, region))
1132 continue;
1133
1134 unreachable("Unsupported image to buffer copy.");
1135 }
1136 cmd_buffer->state.is_transfer = false;
1137 }
1138
1139 /**
1140 * Returns true if the implementation supports the requested operation (even if
1141 * it failed to process it, for example, due to an out-of-memory error).
1142 */
1143 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1144 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1145 struct v3dv_image *dst,
1146 struct v3dv_image *src,
1147 const VkImageCopy2 *region)
1148 {
1149 if (V3D_DBG(DISABLE_TFU)) {
1150 perf_debug("Copy images: TFU disabled, fallbacks could be slower.\n");
1151 return false;
1152 }
1153
1154 /* Destination can't be raster format */
1155 if (!dst->tiled)
1156 return false;
1157
1158 /* We can only do full copies, so if the format is D24S8 both aspects need
1159 * to be copied. We only need to check the dst format because the spec
1160 * states that depth/stencil formats must match exactly.
1161 */
1162 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
1163 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
1164 VK_IMAGE_ASPECT_STENCIL_BIT;
1165 if (region->dstSubresource.aspectMask != ds_aspects)
1166 return false;
1167 }
1168
1169 /* Don't handle copies between uncompressed and compressed formats for now.
1170 *
1171 * FIXME: we should be able to handle these easily but there is no coverage
1172 * in CTS at the moment that make such copies with full images (which we
1173 * require here), only partial copies. Also, in that case the code below that
1174 * checks for "dst image complete" requires some changes, since it is
1175 * checking against the region dimensions, which are in units of the source
1176 * image format.
1177 */
1178 if (vk_format_is_compressed(dst->vk.format) !=
1179 vk_format_is_compressed(src->vk.format)) {
1180 return false;
1181 }
1182
1183 /* Source region must start at (0,0) */
1184 if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
1185 return false;
1186
1187 /* Destination image must be complete */
1188 if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
1189 return false;
1190
1191 uint8_t src_plane =
1192 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1193 uint8_t dst_plane =
1194 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1195
1196 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
1197 uint32_t dst_width = u_minify(dst->planes[dst_plane].width, dst_mip_level);
1198 uint32_t dst_height = u_minify(dst->planes[dst_plane].height, dst_mip_level);
1199 if (region->extent.width != dst_width || region->extent.height != dst_height)
1200 return false;
1201
1202 /* From vkCmdCopyImage:
1203 *
1204 * "When copying between compressed and uncompressed formats the extent
1205 * members represent the texel dimensions of the source image and not
1206 * the destination."
1207 */
1208 const uint32_t block_w =
1209 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1210 const uint32_t block_h =
1211 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1212 uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1213 uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1214
1215 /* Account for sample count */
1216 assert(dst->vk.samples == src->vk.samples);
1217 if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
1218 assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
1219 width *= 2;
1220 height *= 2;
1221 }
1222
1223 /* The TFU unit doesn't handle format conversions so we need the formats to
1224 * match. On the other hand, vkCmdCopyImage allows different color formats
1225 * on the source and destination images, but only if they are texel
1226 * compatible. For us, this means that we can effectively ignore different
1227 * formats and just make the copy using either of them, since we are just
1228 * moving raw data and not making any conversions.
1229 *
1230 * Also, the formats supported by the TFU unit are limited, but again, since
1231 * we are only doing raw copies here without interpreting or converting
1232 * the underlying pixel data according to its format, we can always choose
1233 * to use compatible formats that are supported with the TFU unit.
1234 */
1235 assert(dst->planes[dst_plane].cpp == src->planes[src_plane].cpp);
1236 const struct v3dv_format *format =
1237 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1238 dst->planes[dst_plane].cpp, NULL);
1239
1240 /* Emit a TFU job for each layer to blit */
1241 const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1242 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) :
1243 region->extent.depth;
1244 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
1245
1246 const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
1247 region->srcSubresource.baseArrayLayer : region->srcOffset.z;
1248 const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1249 region->dstSubresource.baseArrayLayer : region->dstOffset.z;
1250 for (uint32_t i = 0; i < layer_count; i++) {
1251 const uint32_t dst_offset =
1252 dst->planes[dst_plane].mem->bo->offset +
1253 v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i, dst_plane);
1254 const uint32_t src_offset =
1255 src->planes[src_plane].mem->bo->offset +
1256 v3dv_layer_offset(src, src_mip_level, base_src_layer + i, src_plane);
1257
1258 const struct v3d_resource_slice *dst_slice =
1259 &dst->planes[dst_plane].slices[dst_mip_level];
1260 const struct v3d_resource_slice *src_slice =
1261 &src->planes[src_plane].slices[src_mip_level];
1262
1263 v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
1264 cmd_buffer,
1265 dst->planes[dst_plane].mem->bo->handle,
1266 dst_offset,
1267 dst_slice->tiling,
1268 dst_slice->padded_height,
1269 dst->planes[dst_plane].cpp,
1270 src->planes[src_plane].mem->bo->handle,
1271 src_offset,
1272 src_slice->tiling,
1273 src_slice->tiling == V3D_TILING_RASTER ?
1274 src_slice->stride : src_slice->padded_height,
1275 src->planes[src_plane].cpp,
1276 /* All compatible TFU formats are single-plane */
1277 width, height, &format->planes[0]);
1278 }
1279
1280 return true;
1281 }
1282
1283 inline bool
v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1284 v3dv_cmd_buffer_copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1285 struct v3dv_image *dst,
1286 struct v3dv_image *src,
1287 const VkImageCopy2 *region)
1288 {
1289 return copy_image_tfu(cmd_buffer, dst, src, region);
1290 }
1291
1292 /**
1293 * Returns true if the implementation supports the requested operation (even if
1294 * it failed to process it, for example, due to an out-of-memory error).
1295 */
1296 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1297 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1298 struct v3dv_image *dst,
1299 struct v3dv_image *src,
1300 const VkImageCopy2 *region)
1301 {
1302 uint8_t src_plane =
1303 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1304 assert(src_plane < src->plane_count);
1305 uint8_t dst_plane =
1306 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1307 assert(dst_plane < dst->plane_count);
1308
1309 VkFormat fb_format;
1310 if (!v3dv_meta_can_use_tlb(src, src_plane, region->srcSubresource.mipLevel,
1311 ®ion->srcOffset, NULL, &fb_format) ||
1312 !v3dv_meta_can_use_tlb(dst, dst_plane, region->dstSubresource.mipLevel,
1313 ®ion->dstOffset, ®ion->extent, &fb_format)) {
1314 return false;
1315 }
1316
1317 /* We can't do TLB stores of linear D/S */
1318 if (!dst->tiled && vk_format_is_depth_or_stencil(fb_format))
1319 return false;
1320
1321 /* From the Vulkan spec, VkImageCopy valid usage:
1322 *
1323 * "If neither the calling command’s srcImage nor the calling command’s
1324 * dstImage has a multi-planar image format then the aspectMask member
1325 * of srcSubresource and dstSubresource must match."
1326 */
1327 assert(src->plane_count != 1 || dst->plane_count != 1 ||
1328 region->dstSubresource.aspectMask ==
1329 region->srcSubresource.aspectMask);
1330 uint32_t internal_type, internal_bpp;
1331 v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
1332 (fb_format, region->dstSubresource.aspectMask,
1333 &internal_type, &internal_bpp);
1334
1335 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
1336 *
1337 * "The number of slices of the extent (for 3D) or layers of the
1338 * srcSubresource (for non-3D) must match the number of slices of the
1339 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
1340 */
1341 assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
1342 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource) :
1343 region->extent.depth) ==
1344 (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
1345 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) :
1346 region->extent.depth));
1347 uint32_t num_layers;
1348 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
1349 num_layers = vk_image_subresource_layer_count(&dst->vk,
1350 ®ion->dstSubresource);
1351 } else {
1352 num_layers = region->extent.depth;
1353 }
1354 assert(num_layers > 0);
1355
1356 struct v3dv_job *job =
1357 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1358 if (!job)
1359 return true;
1360
1361 /* Handle copy to compressed image using compatible format */
1362 const uint32_t block_w =
1363 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1364 const uint32_t block_h =
1365 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1366 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1367 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1368
1369 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
1370 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1371 src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
1372
1373 struct v3dv_meta_framebuffer framebuffer;
1374 v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
1375 internal_type, &job->frame_tiling);
1376
1377 v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
1378 v3d_X((&job->device->devinfo), meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
1379
1380 v3dv_cmd_buffer_finish_job(cmd_buffer);
1381
1382 return true;
1383 }
1384
1385 /**
1386 * Takes the image provided as argument and creates a new image that has
1387 * the same specification and aliases the same memory storage, except that:
1388 *
1389 * - It has the uncompressed format passed in.
1390 * - Its original width/height are scaled by the factors passed in.
1391 *
1392 * This is useful to implement copies from compressed images using the blit
1393 * path. The idea is that we create uncompressed "image views" of both the
1394 * source and destination images using the uncompressed format and then we
1395 * define the copy blit in terms of that format.
1396 */
1397 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)1398 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
1399 struct v3dv_image *src,
1400 float width_scale,
1401 float height_scale,
1402 VkFormat format)
1403 {
1404 assert(!vk_format_is_compressed(format));
1405 /* We don't support ycbcr compressed formats */
1406 assert(src->plane_count == 1);
1407
1408 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
1409
1410 VkImageCreateInfo info = {
1411 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1412 .imageType = src->vk.image_type,
1413 .format = format,
1414 .extent = {
1415 .width = src->vk.extent.width * width_scale,
1416 .height = src->vk.extent.height * height_scale,
1417 .depth = src->vk.extent.depth,
1418 },
1419 .mipLevels = src->vk.mip_levels,
1420 .arrayLayers = src->vk.array_layers,
1421 .samples = src->vk.samples,
1422 .tiling = src->tiled ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
1423 .usage = src->vk.usage,
1424 };
1425
1426 VkImage _image;
1427 VkResult result =
1428 v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1429 if (result != VK_SUCCESS) {
1430 v3dv_flag_oom(cmd_buffer, NULL);
1431 return NULL;
1432 }
1433
1434 v3dv_cmd_buffer_add_private_obj(
1435 cmd_buffer, (uintptr_t)_image,
1436 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
1437
1438 struct v3dv_image *image = v3dv_image_from_handle(_image);
1439 image->planes[0].mem = src->planes[0].mem;
1440 image->planes[0].mem_offset = src->planes[0].mem_offset;
1441 return image;
1442 }
1443
1444 /**
1445 * Returns true if the implementation supports the requested operation (even if
1446 * it failed to process it, for example, due to an out-of-memory error).
1447 */
1448 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1449 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1450 struct v3dv_image *dst,
1451 struct v3dv_image *src,
1452 const VkImageCopy2 *region)
1453 {
1454 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D)
1455 return false;
1456
1457 uint8_t src_plane =
1458 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1459 assert(src_plane < src->plane_count);
1460 uint8_t dst_plane =
1461 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1462 assert(dst_plane < dst->plane_count);
1463
1464 const uint32_t src_block_w =
1465 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
1466 const uint32_t src_block_h =
1467 vk_format_get_blockheight(src->planes[src_plane].vk_format);
1468 const uint32_t dst_block_w =
1469 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
1470 const uint32_t dst_block_h =
1471 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
1472 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1473 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1474
1475 /* We need to choose a single format for the blit to ensure that this is
1476 * really a copy and there are not format conversions going on. Since we
1477 * going to blit, we need to make sure that the selected format can be
1478 * both rendered to and textured from.
1479 */
1480 VkFormat format;
1481 float src_scale_w = 1.0f;
1482 float src_scale_h = 1.0f;
1483 float dst_scale_w = block_scale_w;
1484 float dst_scale_h = block_scale_h;
1485 if (vk_format_is_compressed(src->vk.format)) {
1486 /* If we are copying from a compressed format we should be aware that we
1487 * are going to texture from the source image, and the texture setup
1488 * knows the actual size of the image, so we need to choose a format
1489 * that has a per-texel (not per-block) bpp that is compatible for that
1490 * image size. For example, for a source image with size Bw*WxBh*H
1491 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1492 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1493 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1494 * so we could specify a blit with size Bw*WxBh*H and a format with
1495 * a bpp of 8-bit per texel (R8_UINT).
1496 *
1497 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1498 * which is 64-bit per texel, then we would need a 4-bit format, which
1499 * we don't have, so instead we still choose an 8-bit format, but we
1500 * apply a divisor to the row dimensions of the blit, since we are
1501 * copying two texels per item.
1502 *
1503 * Generally, we can choose any format so long as we compute appropriate
1504 * divisors for the width and height depending on the source image's
1505 * bpp.
1506 */
1507 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1508
1509 switch (src->planes[src_plane].cpp) {
1510 case 16:
1511 format = VK_FORMAT_R32G32B32A32_UINT;
1512 break;
1513 case 8:
1514 format = VK_FORMAT_R16G16B16A16_UINT;
1515 break;
1516 default:
1517 unreachable("Unsupported compressed format");
1518 }
1519
1520 /* Create image views of the src/dst images that we can interpret in
1521 * terms of the canonical format.
1522 */
1523 src_scale_w /= src_block_w;
1524 src_scale_h /= src_block_h;
1525 dst_scale_w /= src_block_w;
1526 dst_scale_h /= src_block_h;
1527
1528 src = create_image_alias(cmd_buffer, src,
1529 src_scale_w, src_scale_h, format);
1530
1531 dst = create_image_alias(cmd_buffer, dst,
1532 dst_scale_w, dst_scale_h, format);
1533 } else {
1534 format = src->format->planes[src_plane].rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1535 src->planes[src_plane].vk_format :
1536 get_compatible_tlb_format(src->planes[src_plane].vk_format);
1537 if (format == VK_FORMAT_UNDEFINED)
1538 return false;
1539
1540 const struct v3dv_format *f = v3d_X((&cmd_buffer->device->devinfo), get_format)(format);
1541 assert(f->plane_count < 2);
1542 if (!f->plane_count || f->planes[0].tex_type == TEXTURE_DATA_FORMAT_NO)
1543 return false;
1544 }
1545
1546 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1547 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1548 * are the compressed format's block width and height. This means that
1549 * copies between compressed and uncompressed images involve different
1550 * image sizes, and therefore, we need to take that into account when
1551 * setting up the source and destination blit regions below, so they are
1552 * consistent from the point of view of the single compatible format
1553 * selected for the copy.
1554 *
1555 * We should take into account that the dimensions of the region provided
1556 * to the copy command are specified in terms of the source image. With that
1557 * in mind, below we adjust the blit destination region to be consistent with
1558 * the source region for the compatible format, so basically, we apply
1559 * the block scale factor to the destination offset provided by the copy
1560 * command (because it is specified in terms of the destination image, not
1561 * the source), and then we just add the region copy dimensions to that
1562 * (since the region dimensions are already specified in terms of the source
1563 * image).
1564 */
1565 uint32_t region_width = region->extent.width * src_scale_w;
1566 uint32_t region_height = region->extent.height * src_scale_h;
1567 if (src_block_w > 1)
1568 region_width = util_next_power_of_two(region_width);
1569 if (src_block_h > 1)
1570 region_height = util_next_power_of_two(region_height);
1571
1572 const VkOffset3D src_start = {
1573 region->srcOffset.x * src_scale_w,
1574 region->srcOffset.y * src_scale_h,
1575 region->srcOffset.z,
1576 };
1577 const VkOffset3D src_end = {
1578 src_start.x + region_width,
1579 src_start.y + region_height,
1580 src_start.z + region->extent.depth,
1581 };
1582
1583 const VkOffset3D dst_start = {
1584 region->dstOffset.x * dst_scale_w,
1585 region->dstOffset.y * dst_scale_h,
1586 region->dstOffset.z,
1587 };
1588 const VkOffset3D dst_end = {
1589 dst_start.x + region_width,
1590 dst_start.y + region_height,
1591 dst_start.z + region->extent.depth,
1592 };
1593
1594 const VkImageBlit2 blit_region = {
1595 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
1596 .srcSubresource = region->srcSubresource,
1597 .srcOffsets = { src_start, src_end },
1598 .dstSubresource = region->dstSubresource,
1599 .dstOffsets = { dst_start, dst_end },
1600 };
1601 bool handled = blit_shader(cmd_buffer,
1602 dst, format,
1603 src, format,
1604 0, NULL,
1605 &blit_region, VK_FILTER_NEAREST, true);
1606
1607 /* We should have selected formats that we can blit */
1608 assert(handled);
1609 return handled;
1610 }
1611
1612 static bool
copy_image_linear_texel_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1613 copy_image_linear_texel_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1614 struct v3dv_image *dst,
1615 struct v3dv_image *src,
1616 const VkImageCopy2 *region)
1617 {
1618 if (src->tiled)
1619 return false;
1620
1621 /* Implementations are allowed to restrict linear images like this */
1622 assert(region->srcOffset.z == 0);
1623 assert(region->dstOffset.z == 0);
1624 assert(region->srcSubresource.mipLevel == 0);
1625 assert(region->srcSubresource.baseArrayLayer == 0);
1626 assert(region->srcSubresource.layerCount == 1);
1627 assert(region->dstSubresource.mipLevel == 0);
1628 assert(region->dstSubresource.baseArrayLayer == 0);
1629 assert(region->dstSubresource.layerCount == 1);
1630
1631 uint8_t src_plane =
1632 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
1633 uint8_t dst_plane =
1634 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
1635
1636 assert(src->planes[src_plane].cpp == dst->planes[dst_plane].cpp);
1637 const uint32_t bpp = src->planes[src_plane].cpp;
1638
1639 VkFormat format;
1640 switch (bpp) {
1641 case 16:
1642 format = VK_FORMAT_R32G32B32A32_UINT;
1643 break;
1644 case 8:
1645 format = VK_FORMAT_R16G16B16A16_UINT;
1646 break;
1647 case 4:
1648 format = VK_FORMAT_R8G8B8A8_UINT;
1649 break;
1650 case 2:
1651 format = VK_FORMAT_R16_UINT;
1652 break;
1653 case 1:
1654 format = VK_FORMAT_R8_UINT;
1655 break;
1656 default:
1657 unreachable("unsupported bit-size");
1658 return false;
1659 }
1660
1661 VkComponentMapping ident_swizzle = {
1662 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1663 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
1664 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
1665 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
1666 };
1667
1668 const uint32_t buf_stride = src->planes[src_plane].slices[0].stride;
1669 const VkDeviceSize buf_offset =
1670 region->srcOffset.y * buf_stride + region->srcOffset.x * bpp;
1671
1672 struct v3dv_buffer src_buffer;
1673 vk_object_base_init(&cmd_buffer->device->vk, &src_buffer.base,
1674 VK_OBJECT_TYPE_BUFFER);
1675
1676 const struct VkBufferCreateInfo buf_create_info = {
1677 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1678 .size = src->planes[src_plane].size,
1679 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1680 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1681 };
1682 v3dv_buffer_init(cmd_buffer->device, &buf_create_info, &src_buffer,
1683 src->planes[src_plane].alignment);
1684
1685 const VkBindBufferMemoryInfo buf_bind_info = {
1686 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1687 .buffer = v3dv_buffer_to_handle(&src_buffer),
1688 .memory = v3dv_device_memory_to_handle(src->planes[src_plane].mem),
1689 .memoryOffset = src->planes[src_plane].mem_offset +
1690 v3dv_layer_offset(src, 0, 0, src_plane),
1691 };
1692 v3dv_buffer_bind_memory(&buf_bind_info);
1693
1694 const VkBufferImageCopy2 copy_region = {
1695 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
1696 .pNext = NULL,
1697 .bufferOffset = buf_offset,
1698 .bufferRowLength = buf_stride / bpp,
1699 .bufferImageHeight = src->vk.extent.height,
1700 .imageSubresource = region->dstSubresource,
1701 .imageOffset = region->dstOffset,
1702 .imageExtent = region->extent,
1703 };
1704
1705 return texel_buffer_shader_copy(cmd_buffer,
1706 region->dstSubresource.aspectMask,
1707 dst,
1708 format,
1709 format,
1710 &src_buffer,
1711 src->planes[src_plane].cpp,
1712 0 /* color mask: full */, &ident_swizzle,
1713 1, ©_region);
1714 }
1715
1716 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)1717 v3dv_CmdCopyImage2(VkCommandBuffer commandBuffer,
1718 const VkCopyImageInfo2 *info)
1719
1720 {
1721 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1722 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1723 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1724
1725 assert(src->vk.samples == dst->vk.samples);
1726
1727 cmd_buffer->state.is_transfer = true;
1728
1729 for (uint32_t i = 0; i < info->regionCount; i++) {
1730 const VkImageCopy2 *region = &info->pRegions[i];
1731 if (copy_image_tfu(cmd_buffer, dst, src, region))
1732 continue;
1733 if (copy_image_tlb(cmd_buffer, dst, src, region))
1734 continue;
1735 if (copy_image_blit(cmd_buffer, dst, src, region))
1736 continue;
1737 if (copy_image_linear_texel_buffer(cmd_buffer, dst, src, region))
1738 continue;
1739 unreachable("Image copy not supported");
1740 }
1741
1742 cmd_buffer->state.is_transfer = false;
1743 }
1744
1745 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1746 v3dv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1747 const VkCopyBufferInfo2 *pCopyBufferInfo)
1748 {
1749 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1750 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1751 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1752
1753 cmd_buffer->state.is_transfer = true;
1754
1755 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1756 v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
1757 (cmd_buffer,
1758 dst_buffer->mem->bo, dst_buffer->mem_offset,
1759 src_buffer->mem->bo, src_buffer->mem_offset,
1760 &pCopyBufferInfo->pRegions[i]);
1761 }
1762
1763 cmd_buffer->state.is_transfer = false;
1764 }
1765
1766 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1767 destroy_update_buffer_cb(VkDevice _device,
1768 uint64_t pobj,
1769 VkAllocationCallbacks *alloc)
1770 {
1771 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1772 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1773 v3dv_bo_free(device, bo);
1774 }
1775
1776 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1777 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1778 VkBuffer dstBuffer,
1779 VkDeviceSize dstOffset,
1780 VkDeviceSize dataSize,
1781 const void *pData)
1782 {
1783 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1784 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1785
1786 struct v3dv_bo *src_bo =
1787 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1788 if (!src_bo) {
1789 mesa_loge("Failed to allocate BO for vkCmdUpdateBuffer.\n");
1790 return;
1791 }
1792
1793 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1794 if (!ok) {
1795 mesa_loge("Failed to map BO for vkCmdUpdateBuffer.\n");
1796 return;
1797 }
1798
1799 cmd_buffer->state.is_transfer = true;
1800
1801 memcpy(src_bo->map, pData, dataSize);
1802
1803 v3dv_bo_unmap(cmd_buffer->device, src_bo);
1804
1805 VkBufferCopy2 region = {
1806 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1807 .srcOffset = 0,
1808 .dstOffset = dstOffset,
1809 .size = dataSize,
1810 };
1811 struct v3dv_job *copy_job =
1812 v3d_X((&cmd_buffer->device->devinfo), meta_copy_buffer)
1813 (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1814 src_bo, 0, ®ion);
1815
1816 if (copy_job) {
1817 v3dv_cmd_buffer_add_private_obj(
1818 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1819 }
1820
1821 cmd_buffer->state.is_transfer = false;
1822 }
1823
1824 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1825 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1826 VkBuffer dstBuffer,
1827 VkDeviceSize dstOffset,
1828 VkDeviceSize size,
1829 uint32_t data)
1830 {
1831 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1832 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1833
1834 cmd_buffer->state.is_transfer = true;
1835
1836 struct v3dv_bo *bo = dst_buffer->mem->bo;
1837
1838 /* From the Vulkan spec:
1839 *
1840 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1841 * a multiple of 4, then the nearest smaller multiple is used."
1842 */
1843 if (size == VK_WHOLE_SIZE) {
1844 size = dst_buffer->size - dstOffset;
1845 size -= size % 4;
1846 }
1847
1848 v3d_X((&cmd_buffer->device->devinfo), meta_fill_buffer)
1849 (cmd_buffer, bo, dstOffset, size, data);
1850
1851 cmd_buffer->state.is_transfer = false;
1852 }
1853
1854 /**
1855 * Returns true if the implementation supports the requested operation (even if
1856 * it failed to process it, for example, due to an out-of-memory error).
1857 */
1858 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1859 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1860 struct v3dv_image *image,
1861 struct v3dv_buffer *buffer,
1862 const VkBufferImageCopy2 *region)
1863 {
1864 if (V3D_DBG(DISABLE_TFU)) {
1865 perf_debug("Copy buffer to image: TFU disabled, fallbacks could be slower.\n");
1866 return false;
1867 }
1868
1869 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1870
1871 /* Destination can't be raster format */
1872 if (!image->tiled)
1873 return false;
1874
1875 /* We can't copy D24S8 because buffer to image copies only copy one aspect
1876 * at a time, and the TFU copies full images. Also, V3D depth bits for
1877 * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1878 * the Vulkan spec has the buffer data specified the other way around, so it
1879 * is not a straight copy, we would have to swizzle the channels, which the
1880 * TFU can't do.
1881 */
1882 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1883 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1884 return false;
1885 }
1886
1887 /* Region must include full slice */
1888 const uint32_t offset_x = region->imageOffset.x;
1889 const uint32_t offset_y = region->imageOffset.y;
1890 if (offset_x != 0 || offset_y != 0)
1891 return false;
1892
1893 uint32_t width, height;
1894 if (region->bufferRowLength == 0)
1895 width = region->imageExtent.width;
1896 else
1897 width = region->bufferRowLength;
1898
1899 if (region->bufferImageHeight == 0)
1900 height = region->imageExtent.height;
1901 else
1902 height = region->bufferImageHeight;
1903
1904 const uint8_t plane =
1905 v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1906
1907 const uint32_t mip_level = region->imageSubresource.mipLevel;
1908 const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
1909
1910 if (width != slice->width || height != slice->height)
1911 return false;
1912
1913 /* Handle region semantics for compressed images */
1914 const uint32_t block_w =
1915 vk_format_get_blockwidth(image->planes[plane].vk_format);
1916 const uint32_t block_h =
1917 vk_format_get_blockheight(image->planes[plane].vk_format);
1918 width = DIV_ROUND_UP(width, block_w);
1919 height = DIV_ROUND_UP(height, block_h);
1920
1921 /* Format must be supported for texturing via the TFU. Since we are just
1922 * copying raw data and not converting between pixel formats, we can ignore
1923 * the image's format and choose a compatible TFU format for the image
1924 * texel size instead, which expands the list of formats we can handle here.
1925 */
1926 const struct v3dv_format *format =
1927 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1928 image->planes[plane].cpp, NULL);
1929 /* We only use single-plane formats with the TFU */
1930 assert(format->plane_count == 1);
1931 const struct v3dv_format_plane *format_plane = &format->planes[0];
1932
1933 uint32_t num_layers;
1934 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
1935 num_layers = vk_image_subresource_layer_count(&image->vk,
1936 ®ion->imageSubresource);
1937 } else {
1938 num_layers = region->imageExtent.depth;
1939 }
1940 assert(num_layers > 0);
1941
1942 assert(image->planes[plane].mem && image->planes[plane].mem->bo);
1943 const struct v3dv_bo *dst_bo = image->planes[plane].mem->bo;
1944
1945 assert(buffer->mem && buffer->mem->bo);
1946 const struct v3dv_bo *src_bo = buffer->mem->bo;
1947
1948 /* Emit a TFU job per layer to copy */
1949 const uint32_t buffer_stride = width * image->planes[plane].cpp;
1950 for (int i = 0; i < num_layers; i++) {
1951 uint32_t layer;
1952 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1953 layer = region->imageSubresource.baseArrayLayer + i;
1954 else
1955 layer = region->imageOffset.z + i;
1956
1957 const uint32_t buffer_offset =
1958 buffer->mem_offset + region->bufferOffset +
1959 height * buffer_stride * i;
1960 const uint32_t src_offset = src_bo->offset + buffer_offset;
1961
1962 const uint32_t dst_offset =
1963 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer, plane);
1964
1965 v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
1966 cmd_buffer,
1967 dst_bo->handle,
1968 dst_offset,
1969 slice->tiling,
1970 slice->padded_height,
1971 image->planes[plane].cpp,
1972 src_bo->handle,
1973 src_offset,
1974 V3D_TILING_RASTER,
1975 width,
1976 1,
1977 width, height, format_plane);
1978 }
1979
1980 return true;
1981 }
1982
1983 /**
1984 * Returns true if the implementation supports the requested operation (even if
1985 * it failed to process it, for example, due to an out-of-memory error).
1986 */
1987 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1988 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1989 struct v3dv_image *image,
1990 struct v3dv_buffer *buffer,
1991 const VkBufferImageCopy2 *region)
1992 {
1993 VkFormat fb_format;
1994 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
1995 assert(plane < image->plane_count);
1996
1997 if (!v3dv_meta_can_use_tlb(image, plane, region->imageSubresource.mipLevel,
1998 ®ion->imageOffset, ®ion->imageExtent,
1999 &fb_format)) {
2000 return false;
2001 }
2002
2003 /* From the Vulkan spec for VkBufferImageCopy2:
2004 *
2005 * "The aspectMask member of imageSubresource must only have a
2006 * single bit set."
2007 *
2008 * For us this has relevant implications because we can't do TLB stores
2009 * of linear depth/stencil so we work around this by loading D/S data to the
2010 * color tile buffer using a compatible color format (see
2011 * emit_copy_buffer_to_layer_per_tile_list and choose_tlb_format functions),
2012 * however, when we are copying a single aspect to a combined D/S image
2013 * we need to preserve the other aspect, and for that we will still use the
2014 * D/S tile buffer to load and store the aspect of the image we need to
2015 * preserve, so in this case we are still constrained by the hw restriction
2016 * for linear D/S stores.
2017 */
2018 assert(util_bitcount(region->imageSubresource.aspectMask) == 1);
2019 if (!image->tiled &&
2020 vk_format_has_depth(fb_format) &&
2021 vk_format_has_stencil(fb_format)) {
2022 return false;
2023 }
2024
2025 uint32_t internal_type, internal_bpp;
2026 v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
2027 (fb_format, region->imageSubresource.aspectMask,
2028 &internal_type, &internal_bpp);
2029
2030 uint32_t num_layers;
2031 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2032 num_layers = vk_image_subresource_layer_count(&image->vk,
2033 ®ion->imageSubresource);
2034 } else {
2035 num_layers = region->imageExtent.depth;
2036 }
2037 assert(num_layers > 0);
2038
2039 struct v3dv_job *job =
2040 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
2041 if (!job)
2042 return true;
2043
2044 /* Handle copy to compressed format using a compatible format */
2045 const uint32_t block_w =
2046 vk_format_get_blockwidth(image->planes[plane].vk_format);
2047 const uint32_t block_h =
2048 vk_format_get_blockheight(image->planes[plane].vk_format);
2049 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
2050 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
2051
2052 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
2053 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
2054 false);
2055
2056 struct v3dv_meta_framebuffer framebuffer;
2057 v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
2058 internal_type, &job->frame_tiling);
2059
2060 v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
2061 v3d_X((&job->device->devinfo), meta_emit_copy_buffer_to_image_rcl)
2062 (job, image, buffer, &framebuffer, region);
2063
2064 v3dv_cmd_buffer_finish_job(cmd_buffer);
2065
2066 return true;
2067 }
2068
2069 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)2070 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2071 struct v3dv_image *image,
2072 struct v3dv_buffer *buffer,
2073 const VkBufferImageCopy2 *region)
2074 {
2075 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
2076 return true;
2077 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
2078 return true;
2079 return false;
2080 }
2081
2082 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)2083 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
2084 {
2085 /* If this is not the first pool we create for this command buffer
2086 * size it based on the size of the currently exhausted pool.
2087 */
2088 uint32_t descriptor_count = 64;
2089 if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
2090 struct v3dv_descriptor_pool *exhausted_pool =
2091 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
2092 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
2093 }
2094
2095 /* Create the descriptor pool */
2096 cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
2097 VkDescriptorPoolSize pool_size = {
2098 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2099 .descriptorCount = descriptor_count,
2100 };
2101 VkDescriptorPoolCreateInfo info = {
2102 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
2103 .maxSets = descriptor_count,
2104 .poolSizeCount = 1,
2105 .pPoolSizes = &pool_size,
2106 .flags = 0,
2107 };
2108 VkResult result =
2109 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
2110 &info,
2111 &cmd_buffer->device->vk.alloc,
2112 &cmd_buffer->meta.texel_buffer_copy.dspool);
2113
2114 if (result == VK_SUCCESS) {
2115 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2116 const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
2117
2118 v3dv_cmd_buffer_add_private_obj(
2119 cmd_buffer, (uintptr_t) _pool,
2120 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
2121
2122 struct v3dv_descriptor_pool *pool =
2123 v3dv_descriptor_pool_from_handle(_pool);
2124 pool->is_driver_internal = true;
2125 }
2126
2127 return result;
2128 }
2129
2130 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)2131 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
2132 VkDescriptorSet *set)
2133 {
2134 /* Make sure we have a descriptor pool */
2135 VkResult result;
2136 if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
2137 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2138 if (result != VK_SUCCESS)
2139 return result;
2140 }
2141 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
2142
2143 /* Allocate descriptor set */
2144 struct v3dv_device *device = cmd_buffer->device;
2145 VkDevice _device = v3dv_device_to_handle(device);
2146 VkDescriptorSetAllocateInfo info = {
2147 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2148 .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
2149 .descriptorSetCount = 1,
2150 .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
2151 };
2152 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2153
2154 /* If we ran out of pool space, grow the pool and try again */
2155 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
2156 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
2157 if (result == VK_SUCCESS) {
2158 info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
2159 result = v3dv_AllocateDescriptorSets(_device, &info, set);
2160 }
2161 }
2162
2163 return result;
2164 }
2165
2166 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)2167 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
2168 VkColorComponentFlags cmask,
2169 VkComponentMapping *cswizzle,
2170 bool is_layered,
2171 uint8_t *key)
2172 {
2173 memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2174
2175 uint32_t *p = (uint32_t *) key;
2176
2177 *p = format;
2178 p++;
2179
2180 *p = cmask;
2181 p++;
2182
2183 /* Note that that we are using a single byte for this, so we could pack
2184 * more data into this 32-bit slot in the future.
2185 */
2186 *p = is_layered ? 1 : 0;
2187 p++;
2188
2189 memcpy(p, cswizzle, sizeof(VkComponentMapping));
2190 p += sizeof(VkComponentMapping) / sizeof(uint32_t);
2191
2192 assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
2193 }
2194
2195 static bool
2196 create_blit_render_pass(struct v3dv_device *device,
2197 VkFormat dst_format,
2198 VkFormat src_format,
2199 VkRenderPass *pass_load,
2200 VkRenderPass *pass_no_load);
2201
2202 static bool
2203 create_pipeline(struct v3dv_device *device,
2204 struct v3dv_render_pass *pass,
2205 struct nir_shader *vs_nir,
2206 struct nir_shader *gs_nir,
2207 struct nir_shader *fs_nir,
2208 const VkPipelineVertexInputStateCreateInfo *vi_state,
2209 const VkPipelineDepthStencilStateCreateInfo *ds_state,
2210 const VkPipelineColorBlendStateCreateInfo *cb_state,
2211 const VkPipelineMultisampleStateCreateInfo *ms_state,
2212 const VkPipelineLayout layout,
2213 VkPipeline *pipeline);
2214
2215 static nir_shader *
get_texel_buffer_copy_vs(const nir_shader_compiler_options * options)2216 get_texel_buffer_copy_vs(const nir_shader_compiler_options *options)
2217 {
2218 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
2219 "meta texel buffer copy vs");
2220 nir_variable *vs_out_pos =
2221 nir_variable_create(b.shader, nir_var_shader_out,
2222 glsl_vec4_type(), "gl_Position");
2223 vs_out_pos->data.location = VARYING_SLOT_POS;
2224
2225 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
2226 nir_store_var(&b, vs_out_pos, pos, 0xf);
2227
2228 return b.shader;
2229 }
2230
2231 static nir_shader *
get_texel_buffer_copy_gs(const nir_shader_compiler_options * options)2232 get_texel_buffer_copy_gs(const nir_shader_compiler_options *options)
2233 {
2234 /* FIXME: this creates a geometry shader that takes the index of a single
2235 * layer to clear from push constants, so we need to emit a draw call for
2236 * each layer that we want to clear. We could actually do better and have it
2237 * take a range of layers however, if we were to do this, we would need to
2238 * be careful not to exceed the maximum number of output vertices allowed in
2239 * a geometry shader.
2240 */
2241 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2242 "meta texel buffer copy gs");
2243 nir_shader *nir = b.shader;
2244 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
2245 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
2246 (1ull << VARYING_SLOT_LAYER);
2247 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
2248 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
2249 nir->info.gs.vertices_in = 3;
2250 nir->info.gs.vertices_out = 3;
2251 nir->info.gs.invocations = 1;
2252 nir->info.gs.active_stream_mask = 0x1;
2253
2254 /* in vec4 gl_Position[3] */
2255 nir_variable *gs_in_pos =
2256 nir_variable_create(b.shader, nir_var_shader_in,
2257 glsl_array_type(glsl_vec4_type(), 3, 0),
2258 "in_gl_Position");
2259 gs_in_pos->data.location = VARYING_SLOT_POS;
2260
2261 /* out vec4 gl_Position */
2262 nir_variable *gs_out_pos =
2263 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
2264 "out_gl_Position");
2265 gs_out_pos->data.location = VARYING_SLOT_POS;
2266
2267 /* out float gl_Layer */
2268 nir_variable *gs_out_layer =
2269 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
2270 "out_gl_Layer");
2271 gs_out_layer->data.location = VARYING_SLOT_LAYER;
2272
2273 /* Emit output triangle */
2274 for (uint32_t i = 0; i < 3; i++) {
2275 /* gl_Position from shader input */
2276 nir_deref_instr *in_pos_i =
2277 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
2278 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
2279
2280 /* gl_Layer from push constants */
2281 nir_def *layer =
2282 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2283 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
2284 .range = 4);
2285 nir_store_var(&b, gs_out_layer, layer, 0x1);
2286
2287 nir_emit_vertex(&b, 0);
2288 }
2289
2290 nir_end_primitive(&b, 0);
2291
2292 return nir;
2293 }
2294
2295 static nir_def *
load_frag_coord(nir_builder * b)2296 load_frag_coord(nir_builder *b)
2297 {
2298 nir_foreach_shader_in_variable(var, b->shader) {
2299 if (var->data.location == VARYING_SLOT_POS)
2300 return nir_load_var(b, var);
2301 }
2302 nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
2303 glsl_vec4_type(), NULL);
2304 pos->data.location = VARYING_SLOT_POS;
2305 return nir_load_var(b, pos);
2306 }
2307
2308 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)2309 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
2310 {
2311 if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
2312 swz = comp;
2313
2314 switch (swz) {
2315 case VK_COMPONENT_SWIZZLE_R:
2316 return 0;
2317 case VK_COMPONENT_SWIZZLE_G:
2318 return 1;
2319 case VK_COMPONENT_SWIZZLE_B:
2320 return 2;
2321 case VK_COMPONENT_SWIZZLE_A:
2322 return 3;
2323 default:
2324 unreachable("Invalid swizzle");
2325 };
2326 }
2327
2328 static nir_shader *
get_texel_buffer_copy_fs(const nir_shader_compiler_options * options,VkFormat format,VkComponentMapping * cswizzle)2329 get_texel_buffer_copy_fs(const nir_shader_compiler_options *options,
2330 VkFormat format, VkComponentMapping *cswizzle)
2331 {
2332 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
2333 "meta texel buffer copy fs");
2334
2335 /* We only use the copy from texel buffer shader to implement
2336 * copy_buffer_to_image_shader, which always selects a compatible integer
2337 * format for the copy.
2338 */
2339 assert(vk_format_is_int(format));
2340
2341 /* Fragment shader output color */
2342 nir_variable *fs_out_color =
2343 nir_variable_create(b.shader, nir_var_shader_out,
2344 glsl_uvec4_type(), "out_color");
2345 fs_out_color->data.location = FRAG_RESULT_DATA0;
2346
2347 /* Texel buffer input */
2348 const struct glsl_type *sampler_type =
2349 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
2350 nir_variable *sampler =
2351 nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
2352 sampler->data.descriptor_set = 0;
2353 sampler->data.binding = 0;
2354
2355 /* Load the box describing the pixel region we want to copy from the
2356 * texel buffer.
2357 */
2358 nir_def *box =
2359 nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
2360 .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
2361 .range = 16);
2362
2363 /* Load the buffer stride (this comes in texel units) */
2364 nir_def *stride =
2365 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2366 .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
2367 .range = 4);
2368
2369 /* Load the buffer offset (this comes in texel units) */
2370 nir_def *offset =
2371 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
2372 .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
2373 .range = 4);
2374
2375 nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
2376
2377 /* Load pixel data from texel buffer based on the x,y offset of the pixel
2378 * within the box. Texel buffers are 1D arrays of texels.
2379 *
2380 * Notice that we already make sure that we only generate fragments that are
2381 * inside the box through the scissor/viewport state, so our offset into the
2382 * texel buffer should always be within its bounds and we we don't need
2383 * to add a check for that here.
2384 */
2385 nir_def *x_offset =
2386 nir_isub(&b, nir_channel(&b, coord, 0),
2387 nir_channel(&b, box, 0));
2388 nir_def *y_offset =
2389 nir_isub(&b, nir_channel(&b, coord, 1),
2390 nir_channel(&b, box, 1));
2391 nir_def *texel_offset =
2392 nir_iadd(&b, nir_iadd(&b, offset, x_offset),
2393 nir_imul(&b, y_offset, stride));
2394
2395 nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->def;
2396 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
2397 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
2398 tex->op = nir_texop_txf;
2399 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, texel_offset);
2400 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
2401 tex->dest_type = nir_type_uint32;
2402 tex->is_array = false;
2403 tex->coord_components = 1;
2404 nir_def_init(&tex->instr, &tex->def, 4, 32);
2405 nir_builder_instr_insert(&b, &tex->instr);
2406
2407 uint32_t swiz[4];
2408 swiz[0] =
2409 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
2410 swiz[1] =
2411 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
2412 swiz[2] =
2413 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
2414 swiz[3] =
2415 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
2416 nir_def *s = nir_swizzle(&b, &tex->def, swiz, 4);
2417 nir_store_var(&b, fs_out_color, s, 0xf);
2418
2419 return b.shader;
2420 }
2421
2422 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)2423 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
2424 VkFormat format,
2425 VkColorComponentFlags cmask,
2426 VkComponentMapping *cswizzle,
2427 bool is_layered,
2428 VkRenderPass _pass,
2429 VkPipelineLayout pipeline_layout,
2430 VkPipeline *pipeline)
2431 {
2432 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
2433
2434 assert(vk_format_is_color(format));
2435
2436 const nir_shader_compiler_options *options =
2437 v3dv_pipeline_get_nir_options(&device->devinfo);
2438
2439 nir_shader *vs_nir = get_texel_buffer_copy_vs(options);
2440 nir_shader *fs_nir = get_texel_buffer_copy_fs(options, format, cswizzle);
2441 nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs(options) : NULL;
2442
2443 const VkPipelineVertexInputStateCreateInfo vi_state = {
2444 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
2445 .vertexBindingDescriptionCount = 0,
2446 .vertexAttributeDescriptionCount = 0,
2447 };
2448
2449 VkPipelineDepthStencilStateCreateInfo ds_state = {
2450 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
2451 };
2452
2453 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
2454 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
2455 .blendEnable = false,
2456 .colorWriteMask = cmask,
2457 };
2458
2459 const VkPipelineColorBlendStateCreateInfo cb_state = {
2460 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
2461 .logicOpEnable = false,
2462 .attachmentCount = 1,
2463 .pAttachments = blend_att_state
2464 };
2465
2466 const VkPipelineMultisampleStateCreateInfo ms_state = {
2467 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
2468 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
2469 .sampleShadingEnable = false,
2470 .pSampleMask = NULL,
2471 .alphaToCoverageEnable = false,
2472 .alphaToOneEnable = false,
2473 };
2474
2475 return create_pipeline(device,
2476 pass,
2477 vs_nir, gs_nir, fs_nir,
2478 &vi_state,
2479 &ds_state,
2480 &cb_state,
2481 &ms_state,
2482 pipeline_layout,
2483 pipeline);
2484 }
2485
2486 static bool
get_copy_texel_buffer_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)2487 get_copy_texel_buffer_pipeline(
2488 struct v3dv_cmd_buffer *cmd_buffer,
2489 VkFormat format,
2490 VkColorComponentFlags cmask,
2491 VkComponentMapping *cswizzle,
2492 VkImageType image_type,
2493 bool is_layered,
2494 struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
2495 {
2496 bool ok = true;
2497 struct v3dv_device *device = cmd_buffer->device;
2498
2499 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
2500 if (device->instance->meta_cache_enabled) {
2501 get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
2502 key);
2503
2504 mtx_lock(&device->meta.mtx);
2505 struct hash_entry *entry =
2506 _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
2507 key);
2508 if (entry) {
2509 mtx_unlock(&device->meta.mtx);
2510 *pipeline = entry->data;
2511 return true;
2512 }
2513 }
2514
2515 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
2516 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2517
2518 if (*pipeline == NULL)
2519 goto fail;
2520
2521 /* The blit render pass is compatible */
2522 ok = create_blit_render_pass(device, format, format,
2523 &(*pipeline)->pass,
2524 &(*pipeline)->pass_no_load);
2525 if (!ok)
2526 goto fail;
2527
2528 ok =
2529 create_texel_buffer_copy_pipeline(device,
2530 format, cmask, cswizzle, is_layered,
2531 (*pipeline)->pass,
2532 device->meta.texel_buffer_copy.p_layout,
2533 &(*pipeline)->pipeline);
2534 if (!ok)
2535 goto fail;
2536
2537 if (device->instance->meta_cache_enabled) {
2538 _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
2539 key, *pipeline);
2540 mtx_unlock(&device->meta.mtx);
2541 } else {
2542 v3dv_cmd_buffer_add_private_obj(
2543 cmd_buffer, (uintptr_t)*pipeline,
2544 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_meta_texel_buffer_copy_pipeline);
2545 }
2546
2547 return true;
2548
2549 fail:
2550 if (device->instance->meta_cache_enabled)
2551 mtx_unlock(&device->meta.mtx);
2552
2553 VkDevice _device = v3dv_device_to_handle(device);
2554 if (*pipeline) {
2555 if ((*pipeline)->pass)
2556 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
2557 if ((*pipeline)->pipeline)
2558 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
2559 vk_free(&device->vk.alloc, *pipeline);
2560 *pipeline = NULL;
2561 }
2562
2563 return false;
2564 }
2565
2566 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2567 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
2568 VkImageAspectFlags aspect,
2569 struct v3dv_image *image,
2570 VkFormat dst_format,
2571 VkFormat src_format,
2572 struct v3dv_buffer *buffer,
2573 uint32_t buffer_bpp,
2574 VkColorComponentFlags cmask,
2575 VkComponentMapping *cswizzle,
2576 uint32_t region_count,
2577 const VkBufferImageCopy2 *regions)
2578 {
2579 VkResult result;
2580 bool handled = false;
2581
2582 assert(cswizzle);
2583
2584 /* This is a copy path, so we don't handle format conversions. The only
2585 * exception are stencil to D24S8 copies, which are handled as a color
2586 * masked R8->RGBA8 copy.
2587 */
2588 assert(src_format == dst_format ||
2589 (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
2590 src_format == VK_FORMAT_R8_UINT &&
2591 cmask == VK_COLOR_COMPONENT_R_BIT));
2592
2593 /* We only handle color copies. Callers can copy D/S aspects by using
2594 * a compatible color format and maybe a cmask/cswizzle for D24 formats.
2595 */
2596 if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
2597 return handled;
2598
2599 /* FIXME: we only handle uncompressed images for now. */
2600 if (vk_format_is_compressed(image->vk.format))
2601 return handled;
2602
2603 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
2604 VK_COLOR_COMPONENT_G_BIT |
2605 VK_COLOR_COMPONENT_B_BIT |
2606 VK_COLOR_COMPONENT_A_BIT;
2607 if (cmask == 0)
2608 cmask = full_cmask;
2609
2610 /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2611 * so we can bind it as a texel buffer. Otherwise, the buffer view
2612 * we create below won't setup the texture state that we need for this.
2613 */
2614 if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2615 if (v3dv_buffer_format_supports_features(
2616 cmd_buffer->device, src_format,
2617 VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
2618 buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2619 } else {
2620 return handled;
2621 }
2622 }
2623
2624 /* At this point we should be able to handle the copy unless an unexpected
2625 * error occurs, such as an OOM.
2626 */
2627 handled = true;
2628
2629
2630 /* Compute the number of layers to copy.
2631 *
2632 * If we are batching (region_count > 1) all our regions have the same
2633 * image subresource so we can take this from the first region. For 3D
2634 * images we require the same depth extent.
2635 */
2636 const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2637 uint32_t num_layers;
2638 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2639 num_layers = vk_image_subresource_layer_count(&image->vk, resource);
2640 } else {
2641 assert(region_count == 1);
2642 num_layers = regions[0].imageExtent.depth;
2643 }
2644 assert(num_layers > 0);
2645
2646 /* Get the texel buffer copy pipeline */
2647 struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2648 bool ok = get_copy_texel_buffer_pipeline(cmd_buffer,
2649 dst_format, cmask, cswizzle,
2650 image->vk.image_type, num_layers > 1,
2651 &pipeline);
2652 if (!ok)
2653 return handled;
2654 assert(pipeline && pipeline->pipeline && pipeline->pass);
2655
2656 /* Setup descriptor set for the source texel buffer. We don't have to
2657 * register the descriptor as a private command buffer object since
2658 * all descriptors will be freed automatically with the descriptor
2659 * pool.
2660 */
2661 VkDescriptorSet set;
2662 result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2663 if (result != VK_SUCCESS)
2664 return handled;
2665
2666 /* We can't pass region->bufferOffset here for the offset field because
2667 * the texture base pointer in the texture shader state must be a 64-byte
2668 * aligned value. Instead, we use 0 here and we pass the offset in texels
2669 * as a push constant to the shader.
2670 */
2671 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2672 VkBufferViewCreateInfo buffer_view_info = {
2673 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2674 .buffer = v3dv_buffer_to_handle(buffer),
2675 .format = src_format,
2676 .offset = 0,
2677 .range = VK_WHOLE_SIZE,
2678 };
2679
2680 VkBufferView texel_buffer_view;
2681 result = v3dv_CreateBufferView(_device, &buffer_view_info,
2682 &cmd_buffer->device->vk.alloc,
2683 &texel_buffer_view);
2684 if (result != VK_SUCCESS)
2685 return handled;
2686
2687 v3dv_cmd_buffer_add_private_obj(
2688 cmd_buffer, (uintptr_t)texel_buffer_view,
2689 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2690
2691 VkWriteDescriptorSet write = {
2692 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2693 .dstSet = set,
2694 .dstBinding = 0,
2695 .dstArrayElement = 0,
2696 .descriptorCount = 1,
2697 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2698 .pTexelBufferView = &texel_buffer_view,
2699 };
2700 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2701
2702 /* Push command buffer state before starting meta operation */
2703 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2704
2705 /* Bind common state for all layers and regions */
2706 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2707 v3dv_CmdBindPipeline(_cmd_buffer,
2708 VK_PIPELINE_BIND_POINT_GRAPHICS,
2709 pipeline->pipeline);
2710
2711 v3dv_CmdBindDescriptorSets(_cmd_buffer,
2712 VK_PIPELINE_BIND_POINT_GRAPHICS,
2713 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2714 0, 1, &set,
2715 0, NULL);
2716
2717 /* Setup framebuffer.
2718 *
2719 * For 3D images, this creates a layered framebuffer with a number of
2720 * layers matching the depth extent of the 3D image.
2721 */
2722 uint8_t plane = v3dv_plane_from_aspect(aspect);
2723 uint32_t fb_width = u_minify(image->planes[plane].width, resource->mipLevel);
2724 uint32_t fb_height = u_minify(image->planes[plane].height, resource->mipLevel);
2725
2726 VkImageViewCreateInfo image_view_info = {
2727 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2728 .image = v3dv_image_to_handle(image),
2729 .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2730 .format = dst_format,
2731 .subresourceRange = {
2732 .aspectMask = aspect,
2733 .baseMipLevel = resource->mipLevel,
2734 .levelCount = 1,
2735 .baseArrayLayer = resource->baseArrayLayer,
2736 .layerCount = num_layers,
2737 },
2738 };
2739 VkImageView image_view;
2740 result = v3dv_create_image_view(cmd_buffer->device,
2741 &image_view_info, &image_view);
2742 if (result != VK_SUCCESS)
2743 goto fail;
2744
2745 v3dv_cmd_buffer_add_private_obj(
2746 cmd_buffer, (uintptr_t)image_view,
2747 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2748
2749 VkFramebufferCreateInfo fb_info = {
2750 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2751 .renderPass = pipeline->pass,
2752 .attachmentCount = 1,
2753 .pAttachments = &image_view,
2754 .width = fb_width,
2755 .height = fb_height,
2756 .layers = num_layers,
2757 };
2758
2759 VkFramebuffer fb;
2760 result = v3dv_CreateFramebuffer(_device, &fb_info,
2761 &cmd_buffer->device->vk.alloc, &fb);
2762 if (result != VK_SUCCESS)
2763 goto fail;
2764
2765 v3dv_cmd_buffer_add_private_obj(
2766 cmd_buffer, (uintptr_t)fb,
2767 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2768
2769 /* For each layer */
2770 for (uint32_t l = 0; l < num_layers; l++) {
2771 /* Start render pass for this layer.
2772 *
2773 * If the we only have one region to copy, then we might be able to
2774 * skip the TLB load if it is aligned to tile boundaries. All layers
2775 * copy the same area, so we only need to check this once.
2776 */
2777 bool can_skip_tlb_load = false;
2778 VkRect2D render_area;
2779 if (region_count == 1) {
2780 render_area.offset.x = regions[0].imageOffset.x;
2781 render_area.offset.y = regions[0].imageOffset.y;
2782 render_area.extent.width = regions[0].imageExtent.width;
2783 render_area.extent.height = regions[0].imageExtent.height;
2784
2785 if (l == 0) {
2786 struct v3dv_render_pass *pipeline_pass =
2787 v3dv_render_pass_from_handle(pipeline->pass);
2788 can_skip_tlb_load =
2789 cmask == full_cmask &&
2790 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2791 v3dv_framebuffer_from_handle(fb),
2792 pipeline_pass, 0);
2793 }
2794 } else {
2795 render_area.offset.x = 0;
2796 render_area.offset.y = 0;
2797 render_area.extent.width = fb_width;
2798 render_area.extent.height = fb_height;
2799 }
2800
2801 VkRenderPassBeginInfo rp_info = {
2802 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2803 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2804 pipeline->pass,
2805 .framebuffer = fb,
2806 .renderArea = render_area,
2807 .clearValueCount = 0,
2808 };
2809
2810 VkSubpassBeginInfo sp_info = {
2811 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2812 .contents = VK_SUBPASS_CONTENTS_INLINE,
2813 };
2814
2815 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2816 struct v3dv_job *job = cmd_buffer->state.job;
2817 if (!job)
2818 goto fail;
2819
2820 /* If we are using a layered copy we need to specify the layer for the
2821 * Geometry Shader.
2822 */
2823 if (num_layers > 1) {
2824 uint32_t layer = resource->baseArrayLayer + l;
2825 v3dv_CmdPushConstants(_cmd_buffer,
2826 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2827 VK_SHADER_STAGE_GEOMETRY_BIT,
2828 24, 4, &layer);
2829 }
2830
2831 /* For each region */
2832 for (uint32_t r = 0; r < region_count; r++) {
2833 const VkBufferImageCopy2 *region = ®ions[r];
2834
2835 /* Obtain the 2D buffer region spec */
2836 uint32_t buf_width, buf_height;
2837 if (region->bufferRowLength == 0)
2838 buf_width = region->imageExtent.width;
2839 else
2840 buf_width = region->bufferRowLength;
2841
2842 if (region->bufferImageHeight == 0)
2843 buf_height = region->imageExtent.height;
2844 else
2845 buf_height = region->bufferImageHeight;
2846
2847 const VkViewport viewport = {
2848 .x = region->imageOffset.x,
2849 .y = region->imageOffset.y,
2850 .width = region->imageExtent.width,
2851 .height = region->imageExtent.height,
2852 .minDepth = 0.0f,
2853 .maxDepth = 1.0f
2854 };
2855 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2856 const VkRect2D scissor = {
2857 .offset = { region->imageOffset.x, region->imageOffset.y },
2858 .extent = { region->imageExtent.width, region->imageExtent.height }
2859 };
2860 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2861
2862 const VkDeviceSize buf_offset =
2863 region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2864 uint32_t push_data[6] = {
2865 region->imageOffset.x,
2866 region->imageOffset.y,
2867 region->imageOffset.x + region->imageExtent.width - 1,
2868 region->imageOffset.y + region->imageExtent.height - 1,
2869 buf_width,
2870 buf_offset,
2871 };
2872
2873 v3dv_CmdPushConstants(_cmd_buffer,
2874 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2875 VK_SHADER_STAGE_FRAGMENT_BIT,
2876 0, sizeof(push_data), &push_data);
2877
2878 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2879 } /* For each region */
2880
2881 VkSubpassEndInfo sp_end_info = {
2882 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2883 };
2884
2885 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2886 } /* For each layer */
2887
2888 fail:
2889 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
2890 return handled;
2891 }
2892
2893 /**
2894 * Returns true if the implementation supports the requested operation (even if
2895 * it failed to process it, for example, due to an out-of-memory error).
2896 */
2897 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2898 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2899 VkImageAspectFlags aspect,
2900 struct v3dv_image *image,
2901 VkFormat dst_format,
2902 VkFormat src_format,
2903 struct v3dv_buffer *buffer,
2904 uint32_t buffer_bpp,
2905 VkColorComponentFlags cmask,
2906 VkComponentMapping *cswizzle,
2907 uint32_t region_count,
2908 const VkBufferImageCopy2 *regions)
2909 {
2910 /* Since we can't sample linear images we need to upload the linear
2911 * buffer to a tiled image that we can use as a blit source, which
2912 * is slow.
2913 */
2914 perf_debug("Falling back to blit path for buffer to image copy.\n");
2915
2916 struct v3dv_device *device = cmd_buffer->device;
2917 VkDevice _device = v3dv_device_to_handle(device);
2918 bool handled = true;
2919
2920 /* Allocate memory for the tiled image. Since we copy layer by layer
2921 * we allocate memory to hold a full layer, which is the worse case.
2922 * For that we create a dummy image with that spec, get memory requirements
2923 * for it and use that information to create the memory allocation.
2924 * We will then reuse this memory store for all the regions we want to
2925 * copy.
2926 */
2927 VkImage dummy_image;
2928 VkImageCreateInfo dummy_info = {
2929 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2930 .imageType = VK_IMAGE_TYPE_2D,
2931 .format = src_format,
2932 .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2933 .mipLevels = 1,
2934 .arrayLayers = 1,
2935 .samples = VK_SAMPLE_COUNT_1_BIT,
2936 .tiling = VK_IMAGE_TILING_OPTIMAL,
2937 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2938 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2939 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2940 .queueFamilyIndexCount = 0,
2941 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2942 };
2943 VkResult result =
2944 v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2945 if (result != VK_SUCCESS)
2946 return handled;
2947
2948 VkMemoryRequirements reqs;
2949 vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2950 v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2951
2952 VkDeviceMemory mem;
2953 VkMemoryAllocateInfo alloc_info = {
2954 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2955 .allocationSize = reqs.size,
2956 .memoryTypeIndex = 0,
2957 };
2958 result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2959 if (result != VK_SUCCESS)
2960 return handled;
2961
2962 v3dv_cmd_buffer_add_private_obj(
2963 cmd_buffer, (uintptr_t)mem,
2964 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2965
2966 /* Obtain the layer count.
2967 *
2968 * If we are batching (region_count > 1) all our regions have the same
2969 * image subresource so we can take this from the first region.
2970 */
2971 uint32_t num_layers;
2972 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2973 num_layers = vk_image_subresource_layer_count(&image->vk,
2974 ®ions[0].imageSubresource);
2975 } else {
2976 num_layers = regions[0].imageExtent.depth;
2977 }
2978 assert(num_layers > 0);
2979
2980 /* Sanity check: we can only batch multiple regions together if they have
2981 * the same framebuffer (so the same layer).
2982 */
2983 assert(num_layers == 1 || region_count == 1);
2984
2985 uint8_t plane = v3dv_plane_from_aspect(aspect);
2986 assert(plane < image->plane_count);
2987
2988 const uint32_t block_width =
2989 vk_format_get_blockwidth(image->planes[plane].vk_format);
2990 const uint32_t block_height =
2991 vk_format_get_blockheight(image->planes[plane].vk_format);
2992
2993 /* Copy regions by uploading each region to a temporary tiled image using
2994 * the memory we have just allocated as storage.
2995 */
2996 for (uint32_t r = 0; r < region_count; r++) {
2997 const VkBufferImageCopy2 *region = ®ions[r];
2998
2999 /* Obtain the 2D buffer region spec */
3000 uint32_t buf_width, buf_height;
3001 if (region->bufferRowLength == 0)
3002 buf_width = region->imageExtent.width;
3003 else
3004 buf_width = region->bufferRowLength;
3005
3006 if (region->bufferImageHeight == 0)
3007 buf_height = region->imageExtent.height;
3008 else
3009 buf_height = region->bufferImageHeight;
3010
3011 /* If the image is compressed, the bpp refers to blocks, not pixels */
3012 buf_width = buf_width / block_width;
3013 buf_height = buf_height / block_height;
3014
3015 for (uint32_t i = 0; i < num_layers; i++) {
3016 /* Create the tiled image */
3017 VkImageCreateInfo image_info = {
3018 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
3019 .imageType = VK_IMAGE_TYPE_2D,
3020 .format = src_format,
3021 .extent = { buf_width, buf_height, 1 },
3022 .mipLevels = 1,
3023 .arrayLayers = 1,
3024 .samples = VK_SAMPLE_COUNT_1_BIT,
3025 .tiling = VK_IMAGE_TILING_OPTIMAL,
3026 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3027 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
3028 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3029 .queueFamilyIndexCount = 0,
3030 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3031 };
3032
3033 VkImage buffer_image;
3034 VkResult result =
3035 v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
3036 &buffer_image);
3037 if (result != VK_SUCCESS)
3038 return handled;
3039
3040 v3dv_cmd_buffer_add_private_obj(
3041 cmd_buffer, (uintptr_t)buffer_image,
3042 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
3043
3044 result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
3045 if (result != VK_SUCCESS)
3046 return handled;
3047
3048 /* When copying a multi-plane image the aspect indicates the plane to
3049 * copy. For these, we only copy one plane at a time, which is always
3050 * a color plane.
3051 */
3052 VkImageAspectFlags copy_aspect =
3053 image->plane_count == 1 ? aspect : VK_IMAGE_ASPECT_COLOR_BIT;
3054
3055 /* Upload buffer contents for the selected layer */
3056 const VkDeviceSize buf_offset_bytes =
3057 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
3058 const VkBufferImageCopy2 buffer_image_copy = {
3059 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
3060 .bufferOffset = buf_offset_bytes,
3061 .bufferRowLength = region->bufferRowLength / block_width,
3062 .bufferImageHeight = region->bufferImageHeight / block_height,
3063 .imageSubresource = {
3064 .aspectMask = copy_aspect,
3065 .mipLevel = 0,
3066 .baseArrayLayer = 0,
3067 .layerCount = 1,
3068 },
3069 .imageOffset = { 0, 0, 0 },
3070 .imageExtent = { buf_width, buf_height, 1 }
3071 };
3072 handled =
3073 create_tiled_image_from_buffer(cmd_buffer,
3074 v3dv_image_from_handle(buffer_image),
3075 buffer, &buffer_image_copy);
3076 if (!handled) {
3077 /* This is unexpected, we should have setup the upload to be
3078 * conformant to a TFU or TLB copy.
3079 */
3080 unreachable("Unable to copy buffer to image through TLB");
3081 return false;
3082 }
3083
3084 /* Blit-copy the requested image extent from the buffer image to the
3085 * destination image.
3086 *
3087 * Since we are copying, the blit must use the same format on the
3088 * destination and source images to avoid format conversions. The
3089 * only exception is copying stencil, which we upload to a R8UI source
3090 * image, but that we need to blit to a S8D24 destination (the only
3091 * stencil format we support).
3092 */
3093 const VkImageBlit2 blit_region = {
3094 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
3095 .srcSubresource = {
3096 .aspectMask = copy_aspect,
3097 .mipLevel = 0,
3098 .baseArrayLayer = 0,
3099 .layerCount = 1,
3100 },
3101 .srcOffsets = {
3102 { 0, 0, 0 },
3103 { region->imageExtent.width, region->imageExtent.height, 1 },
3104 },
3105 .dstSubresource = {
3106 .aspectMask = aspect,
3107 .mipLevel = region->imageSubresource.mipLevel,
3108 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
3109 .layerCount = 1,
3110 },
3111 .dstOffsets = {
3112 {
3113 DIV_ROUND_UP(region->imageOffset.x, block_width),
3114 DIV_ROUND_UP(region->imageOffset.y, block_height),
3115 region->imageOffset.z + i,
3116 },
3117 {
3118 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
3119 block_width),
3120 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
3121 block_height),
3122 region->imageOffset.z + i + 1,
3123 },
3124 },
3125 };
3126
3127 handled = blit_shader(cmd_buffer,
3128 image, dst_format,
3129 v3dv_image_from_handle(buffer_image), src_format,
3130 cmask, cswizzle,
3131 &blit_region, VK_FILTER_NEAREST, true);
3132 if (!handled) {
3133 /* This is unexpected, we should have a supported blit spec */
3134 unreachable("Unable to blit buffer to destination image");
3135 return false;
3136 }
3137 }
3138 }
3139
3140 return handled;
3141 }
3142
3143 /**
3144 * Returns true if the implementation supports the requested operation (even if
3145 * it failed to process it, for example, due to an out-of-memory error).
3146 */
3147 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2 * regions,bool use_texel_buffer)3148 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
3149 struct v3dv_image *image,
3150 struct v3dv_buffer *buffer,
3151 uint32_t region_count,
3152 const VkBufferImageCopy2 *regions,
3153 bool use_texel_buffer)
3154 {
3155 /* We can only call this with region_count > 1 if we can batch the regions
3156 * together, in which case they share the same image subresource, and so
3157 * the same aspect.
3158 */
3159 VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
3160 const VkImageAspectFlagBits any_plane_aspect =
3161 VK_IMAGE_ASPECT_PLANE_0_BIT |
3162 VK_IMAGE_ASPECT_PLANE_1_BIT |
3163 VK_IMAGE_ASPECT_PLANE_2_BIT;
3164
3165 bool is_plane_aspect = aspect & any_plane_aspect;
3166
3167 /* Generally, the bpp of the data in the buffer matches that of the
3168 * destination image. The exception is the case where we are uploading
3169 * stencil (8bpp) to a combined d24s8 image (32bpp).
3170 */
3171 uint8_t plane = v3dv_plane_from_aspect(aspect);
3172 assert(plane < image->plane_count);
3173 uint32_t buf_bpp = image->planes[plane].cpp;
3174
3175 /* We are about to upload the buffer data to an image so we can then
3176 * blit that to our destination region. Because we are going to implement
3177 * the copy as a blit, we want our blit source and destination formats to be
3178 * the same (to avoid any format conversions), so we choose a canonical
3179 * format that matches the destination image bpp.
3180 */
3181 VkComponentMapping ident_swizzle = {
3182 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3183 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3184 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3185 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3186 };
3187
3188 VkComponentMapping cswizzle = ident_swizzle;
3189 VkColorComponentFlags cmask = 0; /* Write all components */
3190 VkFormat src_format;
3191 VkFormat dst_format;
3192 switch (buf_bpp) {
3193 case 16:
3194 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3195 src_format = VK_FORMAT_R32G32B32A32_UINT;
3196 dst_format = src_format;
3197 break;
3198 case 8:
3199 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
3200 src_format = VK_FORMAT_R16G16B16A16_UINT;
3201 dst_format = src_format;
3202 break;
3203 case 4:
3204 switch (aspect) {
3205 case VK_IMAGE_ASPECT_COLOR_BIT:
3206 case VK_IMAGE_ASPECT_PLANE_0_BIT:
3207 case VK_IMAGE_ASPECT_PLANE_1_BIT:
3208 case VK_IMAGE_ASPECT_PLANE_2_BIT:
3209 src_format = VK_FORMAT_R8G8B8A8_UINT;
3210 dst_format = src_format;
3211 break;
3212 case VK_IMAGE_ASPECT_DEPTH_BIT:
3213 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
3214 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3215 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
3216 src_format = VK_FORMAT_R8G8B8A8_UINT;
3217 dst_format = src_format;
3218
3219 /* For D24 formats, the Vulkan spec states that the depth component
3220 * in the buffer is stored in the 24-LSB, but V3D wants it in the
3221 * 24-MSB.
3222 */
3223 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
3224 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
3225 cmask = VK_COLOR_COMPONENT_G_BIT |
3226 VK_COLOR_COMPONENT_B_BIT |
3227 VK_COLOR_COMPONENT_A_BIT;
3228 cswizzle.r = VK_COMPONENT_SWIZZLE_R;
3229 cswizzle.g = VK_COMPONENT_SWIZZLE_R;
3230 cswizzle.b = VK_COMPONENT_SWIZZLE_G;
3231 cswizzle.a = VK_COMPONENT_SWIZZLE_B;
3232 }
3233 break;
3234 case VK_IMAGE_ASPECT_STENCIL_BIT:
3235 /* Since we don't support separate stencil this is always a stencil
3236 * copy to a combined depth/stencil image. Because we don't support
3237 * separate stencil images, we interpret the buffer data as a
3238 * color R8UI image, and implement the blit as a compatible color
3239 * blit to an RGBA8UI destination masking out writes to components
3240 * GBA (which map to the D24 component of a S8D24 image).
3241 */
3242 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
3243 buf_bpp = 1;
3244 src_format = VK_FORMAT_R8_UINT;
3245 dst_format = VK_FORMAT_R8G8B8A8_UINT;
3246 cmask = VK_COLOR_COMPONENT_R_BIT;
3247 break;
3248 default:
3249 unreachable("unsupported aspect");
3250 return false;
3251 };
3252 break;
3253 case 2:
3254 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
3255 aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
3256 is_plane_aspect);
3257 src_format = VK_FORMAT_R16_UINT;
3258 dst_format = src_format;
3259 break;
3260 case 1:
3261 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT || is_plane_aspect);
3262 src_format = VK_FORMAT_R8_UINT;
3263 dst_format = src_format;
3264 break;
3265 default:
3266 unreachable("unsupported bit-size");
3267 return false;
3268 }
3269
3270 if (use_texel_buffer) {
3271 return texel_buffer_shader_copy(cmd_buffer, aspect, image,
3272 dst_format, src_format,
3273 buffer, buf_bpp,
3274 cmask, &cswizzle,
3275 region_count, regions);
3276 } else {
3277 return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
3278 dst_format, src_format,
3279 buffer, buf_bpp,
3280 cmask, &cswizzle,
3281 region_count, regions);
3282 }
3283 }
3284
3285 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)3286 v3dv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
3287 const VkCopyBufferToImageInfo2 *info)
3288 {
3289 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
3290 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
3291 V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
3292
3293 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3294
3295 cmd_buffer->state.is_transfer = true;
3296
3297 uint32_t r = 0;
3298 while (r < info->regionCount) {
3299 /* The TFU and TLB paths can only copy one region at a time and the region
3300 * needs to start at the origin. We try these first for the common case
3301 * where we are copying full images, since they should be the fastest.
3302 */
3303 uint32_t batch_size = 1;
3304 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
3305 goto handled;
3306
3307 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
3308 goto handled;
3309
3310 /* Otherwise, we are copying subrects, so we fallback to copying
3311 * via shader and texel buffers and we try to batch the regions
3312 * if possible. We can only batch copies if they have the same
3313 * framebuffer spec, which is mostly determined by the image
3314 * subresource of the region.
3315 */
3316 const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
3317 for (uint32_t s = r + 1; s < info->regionCount; s++) {
3318 const VkImageSubresourceLayers *rsc_s =
3319 &info->pRegions[s].imageSubresource;
3320
3321 if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
3322 break;
3323
3324 /* For 3D images we also need to check the depth extent */
3325 if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
3326 info->pRegions[s].imageExtent.depth !=
3327 info->pRegions[r].imageExtent.depth) {
3328 break;
3329 }
3330
3331 batch_size++;
3332 }
3333
3334 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3335 batch_size, &info->pRegions[r], true)) {
3336 goto handled;
3337 }
3338
3339 /* If we still could not copy, fallback to slower paths.
3340 *
3341 * FIXME: we could try to batch these too, but since they are bound to be
3342 * slow it might not be worth it and we should instead put more effort
3343 * in handling more cases with the other paths.
3344 */
3345 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
3346 batch_size, &info->pRegions[r], false)) {
3347 goto handled;
3348 }
3349
3350 unreachable("Unsupported buffer to image copy.");
3351
3352 handled:
3353 r += batch_size;
3354 }
3355
3356 cmd_buffer->state.is_transfer = false;
3357 }
3358
3359 static void
3360 compute_blit_3d_layers(const VkOffset3D *offsets,
3361 uint32_t *min_layer, uint32_t *max_layer,
3362 bool *mirror_z);
3363
3364 /**
3365 * Returns true if the implementation supports the requested operation (even if
3366 * it failed to process it, for example, due to an out-of-memory error).
3367 *
3368 * The TFU blit path doesn't handle scaling so the blit filter parameter can
3369 * be ignored.
3370 */
3371 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2 * region)3372 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
3373 struct v3dv_image *dst,
3374 struct v3dv_image *src,
3375 const VkImageBlit2 *region)
3376 {
3377 if (V3D_DBG(DISABLE_TFU)) {
3378 perf_debug("Blit: TFU disabled, fallbacks could be slower.");
3379 return false;
3380 }
3381
3382 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3383 assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
3384
3385 /* From vkCmdBlitImage:
3386 * "srcImage must not use a format that requires a sampler YCBCR
3387 * conversion"
3388 * "dstImage must not use a format that requires a sampler YCBCR
3389 * conversion"
3390 */
3391 assert(dst->plane_count == 1);
3392 assert(src->plane_count == 1);
3393
3394 /* Format must match */
3395 if (src->vk.format != dst->vk.format)
3396 return false;
3397
3398 /* Destination can't be raster format */
3399 if (!dst->tiled)
3400 return false;
3401
3402 /* Source region must start at (0,0) */
3403 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
3404 return false;
3405
3406 /* Destination image must be complete */
3407 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
3408 return false;
3409
3410 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
3411 const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
3412 const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
3413 if (region->dstOffsets[1].x < dst_width - 1||
3414 region->dstOffsets[1].y < dst_height - 1) {
3415 return false;
3416 }
3417
3418 /* No XY scaling */
3419 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
3420 region->srcOffsets[1].y != region->dstOffsets[1].y) {
3421 return false;
3422 }
3423
3424 /* If the format is D24S8 both aspects need to be copied, since the TFU
3425 * can't be programmed to copy only one aspect of the image.
3426 */
3427 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
3428 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
3429 VK_IMAGE_ASPECT_STENCIL_BIT;
3430 if (region->dstSubresource.aspectMask != ds_aspects)
3431 return false;
3432 }
3433
3434 /* Our TFU blits only handle exact copies (it requires same formats
3435 * on input and output, no scaling, etc), so there is no pixel format
3436 * conversions and we can rewrite the format to use one that is TFU
3437 * compatible based on its texel size.
3438 */
3439 const struct v3dv_format *format =
3440 v3dv_get_compatible_tfu_format(cmd_buffer->device,
3441 dst->planes[0].cpp, NULL);
3442
3443 /* Emit a TFU job for each layer to blit */
3444 assert(vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource) ==
3445 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource));
3446
3447 uint32_t min_dst_layer;
3448 uint32_t max_dst_layer;
3449 bool dst_mirror_z = false;
3450 if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
3451 compute_blit_3d_layers(region->dstOffsets,
3452 &min_dst_layer, &max_dst_layer,
3453 &dst_mirror_z);
3454 } else {
3455 min_dst_layer = region->dstSubresource.baseArrayLayer;
3456 max_dst_layer = min_dst_layer +
3457 vk_image_subresource_layer_count(&dst->vk,
3458 ®ion->dstSubresource);
3459 }
3460
3461 uint32_t min_src_layer;
3462 uint32_t max_src_layer;
3463 bool src_mirror_z = false;
3464 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
3465 compute_blit_3d_layers(region->srcOffsets,
3466 &min_src_layer, &max_src_layer,
3467 &src_mirror_z);
3468 } else {
3469 min_src_layer = region->srcSubresource.baseArrayLayer;
3470 max_src_layer = min_src_layer +
3471 vk_image_subresource_layer_count(&src->vk,
3472 ®ion->srcSubresource);
3473 }
3474
3475 /* No Z scaling for 3D images (for non-3D images both src and dst must
3476 * have the same layerCount).
3477 */
3478 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3479 return false;
3480
3481 const uint32_t layer_count = max_dst_layer - min_dst_layer;
3482 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3483 for (uint32_t i = 0; i < layer_count; i++) {
3484 /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
3485 * only involves reversing the order of the slices.
3486 */
3487 const uint32_t dst_layer =
3488 dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
3489 const uint32_t src_layer =
3490 src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
3491
3492 const uint32_t dst_offset =
3493 dst->planes[0].mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level,
3494 dst_layer, 0);
3495 const uint32_t src_offset =
3496 src->planes[0].mem->bo->offset + v3dv_layer_offset(src, src_mip_level,
3497 src_layer, 0);
3498
3499 const struct v3d_resource_slice *dst_slice = &dst->planes[0].slices[dst_mip_level];
3500 const struct v3d_resource_slice *src_slice = &src->planes[0].slices[src_mip_level];
3501
3502 v3d_X((&cmd_buffer->device->devinfo), meta_emit_tfu_job)(
3503 cmd_buffer,
3504 dst->planes[0].mem->bo->handle,
3505 dst_offset,
3506 dst_slice->tiling,
3507 dst_slice->padded_height,
3508 dst->planes[0].cpp,
3509 src->planes[0].mem->bo->handle,
3510 src_offset,
3511 src_slice->tiling,
3512 src_slice->tiling == V3D_TILING_RASTER ?
3513 src_slice->stride : src_slice->padded_height,
3514 src->planes[0].cpp,
3515 dst_width, dst_height, &format->planes[0]);
3516 }
3517
3518 return true;
3519 }
3520
3521 static bool
format_needs_software_int_clamp(VkFormat format)3522 format_needs_software_int_clamp(VkFormat format)
3523 {
3524 switch (format) {
3525 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3526 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3527 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3528 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3529 return true;
3530 default:
3531 return false;
3532 };
3533 }
3534
3535 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)3536 get_blit_pipeline_cache_key(VkFormat dst_format,
3537 VkFormat src_format,
3538 VkColorComponentFlags cmask,
3539 VkSampleCountFlagBits dst_samples,
3540 VkSampleCountFlagBits src_samples,
3541 uint8_t *key)
3542 {
3543 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3544
3545 uint32_t *p = (uint32_t *) key;
3546
3547 *p = dst_format;
3548 p++;
3549
3550 /* Generally, when blitting from a larger format to a smaller format
3551 * the hardware takes care of clamping the source to the RT range.
3552 * Specifically, for integer formats, this is done by using
3553 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3554 * clamps to the bit-size of the render type, and some formats, such as
3555 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3556 * require to clamp in software. In these cases, we need to amend the blit
3557 * shader with clamp code that depends on both the src and dst formats, so
3558 * we need the src format to be part of the key.
3559 */
3560 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3561 p++;
3562
3563 *p = cmask;
3564 p++;
3565
3566 *p = (dst_samples << 8) | src_samples;
3567 p++;
3568
3569 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3570 }
3571
3572 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)3573 create_blit_render_pass(struct v3dv_device *device,
3574 VkFormat dst_format,
3575 VkFormat src_format,
3576 VkRenderPass *pass_load,
3577 VkRenderPass *pass_no_load)
3578 {
3579 const bool is_color_blit = vk_format_is_color(dst_format);
3580
3581 /* Attachment load operation is specified below */
3582 VkAttachmentDescription2 att = {
3583 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
3584 .format = dst_format,
3585 .samples = VK_SAMPLE_COUNT_1_BIT,
3586 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3587 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3588 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3589 };
3590
3591 VkAttachmentReference2 att_ref = {
3592 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
3593 .attachment = 0,
3594 .layout = VK_IMAGE_LAYOUT_GENERAL,
3595 };
3596
3597 VkSubpassDescription2 subpass = {
3598 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
3599 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3600 .inputAttachmentCount = 0,
3601 .colorAttachmentCount = is_color_blit ? 1 : 0,
3602 .pColorAttachments = is_color_blit ? &att_ref : NULL,
3603 .pResolveAttachments = NULL,
3604 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3605 .preserveAttachmentCount = 0,
3606 .pPreserveAttachments = NULL,
3607 };
3608
3609 VkRenderPassCreateInfo2 info = {
3610 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3611 .attachmentCount = 1,
3612 .pAttachments = &att,
3613 .subpassCount = 1,
3614 .pSubpasses = &subpass,
3615 .dependencyCount = 0,
3616 .pDependencies = NULL,
3617 };
3618
3619 VkResult result;
3620 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3621 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3622 &info, &device->vk.alloc, pass_load);
3623 if (result != VK_SUCCESS)
3624 return false;
3625
3626 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3627 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3628 &info, &device->vk.alloc, pass_no_load);
3629 return result == VK_SUCCESS;
3630 }
3631
3632 static nir_def *
gen_tex_coords(nir_builder * b)3633 gen_tex_coords(nir_builder *b)
3634 {
3635 nir_def *tex_box =
3636 nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3637
3638 nir_def *tex_z =
3639 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3640
3641 nir_def *vertex_id = nir_load_vertex_id(b);
3642
3643 /* vertex 0: src0_x, src0_y
3644 * vertex 1: src0_x, src1_y
3645 * vertex 2: src1_x, src0_y
3646 * vertex 3: src1_x, src1_y
3647 *
3648 * So:
3649 *
3650 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3651 * channel 1 is vertex id & 1 ? src1_y : src0_y
3652 */
3653
3654 nir_def *one = nir_imm_int(b, 1);
3655 nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
3656 nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3657
3658 nir_def *comp[4];
3659 comp[0] = nir_bcsel(b, c0cmp,
3660 nir_channel(b, tex_box, 0),
3661 nir_channel(b, tex_box, 2));
3662
3663 comp[1] = nir_bcsel(b, c1cmp,
3664 nir_channel(b, tex_box, 3),
3665 nir_channel(b, tex_box, 1));
3666 comp[2] = tex_z;
3667 comp[3] = nir_imm_float(b, 1.0f);
3668 return nir_vec(b, comp, 4);
3669 }
3670
3671 static nir_def *
build_nir_tex_op_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3672 build_nir_tex_op_read(struct nir_builder *b,
3673 nir_def *tex_pos,
3674 enum glsl_base_type tex_type,
3675 enum glsl_sampler_dim dim)
3676 {
3677 assert(dim != GLSL_SAMPLER_DIM_MS);
3678
3679 const struct glsl_type *sampler_type =
3680 glsl_sampler_type(dim, false, false, tex_type);
3681 nir_variable *sampler =
3682 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3683 sampler->data.descriptor_set = 0;
3684 sampler->data.binding = 0;
3685
3686 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3687 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3688 tex->sampler_dim = dim;
3689 tex->op = nir_texop_tex;
3690 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3691 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3692 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref, tex_deref);
3693 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3694 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3695 tex->coord_components = tex_pos->num_components;
3696
3697 nir_def_init(&tex->instr, &tex->def, 4, 32);
3698 nir_builder_instr_insert(b, &tex->instr);
3699 return &tex->def;
3700 }
3701
3702 static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_def * tex_deref,enum glsl_base_type tex_type,nir_def * tex_pos,nir_def * sample_idx)3703 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3704 nir_variable *sampler,
3705 nir_def *tex_deref,
3706 enum glsl_base_type tex_type,
3707 nir_def *tex_pos,
3708 nir_def *sample_idx)
3709 {
3710 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3711 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3712 tex->op = nir_texop_txf_ms;
3713 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, tex_pos);
3714 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_texture_deref, tex_deref);
3715 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_idx);
3716 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3717 tex->is_array = false;
3718 tex->coord_components = tex_pos->num_components;
3719
3720 nir_def_init(&tex->instr, &tex->def, 4, 32);
3721 nir_builder_instr_insert(b, &tex->instr);
3722 return &tex->def;
3723 }
3724
3725 /* Fetches all samples at the given position and averages them */
3726 static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3727 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3728 nir_def *tex_pos,
3729 enum glsl_base_type tex_type,
3730 VkSampleCountFlagBits src_samples)
3731 {
3732 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3733 const struct glsl_type *sampler_type =
3734 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3735 nir_variable *sampler =
3736 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3737 sampler->data.descriptor_set = 0;
3738 sampler->data.binding = 0;
3739
3740 const bool is_int = glsl_base_type_is_integer(tex_type);
3741
3742 nir_def *tmp = NULL;
3743 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3744 for (uint32_t i = 0; i < src_samples; i++) {
3745 nir_def *s =
3746 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3747 tex_type, tex_pos,
3748 nir_imm_int(b, i));
3749
3750 /* For integer formats, the multisample resolve operation is expected to
3751 * return one of the samples, we just return the first one.
3752 */
3753 if (is_int)
3754 return s;
3755
3756 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3757 }
3758
3759 assert(!is_int);
3760 return nir_fmul_imm(b, tmp, 1.0f / src_samples);
3761 }
3762
3763 /* Fetches the current sample (gl_SampleID) at the given position */
3764 static nir_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_def * tex_pos,enum glsl_base_type tex_type)3765 build_nir_tex_op_ms_read(struct nir_builder *b,
3766 nir_def *tex_pos,
3767 enum glsl_base_type tex_type)
3768 {
3769 const struct glsl_type *sampler_type =
3770 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3771 nir_variable *sampler =
3772 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3773 sampler->data.descriptor_set = 0;
3774 sampler->data.binding = 0;
3775
3776 nir_def *tex_deref = &nir_build_deref_var(b, sampler)->def;
3777
3778 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3779 tex_type, tex_pos,
3780 nir_load_sample_id(b));
3781 }
3782
3783 static nir_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3784 build_nir_tex_op(struct nir_builder *b,
3785 struct v3dv_device *device,
3786 nir_def *tex_pos,
3787 enum glsl_base_type tex_type,
3788 VkSampleCountFlagBits dst_samples,
3789 VkSampleCountFlagBits src_samples,
3790 enum glsl_sampler_dim dim)
3791 {
3792 switch (dim) {
3793 case GLSL_SAMPLER_DIM_MS:
3794 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3795 /* For multisampled texture sources we need to use fetching instead of
3796 * normalized texture coordinates. We already configured our blit
3797 * coordinates to be in texel units, but here we still need to convert
3798 * them from floating point to integer.
3799 */
3800 tex_pos = nir_f2i32(b, tex_pos);
3801
3802 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3803 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3804 else
3805 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3806 default:
3807 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3808 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3809 }
3810 }
3811
3812 static nir_shader *
get_blit_vs(const nir_shader_compiler_options * options)3813 get_blit_vs(const nir_shader_compiler_options *options)
3814 {
3815 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3816 "meta blit vs");
3817
3818 const struct glsl_type *vec4 = glsl_vec4_type();
3819
3820 nir_variable *vs_out_pos =
3821 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3822 vs_out_pos->data.location = VARYING_SLOT_POS;
3823
3824 nir_variable *vs_out_tex_coord =
3825 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3826 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3827 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3828
3829 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
3830 nir_store_var(&b, vs_out_pos, pos, 0xf);
3831
3832 nir_def *tex_coord = gen_tex_coords(&b);
3833 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3834
3835 return b.shader;
3836 }
3837
3838 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3839 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3840 {
3841 switch (sampler_dim) {
3842 case GLSL_SAMPLER_DIM_1D: return 0x1;
3843 case GLSL_SAMPLER_DIM_2D: return 0x3;
3844 case GLSL_SAMPLER_DIM_MS: return 0x3;
3845 case GLSL_SAMPLER_DIM_3D: return 0x7;
3846 default:
3847 unreachable("invalid sampler dim");
3848 };
3849 }
3850
3851 static nir_shader *
get_color_blit_fs(const nir_shader_compiler_options * options,struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3852 get_color_blit_fs(const nir_shader_compiler_options *options,
3853 struct v3dv_device *device,
3854 VkFormat dst_format,
3855 VkFormat src_format,
3856 VkSampleCountFlagBits dst_samples,
3857 VkSampleCountFlagBits src_samples,
3858 enum glsl_sampler_dim sampler_dim)
3859 {
3860 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3861 "meta blit fs");
3862
3863 const struct glsl_type *vec4 = glsl_vec4_type();
3864
3865 nir_variable *fs_in_tex_coord =
3866 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3867 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3868
3869 const struct glsl_type *fs_out_type =
3870 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3871 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3872 glsl_vec4_type();
3873
3874 enum glsl_base_type src_base_type =
3875 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3876 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3877 GLSL_TYPE_FLOAT;
3878
3879 nir_variable *fs_out_color =
3880 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3881 fs_out_color->data.location = FRAG_RESULT_DATA0;
3882
3883 nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3884 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3885 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3886
3887 nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3888 dst_samples, src_samples, sampler_dim);
3889
3890 /* For integer textures, if the bit-size of the destination is too small to
3891 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3892 * maximum value the destination can hold. The hardware can clamp to the
3893 * render target type, which usually matches the component bit-size, but
3894 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3895 * render target type, so in these cases we need to clamp manually.
3896 */
3897 if (format_needs_software_int_clamp(dst_format)) {
3898 assert(vk_format_is_int(dst_format));
3899 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3900 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3901
3902 nir_def *c[4];
3903 for (uint32_t i = 0; i < 4; i++) {
3904 c[i] = nir_channel(&b, color, i);
3905
3906 const uint32_t src_bit_size =
3907 util_format_get_component_bits(src_pformat,
3908 UTIL_FORMAT_COLORSPACE_RGB,
3909 i);
3910 const uint32_t dst_bit_size =
3911 util_format_get_component_bits(dst_pformat,
3912 UTIL_FORMAT_COLORSPACE_RGB,
3913 i);
3914
3915 if (dst_bit_size >= src_bit_size)
3916 continue;
3917
3918 assert(dst_bit_size > 0);
3919 if (util_format_is_pure_uint(dst_pformat)) {
3920 nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3921 c[i] = nir_umin(&b, c[i], max);
3922 } else {
3923 nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3924 nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3925 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3926 }
3927 }
3928
3929 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3930 }
3931
3932 nir_store_var(&b, fs_out_color, color, 0xf);
3933
3934 return b.shader;
3935 }
3936
3937 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3938 create_pipeline(struct v3dv_device *device,
3939 struct v3dv_render_pass *pass,
3940 struct nir_shader *vs_nir,
3941 struct nir_shader *gs_nir,
3942 struct nir_shader *fs_nir,
3943 const VkPipelineVertexInputStateCreateInfo *vi_state,
3944 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3945 const VkPipelineColorBlendStateCreateInfo *cb_state,
3946 const VkPipelineMultisampleStateCreateInfo *ms_state,
3947 const VkPipelineLayout layout,
3948 VkPipeline *pipeline)
3949 {
3950 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
3951 struct vk_shader_module fs_m = vk_shader_module_from_nir(fs_nir);
3952 struct vk_shader_module gs_m;
3953
3954 uint32_t num_stages = gs_nir ? 3 : 2;
3955
3956
3957 VkPipelineShaderStageCreateInfo stages[3] = {
3958 {
3959 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3960 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3961 .module = vk_shader_module_to_handle(&vs_m),
3962 .pName = "main",
3963 },
3964 {
3965 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3966 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3967 .module = vk_shader_module_to_handle(&fs_m),
3968 .pName = "main",
3969 },
3970 {
3971 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3972 .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3973 .module = VK_NULL_HANDLE,
3974 .pName = "main",
3975 },
3976 };
3977
3978 if (gs_nir) {
3979 gs_m = vk_shader_module_from_nir(gs_nir);
3980 stages[2].module = vk_shader_module_to_handle(&gs_m);
3981 }
3982
3983 VkGraphicsPipelineCreateInfo info = {
3984 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3985
3986 .stageCount = num_stages,
3987 .pStages = stages,
3988
3989 .pVertexInputState = vi_state,
3990
3991 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3992 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3993 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3994 .primitiveRestartEnable = false,
3995 },
3996
3997 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3998 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3999 .viewportCount = 1,
4000 .scissorCount = 1,
4001 },
4002
4003 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
4004 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
4005 .rasterizerDiscardEnable = false,
4006 .polygonMode = VK_POLYGON_MODE_FILL,
4007 .cullMode = VK_CULL_MODE_NONE,
4008 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
4009 .depthBiasEnable = false,
4010 },
4011
4012 .pMultisampleState = ms_state,
4013
4014 .pDepthStencilState = ds_state,
4015
4016 .pColorBlendState = cb_state,
4017
4018 /* The meta clear pipeline declares all state as dynamic.
4019 * As a consequence, vkCmdBindPipeline writes no dynamic state
4020 * to the cmd buffer. Therefore, at the end of the meta clear,
4021 * we need only restore dynamic state that was vkCmdSet.
4022 */
4023 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
4024 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
4025 .dynamicStateCount = 6,
4026 .pDynamicStates = (VkDynamicState[]) {
4027 VK_DYNAMIC_STATE_VIEWPORT,
4028 VK_DYNAMIC_STATE_SCISSOR,
4029 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
4030 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
4031 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
4032 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
4033 VK_DYNAMIC_STATE_DEPTH_BIAS,
4034 VK_DYNAMIC_STATE_LINE_WIDTH,
4035 },
4036 },
4037
4038 .flags = 0,
4039 .layout = layout,
4040 .renderPass = v3dv_render_pass_to_handle(pass),
4041 .subpass = 0,
4042 };
4043
4044 VkResult result =
4045 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
4046 VK_NULL_HANDLE,
4047 1, &info,
4048 &device->vk.alloc,
4049 pipeline);
4050
4051 ralloc_free(vs_nir);
4052 ralloc_free(gs_nir);
4053 ralloc_free(fs_nir);
4054
4055 return result == VK_SUCCESS;
4056 }
4057
4058 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)4059 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
4060 {
4061 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
4062 *
4063 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
4064 * VK_IMAGE_TYPE_2D, ..."
4065 */
4066 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
4067
4068 switch (type) {
4069 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
4070 case VK_IMAGE_TYPE_2D:
4071 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
4072 GLSL_SAMPLER_DIM_MS;
4073 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
4074 default:
4075 unreachable("Invalid image type");
4076 }
4077 }
4078
4079 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)4080 create_blit_pipeline(struct v3dv_device *device,
4081 VkFormat dst_format,
4082 VkFormat src_format,
4083 VkColorComponentFlags cmask,
4084 VkImageType src_type,
4085 VkSampleCountFlagBits dst_samples,
4086 VkSampleCountFlagBits src_samples,
4087 VkRenderPass _pass,
4088 VkPipelineLayout pipeline_layout,
4089 VkPipeline *pipeline)
4090 {
4091 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
4092
4093 /* We always rewrite depth/stencil blits to compatible color blits */
4094 assert(vk_format_is_color(dst_format));
4095 assert(vk_format_is_color(src_format));
4096
4097 const nir_shader_compiler_options *options =
4098 v3dv_pipeline_get_nir_options(&device->devinfo);
4099
4100 const enum glsl_sampler_dim sampler_dim =
4101 get_sampler_dim(src_type, src_samples);
4102
4103 nir_shader *vs_nir = get_blit_vs(options);
4104 nir_shader *fs_nir =
4105 get_color_blit_fs(options, device, dst_format, src_format,
4106 dst_samples, src_samples, sampler_dim);
4107
4108 const VkPipelineVertexInputStateCreateInfo vi_state = {
4109 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
4110 .vertexBindingDescriptionCount = 0,
4111 .vertexAttributeDescriptionCount = 0,
4112 };
4113
4114 VkPipelineDepthStencilStateCreateInfo ds_state = {
4115 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
4116 };
4117
4118 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
4119 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
4120 .blendEnable = false,
4121 .colorWriteMask = cmask,
4122 };
4123
4124 const VkPipelineColorBlendStateCreateInfo cb_state = {
4125 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
4126 .logicOpEnable = false,
4127 .attachmentCount = 1,
4128 .pAttachments = blend_att_state
4129 };
4130
4131 const VkPipelineMultisampleStateCreateInfo ms_state = {
4132 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
4133 .rasterizationSamples = dst_samples,
4134 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
4135 .pSampleMask = NULL,
4136 .alphaToCoverageEnable = false,
4137 .alphaToOneEnable = false,
4138 };
4139
4140 return create_pipeline(device,
4141 pass,
4142 vs_nir, NULL, fs_nir,
4143 &vi_state,
4144 &ds_state,
4145 &cb_state,
4146 &ms_state,
4147 pipeline_layout,
4148 pipeline);
4149 }
4150
4151 /**
4152 * Return a pipeline suitable for blitting the requested aspect given the
4153 * destination and source formats.
4154 */
4155 static bool
get_blit_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)4156 get_blit_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
4157 VkFormat dst_format,
4158 VkFormat src_format,
4159 VkColorComponentFlags cmask,
4160 VkImageType src_type,
4161 VkSampleCountFlagBits dst_samples,
4162 VkSampleCountFlagBits src_samples,
4163 struct v3dv_meta_blit_pipeline **pipeline)
4164 {
4165 bool ok = true;
4166 struct v3dv_device *device = cmd_buffer->device;
4167
4168 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
4169 if (device->instance->meta_cache_enabled) {
4170 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
4171 dst_samples, src_samples, key);
4172 mtx_lock(&device->meta.mtx);
4173 struct hash_entry *entry =
4174 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
4175 if (entry) {
4176 mtx_unlock(&device->meta.mtx);
4177 *pipeline = entry->data;
4178 return true;
4179 }
4180 }
4181
4182 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
4183 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
4184
4185 if (*pipeline == NULL)
4186 goto fail;
4187
4188 ok = create_blit_render_pass(device, dst_format, src_format,
4189 &(*pipeline)->pass,
4190 &(*pipeline)->pass_no_load);
4191 if (!ok)
4192 goto fail;
4193
4194 /* Create the pipeline using one of the render passes, they are both
4195 * compatible, so we don't care which one we use here.
4196 */
4197 ok = create_blit_pipeline(device,
4198 dst_format,
4199 src_format,
4200 cmask,
4201 src_type,
4202 dst_samples,
4203 src_samples,
4204 (*pipeline)->pass,
4205 device->meta.blit.p_layout,
4206 &(*pipeline)->pipeline);
4207 if (!ok)
4208 goto fail;
4209
4210 if (device->instance->meta_cache_enabled) {
4211 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
4212 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
4213 &(*pipeline)->key, *pipeline);
4214 mtx_unlock(&device->meta.mtx);
4215 } else {
4216 v3dv_cmd_buffer_add_private_obj(
4217 cmd_buffer, (uintptr_t)*pipeline,
4218 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_meta_blit_pipeline);
4219 }
4220
4221 return true;
4222
4223 fail:
4224 if (device->instance->meta_cache_enabled)
4225 mtx_unlock(&device->meta.mtx);
4226
4227 VkDevice _device = v3dv_device_to_handle(device);
4228 if (*pipeline) {
4229 if ((*pipeline)->pass)
4230 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
4231 if ((*pipeline)->pass_no_load)
4232 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
4233 if ((*pipeline)->pipeline)
4234 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
4235 vk_free(&device->vk.alloc, *pipeline);
4236 *pipeline = NULL;
4237 }
4238
4239 return false;
4240 }
4241
4242 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)4243 compute_blit_box(const VkOffset3D *offsets,
4244 uint32_t image_w, uint32_t image_h,
4245 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
4246 bool *mirror_x, bool *mirror_y)
4247 {
4248 if (offsets[1].x >= offsets[0].x) {
4249 *mirror_x = false;
4250 *x = MIN2(offsets[0].x, image_w - 1);
4251 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
4252 } else {
4253 *mirror_x = true;
4254 *x = MIN2(offsets[1].x, image_w - 1);
4255 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
4256 }
4257 if (offsets[1].y >= offsets[0].y) {
4258 *mirror_y = false;
4259 *y = MIN2(offsets[0].y, image_h - 1);
4260 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
4261 } else {
4262 *mirror_y = true;
4263 *y = MIN2(offsets[1].y, image_h - 1);
4264 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
4265 }
4266 }
4267
4268 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)4269 compute_blit_3d_layers(const VkOffset3D *offsets,
4270 uint32_t *min_layer, uint32_t *max_layer,
4271 bool *mirror_z)
4272 {
4273 if (offsets[1].z >= offsets[0].z) {
4274 *mirror_z = false;
4275 *min_layer = offsets[0].z;
4276 *max_layer = offsets[1].z;
4277 } else {
4278 *mirror_z = true;
4279 *min_layer = offsets[1].z;
4280 *max_layer = offsets[0].z;
4281 }
4282 }
4283
4284 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)4285 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
4286 {
4287 /* If this is not the first pool we create for this command buffer
4288 * size it based on the size of the currently exhausted pool.
4289 */
4290 uint32_t descriptor_count = 64;
4291 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
4292 struct v3dv_descriptor_pool *exhausted_pool =
4293 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
4294 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
4295 }
4296
4297 /* Create the descriptor pool */
4298 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
4299 VkDescriptorPoolSize pool_size = {
4300 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4301 .descriptorCount = descriptor_count,
4302 };
4303 VkDescriptorPoolCreateInfo info = {
4304 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
4305 .maxSets = descriptor_count,
4306 .poolSizeCount = 1,
4307 .pPoolSizes = &pool_size,
4308 .flags = 0,
4309 };
4310 VkResult result =
4311 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
4312 &info,
4313 &cmd_buffer->device->vk.alloc,
4314 &cmd_buffer->meta.blit.dspool);
4315
4316 if (result == VK_SUCCESS) {
4317 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4318 const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
4319
4320 v3dv_cmd_buffer_add_private_obj(
4321 cmd_buffer, (uintptr_t) _pool,
4322 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
4323
4324 struct v3dv_descriptor_pool *pool =
4325 v3dv_descriptor_pool_from_handle(_pool);
4326 pool->is_driver_internal = true;
4327 }
4328
4329 return result;
4330 }
4331
4332 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)4333 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
4334 VkDescriptorSet *set)
4335 {
4336 /* Make sure we have a descriptor pool */
4337 VkResult result;
4338 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
4339 result = create_blit_descriptor_pool(cmd_buffer);
4340 if (result != VK_SUCCESS)
4341 return result;
4342 }
4343 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4344
4345 /* Allocate descriptor set */
4346 struct v3dv_device *device = cmd_buffer->device;
4347 VkDevice _device = v3dv_device_to_handle(device);
4348 VkDescriptorSetAllocateInfo info = {
4349 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
4350 .descriptorPool = cmd_buffer->meta.blit.dspool,
4351 .descriptorSetCount = 1,
4352 .pSetLayouts = &device->meta.blit.ds_layout,
4353 };
4354 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4355
4356 /* If we ran out of pool space, grow the pool and try again */
4357 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
4358 result = create_blit_descriptor_pool(cmd_buffer);
4359 if (result == VK_SUCCESS) {
4360 info.descriptorPool = cmd_buffer->meta.blit.dspool;
4361 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4362 }
4363 }
4364
4365 return result;
4366 }
4367
4368 /**
4369 * Returns true if the implementation supports the requested operation (even if
4370 * it failed to process it, for example, due to an out-of-memory error).
4371 *
4372 * The caller can specify the channels on the destination to be written via the
4373 * cmask parameter (which can be 0 to default to all channels), as well as a
4374 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
4375 * to use the default identity swizzle).
4376 *
4377 * Supports multi-plane formats too.
4378 */
4379 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2 * region,VkFilter filter,bool dst_is_padded_image)4380 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4381 struct v3dv_image *dst,
4382 VkFormat dst_format,
4383 struct v3dv_image *src,
4384 VkFormat src_format,
4385 VkColorComponentFlags cmask,
4386 VkComponentMapping *cswizzle,
4387 const VkImageBlit2 *region,
4388 VkFilter filter,
4389 bool dst_is_padded_image)
4390 {
4391 bool handled = true;
4392 VkResult result;
4393
4394 /* Can't sample from linear images */
4395 if (!src->tiled && src->vk.image_type != VK_IMAGE_TYPE_1D) {
4396 return false;
4397 }
4398
4399 /* Rewrite combined D/S blits to compatible color blits */
4400 if (vk_format_is_depth_or_stencil(dst_format)) {
4401 assert(src_format == dst_format);
4402 assert(cmask == 0);
4403 switch(dst_format) {
4404 case VK_FORMAT_D16_UNORM:
4405 dst_format = VK_FORMAT_R16_UINT;
4406 break;
4407 case VK_FORMAT_D32_SFLOAT:
4408 dst_format = VK_FORMAT_R32_UINT;
4409 break;
4410 case VK_FORMAT_X8_D24_UNORM_PACK32:
4411 case VK_FORMAT_D24_UNORM_S8_UINT:
4412 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
4413 cmask |= VK_COLOR_COMPONENT_G_BIT |
4414 VK_COLOR_COMPONENT_B_BIT |
4415 VK_COLOR_COMPONENT_A_BIT;
4416 }
4417 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
4418 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
4419 cmask |= VK_COLOR_COMPONENT_R_BIT;
4420 }
4421 dst_format = VK_FORMAT_R8G8B8A8_UINT;
4422 break;
4423 default:
4424 unreachable("Unsupported depth/stencil format");
4425 };
4426 src_format = dst_format;
4427 }
4428
4429 uint8_t src_plane =
4430 v3dv_plane_from_aspect(region->srcSubresource.aspectMask);
4431 assert(src_plane < src->plane_count);
4432 uint8_t dst_plane =
4433 v3dv_plane_from_aspect(region->dstSubresource.aspectMask);
4434 assert(dst_plane < dst->plane_count);
4435
4436 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
4437 VK_COLOR_COMPONENT_G_BIT |
4438 VK_COLOR_COMPONENT_B_BIT |
4439 VK_COLOR_COMPONENT_A_BIT;
4440 if (cmask == 0)
4441 cmask = full_cmask;
4442
4443 VkComponentMapping ident_swizzle = {
4444 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4445 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4446 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4447 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4448 };
4449 if (!cswizzle)
4450 cswizzle = &ident_swizzle;
4451
4452 /* When we get here from a copy between compressed / uncompressed images
4453 * we choose to specify the destination blit region based on the size
4454 * semantics of the source image of the copy (see copy_image_blit), so we
4455 * need to apply those same semantics here when we compute the size of the
4456 * destination image level.
4457 */
4458 const uint32_t dst_block_w =
4459 vk_format_get_blockwidth(dst->planes[dst_plane].vk_format);
4460 const uint32_t dst_block_h =
4461 vk_format_get_blockheight(dst->planes[dst_plane].vk_format);
4462 const uint32_t src_block_w =
4463 vk_format_get_blockwidth(src->planes[src_plane].vk_format);
4464 const uint32_t src_block_h =
4465 vk_format_get_blockheight(src->planes[src_plane].vk_format);
4466 const uint32_t dst_level_w =
4467 u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
4468 region->dstSubresource.mipLevel);
4469 const uint32_t dst_level_h =
4470 u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
4471 region->dstSubresource.mipLevel);
4472
4473 const uint32_t src_level_w =
4474 u_minify(src->planes[src_plane].width, region->srcSubresource.mipLevel);
4475 const uint32_t src_level_h =
4476 u_minify(src->planes[src_plane].height, region->srcSubresource.mipLevel);
4477
4478 assert(src->plane_count == 1 || src->vk.image_type != VK_IMAGE_TYPE_3D);
4479 const uint32_t src_level_d =
4480 u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel);
4481
4482 uint32_t dst_x, dst_y, dst_w, dst_h;
4483 bool dst_mirror_x, dst_mirror_y;
4484 compute_blit_box(region->dstOffsets,
4485 dst_level_w, dst_level_h,
4486 &dst_x, &dst_y, &dst_w, &dst_h,
4487 &dst_mirror_x, &dst_mirror_y);
4488
4489 uint32_t src_x, src_y, src_w, src_h;
4490 bool src_mirror_x, src_mirror_y;
4491 compute_blit_box(region->srcOffsets,
4492 src_level_w, src_level_h,
4493 &src_x, &src_y, &src_w, &src_h,
4494 &src_mirror_x, &src_mirror_y);
4495
4496 uint32_t min_dst_layer;
4497 uint32_t max_dst_layer;
4498 bool dst_mirror_z = false;
4499 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4500 min_dst_layer = region->dstSubresource.baseArrayLayer;
4501 max_dst_layer = min_dst_layer +
4502 vk_image_subresource_layer_count(&dst->vk,
4503 ®ion->dstSubresource);
4504 } else {
4505 compute_blit_3d_layers(region->dstOffsets,
4506 &min_dst_layer, &max_dst_layer,
4507 &dst_mirror_z);
4508 }
4509
4510 uint32_t min_src_layer;
4511 uint32_t max_src_layer;
4512 bool src_mirror_z = false;
4513 if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
4514 min_src_layer = region->srcSubresource.baseArrayLayer;
4515 max_src_layer = min_src_layer +
4516 vk_image_subresource_layer_count(&src->vk,
4517 ®ion->srcSubresource);
4518 } else {
4519 compute_blit_3d_layers(region->srcOffsets,
4520 &min_src_layer, &max_src_layer,
4521 &src_mirror_z);
4522 }
4523
4524 uint32_t layer_count = max_dst_layer - min_dst_layer;
4525
4526 /* Translate source blit coordinates to normalized texture coordinates for
4527 * single sampled textures. For multisampled textures we require
4528 * unnormalized coordinates, since we can only do texelFetch on them.
4529 */
4530 float coords[4] = {
4531 (float)src_x,
4532 (float)src_y,
4533 (float)(src_x + src_w),
4534 (float)(src_y + src_h),
4535 };
4536
4537 if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
4538 coords[0] /= (float)src_level_w;
4539 coords[1] /= (float)src_level_h;
4540 coords[2] /= (float)src_level_w;
4541 coords[3] /= (float)src_level_h;
4542 }
4543
4544 /* Handle mirroring */
4545 const bool mirror_x = dst_mirror_x != src_mirror_x;
4546 const bool mirror_y = dst_mirror_y != src_mirror_y;
4547 const bool mirror_z = dst_mirror_z != src_mirror_z;
4548 float tex_coords[5] = {
4549 !mirror_x ? coords[0] : coords[2],
4550 !mirror_y ? coords[1] : coords[3],
4551 !mirror_x ? coords[2] : coords[0],
4552 !mirror_y ? coords[3] : coords[1],
4553 /* Z coordinate for 3D blit sources, to be filled for each
4554 * destination layer
4555 */
4556 0.0f
4557 };
4558
4559 /* For blits from 3D images we also need to compute the slice coordinate to
4560 * sample from, which will change for each layer in the destination.
4561 * Compute the step we should increase for each iteration.
4562 */
4563 const float src_z_step =
4564 (float)(max_src_layer - min_src_layer) / (float)layer_count;
4565
4566 /* Get the blit pipeline */
4567 struct v3dv_meta_blit_pipeline *pipeline = NULL;
4568 bool ok = get_blit_pipeline(cmd_buffer,
4569 dst_format, src_format, cmask, src->vk.image_type,
4570 dst->vk.samples, src->vk.samples,
4571 &pipeline);
4572 if (!ok)
4573 return handled;
4574 assert(pipeline && pipeline->pipeline &&
4575 pipeline->pass && pipeline->pass_no_load);
4576
4577 struct v3dv_device *device = cmd_buffer->device;
4578 assert(device->meta.blit.ds_layout);
4579
4580 VkDevice _device = v3dv_device_to_handle(device);
4581 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4582
4583 /* Create sampler for blit source image */
4584 VkSamplerCreateInfo sampler_info = {
4585 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4586 .magFilter = filter,
4587 .minFilter = filter,
4588 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4589 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4590 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4591 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4592 };
4593 VkSampler sampler;
4594 result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4595 &sampler);
4596 if (result != VK_SUCCESS)
4597 goto fail;
4598
4599 v3dv_cmd_buffer_add_private_obj(
4600 cmd_buffer, (uintptr_t)sampler,
4601 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4602
4603 /* Push command buffer state before starting meta operation */
4604 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4605
4606 /* Push state that is common for all layers */
4607 v3dv_CmdBindPipeline(_cmd_buffer,
4608 VK_PIPELINE_BIND_POINT_GRAPHICS,
4609 pipeline->pipeline);
4610
4611 const VkViewport viewport = {
4612 .x = dst_x,
4613 .y = dst_y,
4614 .width = dst_w,
4615 .height = dst_h,
4616 .minDepth = 0.0f,
4617 .maxDepth = 1.0f
4618 };
4619 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4620
4621 const VkRect2D scissor = {
4622 .offset = { dst_x, dst_y },
4623 .extent = { dst_w, dst_h }
4624 };
4625 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4626
4627 bool can_skip_tlb_load = false;
4628 const VkRect2D render_area = {
4629 .offset = { dst_x, dst_y },
4630 .extent = { dst_w, dst_h },
4631 };
4632
4633 /* Record per-layer commands */
4634 for (uint32_t i = 0; i < layer_count; i++) {
4635 /* Setup framebuffer */
4636 VkImageViewCreateInfo dst_image_view_info = {
4637 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4638 .image = v3dv_image_to_handle(dst),
4639 .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4640 .format = dst_format,
4641 .subresourceRange = {
4642 .aspectMask = region->dstSubresource.aspectMask,
4643 .baseMipLevel = region->dstSubresource.mipLevel,
4644 .levelCount = 1,
4645 .baseArrayLayer = min_dst_layer + i,
4646 .layerCount = 1
4647 },
4648 };
4649 VkImageView dst_image_view;
4650 result = v3dv_create_image_view(device, &dst_image_view_info,
4651 &dst_image_view);
4652 if (result != VK_SUCCESS)
4653 goto fail;
4654
4655 v3dv_cmd_buffer_add_private_obj(
4656 cmd_buffer, (uintptr_t)dst_image_view,
4657 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4658
4659 VkFramebufferCreateInfo fb_info = {
4660 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4661 .renderPass = pipeline->pass,
4662 .attachmentCount = 1,
4663 .pAttachments = &dst_image_view,
4664 .width = dst_x + dst_w,
4665 .height = dst_y + dst_h,
4666 .layers = 1,
4667 };
4668
4669 VkFramebuffer fb;
4670 result = v3dv_CreateFramebuffer(_device, &fb_info,
4671 &cmd_buffer->device->vk.alloc, &fb);
4672 if (result != VK_SUCCESS)
4673 goto fail;
4674
4675 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4676 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4677 fb_info.height == dst_level_h &&
4678 dst_is_padded_image;
4679
4680 v3dv_cmd_buffer_add_private_obj(
4681 cmd_buffer, (uintptr_t)fb,
4682 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4683
4684 /* Setup descriptor set for blit source texture. We don't have to
4685 * register the descriptor as a private command buffer object since
4686 * all descriptors will be freed automatically with the descriptor
4687 * pool.
4688 */
4689 VkDescriptorSet set;
4690 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4691 if (result != VK_SUCCESS)
4692 goto fail;
4693
4694 VkImageViewCreateInfo src_image_view_info = {
4695 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4696 .image = v3dv_image_to_handle(src),
4697 .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4698 .format = src_format,
4699 .components = *cswizzle,
4700 .subresourceRange = {
4701 .aspectMask = region->srcSubresource.aspectMask,
4702 .baseMipLevel = region->srcSubresource.mipLevel,
4703 .levelCount = 1,
4704 .baseArrayLayer =
4705 src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4706 .layerCount = 1
4707 },
4708 };
4709 VkImageView src_image_view;
4710 result = v3dv_create_image_view(device, &src_image_view_info,
4711 &src_image_view);
4712 if (result != VK_SUCCESS)
4713 goto fail;
4714
4715 v3dv_cmd_buffer_add_private_obj(
4716 cmd_buffer, (uintptr_t)src_image_view,
4717 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4718
4719 VkDescriptorImageInfo image_info = {
4720 .sampler = sampler,
4721 .imageView = src_image_view,
4722 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4723 };
4724 VkWriteDescriptorSet write = {
4725 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4726 .dstSet = set,
4727 .dstBinding = 0,
4728 .dstArrayElement = 0,
4729 .descriptorCount = 1,
4730 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4731 .pImageInfo = &image_info,
4732 };
4733 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4734
4735 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4736 VK_PIPELINE_BIND_POINT_GRAPHICS,
4737 device->meta.blit.p_layout,
4738 0, 1, &set,
4739 0, NULL);
4740
4741 /* If the region we are about to blit is tile-aligned, then we can
4742 * use the render pass version that won't pre-load the tile buffer
4743 * with the dst image contents before the blit. The exception is when we
4744 * don't have a full color mask, since in that case we need to preserve
4745 * the original value of some of the color components.
4746 *
4747 * Since all layers have the same area, we only need to compute this for
4748 * the first.
4749 */
4750 if (i == 0) {
4751 struct v3dv_render_pass *pipeline_pass =
4752 v3dv_render_pass_from_handle(pipeline->pass);
4753 can_skip_tlb_load =
4754 cmask == full_cmask &&
4755 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4756 framebuffer, pipeline_pass, 0);
4757 }
4758
4759 /* Record blit */
4760 VkRenderPassBeginInfo rp_info = {
4761 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4762 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4763 pipeline->pass,
4764 .framebuffer = fb,
4765 .renderArea = render_area,
4766 .clearValueCount = 0,
4767 };
4768
4769 VkSubpassBeginInfo sp_info = {
4770 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4771 .contents = VK_SUBPASS_CONTENTS_INLINE,
4772 };
4773
4774 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4775 struct v3dv_job *job = cmd_buffer->state.job;
4776 if (!job)
4777 goto fail;
4778
4779 /* For 3D blits we need to compute the source slice to blit from (the Z
4780 * coordinate of the source sample operation). We want to choose this
4781 * based on the ratio of the depth of the source and the destination
4782 * images, picking the coordinate in the middle of each step.
4783 */
4784 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4785 tex_coords[4] =
4786 !mirror_z ?
4787 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4788 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4789 }
4790
4791 v3dv_CmdPushConstants(_cmd_buffer,
4792 device->meta.blit.p_layout,
4793 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4794 &tex_coords);
4795
4796 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4797
4798 VkSubpassEndInfo sp_end_info = {
4799 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4800 };
4801
4802 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4803 }
4804
4805 fail:
4806 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
4807
4808 return handled;
4809 }
4810
4811 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)4812 v3dv_CmdBlitImage2(VkCommandBuffer commandBuffer,
4813 const VkBlitImageInfo2 *pBlitImageInfo)
4814 {
4815 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4816 V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4817 V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4818
4819 /* From vkCmdBlitImage:
4820 * "srcImage must not use a format that requires a sampler YCBCR
4821 * conversion"
4822 * "dstImage must not use a format that requires a sampler YCBCR
4823 * conversion"
4824 */
4825 assert(src->plane_count == 1);
4826 assert(dst->plane_count == 1);
4827
4828 /* This command can only happen outside a render pass */
4829 assert(cmd_buffer->state.pass == NULL);
4830 assert(cmd_buffer->state.job == NULL);
4831
4832 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4833 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4834 src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4835
4836 /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4837 assert(!vk_format_is_compressed(dst->vk.format));
4838
4839 cmd_buffer->state.is_transfer = true;
4840
4841 for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4842 const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
4843
4844 if (blit_tfu(cmd_buffer, dst, src, region))
4845 continue;
4846 if (blit_shader(cmd_buffer,
4847 dst, dst->vk.format,
4848 src, src->vk.format,
4849 0, NULL,
4850 region,
4851 pBlitImageInfo->filter, true)) {
4852 continue;
4853 }
4854 unreachable("Unsupported blit operation");
4855 }
4856
4857 cmd_buffer->state.is_transfer = false;
4858 }
4859
4860 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4861 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4862 struct v3dv_image *dst,
4863 struct v3dv_image *src,
4864 const VkImageResolve2 *region)
4865 {
4866 /* No resolve for multi-planar images. Using plane 0 */
4867 assert(dst->plane_count == 1);
4868 assert(src->plane_count == 1);
4869
4870 if (!v3dv_meta_can_use_tlb(src, 0, region->srcSubresource.mipLevel,
4871 ®ion->srcOffset, NULL, NULL) ||
4872 !v3dv_meta_can_use_tlb(dst, 0, region->dstSubresource.mipLevel,
4873 ®ion->dstOffset, ®ion->extent, NULL)) {
4874 return false;
4875 }
4876
4877 if (!v3d_X((&cmd_buffer->device->devinfo), format_supports_tlb_resolve)(src->format))
4878 return false;
4879
4880 const VkFormat fb_format = src->vk.format;
4881
4882 uint32_t num_layers;
4883 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
4884 num_layers = vk_image_subresource_layer_count(&dst->vk,
4885 ®ion->dstSubresource);
4886 } else {
4887 num_layers = region->extent.depth;
4888 }
4889 assert(num_layers > 0);
4890
4891 struct v3dv_job *job =
4892 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4893 if (!job)
4894 return true;
4895
4896 const uint32_t block_w =
4897 vk_format_get_blockwidth(dst->planes[0].vk_format);
4898 const uint32_t block_h =
4899 vk_format_get_blockheight(dst->planes[0].vk_format);
4900 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4901 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4902
4903 uint32_t internal_type, internal_bpp;
4904 v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
4905 (fb_format, region->srcSubresource.aspectMask,
4906 &internal_type, &internal_bpp);
4907
4908 v3dv_job_start_frame(job, width, height, num_layers, false, true, 1,
4909 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
4910 true);
4911
4912 struct v3dv_meta_framebuffer framebuffer;
4913 v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
4914 internal_type, &job->frame_tiling);
4915
4916 v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
4917 v3d_X((&job->device->devinfo), meta_emit_resolve_image_rcl)(job, dst, src,
4918 &framebuffer, region);
4919
4920 v3dv_cmd_buffer_finish_job(cmd_buffer);
4921 return true;
4922 }
4923
4924 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4925 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4926 struct v3dv_image *dst,
4927 struct v3dv_image *src,
4928 const VkImageResolve2 *region)
4929 {
4930 const VkImageBlit2 blit_region = {
4931 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4932 .srcSubresource = region->srcSubresource,
4933 .srcOffsets = {
4934 region->srcOffset,
4935 {
4936 region->srcOffset.x + region->extent.width,
4937 region->srcOffset.y + region->extent.height,
4938 }
4939 },
4940 .dstSubresource = region->dstSubresource,
4941 .dstOffsets = {
4942 region->dstOffset,
4943 {
4944 region->dstOffset.x + region->extent.width,
4945 region->dstOffset.y + region->extent.height,
4946 }
4947 },
4948 };
4949 return blit_shader(cmd_buffer,
4950 dst, dst->vk.format,
4951 src, src->vk.format,
4952 0, NULL,
4953 &blit_region, VK_FILTER_NEAREST, true);
4954 }
4955
4956 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4957 v3dv_CmdResolveImage2(VkCommandBuffer commandBuffer,
4958 const VkResolveImageInfo2 *info)
4959
4960 {
4961 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4962 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4963 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4964
4965 /* This command can only happen outside a render pass */
4966 assert(cmd_buffer->state.pass == NULL);
4967 assert(cmd_buffer->state.job == NULL);
4968
4969 assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4970 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4971
4972 /* We don't support multi-sampled multi-plane images */
4973 assert(src->plane_count == 1);
4974 assert(dst->plane_count == 1);
4975
4976 cmd_buffer->state.is_transfer = true;
4977
4978 for (uint32_t i = 0; i < info->regionCount; i++) {
4979 if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4980 continue;
4981 if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4982 continue;
4983 unreachable("Unsupported multismaple resolve operation");
4984 }
4985
4986 cmd_buffer->state.is_transfer = false;
4987 }
4988