1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vulkan/runtime/vk_common_entrypoints.h"
30
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42
43 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)44 create_blit_pipeline_layout(struct v3dv_device *device,
45 VkDescriptorSetLayout *descriptor_set_layout,
46 VkPipelineLayout *pipeline_layout)
47 {
48 VkResult result;
49
50 if (*descriptor_set_layout == 0) {
51 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
52 .binding = 0,
53 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
54 .descriptorCount = 1,
55 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
56 };
57 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
58 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
59 .bindingCount = 1,
60 .pBindings = &descriptor_set_layout_binding,
61 };
62 result =
63 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
64 &descriptor_set_layout_info,
65 &device->vk.alloc,
66 descriptor_set_layout);
67 if (result != VK_SUCCESS)
68 return false;
69 }
70
71 assert(*pipeline_layout == 0);
72 VkPipelineLayoutCreateInfo pipeline_layout_info = {
73 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
74 .setLayoutCount = 1,
75 .pSetLayouts = descriptor_set_layout,
76 .pushConstantRangeCount = 1,
77 .pPushConstantRanges =
78 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
79 };
80
81 result =
82 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
83 &pipeline_layout_info,
84 &device->vk.alloc,
85 pipeline_layout);
86 return result == VK_SUCCESS;
87 }
88
89 void
v3dv_meta_blit_init(struct v3dv_device * device)90 v3dv_meta_blit_init(struct v3dv_device *device)
91 {
92 for (uint32_t i = 0; i < 3; i++) {
93 device->meta.blit.cache[i] =
94 _mesa_hash_table_create(NULL,
95 meta_blit_key_hash,
96 meta_blit_key_compare);
97 }
98
99 create_blit_pipeline_layout(device,
100 &device->meta.blit.ds_layout,
101 &device->meta.blit.p_layout);
102 }
103
104 void
v3dv_meta_blit_finish(struct v3dv_device * device)105 v3dv_meta_blit_finish(struct v3dv_device *device)
106 {
107 VkDevice _device = v3dv_device_to_handle(device);
108
109 for (uint32_t i = 0; i < 3; i++) {
110 hash_table_foreach(device->meta.blit.cache[i], entry) {
111 struct v3dv_meta_blit_pipeline *item = entry->data;
112 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
113 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
114 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
115 vk_free(&device->vk.alloc, item);
116 }
117 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
118 }
119
120 if (device->meta.blit.p_layout) {
121 v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
122 &device->vk.alloc);
123 }
124
125 if (device->meta.blit.ds_layout) {
126 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
127 &device->vk.alloc);
128 }
129 }
130
131 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)132 meta_texel_buffer_copy_key_hash(const void *key)
133 {
134 return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
135 }
136
137 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)138 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
139 {
140 return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
141 }
142
143 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)144 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
145 VkDescriptorSetLayout *ds_layout,
146 VkPipelineLayout *p_layout)
147 {
148 VkResult result;
149
150 if (*ds_layout == 0) {
151 VkDescriptorSetLayoutBinding ds_layout_binding = {
152 .binding = 0,
153 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
154 .descriptorCount = 1,
155 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
156 };
157 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
158 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
159 .bindingCount = 1,
160 .pBindings = &ds_layout_binding,
161 };
162 result =
163 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
164 &ds_layout_info,
165 &device->vk.alloc,
166 ds_layout);
167 if (result != VK_SUCCESS)
168 return false;
169 }
170
171 assert(*p_layout == 0);
172 /* FIXME: this is abusing a bit the API, since not all of our copy
173 * pipelines have a geometry shader. We could create 2 different pipeline
174 * layouts, but this works for us for now.
175 */
176 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
177 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
178 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
179 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
180 VkPushConstantRange ranges[2] = {
181 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
182 { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
183 };
184
185 VkPipelineLayoutCreateInfo p_layout_info = {
186 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
187 .setLayoutCount = 1,
188 .pSetLayouts = ds_layout,
189 .pushConstantRangeCount = 2,
190 .pPushConstantRanges = ranges,
191 };
192
193 result =
194 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
195 &p_layout_info,
196 &device->vk.alloc,
197 p_layout);
198 return result == VK_SUCCESS;
199 }
200
201 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)202 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
203 {
204 for (uint32_t i = 0; i < 3; i++) {
205 device->meta.texel_buffer_copy.cache[i] =
206 _mesa_hash_table_create(NULL,
207 meta_texel_buffer_copy_key_hash,
208 meta_texel_buffer_copy_key_compare);
209 }
210
211 create_texel_buffer_copy_pipeline_layout(
212 device,
213 &device->meta.texel_buffer_copy.ds_layout,
214 &device->meta.texel_buffer_copy.p_layout);
215 }
216
217 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)218 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
219 {
220 VkDevice _device = v3dv_device_to_handle(device);
221
222 for (uint32_t i = 0; i < 3; i++) {
223 hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
224 struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
225 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
226 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
227 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
228 vk_free(&device->vk.alloc, item);
229 }
230 _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
231 }
232
233 if (device->meta.texel_buffer_copy.p_layout) {
234 v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
235 &device->vk.alloc);
236 }
237
238 if (device->meta.texel_buffer_copy.ds_layout) {
239 v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
240 &device->vk.alloc);
241 }
242 }
243
244 static VkFormat
get_compatible_tlb_format(VkFormat format)245 get_compatible_tlb_format(VkFormat format)
246 {
247 switch (format) {
248 case VK_FORMAT_R8G8B8A8_SNORM:
249 return VK_FORMAT_R8G8B8A8_UINT;
250
251 case VK_FORMAT_R8G8_SNORM:
252 return VK_FORMAT_R8G8_UINT;
253
254 case VK_FORMAT_R8_SNORM:
255 return VK_FORMAT_R8_UINT;
256
257 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
258 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
259
260 case VK_FORMAT_R16_UNORM:
261 case VK_FORMAT_R16_SNORM:
262 return VK_FORMAT_R16_UINT;
263
264 case VK_FORMAT_R16G16_UNORM:
265 case VK_FORMAT_R16G16_SNORM:
266 return VK_FORMAT_R16G16_UINT;
267
268 case VK_FORMAT_R16G16B16A16_UNORM:
269 case VK_FORMAT_R16G16B16A16_SNORM:
270 return VK_FORMAT_R16G16B16A16_UINT;
271
272 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
273 return VK_FORMAT_R32_SFLOAT;
274
275 /* We can't render to compressed formats using the TLB so instead we use
276 * a compatible format with the same bpp as the compressed format. Because
277 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
278 * case of ETC), when we implement copies with the compatible format we
279 * will have to divide offsets and dimensions on the compressed image by
280 * the compressed block size.
281 */
282 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
283 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
284 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
285 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
286 case VK_FORMAT_BC2_UNORM_BLOCK:
287 case VK_FORMAT_BC2_SRGB_BLOCK:
288 case VK_FORMAT_BC3_SRGB_BLOCK:
289 case VK_FORMAT_BC3_UNORM_BLOCK:
290 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
291 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
292 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
293 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
294 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
295 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
296 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
297 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
298 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
299 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
300 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
301 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
302 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
303 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
304 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
305 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
306 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
307 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
308 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
309 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
310 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
311 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
312 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
313 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
314 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
315 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
316 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
317 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
318 return VK_FORMAT_R32G32B32A32_UINT;
319
320 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
321 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
322 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
323 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
324 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
325 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
326 case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
327 case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
328 case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
329 case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
330 return VK_FORMAT_R16G16B16A16_UINT;
331
332 default:
333 return VK_FORMAT_UNDEFINED;
334 }
335 }
336
337 /**
338 * Checks if we can implement an image copy or clear operation using the TLB
339 * hardware.
340 */
341 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,const VkOffset3D * offset,VkFormat * compat_format)342 v3dv_meta_can_use_tlb(struct v3dv_image *image,
343 const VkOffset3D *offset,
344 VkFormat *compat_format)
345 {
346 if (offset->x != 0 || offset->y != 0)
347 return false;
348
349 if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
350 if (compat_format)
351 *compat_format = image->vk.format;
352 return true;
353 }
354
355 /* If the image format is not TLB-supported, then check if we can use
356 * a compatible format instead.
357 */
358 if (compat_format) {
359 *compat_format = get_compatible_tlb_format(image->vk.format);
360 if (*compat_format != VK_FORMAT_UNDEFINED)
361 return true;
362 }
363
364 return false;
365 }
366
367 /* Implements a copy using the TLB.
368 *
369 * This only works if we are copying from offset (0,0), since a TLB store for
370 * tile (x,y) will be written at the same tile offset into the destination.
371 * When this requirement is not met, we need to use a blit instead.
372 *
373 * Returns true if the implementation supports the requested operation (even if
374 * it failed to process it, for example, due to an out-of-memory error).
375 *
376 */
377 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)378 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
379 struct v3dv_buffer *buffer,
380 struct v3dv_image *image,
381 const VkBufferImageCopy2 *region)
382 {
383 VkFormat fb_format;
384 if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
385 return false;
386
387 uint32_t internal_type, internal_bpp;
388 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
389 (fb_format, region->imageSubresource.aspectMask,
390 &internal_type, &internal_bpp);
391
392 uint32_t num_layers;
393 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
394 num_layers = region->imageSubresource.layerCount;
395 else
396 num_layers = region->imageExtent.depth;
397 assert(num_layers > 0);
398
399 struct v3dv_job *job =
400 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
401 if (!job)
402 return true;
403
404 /* Handle copy from compressed format using a compatible format */
405 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
406 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
407 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
408 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
409
410 v3dv_job_start_frame(job, width, height, num_layers, false,
411 1, internal_bpp, false);
412
413 struct v3dv_meta_framebuffer framebuffer;
414 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
415 internal_type, &job->frame_tiling);
416
417 v3dv_X(job->device, job_emit_binning_flush)(job);
418 v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
419 (job, buffer, image, &framebuffer, region);
420
421 v3dv_cmd_buffer_finish_job(cmd_buffer);
422
423 return true;
424 }
425
426 static bool
427 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
428 struct v3dv_image *dst,
429 VkFormat dst_format,
430 struct v3dv_image *src,
431 VkFormat src_format,
432 VkColorComponentFlags cmask,
433 VkComponentMapping *cswizzle,
434 const VkImageBlit2 *region,
435 VkFilter filter,
436 bool dst_is_padded_image);
437
438 /**
439 * Returns true if the implementation supports the requested operation (even if
440 * it failed to process it, for example, due to an out-of-memory error).
441 */
442 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2 * region)443 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
444 struct v3dv_buffer *buffer,
445 struct v3dv_image *image,
446 const VkBufferImageCopy2 *region)
447 {
448 bool handled = false;
449
450 /* This path uses a shader blit which doesn't support linear images. Return
451 * early to avoid all te heavy lifting in preparation for the blit_shader()
452 * call that is bound to fail in that scenario.
453 */
454 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR &&
455 image->vk.image_type != VK_IMAGE_TYPE_1D) {
456 return handled;
457 }
458
459 /* Generally, the bpp of the data in the buffer matches that of the
460 * source image. The exception is the case where we are copying
461 * stencil (8bpp) to a combined d24s8 image (32bpp).
462 */
463 uint32_t buffer_bpp = image->cpp;
464
465 VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
466
467 /* Because we are going to implement the copy as a blit, we need to create
468 * a linear image from the destination buffer and we also want our blit
469 * source and destination formats to be the same (to avoid any format
470 * conversions), so we choose a canonical format that matches the
471 * source image bpp.
472 *
473 * The exception to the above is copying from combined depth/stencil images
474 * because we are copying only one aspect of the image, so we need to setup
475 * our formats, color write mask and source swizzle mask to match that.
476 */
477 VkFormat dst_format;
478 VkFormat src_format;
479 VkColorComponentFlags cmask = 0; /* All components */
480 VkComponentMapping cswizzle = {
481 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
482 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
483 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
484 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
485 };
486 switch (buffer_bpp) {
487 case 16:
488 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
489 dst_format = VK_FORMAT_R32G32B32A32_UINT;
490 src_format = dst_format;
491 break;
492 case 8:
493 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
494 dst_format = VK_FORMAT_R16G16B16A16_UINT;
495 src_format = dst_format;
496 break;
497 case 4:
498 switch (copy_aspect) {
499 case VK_IMAGE_ASPECT_COLOR_BIT:
500 src_format = VK_FORMAT_R8G8B8A8_UINT;
501 dst_format = VK_FORMAT_R8G8B8A8_UINT;
502 break;
503 case VK_IMAGE_ASPECT_DEPTH_BIT:
504 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
505 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
506 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
507 if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
508 src_format = VK_FORMAT_R32_UINT;
509 dst_format = VK_FORMAT_R32_UINT;
510 } else {
511 /* We want to write depth in the buffer in the first 24-bits,
512 * however, the hardware has depth in bits 8-31, so swizzle the
513 * the source components to match what we want. Also, we don't
514 * want to write bits 24-31 in the destination.
515 */
516 src_format = VK_FORMAT_R8G8B8A8_UINT;
517 dst_format = VK_FORMAT_R8G8B8A8_UINT;
518 cmask = VK_COLOR_COMPONENT_R_BIT |
519 VK_COLOR_COMPONENT_G_BIT |
520 VK_COLOR_COMPONENT_B_BIT;
521 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
522 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
523 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
524 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
525 }
526 break;
527 case VK_IMAGE_ASPECT_STENCIL_BIT:
528 assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
529 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
530 /* Copying from S8D24. We want to write 8-bit stencil values only,
531 * so adjust the buffer bpp for that. Since the hardware stores stencil
532 * in the LSB, we can just do a RGBA8UI to R8UI blit.
533 */
534 src_format = VK_FORMAT_R8G8B8A8_UINT;
535 dst_format = VK_FORMAT_R8_UINT;
536 buffer_bpp = 1;
537 break;
538 default:
539 unreachable("unsupported aspect");
540 return handled;
541 };
542 break;
543 case 2:
544 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
545 copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
546 dst_format = VK_FORMAT_R16_UINT;
547 src_format = dst_format;
548 break;
549 case 1:
550 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
551 dst_format = VK_FORMAT_R8_UINT;
552 src_format = dst_format;
553 break;
554 default:
555 unreachable("unsupported bit-size");
556 return handled;
557 };
558
559 /* The hardware doesn't support linear depth/stencil stores, so we
560 * implement copies of depth/stencil aspect as color copies using a
561 * compatible color format.
562 */
563 assert(vk_format_is_color(src_format));
564 assert(vk_format_is_color(dst_format));
565 copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
566
567 /* We should be able to handle the blit if we got this far */
568 handled = true;
569
570 /* Obtain the 2D buffer region spec */
571 uint32_t buf_width, buf_height;
572 if (region->bufferRowLength == 0)
573 buf_width = region->imageExtent.width;
574 else
575 buf_width = region->bufferRowLength;
576
577 if (region->bufferImageHeight == 0)
578 buf_height = region->imageExtent.height;
579 else
580 buf_height = region->bufferImageHeight;
581
582 /* If the image is compressed, the bpp refers to blocks, not pixels */
583 uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
584 uint32_t block_height = vk_format_get_blockheight(image->vk.format);
585 buf_width = buf_width / block_width;
586 buf_height = buf_height / block_height;
587
588 /* Compute layers to copy */
589 uint32_t num_layers;
590 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
591 num_layers = region->imageSubresource.layerCount;
592 else
593 num_layers = region->imageExtent.depth;
594 assert(num_layers > 0);
595
596 /* Our blit interface can see the real format of the images to detect
597 * copies between compressed and uncompressed images and adapt the
598 * blit region accordingly. Here we are just doing a raw copy of
599 * compressed data, but we are passing an uncompressed view of the
600 * buffer for the blit destination image (since compressed formats are
601 * not renderable), so we also want to provide an uncompressed view of
602 * the source image.
603 */
604 VkResult result;
605 struct v3dv_device *device = cmd_buffer->device;
606 VkDevice _device = v3dv_device_to_handle(device);
607 if (vk_format_is_compressed(image->vk.format)) {
608 VkImage uiview;
609 VkImageCreateInfo uiview_info = {
610 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
611 .imageType = VK_IMAGE_TYPE_3D,
612 .format = dst_format,
613 .extent = { buf_width, buf_height, image->vk.extent.depth },
614 .mipLevels = image->vk.mip_levels,
615 .arrayLayers = image->vk.array_layers,
616 .samples = image->vk.samples,
617 .tiling = image->vk.tiling,
618 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
619 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
620 .queueFamilyIndexCount = 0,
621 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
622 };
623 result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
624 if (result != VK_SUCCESS)
625 return handled;
626
627 v3dv_cmd_buffer_add_private_obj(
628 cmd_buffer, (uintptr_t)uiview,
629 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
630
631 result =
632 vk_common_BindImageMemory(_device, uiview,
633 v3dv_device_memory_to_handle(image->mem),
634 image->mem_offset);
635 if (result != VK_SUCCESS)
636 return handled;
637
638 image = v3dv_image_from_handle(uiview);
639 }
640
641 /* Copy requested layers */
642 for (uint32_t i = 0; i < num_layers; i++) {
643 /* Create the destination blit image from the destination buffer */
644 VkImageCreateInfo image_info = {
645 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
646 .imageType = VK_IMAGE_TYPE_2D,
647 .format = dst_format,
648 .extent = { buf_width, buf_height, 1 },
649 .mipLevels = 1,
650 .arrayLayers = 1,
651 .samples = VK_SAMPLE_COUNT_1_BIT,
652 .tiling = VK_IMAGE_TILING_LINEAR,
653 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
654 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
655 .queueFamilyIndexCount = 0,
656 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
657 };
658
659 VkImage buffer_image;
660 result =
661 v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
662 if (result != VK_SUCCESS)
663 return handled;
664
665 v3dv_cmd_buffer_add_private_obj(
666 cmd_buffer, (uintptr_t)buffer_image,
667 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
668
669 /* Bind the buffer memory to the image */
670 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
671 i * buf_width * buf_height * buffer_bpp;
672 result =
673 vk_common_BindImageMemory(_device, buffer_image,
674 v3dv_device_memory_to_handle(buffer->mem),
675 buffer_offset);
676 if (result != VK_SUCCESS)
677 return handled;
678
679 /* Blit-copy the requested image extent.
680 *
681 * Since we are copying, the blit must use the same format on the
682 * destination and source images to avoid format conversions. The
683 * only exception is copying stencil, which we upload to a R8UI source
684 * image, but that we need to blit to a S8D24 destination (the only
685 * stencil format we support).
686 */
687 const VkImageBlit2 blit_region = {
688 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
689 .srcSubresource = {
690 .aspectMask = copy_aspect,
691 .mipLevel = region->imageSubresource.mipLevel,
692 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
693 .layerCount = 1,
694 },
695 .srcOffsets = {
696 {
697 DIV_ROUND_UP(region->imageOffset.x, block_width),
698 DIV_ROUND_UP(region->imageOffset.y, block_height),
699 region->imageOffset.z + i,
700 },
701 {
702 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
703 block_width),
704 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
705 block_height),
706 region->imageOffset.z + i + 1,
707 },
708 },
709 .dstSubresource = {
710 .aspectMask = copy_aspect,
711 .mipLevel = 0,
712 .baseArrayLayer = 0,
713 .layerCount = 1,
714 },
715 .dstOffsets = {
716 { 0, 0, 0 },
717 {
718 DIV_ROUND_UP(region->imageExtent.width, block_width),
719 DIV_ROUND_UP(region->imageExtent.height, block_height),
720 1
721 },
722 },
723 };
724
725 handled = blit_shader(cmd_buffer,
726 v3dv_image_from_handle(buffer_image), dst_format,
727 image, src_format,
728 cmask, &cswizzle,
729 &blit_region, VK_FILTER_NEAREST, false);
730 if (!handled) {
731 /* This is unexpected, we should have a supported blit spec */
732 unreachable("Unable to blit buffer to destination image");
733 return false;
734 }
735 }
736
737 assert(handled);
738 return true;
739 }
740
741 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * info)742 v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
743 const VkCopyImageToBufferInfo2 *info)
744
745 {
746 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
747 V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
748 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
749
750 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
751
752 cmd_buffer->state.is_transfer = true;
753
754 for (uint32_t i = 0; i < info->regionCount; i++) {
755 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
756 continue;
757 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
758 continue;
759 unreachable("Unsupported image to buffer copy.");
760 }
761
762 cmd_buffer->state.is_transfer = false;
763 }
764
765 /**
766 * Returns true if the implementation supports the requested operation (even if
767 * it failed to process it, for example, due to an out-of-memory error).
768 */
769 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)770 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
771 struct v3dv_image *dst,
772 struct v3dv_image *src,
773 const VkImageCopy2 *region)
774 {
775 /* Destination can't be raster format */
776 if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
777 return false;
778
779 /* We can only do full copies, so if the format is D24S8 both aspects need
780 * to be copied. We only need to check the dst format because the spec
781 * states that depth/stencil formats must match exactly.
782 */
783 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
784 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
785 VK_IMAGE_ASPECT_STENCIL_BIT;
786 if (region->dstSubresource.aspectMask != ds_aspects)
787 return false;
788 }
789
790 /* Don't handle copies between uncompressed and compressed formats for now.
791 *
792 * FIXME: we should be able to handle these easily but there is no coverage
793 * in CTS at the moment that make such copies with full images (which we
794 * require here), only partial copies. Also, in that case the code below that
795 * checks for "dst image complete" requires some changes, since it is
796 * checking against the region dimensions, which are in units of the source
797 * image format.
798 */
799 if (vk_format_is_compressed(dst->vk.format) !=
800 vk_format_is_compressed(src->vk.format)) {
801 return false;
802 }
803
804 /* Source region must start at (0,0) */
805 if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
806 return false;
807
808 /* Destination image must be complete */
809 if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
810 return false;
811
812 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
813 uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
814 uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
815 if (region->extent.width != dst_width || region->extent.height != dst_height)
816 return false;
817
818 /* From vkCmdCopyImage:
819 *
820 * "When copying between compressed and uncompressed formats the extent
821 * members represent the texel dimensions of the source image and not
822 * the destination."
823 */
824 const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
825 const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
826 uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
827 uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
828
829 /* Account for sample count */
830 assert(dst->vk.samples == src->vk.samples);
831 if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
832 assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
833 width *= 2;
834 height *= 2;
835 }
836
837 /* The TFU unit doesn't handle format conversions so we need the formats to
838 * match. On the other hand, vkCmdCopyImage allows different color formats
839 * on the source and destination images, but only if they are texel
840 * compatible. For us, this means that we can effectively ignore different
841 * formats and just make the copy using either of them, since we are just
842 * moving raw data and not making any conversions.
843 *
844 * Also, the formats supported by the TFU unit are limited, but again, since
845 * we are only doing raw copies here without interpreting or converting
846 * the underlying pixel data according to its format, we can always choose
847 * to use compatible formats that are supported with the TFU unit.
848 */
849 assert(dst->cpp == src->cpp);
850 const struct v3dv_format *format =
851 v3dv_get_compatible_tfu_format(cmd_buffer->device,
852 dst->cpp, NULL);
853
854 /* Emit a TFU job for each layer to blit */
855 const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
856 region->dstSubresource.layerCount :
857 region->extent.depth;
858 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
859
860 const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
861 region->srcSubresource.baseArrayLayer : region->srcOffset.z;
862 const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
863 region->dstSubresource.baseArrayLayer : region->dstOffset.z;
864 for (uint32_t i = 0; i < layer_count; i++) {
865 const uint32_t dst_offset =
866 dst->mem->bo->offset +
867 v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i);
868 const uint32_t src_offset =
869 src->mem->bo->offset +
870 v3dv_layer_offset(src, src_mip_level, base_src_layer + i);
871
872 const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
873 const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
874
875 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
876 cmd_buffer,
877 dst->mem->bo->handle,
878 dst_offset,
879 dst_slice->tiling,
880 dst_slice->padded_height,
881 dst->cpp,
882 src->mem->bo->handle,
883 src_offset,
884 src_slice->tiling,
885 src_slice->tiling == V3D_TILING_RASTER ?
886 src_slice->stride : src_slice->padded_height,
887 src->cpp,
888 width, height, format);
889 }
890
891 return true;
892 }
893
894 /**
895 * Returns true if the implementation supports the requested operation (even if
896 * it failed to process it, for example, due to an out-of-memory error).
897 */
898 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)899 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
900 struct v3dv_image *dst,
901 struct v3dv_image *src,
902 const VkImageCopy2 *region)
903 {
904 VkFormat fb_format;
905 if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) ||
906 !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {
907 return false;
908 }
909
910 /* From the Vulkan spec, VkImageCopy valid usage:
911 *
912 * "If neither the calling command’s srcImage nor the calling command’s
913 * dstImage has a multi-planar image format then the aspectMask member
914 * of srcSubresource and dstSubresource must match."
915 */
916 assert(region->dstSubresource.aspectMask ==
917 region->srcSubresource.aspectMask);
918 uint32_t internal_type, internal_bpp;
919 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
920 (fb_format, region->dstSubresource.aspectMask,
921 &internal_type, &internal_bpp);
922
923 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
924 *
925 * "The number of slices of the extent (for 3D) or layers of the
926 * srcSubresource (for non-3D) must match the number of slices of the
927 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
928 */
929 assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
930 region->srcSubresource.layerCount : region->extent.depth) ==
931 (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
932 region->dstSubresource.layerCount : region->extent.depth));
933 uint32_t num_layers;
934 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
935 num_layers = region->dstSubresource.layerCount;
936 else
937 num_layers = region->extent.depth;
938 assert(num_layers > 0);
939
940 struct v3dv_job *job =
941 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
942 if (!job)
943 return true;
944
945 /* Handle copy to compressed image using compatible format */
946 const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
947 const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
948 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
949 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
950
951 v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp,
952 src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
953
954 struct v3dv_meta_framebuffer framebuffer;
955 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
956 internal_type, &job->frame_tiling);
957
958 v3dv_X(job->device, job_emit_binning_flush)(job);
959 v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
960
961 v3dv_cmd_buffer_finish_job(cmd_buffer);
962
963 return true;
964 }
965
966 /**
967 * Takes the image provided as argument and creates a new image that has
968 * the same specification and aliases the same memory storage, except that:
969 *
970 * - It has the uncompressed format passed in.
971 * - Its original width/height are scaled by the factors passed in.
972 *
973 * This is useful to implement copies from compressed images using the blit
974 * path. The idea is that we create uncompressed "image views" of both the
975 * source and destination images using the uncompressed format and then we
976 * define the copy blit in terms of that format.
977 */
978 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)979 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
980 struct v3dv_image *src,
981 float width_scale,
982 float height_scale,
983 VkFormat format)
984 {
985 assert(!vk_format_is_compressed(format));
986
987 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
988
989 VkImageCreateInfo info = {
990 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
991 .imageType = src->vk.image_type,
992 .format = format,
993 .extent = {
994 .width = src->vk.extent.width * width_scale,
995 .height = src->vk.extent.height * height_scale,
996 .depth = src->vk.extent.depth,
997 },
998 .mipLevels = src->vk.mip_levels,
999 .arrayLayers = src->vk.array_layers,
1000 .samples = src->vk.samples,
1001 .tiling = src->vk.tiling,
1002 .usage = src->vk.usage,
1003 };
1004
1005 VkImage _image;
1006 VkResult result =
1007 v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
1008 if (result != VK_SUCCESS) {
1009 v3dv_flag_oom(cmd_buffer, NULL);
1010 return NULL;
1011 }
1012
1013 struct v3dv_image *image = v3dv_image_from_handle(_image);
1014 image->mem = src->mem;
1015 image->mem_offset = src->mem_offset;
1016 return image;
1017 }
1018
1019 /**
1020 * Returns true if the implementation supports the requested operation (even if
1021 * it failed to process it, for example, due to an out-of-memory error).
1022 */
1023 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2 * region)1024 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1025 struct v3dv_image *dst,
1026 struct v3dv_image *src,
1027 const VkImageCopy2 *region)
1028 {
1029 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
1030 const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
1031 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
1032 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
1033 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1034 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1035
1036 /* We need to choose a single format for the blit to ensure that this is
1037 * really a copy and there are not format conversions going on. Since we
1038 * going to blit, we need to make sure that the selected format can be
1039 * both rendered to and textured from.
1040 */
1041 VkFormat format;
1042 float src_scale_w = 1.0f;
1043 float src_scale_h = 1.0f;
1044 float dst_scale_w = block_scale_w;
1045 float dst_scale_h = block_scale_h;
1046 if (vk_format_is_compressed(src->vk.format)) {
1047 /* If we are copying from a compressed format we should be aware that we
1048 * are going to texture from the source image, and the texture setup
1049 * knows the actual size of the image, so we need to choose a format
1050 * that has a per-texel (not per-block) bpp that is compatible for that
1051 * image size. For example, for a source image with size Bw*WxBh*H
1052 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1053 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1054 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1055 * so we could specify a blit with size Bw*WxBh*H and a format with
1056 * a bpp of 8-bit per texel (R8_UINT).
1057 *
1058 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1059 * which is 64-bit per texel, then we would need a 4-bit format, which
1060 * we don't have, so instead we still choose an 8-bit format, but we
1061 * apply a divisor to the row dimensions of the blit, since we are
1062 * copying two texels per item.
1063 *
1064 * Generally, we can choose any format so long as we compute appropriate
1065 * divisors for the width and height depending on the source image's
1066 * bpp.
1067 */
1068 assert(src->cpp == dst->cpp);
1069
1070 format = VK_FORMAT_R32G32_UINT;
1071 switch (src->cpp) {
1072 case 16:
1073 format = VK_FORMAT_R32G32B32A32_UINT;
1074 break;
1075 case 8:
1076 format = VK_FORMAT_R16G16B16A16_UINT;
1077 break;
1078 default:
1079 unreachable("Unsupported compressed format");
1080 }
1081
1082 /* Create image views of the src/dst images that we can interpret in
1083 * terms of the canonical format.
1084 */
1085 src_scale_w /= src_block_w;
1086 src_scale_h /= src_block_h;
1087 dst_scale_w /= src_block_w;
1088 dst_scale_h /= src_block_h;
1089
1090 src = create_image_alias(cmd_buffer, src,
1091 src_scale_w, src_scale_h, format);
1092
1093 dst = create_image_alias(cmd_buffer, dst,
1094 dst_scale_w, dst_scale_h, format);
1095 } else {
1096 format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1097 src->vk.format : get_compatible_tlb_format(src->vk.format);
1098 if (format == VK_FORMAT_UNDEFINED)
1099 return false;
1100
1101 const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1102 if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
1103 return false;
1104 }
1105
1106 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1107 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1108 * are the compressed format's block width and height. This means that
1109 * copies between compressed and uncompressed images involve different
1110 * image sizes, and therefore, we need to take that into account when
1111 * setting up the source and destination blit regions below, so they are
1112 * consistent from the point of view of the single compatible format
1113 * selected for the copy.
1114 *
1115 * We should take into account that the dimensions of the region provided
1116 * to the copy command are specified in terms of the source image. With that
1117 * in mind, below we adjust the blit destination region to be consistent with
1118 * the source region for the compatible format, so basically, we apply
1119 * the block scale factor to the destination offset provided by the copy
1120 * command (because it is specified in terms of the destination image, not
1121 * the source), and then we just add the region copy dimensions to that
1122 * (since the region dimensions are already specified in terms of the source
1123 * image).
1124 */
1125 const VkOffset3D src_start = {
1126 region->srcOffset.x * src_scale_w,
1127 region->srcOffset.y * src_scale_h,
1128 region->srcOffset.z,
1129 };
1130 const VkOffset3D src_end = {
1131 src_start.x + region->extent.width * src_scale_w,
1132 src_start.y + region->extent.height * src_scale_h,
1133 src_start.z + region->extent.depth,
1134 };
1135
1136 const VkOffset3D dst_start = {
1137 region->dstOffset.x * dst_scale_w,
1138 region->dstOffset.y * dst_scale_h,
1139 region->dstOffset.z,
1140 };
1141 const VkOffset3D dst_end = {
1142 dst_start.x + region->extent.width * src_scale_w,
1143 dst_start.y + region->extent.height * src_scale_h,
1144 dst_start.z + region->extent.depth,
1145 };
1146
1147 const VkImageBlit2 blit_region = {
1148 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
1149 .srcSubresource = region->srcSubresource,
1150 .srcOffsets = { src_start, src_end },
1151 .dstSubresource = region->dstSubresource,
1152 .dstOffsets = { dst_start, dst_end },
1153 };
1154 bool handled = blit_shader(cmd_buffer,
1155 dst, format,
1156 src, format,
1157 0, NULL,
1158 &blit_region, VK_FILTER_NEAREST, true);
1159
1160 /* We should have selected formats that we can blit */
1161 assert(handled);
1162 return handled;
1163 }
1164
1165 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * info)1166 v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
1167 const VkCopyImageInfo2 *info)
1168
1169 {
1170 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1171 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1172 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1173
1174 assert(src->vk.samples == dst->vk.samples);
1175
1176 cmd_buffer->state.is_transfer = true;
1177
1178 for (uint32_t i = 0; i < info->regionCount; i++) {
1179 if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
1180 continue;
1181 if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
1182 continue;
1183 if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
1184 continue;
1185 unreachable("Image copy not supported");
1186 }
1187
1188 cmd_buffer->state.is_transfer = false;
1189 }
1190
1191 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1192 v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
1193 const VkCopyBufferInfo2 *pCopyBufferInfo)
1194 {
1195 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1196 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1197 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1198
1199 cmd_buffer->state.is_transfer = true;
1200
1201 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1202 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1203 (cmd_buffer,
1204 dst_buffer->mem->bo, dst_buffer->mem_offset,
1205 src_buffer->mem->bo, src_buffer->mem_offset,
1206 &pCopyBufferInfo->pRegions[i]);
1207 }
1208
1209 cmd_buffer->state.is_transfer = false;
1210 }
1211
1212 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1213 destroy_update_buffer_cb(VkDevice _device,
1214 uint64_t pobj,
1215 VkAllocationCallbacks *alloc)
1216 {
1217 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1218 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1219 v3dv_bo_free(device, bo);
1220 }
1221
1222 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1223 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1224 VkBuffer dstBuffer,
1225 VkDeviceSize dstOffset,
1226 VkDeviceSize dataSize,
1227 const void *pData)
1228 {
1229 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1230 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1231
1232 struct v3dv_bo *src_bo =
1233 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1234 if (!src_bo) {
1235 fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1236 return;
1237 }
1238
1239 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1240 if (!ok) {
1241 fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1242 return;
1243 }
1244
1245 cmd_buffer->state.is_transfer = true;
1246
1247 memcpy(src_bo->map, pData, dataSize);
1248
1249 v3dv_bo_unmap(cmd_buffer->device, src_bo);
1250
1251 VkBufferCopy2 region = {
1252 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
1253 .srcOffset = 0,
1254 .dstOffset = dstOffset,
1255 .size = dataSize,
1256 };
1257 struct v3dv_job *copy_job =
1258 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1259 (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1260 src_bo, 0, ®ion);
1261
1262 if (copy_job) {
1263 v3dv_cmd_buffer_add_private_obj(
1264 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1265 }
1266
1267 cmd_buffer->state.is_transfer = false;
1268 }
1269
1270 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1271 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1272 VkBuffer dstBuffer,
1273 VkDeviceSize dstOffset,
1274 VkDeviceSize size,
1275 uint32_t data)
1276 {
1277 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1278 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1279
1280 cmd_buffer->state.is_transfer = true;
1281
1282 struct v3dv_bo *bo = dst_buffer->mem->bo;
1283
1284 /* From the Vulkan spec:
1285 *
1286 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1287 * a multiple of 4, then the nearest smaller multiple is used."
1288 */
1289 if (size == VK_WHOLE_SIZE) {
1290 size = dst_buffer->size - dstOffset;
1291 size -= size % 4;
1292 }
1293
1294 v3dv_X(cmd_buffer->device, meta_fill_buffer)
1295 (cmd_buffer, bo, dstOffset, size, data);
1296
1297 cmd_buffer->state.is_transfer = false;
1298 }
1299
1300 /**
1301 * Returns true if the implementation supports the requested operation (even if
1302 * it failed to process it, for example, due to an out-of-memory error).
1303 */
1304 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1305 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1306 struct v3dv_image *image,
1307 struct v3dv_buffer *buffer,
1308 const VkBufferImageCopy2 *region)
1309 {
1310 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1311
1312 /* Destination can't be raster format */
1313 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1314 return false;
1315
1316 /* We can't copy D24S8 because buffer to image copies only copy one aspect
1317 * at a time, and the TFU copies full images. Also, V3D depth bits for
1318 * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1319 * the Vulkan spec has the buffer data specified the other way around, so it
1320 * is not a straight copy, we would havew to swizzle the channels, which the
1321 * TFU can't do.
1322 */
1323 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1324 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1325 return false;
1326 }
1327
1328 /* Region must include full slice */
1329 const uint32_t offset_x = region->imageOffset.x;
1330 const uint32_t offset_y = region->imageOffset.y;
1331 if (offset_x != 0 || offset_y != 0)
1332 return false;
1333
1334 uint32_t width, height;
1335 if (region->bufferRowLength == 0)
1336 width = region->imageExtent.width;
1337 else
1338 width = region->bufferRowLength;
1339
1340 if (region->bufferImageHeight == 0)
1341 height = region->imageExtent.height;
1342 else
1343 height = region->bufferImageHeight;
1344
1345 if (width != image->vk.extent.width || height != image->vk.extent.height)
1346 return false;
1347
1348 /* Handle region semantics for compressed images */
1349 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1350 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1351 width = DIV_ROUND_UP(width, block_w);
1352 height = DIV_ROUND_UP(height, block_h);
1353
1354 /* Format must be supported for texturing via the TFU. Since we are just
1355 * copying raw data and not converting between pixel formats, we can ignore
1356 * the image's format and choose a compatible TFU format for the image
1357 * texel size instead, which expands the list of formats we can handle here.
1358 */
1359 const struct v3dv_format *format =
1360 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1361 image->cpp, NULL);
1362
1363 const uint32_t mip_level = region->imageSubresource.mipLevel;
1364 const struct v3d_resource_slice *slice = &image->slices[mip_level];
1365
1366 uint32_t num_layers;
1367 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1368 num_layers = region->imageSubresource.layerCount;
1369 else
1370 num_layers = region->imageExtent.depth;
1371 assert(num_layers > 0);
1372
1373 assert(image->mem && image->mem->bo);
1374 const struct v3dv_bo *dst_bo = image->mem->bo;
1375
1376 assert(buffer->mem && buffer->mem->bo);
1377 const struct v3dv_bo *src_bo = buffer->mem->bo;
1378
1379 /* Emit a TFU job per layer to copy */
1380 const uint32_t buffer_stride = width * image->cpp;
1381 for (int i = 0; i < num_layers; i++) {
1382 uint32_t layer;
1383 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1384 layer = region->imageSubresource.baseArrayLayer + i;
1385 else
1386 layer = region->imageOffset.z + i;
1387
1388 const uint32_t buffer_offset =
1389 buffer->mem_offset + region->bufferOffset +
1390 height * buffer_stride * i;
1391 const uint32_t src_offset = src_bo->offset + buffer_offset;
1392
1393 const uint32_t dst_offset =
1394 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
1395
1396 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
1397 cmd_buffer,
1398 dst_bo->handle,
1399 dst_offset,
1400 slice->tiling,
1401 slice->padded_height,
1402 image->cpp,
1403 src_bo->handle,
1404 src_offset,
1405 V3D_TILING_RASTER,
1406 width,
1407 1,
1408 width, height, format);
1409 }
1410
1411 return true;
1412 }
1413
1414 /**
1415 * Returns true if the implementation supports the requested operation (even if
1416 * it failed to process it, for example, due to an out-of-memory error).
1417 */
1418 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1419 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1420 struct v3dv_image *image,
1421 struct v3dv_buffer *buffer,
1422 const VkBufferImageCopy2 *region)
1423 {
1424 VkFormat fb_format;
1425 if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
1426 return false;
1427
1428 uint32_t internal_type, internal_bpp;
1429 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1430 (fb_format, region->imageSubresource.aspectMask,
1431 &internal_type, &internal_bpp);
1432
1433 uint32_t num_layers;
1434 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1435 num_layers = region->imageSubresource.layerCount;
1436 else
1437 num_layers = region->imageExtent.depth;
1438 assert(num_layers > 0);
1439
1440 struct v3dv_job *job =
1441 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1442 if (!job)
1443 return true;
1444
1445 /* Handle copy to compressed format using a compatible format */
1446 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1447 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1448 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1449 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1450
1451 v3dv_job_start_frame(job, width, height, num_layers, false,
1452 1, internal_bpp, false);
1453
1454 struct v3dv_meta_framebuffer framebuffer;
1455 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1456 internal_type, &job->frame_tiling);
1457
1458 v3dv_X(job->device, job_emit_binning_flush)(job);
1459 v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
1460 (job, image, buffer, &framebuffer, region);
1461
1462 v3dv_cmd_buffer_finish_job(cmd_buffer);
1463
1464 return true;
1465 }
1466
1467 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)1468 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1469 struct v3dv_image *image,
1470 struct v3dv_buffer *buffer,
1471 const VkBufferImageCopy2 *region)
1472 {
1473 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
1474 return true;
1475 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
1476 return true;
1477 return false;
1478 }
1479
1480 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)1481 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
1482 {
1483 /* If this is not the first pool we create for this command buffer
1484 * size it based on the size of the currently exhausted pool.
1485 */
1486 uint32_t descriptor_count = 64;
1487 if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
1488 struct v3dv_descriptor_pool *exhausted_pool =
1489 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
1490 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
1491 }
1492
1493 /* Create the descriptor pool */
1494 cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
1495 VkDescriptorPoolSize pool_size = {
1496 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1497 .descriptorCount = descriptor_count,
1498 };
1499 VkDescriptorPoolCreateInfo info = {
1500 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1501 .maxSets = descriptor_count,
1502 .poolSizeCount = 1,
1503 .pPoolSizes = &pool_size,
1504 .flags = 0,
1505 };
1506 VkResult result =
1507 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
1508 &info,
1509 &cmd_buffer->device->vk.alloc,
1510 &cmd_buffer->meta.texel_buffer_copy.dspool);
1511
1512 if (result == VK_SUCCESS) {
1513 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1514 const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
1515
1516 v3dv_cmd_buffer_add_private_obj(
1517 cmd_buffer, (uintptr_t) _pool,
1518 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
1519
1520 struct v3dv_descriptor_pool *pool =
1521 v3dv_descriptor_pool_from_handle(_pool);
1522 pool->is_driver_internal = true;
1523 }
1524
1525 return result;
1526 }
1527
1528 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)1529 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
1530 VkDescriptorSet *set)
1531 {
1532 /* Make sure we have a descriptor pool */
1533 VkResult result;
1534 if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
1535 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1536 if (result != VK_SUCCESS)
1537 return result;
1538 }
1539 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1540
1541 /* Allocate descriptor set */
1542 struct v3dv_device *device = cmd_buffer->device;
1543 VkDevice _device = v3dv_device_to_handle(device);
1544 VkDescriptorSetAllocateInfo info = {
1545 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1546 .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
1547 .descriptorSetCount = 1,
1548 .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
1549 };
1550 result = v3dv_AllocateDescriptorSets(_device, &info, set);
1551
1552 /* If we ran out of pool space, grow the pool and try again */
1553 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
1554 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1555 if (result == VK_SUCCESS) {
1556 info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
1557 result = v3dv_AllocateDescriptorSets(_device, &info, set);
1558 }
1559 }
1560
1561 return result;
1562 }
1563
1564 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)1565 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
1566 VkColorComponentFlags cmask,
1567 VkComponentMapping *cswizzle,
1568 bool is_layered,
1569 uint8_t *key)
1570 {
1571 memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1572
1573 uint32_t *p = (uint32_t *) key;
1574
1575 *p = format;
1576 p++;
1577
1578 *p = cmask;
1579 p++;
1580
1581 /* Note that that we are using a single byte for this, so we could pack
1582 * more data into this 32-bit slot in the future.
1583 */
1584 *p = is_layered ? 1 : 0;
1585 p++;
1586
1587 memcpy(p, cswizzle, sizeof(VkComponentMapping));
1588 p += sizeof(VkComponentMapping) / sizeof(uint32_t);
1589
1590 assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1591 }
1592
1593 static bool
1594 create_blit_render_pass(struct v3dv_device *device,
1595 VkFormat dst_format,
1596 VkFormat src_format,
1597 VkRenderPass *pass_load,
1598 VkRenderPass *pass_no_load);
1599
1600 static bool
1601 create_pipeline(struct v3dv_device *device,
1602 struct v3dv_render_pass *pass,
1603 struct nir_shader *vs_nir,
1604 struct nir_shader *gs_nir,
1605 struct nir_shader *fs_nir,
1606 const VkPipelineVertexInputStateCreateInfo *vi_state,
1607 const VkPipelineDepthStencilStateCreateInfo *ds_state,
1608 const VkPipelineColorBlendStateCreateInfo *cb_state,
1609 const VkPipelineMultisampleStateCreateInfo *ms_state,
1610 const VkPipelineLayout layout,
1611 VkPipeline *pipeline);
1612
1613 static nir_shader *
get_texel_buffer_copy_vs()1614 get_texel_buffer_copy_vs()
1615 {
1616 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1617 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
1618 "meta texel buffer copy vs");
1619 nir_variable *vs_out_pos =
1620 nir_variable_create(b.shader, nir_var_shader_out,
1621 glsl_vec4_type(), "gl_Position");
1622 vs_out_pos->data.location = VARYING_SLOT_POS;
1623
1624 nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
1625 nir_store_var(&b, vs_out_pos, pos, 0xf);
1626
1627 return b.shader;
1628 }
1629
1630 static nir_shader *
get_texel_buffer_copy_gs()1631 get_texel_buffer_copy_gs()
1632 {
1633 /* FIXME: this creates a geometry shader that takes the index of a single
1634 * layer to clear from push constants, so we need to emit a draw call for
1635 * each layer that we want to clear. We could actually do better and have it
1636 * take a range of layers however, if we were to do this, we would need to
1637 * be careful not to exceed the maximum number of output vertices allowed in
1638 * a geometry shader.
1639 */
1640 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1641 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
1642 "meta texel buffer copy gs");
1643 nir_shader *nir = b.shader;
1644 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
1645 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
1646 (1ull << VARYING_SLOT_LAYER);
1647 nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
1648 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
1649 nir->info.gs.vertices_in = 3;
1650 nir->info.gs.vertices_out = 3;
1651 nir->info.gs.invocations = 1;
1652 nir->info.gs.active_stream_mask = 0x1;
1653
1654 /* in vec4 gl_Position[3] */
1655 nir_variable *gs_in_pos =
1656 nir_variable_create(b.shader, nir_var_shader_in,
1657 glsl_array_type(glsl_vec4_type(), 3, 0),
1658 "in_gl_Position");
1659 gs_in_pos->data.location = VARYING_SLOT_POS;
1660
1661 /* out vec4 gl_Position */
1662 nir_variable *gs_out_pos =
1663 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
1664 "out_gl_Position");
1665 gs_out_pos->data.location = VARYING_SLOT_POS;
1666
1667 /* out float gl_Layer */
1668 nir_variable *gs_out_layer =
1669 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
1670 "out_gl_Layer");
1671 gs_out_layer->data.location = VARYING_SLOT_LAYER;
1672
1673 /* Emit output triangle */
1674 for (uint32_t i = 0; i < 3; i++) {
1675 /* gl_Position from shader input */
1676 nir_deref_instr *in_pos_i =
1677 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
1678 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
1679
1680 /* gl_Layer from push constants */
1681 nir_ssa_def *layer =
1682 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1683 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
1684 .range = 4);
1685 nir_store_var(&b, gs_out_layer, layer, 0x1);
1686
1687 nir_emit_vertex(&b, 0);
1688 }
1689
1690 nir_end_primitive(&b, 0);
1691
1692 return nir;
1693 }
1694
1695 static nir_ssa_def *
load_frag_coord(nir_builder * b)1696 load_frag_coord(nir_builder *b)
1697 {
1698 nir_foreach_shader_in_variable(var, b->shader) {
1699 if (var->data.location == VARYING_SLOT_POS)
1700 return nir_load_var(b, var);
1701 }
1702 nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
1703 glsl_vec4_type(), NULL);
1704 pos->data.location = VARYING_SLOT_POS;
1705 return nir_load_var(b, pos);
1706 }
1707
1708 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)1709 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
1710 {
1711 if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
1712 swz = comp;
1713
1714 switch (swz) {
1715 case VK_COMPONENT_SWIZZLE_R:
1716 return 0;
1717 case VK_COMPONENT_SWIZZLE_G:
1718 return 1;
1719 case VK_COMPONENT_SWIZZLE_B:
1720 return 2;
1721 case VK_COMPONENT_SWIZZLE_A:
1722 return 3;
1723 default:
1724 unreachable("Invalid swizzle");
1725 };
1726 }
1727
1728 static nir_shader *
get_texel_buffer_copy_fs(struct v3dv_device * device,VkFormat format,VkComponentMapping * cswizzle)1729 get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
1730 VkComponentMapping *cswizzle)
1731 {
1732 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1733 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
1734 "meta texel buffer copy fs");
1735
1736 /* We only use the copy from texel buffer shader to implement
1737 * copy_buffer_to_image_shader, which always selects a compatible integer
1738 * format for the copy.
1739 */
1740 assert(vk_format_is_int(format));
1741
1742 /* Fragment shader output color */
1743 nir_variable *fs_out_color =
1744 nir_variable_create(b.shader, nir_var_shader_out,
1745 glsl_uvec4_type(), "out_color");
1746 fs_out_color->data.location = FRAG_RESULT_DATA0;
1747
1748 /* Texel buffer input */
1749 const struct glsl_type *sampler_type =
1750 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
1751 nir_variable *sampler =
1752 nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
1753 sampler->data.descriptor_set = 0;
1754 sampler->data.binding = 0;
1755
1756 /* Load the box describing the pixel region we want to copy from the
1757 * texel buffer.
1758 */
1759 nir_ssa_def *box =
1760 nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
1761 .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
1762 .range = 16);
1763
1764 /* Load the buffer stride (this comes in texel units) */
1765 nir_ssa_def *stride =
1766 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1767 .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
1768 .range = 4);
1769
1770 /* Load the buffer offset (this comes in texel units) */
1771 nir_ssa_def *offset =
1772 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1773 .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
1774 .range = 4);
1775
1776 nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
1777
1778 /* Load pixel data from texel buffer based on the x,y offset of the pixel
1779 * within the box. Texel buffers are 1D arrays of texels.
1780 *
1781 * Notice that we already make sure that we only generate fragments that are
1782 * inside the box through the scissor/viewport state, so our offset into the
1783 * texel buffer should always be within its bounds and we we don't need
1784 * to add a check for that here.
1785 */
1786 nir_ssa_def *x_offset =
1787 nir_isub(&b, nir_channel(&b, coord, 0),
1788 nir_channel(&b, box, 0));
1789 nir_ssa_def *y_offset =
1790 nir_isub(&b, nir_channel(&b, coord, 1),
1791 nir_channel(&b, box, 1));
1792 nir_ssa_def *texel_offset =
1793 nir_iadd(&b, nir_iadd(&b, offset, x_offset),
1794 nir_imul(&b, y_offset, stride));
1795
1796 nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
1797 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
1798 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1799 tex->op = nir_texop_txf;
1800 tex->src[0].src_type = nir_tex_src_coord;
1801 tex->src[0].src = nir_src_for_ssa(texel_offset);
1802 tex->src[1].src_type = nir_tex_src_texture_deref;
1803 tex->src[1].src = nir_src_for_ssa(tex_deref);
1804 tex->dest_type = nir_type_uint32;
1805 tex->is_array = false;
1806 tex->coord_components = 1;
1807 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
1808 nir_builder_instr_insert(&b, &tex->instr);
1809
1810 uint32_t swiz[4];
1811 swiz[0] =
1812 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
1813 swiz[1] =
1814 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
1815 swiz[2] =
1816 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
1817 swiz[3] =
1818 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
1819 nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
1820 nir_store_var(&b, fs_out_color, s, 0xf);
1821
1822 return b.shader;
1823 }
1824
1825 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)1826 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
1827 VkFormat format,
1828 VkColorComponentFlags cmask,
1829 VkComponentMapping *cswizzle,
1830 bool is_layered,
1831 VkRenderPass _pass,
1832 VkPipelineLayout pipeline_layout,
1833 VkPipeline *pipeline)
1834 {
1835 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
1836
1837 assert(vk_format_is_color(format));
1838
1839 nir_shader *vs_nir = get_texel_buffer_copy_vs();
1840 nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
1841 nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
1842
1843 const VkPipelineVertexInputStateCreateInfo vi_state = {
1844 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
1845 .vertexBindingDescriptionCount = 0,
1846 .vertexAttributeDescriptionCount = 0,
1847 };
1848
1849 VkPipelineDepthStencilStateCreateInfo ds_state = {
1850 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1851 };
1852
1853 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
1854 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
1855 .blendEnable = false,
1856 .colorWriteMask = cmask,
1857 };
1858
1859 const VkPipelineColorBlendStateCreateInfo cb_state = {
1860 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1861 .logicOpEnable = false,
1862 .attachmentCount = 1,
1863 .pAttachments = blend_att_state
1864 };
1865
1866 const VkPipelineMultisampleStateCreateInfo ms_state = {
1867 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1868 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
1869 .sampleShadingEnable = false,
1870 .pSampleMask = NULL,
1871 .alphaToCoverageEnable = false,
1872 .alphaToOneEnable = false,
1873 };
1874
1875 return create_pipeline(device,
1876 pass,
1877 vs_nir, gs_nir, fs_nir,
1878 &vi_state,
1879 &ds_state,
1880 &cb_state,
1881 &ms_state,
1882 pipeline_layout,
1883 pipeline);
1884 }
1885
1886 static bool
get_copy_texel_buffer_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)1887 get_copy_texel_buffer_pipeline(
1888 struct v3dv_device *device,
1889 VkFormat format,
1890 VkColorComponentFlags cmask,
1891 VkComponentMapping *cswizzle,
1892 VkImageType image_type,
1893 bool is_layered,
1894 struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
1895 {
1896 bool ok = true;
1897
1898 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
1899 get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
1900 key);
1901
1902 mtx_lock(&device->meta.mtx);
1903 struct hash_entry *entry =
1904 _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
1905 key);
1906 if (entry) {
1907 mtx_unlock(&device->meta.mtx);
1908 *pipeline = entry->data;
1909 return true;
1910 }
1911
1912 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
1913 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1914
1915 if (*pipeline == NULL)
1916 goto fail;
1917
1918 /* The blit render pass is compatible */
1919 ok = create_blit_render_pass(device, format, format,
1920 &(*pipeline)->pass,
1921 &(*pipeline)->pass_no_load);
1922 if (!ok)
1923 goto fail;
1924
1925 ok =
1926 create_texel_buffer_copy_pipeline(device,
1927 format, cmask, cswizzle, is_layered,
1928 (*pipeline)->pass,
1929 device->meta.texel_buffer_copy.p_layout,
1930 &(*pipeline)->pipeline);
1931 if (!ok)
1932 goto fail;
1933
1934 uint8_t *dupkey = malloc(V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1935 memcpy(dupkey, key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1936 _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
1937 dupkey, *pipeline);
1938
1939 mtx_unlock(&device->meta.mtx);
1940 return true;
1941
1942 fail:
1943 mtx_unlock(&device->meta.mtx);
1944
1945 VkDevice _device = v3dv_device_to_handle(device);
1946 if (*pipeline) {
1947 if ((*pipeline)->pass)
1948 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
1949 if ((*pipeline)->pipeline)
1950 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1951 vk_free(&device->vk.alloc, *pipeline);
1952 *pipeline = NULL;
1953 }
1954
1955 return false;
1956 }
1957
1958 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)1959 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
1960 VkImageAspectFlags aspect,
1961 struct v3dv_image *image,
1962 VkFormat dst_format,
1963 VkFormat src_format,
1964 struct v3dv_buffer *buffer,
1965 uint32_t buffer_bpp,
1966 VkColorComponentFlags cmask,
1967 VkComponentMapping *cswizzle,
1968 uint32_t region_count,
1969 const VkBufferImageCopy2 *regions)
1970 {
1971 VkResult result;
1972 bool handled = false;
1973
1974 assert(cswizzle);
1975
1976 /* This is a copy path, so we don't handle format conversions. The only
1977 * exception are stencil to D24S8 copies, which are handled as a color
1978 * masked R8->RGBA8 copy.
1979 */
1980 assert(src_format == dst_format ||
1981 (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
1982 src_format == VK_FORMAT_R8_UINT &&
1983 cmask == VK_COLOR_COMPONENT_R_BIT));
1984
1985 /* We only handle color copies. Callers can copy D/S aspects by using
1986 * a compatible color format and maybe a cmask/cswizzle for D24 formats.
1987 */
1988 if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
1989 return handled;
1990
1991 /* FIXME: we only handle uncompressed images for now. */
1992 if (vk_format_is_compressed(image->vk.format))
1993 return handled;
1994
1995 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
1996 VK_COLOR_COMPONENT_G_BIT |
1997 VK_COLOR_COMPONENT_B_BIT |
1998 VK_COLOR_COMPONENT_A_BIT;
1999 if (cmask == 0)
2000 cmask = full_cmask;
2001
2002 /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2003 * so we can bind it as a texel buffer. Otherwise, the buffer view
2004 * we create below won't setup the texture state that we need for this.
2005 */
2006 if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2007 if (v3dv_buffer_format_supports_features(
2008 cmd_buffer->device, src_format,
2009 VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
2010 buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2011 } else {
2012 return handled;
2013 }
2014 }
2015
2016 /* At this point we should be able to handle the copy unless an unexpected
2017 * error occurs, such as an OOM.
2018 */
2019 handled = true;
2020
2021
2022 /* Compute the number of layers to copy.
2023 *
2024 * If we are batching (region_count > 1) all our regions have the same
2025 * image subresource so we can take this from the first region. For 3D
2026 * images we require the same depth extent.
2027 */
2028 const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2029 uint32_t num_layers;
2030 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2031 num_layers = resource->layerCount;
2032 } else {
2033 assert(region_count == 1);
2034 num_layers = regions[0].imageExtent.depth;
2035 }
2036 assert(num_layers > 0);
2037
2038 /* Get the texel buffer copy pipeline */
2039 struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2040 bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2041 dst_format, cmask, cswizzle,
2042 image->vk.image_type, num_layers > 1,
2043 &pipeline);
2044 if (!ok)
2045 return handled;
2046 assert(pipeline && pipeline->pipeline && pipeline->pass);
2047
2048 /* Setup descriptor set for the source texel buffer. We don't have to
2049 * register the descriptor as a private command buffer object since
2050 * all descriptors will be freed automatically with the descriptor
2051 * pool.
2052 */
2053 VkDescriptorSet set;
2054 result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2055 if (result != VK_SUCCESS)
2056 return handled;
2057
2058 /* We can't pass region->bufferOffset here for the offset field because
2059 * the texture base pointer in the texture shader state must be a 64-byte
2060 * aligned value. Instead, we use 0 here and we pass the offset in texels
2061 * as a push constant to the shader.
2062 */
2063 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2064 VkBufferViewCreateInfo buffer_view_info = {
2065 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2066 .buffer = v3dv_buffer_to_handle(buffer),
2067 .format = src_format,
2068 .offset = 0,
2069 .range = VK_WHOLE_SIZE,
2070 };
2071
2072 VkBufferView texel_buffer_view;
2073 result = v3dv_CreateBufferView(_device, &buffer_view_info,
2074 &cmd_buffer->device->vk.alloc,
2075 &texel_buffer_view);
2076 if (result != VK_SUCCESS)
2077 return handled;
2078
2079 v3dv_cmd_buffer_add_private_obj(
2080 cmd_buffer, (uintptr_t)texel_buffer_view,
2081 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2082
2083 VkWriteDescriptorSet write = {
2084 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2085 .dstSet = set,
2086 .dstBinding = 0,
2087 .dstArrayElement = 0,
2088 .descriptorCount = 1,
2089 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2090 .pTexelBufferView = &texel_buffer_view,
2091 };
2092 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2093
2094 /* Push command buffer state before starting meta operation */
2095 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2096 uint32_t dirty_dynamic_state = 0;
2097
2098 /* Bind common state for all layers and regions */
2099 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2100 v3dv_CmdBindPipeline(_cmd_buffer,
2101 VK_PIPELINE_BIND_POINT_GRAPHICS,
2102 pipeline->pipeline);
2103
2104 v3dv_CmdBindDescriptorSets(_cmd_buffer,
2105 VK_PIPELINE_BIND_POINT_GRAPHICS,
2106 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2107 0, 1, &set,
2108 0, NULL);
2109
2110 /* Setup framebuffer.
2111 *
2112 * For 3D images, this creates a layered framebuffer with a number of
2113 * layers matching the depth extent of the 3D image.
2114 */
2115 uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
2116 uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
2117 VkImageViewCreateInfo image_view_info = {
2118 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2119 .image = v3dv_image_to_handle(image),
2120 .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2121 .format = dst_format,
2122 .subresourceRange = {
2123 .aspectMask = aspect,
2124 .baseMipLevel = resource->mipLevel,
2125 .levelCount = 1,
2126 .baseArrayLayer = resource->baseArrayLayer,
2127 .layerCount = num_layers,
2128 },
2129 };
2130 VkImageView image_view;
2131 result = v3dv_create_image_view(cmd_buffer->device,
2132 &image_view_info, &image_view);
2133 if (result != VK_SUCCESS)
2134 goto fail;
2135
2136 v3dv_cmd_buffer_add_private_obj(
2137 cmd_buffer, (uintptr_t)image_view,
2138 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2139
2140 VkFramebufferCreateInfo fb_info = {
2141 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2142 .renderPass = pipeline->pass,
2143 .attachmentCount = 1,
2144 .pAttachments = &image_view,
2145 .width = fb_width,
2146 .height = fb_height,
2147 .layers = num_layers,
2148 };
2149
2150 VkFramebuffer fb;
2151 result = v3dv_CreateFramebuffer(_device, &fb_info,
2152 &cmd_buffer->device->vk.alloc, &fb);
2153 if (result != VK_SUCCESS)
2154 goto fail;
2155
2156 v3dv_cmd_buffer_add_private_obj(
2157 cmd_buffer, (uintptr_t)fb,
2158 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2159
2160 /* For each layer */
2161 for (uint32_t l = 0; l < num_layers; l++) {
2162 /* Start render pass for this layer.
2163 *
2164 * If the we only have one region to copy, then we might be able to
2165 * skip the TLB load if it is aligned to tile boundaries. All layers
2166 * copy the same area, so we only need to check this once.
2167 */
2168 bool can_skip_tlb_load = false;
2169 VkRect2D render_area;
2170 if (region_count == 1) {
2171 render_area.offset.x = regions[0].imageOffset.x;
2172 render_area.offset.y = regions[0].imageOffset.y;
2173 render_area.extent.width = regions[0].imageExtent.width;
2174 render_area.extent.height = regions[0].imageExtent.height;
2175
2176 if (l == 0) {
2177 struct v3dv_render_pass *pipeline_pass =
2178 v3dv_render_pass_from_handle(pipeline->pass);
2179 can_skip_tlb_load =
2180 cmask == full_cmask &&
2181 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2182 v3dv_framebuffer_from_handle(fb),
2183 pipeline_pass, 0);
2184 }
2185 } else {
2186 render_area.offset.x = 0;
2187 render_area.offset.y = 0;
2188 render_area.extent.width = fb_width;
2189 render_area.extent.height = fb_height;
2190 }
2191
2192 VkRenderPassBeginInfo rp_info = {
2193 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2194 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2195 pipeline->pass,
2196 .framebuffer = fb,
2197 .renderArea = render_area,
2198 .clearValueCount = 0,
2199 };
2200
2201 VkSubpassBeginInfo sp_info = {
2202 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
2203 .contents = VK_SUBPASS_CONTENTS_INLINE,
2204 };
2205
2206 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
2207 struct v3dv_job *job = cmd_buffer->state.job;
2208 if (!job)
2209 goto fail;
2210
2211 /* If we are using a layered copy we need to specify the layer for the
2212 * Geometry Shader.
2213 */
2214 if (num_layers > 1) {
2215 uint32_t layer = resource->baseArrayLayer + l;
2216 v3dv_CmdPushConstants(_cmd_buffer,
2217 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2218 VK_SHADER_STAGE_GEOMETRY_BIT,
2219 24, 4, &layer);
2220 }
2221
2222 /* For each region */
2223 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
2224 for (uint32_t r = 0; r < region_count; r++) {
2225 const VkBufferImageCopy2 *region = ®ions[r];
2226
2227 /* Obtain the 2D buffer region spec */
2228 uint32_t buf_width, buf_height;
2229 if (region->bufferRowLength == 0)
2230 buf_width = region->imageExtent.width;
2231 else
2232 buf_width = region->bufferRowLength;
2233
2234 if (region->bufferImageHeight == 0)
2235 buf_height = region->imageExtent.height;
2236 else
2237 buf_height = region->bufferImageHeight;
2238
2239 const VkViewport viewport = {
2240 .x = region->imageOffset.x,
2241 .y = region->imageOffset.y,
2242 .width = region->imageExtent.width,
2243 .height = region->imageExtent.height,
2244 .minDepth = 0.0f,
2245 .maxDepth = 1.0f
2246 };
2247 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2248 const VkRect2D scissor = {
2249 .offset = { region->imageOffset.x, region->imageOffset.y },
2250 .extent = { region->imageExtent.width, region->imageExtent.height }
2251 };
2252 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2253
2254 const VkDeviceSize buf_offset =
2255 region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2256 uint32_t push_data[6] = {
2257 region->imageOffset.x,
2258 region->imageOffset.y,
2259 region->imageOffset.x + region->imageExtent.width - 1,
2260 region->imageOffset.y + region->imageExtent.height - 1,
2261 buf_width,
2262 buf_offset,
2263 };
2264
2265 v3dv_CmdPushConstants(_cmd_buffer,
2266 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2267 VK_SHADER_STAGE_FRAGMENT_BIT,
2268 0, sizeof(push_data), &push_data);
2269
2270 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2271 } /* For each region */
2272
2273 VkSubpassEndInfo sp_end_info = {
2274 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
2275 };
2276
2277 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
2278 } /* For each layer */
2279
2280 fail:
2281 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
2282 return handled;
2283 }
2284
2285 /**
2286 * Returns true if the implementation supports the requested operation (even if
2287 * it failed to process it, for example, due to an out-of-memory error).
2288 */
2289 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2 * regions)2290 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2291 VkImageAspectFlags aspect,
2292 struct v3dv_image *image,
2293 VkFormat dst_format,
2294 VkFormat src_format,
2295 struct v3dv_buffer *buffer,
2296 uint32_t buffer_bpp,
2297 VkColorComponentFlags cmask,
2298 VkComponentMapping *cswizzle,
2299 uint32_t region_count,
2300 const VkBufferImageCopy2 *regions)
2301 {
2302 /* Since we can't sample linear images we need to upload the linear
2303 * buffer to a tiled image that we can use as a blit source, which
2304 * is slow.
2305 */
2306 perf_debug("Falling back to blit path for buffer to image copy.\n");
2307
2308 struct v3dv_device *device = cmd_buffer->device;
2309 VkDevice _device = v3dv_device_to_handle(device);
2310 bool handled = true;
2311
2312 /* Allocate memory for the tiled image. Since we copy layer by layer
2313 * we allocate memory to hold a full layer, which is the worse case.
2314 * For that we create a dummy image with that spec, get memory requirements
2315 * for it and use that information to create the memory allocation.
2316 * We will then reuse this memory store for all the regions we want to
2317 * copy.
2318 */
2319 VkImage dummy_image;
2320 VkImageCreateInfo dummy_info = {
2321 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2322 .imageType = VK_IMAGE_TYPE_2D,
2323 .format = src_format,
2324 .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2325 .mipLevels = 1,
2326 .arrayLayers = 1,
2327 .samples = VK_SAMPLE_COUNT_1_BIT,
2328 .tiling = VK_IMAGE_TILING_OPTIMAL,
2329 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2330 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2331 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2332 .queueFamilyIndexCount = 0,
2333 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2334 };
2335 VkResult result =
2336 v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2337 if (result != VK_SUCCESS)
2338 return handled;
2339
2340 VkMemoryRequirements reqs;
2341 vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2342 v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2343
2344 VkDeviceMemory mem;
2345 VkMemoryAllocateInfo alloc_info = {
2346 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2347 .allocationSize = reqs.size,
2348 .memoryTypeIndex = 0,
2349 };
2350 result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2351 if (result != VK_SUCCESS)
2352 return handled;
2353
2354 v3dv_cmd_buffer_add_private_obj(
2355 cmd_buffer, (uintptr_t)mem,
2356 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2357
2358 /* Obtain the layer count.
2359 *
2360 * If we are batching (region_count > 1) all our regions have the same
2361 * image subresource so we can take this from the first region.
2362 */
2363 uint32_t num_layers;
2364 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2365 num_layers = regions[0].imageSubresource.layerCount;
2366 else
2367 num_layers = regions[0].imageExtent.depth;
2368 assert(num_layers > 0);
2369
2370 /* Sanity check: we can only batch multiple regions together if they have
2371 * the same framebuffer (so the same layer).
2372 */
2373 assert(num_layers == 1 || region_count == 1);
2374
2375 const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
2376 const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
2377
2378 /* Copy regions by uploading each region to a temporary tiled image using
2379 * the memory we have just allocated as storage.
2380 */
2381 for (uint32_t r = 0; r < region_count; r++) {
2382 const VkBufferImageCopy2 *region = ®ions[r];
2383
2384 /* Obtain the 2D buffer region spec */
2385 uint32_t buf_width, buf_height;
2386 if (region->bufferRowLength == 0)
2387 buf_width = region->imageExtent.width;
2388 else
2389 buf_width = region->bufferRowLength;
2390
2391 if (region->bufferImageHeight == 0)
2392 buf_height = region->imageExtent.height;
2393 else
2394 buf_height = region->bufferImageHeight;
2395
2396 /* If the image is compressed, the bpp refers to blocks, not pixels */
2397 buf_width = buf_width / block_width;
2398 buf_height = buf_height / block_height;
2399
2400 for (uint32_t i = 0; i < num_layers; i++) {
2401 /* Create the tiled image */
2402 VkImageCreateInfo image_info = {
2403 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2404 .imageType = VK_IMAGE_TYPE_2D,
2405 .format = src_format,
2406 .extent = { buf_width, buf_height, 1 },
2407 .mipLevels = 1,
2408 .arrayLayers = 1,
2409 .samples = VK_SAMPLE_COUNT_1_BIT,
2410 .tiling = VK_IMAGE_TILING_OPTIMAL,
2411 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2412 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2413 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2414 .queueFamilyIndexCount = 0,
2415 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2416 };
2417
2418 VkImage buffer_image;
2419 VkResult result =
2420 v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2421 &buffer_image);
2422 if (result != VK_SUCCESS)
2423 return handled;
2424
2425 v3dv_cmd_buffer_add_private_obj(
2426 cmd_buffer, (uintptr_t)buffer_image,
2427 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2428
2429 result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2430 if (result != VK_SUCCESS)
2431 return handled;
2432
2433 /* Upload buffer contents for the selected layer */
2434 const VkDeviceSize buf_offset_bytes =
2435 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2436 const VkBufferImageCopy2 buffer_image_copy = {
2437 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
2438 .bufferOffset = buf_offset_bytes,
2439 .bufferRowLength = region->bufferRowLength / block_width,
2440 .bufferImageHeight = region->bufferImageHeight / block_height,
2441 .imageSubresource = {
2442 .aspectMask = aspect,
2443 .mipLevel = 0,
2444 .baseArrayLayer = 0,
2445 .layerCount = 1,
2446 },
2447 .imageOffset = { 0, 0, 0 },
2448 .imageExtent = { buf_width, buf_height, 1 }
2449 };
2450 handled =
2451 create_tiled_image_from_buffer(cmd_buffer,
2452 v3dv_image_from_handle(buffer_image),
2453 buffer, &buffer_image_copy);
2454 if (!handled) {
2455 /* This is unexpected, we should have setup the upload to be
2456 * conformant to a TFU or TLB copy.
2457 */
2458 unreachable("Unable to copy buffer to image through TLB");
2459 return false;
2460 }
2461
2462 /* Blit-copy the requested image extent from the buffer image to the
2463 * destination image.
2464 *
2465 * Since we are copying, the blit must use the same format on the
2466 * destination and source images to avoid format conversions. The
2467 * only exception is copying stencil, which we upload to a R8UI source
2468 * image, but that we need to blit to a S8D24 destination (the only
2469 * stencil format we support).
2470 */
2471 const VkImageBlit2 blit_region = {
2472 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
2473 .srcSubresource = {
2474 .aspectMask = aspect,
2475 .mipLevel = 0,
2476 .baseArrayLayer = 0,
2477 .layerCount = 1,
2478 },
2479 .srcOffsets = {
2480 { 0, 0, 0 },
2481 { region->imageExtent.width, region->imageExtent.height, 1 },
2482 },
2483 .dstSubresource = {
2484 .aspectMask = aspect,
2485 .mipLevel = region->imageSubresource.mipLevel,
2486 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
2487 .layerCount = 1,
2488 },
2489 .dstOffsets = {
2490 {
2491 DIV_ROUND_UP(region->imageOffset.x, block_width),
2492 DIV_ROUND_UP(region->imageOffset.y, block_height),
2493 region->imageOffset.z + i,
2494 },
2495 {
2496 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
2497 block_width),
2498 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
2499 block_height),
2500 region->imageOffset.z + i + 1,
2501 },
2502 },
2503 };
2504
2505 handled = blit_shader(cmd_buffer,
2506 image, dst_format,
2507 v3dv_image_from_handle(buffer_image), src_format,
2508 cmask, cswizzle,
2509 &blit_region, VK_FILTER_NEAREST, true);
2510 if (!handled) {
2511 /* This is unexpected, we should have a supported blit spec */
2512 unreachable("Unable to blit buffer to destination image");
2513 return false;
2514 }
2515 }
2516 }
2517
2518 return handled;
2519 }
2520
2521 /**
2522 * Returns true if the implementation supports the requested operation (even if
2523 * it failed to process it, for example, due to an out-of-memory error).
2524 */
2525 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2 * regions,bool use_texel_buffer)2526 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
2527 struct v3dv_image *image,
2528 struct v3dv_buffer *buffer,
2529 uint32_t region_count,
2530 const VkBufferImageCopy2 *regions,
2531 bool use_texel_buffer)
2532 {
2533 /* We can only call this with region_count > 1 if we can batch the regions
2534 * together, in which case they share the same image subresource, and so
2535 * the same aspect.
2536 */
2537 VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
2538
2539 /* Generally, the bpp of the data in the buffer matches that of the
2540 * destination image. The exception is the case where we are uploading
2541 * stencil (8bpp) to a combined d24s8 image (32bpp).
2542 */
2543 uint32_t buf_bpp = image->cpp;
2544
2545 /* We are about to upload the buffer data to an image so we can then
2546 * blit that to our destination region. Because we are going to implement
2547 * the copy as a blit, we want our blit source and destination formats to be
2548 * the same (to avoid any format conversions), so we choose a canonical
2549 * format that matches the destination image bpp.
2550 */
2551 VkComponentMapping ident_swizzle = {
2552 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
2553 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
2554 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
2555 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
2556 };
2557
2558 VkComponentMapping cswizzle = ident_swizzle;
2559 VkColorComponentFlags cmask = 0; /* Write all components */
2560 VkFormat src_format;
2561 VkFormat dst_format;
2562 switch (buf_bpp) {
2563 case 16:
2564 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2565 src_format = VK_FORMAT_R32G32B32A32_UINT;
2566 dst_format = src_format;
2567 break;
2568 case 8:
2569 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2570 src_format = VK_FORMAT_R16G16B16A16_UINT;
2571 dst_format = src_format;
2572 break;
2573 case 4:
2574 switch (aspect) {
2575 case VK_IMAGE_ASPECT_COLOR_BIT:
2576 src_format = VK_FORMAT_R8G8B8A8_UINT;
2577 dst_format = src_format;
2578 break;
2579 case VK_IMAGE_ASPECT_DEPTH_BIT:
2580 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
2581 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2582 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
2583 src_format = VK_FORMAT_R8G8B8A8_UINT;
2584 dst_format = src_format;
2585
2586 /* For D24 formats, the Vulkan spec states that the depth component
2587 * in the buffer is stored in the 24-LSB, but V3D wants it in the
2588 * 24-MSB.
2589 */
2590 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2591 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2592 cmask = VK_COLOR_COMPONENT_G_BIT |
2593 VK_COLOR_COMPONENT_B_BIT |
2594 VK_COLOR_COMPONENT_A_BIT;
2595 cswizzle.r = VK_COMPONENT_SWIZZLE_R;
2596 cswizzle.g = VK_COMPONENT_SWIZZLE_R;
2597 cswizzle.b = VK_COMPONENT_SWIZZLE_G;
2598 cswizzle.a = VK_COMPONENT_SWIZZLE_B;
2599 }
2600 break;
2601 case VK_IMAGE_ASPECT_STENCIL_BIT:
2602 /* Since we don't support separate stencil this is always a stencil
2603 * copy to a combined depth/stencil image. Because we don't support
2604 * separate stencil images, we interpret the buffer data as a
2605 * color R8UI image, and implement the blit as a compatible color
2606 * blit to an RGBA8UI destination masking out writes to components
2607 * GBA (which map to the D24 component of a S8D24 image).
2608 */
2609 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
2610 buf_bpp = 1;
2611 src_format = VK_FORMAT_R8_UINT;
2612 dst_format = VK_FORMAT_R8G8B8A8_UINT;
2613 cmask = VK_COLOR_COMPONENT_R_BIT;
2614 break;
2615 default:
2616 unreachable("unsupported aspect");
2617 return false;
2618 };
2619 break;
2620 case 2:
2621 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
2622 aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
2623 src_format = VK_FORMAT_R16_UINT;
2624 dst_format = src_format;
2625 break;
2626 case 1:
2627 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2628 src_format = VK_FORMAT_R8_UINT;
2629 dst_format = src_format;
2630 break;
2631 default:
2632 unreachable("unsupported bit-size");
2633 return false;
2634 }
2635
2636 if (use_texel_buffer) {
2637 return texel_buffer_shader_copy(cmd_buffer, aspect, image,
2638 dst_format, src_format,
2639 buffer, buf_bpp,
2640 cmask, &cswizzle,
2641 region_count, regions);
2642 } else {
2643 return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
2644 dst_format, src_format,
2645 buffer, buf_bpp,
2646 cmask, &cswizzle,
2647 region_count, regions);
2648 }
2649 }
2650
2651 /**
2652 * Returns true if the implementation supports the requested operation (even if
2653 * it failed to process it, for example, due to an out-of-memory error).
2654 */
2655 static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2 * region)2656 copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
2657 struct v3dv_image *image,
2658 struct v3dv_buffer *buffer,
2659 const VkBufferImageCopy2 *region)
2660 {
2661 /* FIXME */
2662 if (vk_format_is_depth_or_stencil(image->vk.format))
2663 return false;
2664
2665 if (vk_format_is_compressed(image->vk.format))
2666 return false;
2667
2668 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
2669 return false;
2670
2671 uint32_t buffer_width, buffer_height;
2672 if (region->bufferRowLength == 0)
2673 buffer_width = region->imageExtent.width;
2674 else
2675 buffer_width = region->bufferRowLength;
2676
2677 if (region->bufferImageHeight == 0)
2678 buffer_height = region->imageExtent.height;
2679 else
2680 buffer_height = region->bufferImageHeight;
2681
2682 uint32_t buffer_stride = buffer_width * image->cpp;
2683 uint32_t buffer_layer_stride = buffer_stride * buffer_height;
2684
2685 uint32_t num_layers;
2686 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2687 num_layers = region->imageSubresource.layerCount;
2688 else
2689 num_layers = region->imageExtent.depth;
2690 assert(num_layers > 0);
2691
2692 struct v3dv_job *job =
2693 v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
2694 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
2695 cmd_buffer, -1);
2696 if (!job)
2697 return true;
2698
2699 job->cpu.copy_buffer_to_image.image = image;
2700 job->cpu.copy_buffer_to_image.buffer = buffer;
2701 job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
2702 job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
2703 job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
2704 job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
2705 job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
2706 job->cpu.copy_buffer_to_image.mip_level =
2707 region->imageSubresource.mipLevel;
2708 job->cpu.copy_buffer_to_image.base_layer =
2709 region->imageSubresource.baseArrayLayer;
2710 job->cpu.copy_buffer_to_image.layer_count = num_layers;
2711
2712 list_addtail(&job->list_link, &cmd_buffer->jobs);
2713
2714 return true;
2715 }
2716
2717 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * info)2718 v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
2719 const VkCopyBufferToImageInfo2 *info)
2720 {
2721 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2722 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
2723 V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
2724
2725 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2726
2727 cmd_buffer->state.is_transfer = true;
2728
2729 uint32_t r = 0;
2730 while (r < info->regionCount) {
2731 /* The TFU and TLB paths can only copy one region at a time and the region
2732 * needs to start at the origin. We try these first for the common case
2733 * where we are copying full images, since they should be the fastest.
2734 */
2735 uint32_t batch_size = 1;
2736 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
2737 goto handled;
2738
2739 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
2740 goto handled;
2741
2742 /* Otherwise, we are copying subrects, so we fallback to copying
2743 * via shader and texel buffers and we try to batch the regions
2744 * if possible. We can only batch copies if they have the same
2745 * framebuffer spec, which is mostly determined by the image
2746 * subresource of the region.
2747 */
2748 const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
2749 for (uint32_t s = r + 1; s < info->regionCount; s++) {
2750 const VkImageSubresourceLayers *rsc_s =
2751 &info->pRegions[s].imageSubresource;
2752
2753 if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
2754 break;
2755
2756 /* For 3D images we also need to check the depth extent */
2757 if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
2758 info->pRegions[s].imageExtent.depth !=
2759 info->pRegions[r].imageExtent.depth) {
2760 break;
2761 }
2762
2763 batch_size++;
2764 }
2765
2766 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2767 batch_size, &info->pRegions[r], true)) {
2768 goto handled;
2769 }
2770
2771 /* If we still could not copy, fallback to slower paths.
2772 *
2773 * FIXME: we could try to batch these too, but since they are bound to be
2774 * slow it might not be worth it and we should instead put more effort
2775 * in handling more cases with the other paths.
2776 */
2777 if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
2778 &info->pRegions[r])) {
2779 batch_size = 1;
2780 goto handled;
2781 }
2782
2783 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2784 batch_size, &info->pRegions[r], false)) {
2785 goto handled;
2786 }
2787
2788 unreachable("Unsupported buffer to image copy.");
2789
2790 handled:
2791 r += batch_size;
2792 }
2793
2794 cmd_buffer->state.is_transfer = false;
2795 }
2796
2797 static void
2798 compute_blit_3d_layers(const VkOffset3D *offsets,
2799 uint32_t *min_layer, uint32_t *max_layer,
2800 bool *mirror_z);
2801
2802 /**
2803 * Returns true if the implementation supports the requested operation (even if
2804 * it failed to process it, for example, due to an out-of-memory error).
2805 *
2806 * The TFU blit path doesn't handle scaling so the blit filter parameter can
2807 * be ignored.
2808 */
2809 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2 * region)2810 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2811 struct v3dv_image *dst,
2812 struct v3dv_image *src,
2813 const VkImageBlit2 *region)
2814 {
2815 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2816 assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2817
2818 /* Format must match */
2819 if (src->vk.format != dst->vk.format)
2820 return false;
2821
2822 /* Destination can't be raster format */
2823 if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
2824 return false;
2825
2826 /* Source region must start at (0,0) */
2827 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
2828 return false;
2829
2830 /* Destination image must be complete */
2831 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
2832 return false;
2833
2834 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
2835 const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
2836 const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
2837 if (region->dstOffsets[1].x < dst_width - 1||
2838 region->dstOffsets[1].y < dst_height - 1) {
2839 return false;
2840 }
2841
2842 /* No XY scaling */
2843 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
2844 region->srcOffsets[1].y != region->dstOffsets[1].y) {
2845 return false;
2846 }
2847
2848 /* If the format is D24S8 both aspects need to be copied, since the TFU
2849 * can't be programmed to copy only one aspect of the image.
2850 */
2851 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
2852 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
2853 VK_IMAGE_ASPECT_STENCIL_BIT;
2854 if (region->dstSubresource.aspectMask != ds_aspects)
2855 return false;
2856 }
2857
2858 /* Our TFU blits only handle exact copies (it requires same formats
2859 * on input and output, no scaling, etc), so there is no pixel format
2860 * conversions and we can rewrite the format to use one that is TFU
2861 * compatible based on its texel size.
2862 */
2863 const struct v3dv_format *format =
2864 v3dv_get_compatible_tfu_format(cmd_buffer->device,
2865 dst->cpp, NULL);
2866
2867 /* Emit a TFU job for each layer to blit */
2868 assert(region->dstSubresource.layerCount ==
2869 region->srcSubresource.layerCount);
2870
2871 uint32_t min_dst_layer;
2872 uint32_t max_dst_layer;
2873 bool dst_mirror_z = false;
2874 if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2875 compute_blit_3d_layers(region->dstOffsets,
2876 &min_dst_layer, &max_dst_layer,
2877 &dst_mirror_z);
2878 } else {
2879 min_dst_layer = region->dstSubresource.baseArrayLayer;
2880 max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
2881 }
2882
2883 uint32_t min_src_layer;
2884 uint32_t max_src_layer;
2885 bool src_mirror_z = false;
2886 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
2887 compute_blit_3d_layers(region->srcOffsets,
2888 &min_src_layer, &max_src_layer,
2889 &src_mirror_z);
2890 } else {
2891 min_src_layer = region->srcSubresource.baseArrayLayer;
2892 max_src_layer = min_src_layer + region->srcSubresource.layerCount;
2893 }
2894
2895 /* No Z scaling for 3D images (for non-3D images both src and dst must
2896 * have the same layerCount).
2897 */
2898 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
2899 return false;
2900
2901 const uint32_t layer_count = max_dst_layer - min_dst_layer;
2902 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
2903 for (uint32_t i = 0; i < layer_count; i++) {
2904 /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
2905 * only involves reversing the order of the slices.
2906 */
2907 const uint32_t dst_layer =
2908 dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
2909 const uint32_t src_layer =
2910 src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
2911
2912 const uint32_t dst_offset =
2913 dst->mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
2914 const uint32_t src_offset =
2915 src->mem->bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
2916
2917 const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
2918 const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
2919
2920 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
2921 cmd_buffer,
2922 dst->mem->bo->handle,
2923 dst_offset,
2924 dst_slice->tiling,
2925 dst_slice->padded_height,
2926 dst->cpp,
2927 src->mem->bo->handle,
2928 src_offset,
2929 src_slice->tiling,
2930 src_slice->tiling == V3D_TILING_RASTER ?
2931 src_slice->stride : src_slice->padded_height,
2932 src->cpp,
2933 dst_width, dst_height, format);
2934 }
2935
2936 return true;
2937 }
2938
2939 static bool
format_needs_software_int_clamp(VkFormat format)2940 format_needs_software_int_clamp(VkFormat format)
2941 {
2942 switch (format) {
2943 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
2944 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
2945 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2946 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
2947 return true;
2948 default:
2949 return false;
2950 };
2951 }
2952
2953 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)2954 get_blit_pipeline_cache_key(VkFormat dst_format,
2955 VkFormat src_format,
2956 VkColorComponentFlags cmask,
2957 VkSampleCountFlagBits dst_samples,
2958 VkSampleCountFlagBits src_samples,
2959 uint8_t *key)
2960 {
2961 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
2962
2963 uint32_t *p = (uint32_t *) key;
2964
2965 *p = dst_format;
2966 p++;
2967
2968 /* Generally, when blitting from a larger format to a smaller format
2969 * the hardware takes care of clamping the source to the RT range.
2970 * Specifically, for integer formats, this is done by using
2971 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
2972 * clamps to the bit-size of the render type, and some formats, such as
2973 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
2974 * require to clamp in software. In these cases, we need to amend the blit
2975 * shader with clamp code that depends on both the src and dst formats, so
2976 * we need the src format to be part of the key.
2977 */
2978 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
2979 p++;
2980
2981 *p = cmask;
2982 p++;
2983
2984 *p = (dst_samples << 8) | src_samples;
2985 p++;
2986
2987 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
2988 }
2989
2990 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)2991 create_blit_render_pass(struct v3dv_device *device,
2992 VkFormat dst_format,
2993 VkFormat src_format,
2994 VkRenderPass *pass_load,
2995 VkRenderPass *pass_no_load)
2996 {
2997 const bool is_color_blit = vk_format_is_color(dst_format);
2998
2999 /* Attachment load operation is specified below */
3000 VkAttachmentDescription2 att = {
3001 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
3002 .format = dst_format,
3003 .samples = VK_SAMPLE_COUNT_1_BIT,
3004 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3005 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3006 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3007 };
3008
3009 VkAttachmentReference2 att_ref = {
3010 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
3011 .attachment = 0,
3012 .layout = VK_IMAGE_LAYOUT_GENERAL,
3013 };
3014
3015 VkSubpassDescription2 subpass = {
3016 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
3017 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3018 .inputAttachmentCount = 0,
3019 .colorAttachmentCount = is_color_blit ? 1 : 0,
3020 .pColorAttachments = is_color_blit ? &att_ref : NULL,
3021 .pResolveAttachments = NULL,
3022 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3023 .preserveAttachmentCount = 0,
3024 .pPreserveAttachments = NULL,
3025 };
3026
3027 VkRenderPassCreateInfo2 info = {
3028 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
3029 .attachmentCount = 1,
3030 .pAttachments = &att,
3031 .subpassCount = 1,
3032 .pSubpasses = &subpass,
3033 .dependencyCount = 0,
3034 .pDependencies = NULL,
3035 };
3036
3037 VkResult result;
3038 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3039 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3040 &info, &device->vk.alloc, pass_load);
3041 if (result != VK_SUCCESS)
3042 return false;
3043
3044 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3045 result = v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
3046 &info, &device->vk.alloc, pass_no_load);
3047 return result == VK_SUCCESS;
3048 }
3049
3050 static nir_ssa_def *
gen_tex_coords(nir_builder * b)3051 gen_tex_coords(nir_builder *b)
3052 {
3053 nir_ssa_def *tex_box =
3054 nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3055
3056 nir_ssa_def *tex_z =
3057 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3058
3059 nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3060
3061 /* vertex 0: src0_x, src0_y
3062 * vertex 1: src0_x, src1_y
3063 * vertex 2: src1_x, src0_y
3064 * vertex 3: src1_x, src1_y
3065 *
3066 * So:
3067 *
3068 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3069 * channel 1 is vertex id & 1 ? src1_y : src0_y
3070 */
3071
3072 nir_ssa_def *one = nir_imm_int(b, 1);
3073 nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3074 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3075
3076 nir_ssa_def *comp[4];
3077 comp[0] = nir_bcsel(b, c0cmp,
3078 nir_channel(b, tex_box, 0),
3079 nir_channel(b, tex_box, 2));
3080
3081 comp[1] = nir_bcsel(b, c1cmp,
3082 nir_channel(b, tex_box, 3),
3083 nir_channel(b, tex_box, 1));
3084 comp[2] = tex_z;
3085 comp[3] = nir_imm_float(b, 1.0f);
3086 return nir_vec(b, comp, 4);
3087 }
3088
3089 static nir_ssa_def *
build_nir_tex_op_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3090 build_nir_tex_op_read(struct nir_builder *b,
3091 nir_ssa_def *tex_pos,
3092 enum glsl_base_type tex_type,
3093 enum glsl_sampler_dim dim)
3094 {
3095 assert(dim != GLSL_SAMPLER_DIM_MS);
3096
3097 const struct glsl_type *sampler_type =
3098 glsl_sampler_type(dim, false, false, tex_type);
3099 nir_variable *sampler =
3100 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3101 sampler->data.descriptor_set = 0;
3102 sampler->data.binding = 0;
3103
3104 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3105 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3106 tex->sampler_dim = dim;
3107 tex->op = nir_texop_tex;
3108 tex->src[0].src_type = nir_tex_src_coord;
3109 tex->src[0].src = nir_src_for_ssa(tex_pos);
3110 tex->src[1].src_type = nir_tex_src_texture_deref;
3111 tex->src[1].src = nir_src_for_ssa(tex_deref);
3112 tex->src[2].src_type = nir_tex_src_sampler_deref;
3113 tex->src[2].src = nir_src_for_ssa(tex_deref);
3114 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3115 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3116 tex->coord_components = tex_pos->num_components;
3117
3118 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3119 nir_builder_instr_insert(b, &tex->instr);
3120 return &tex->dest.ssa;
3121 }
3122
3123 static nir_ssa_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_ssa_def * tex_deref,enum glsl_base_type tex_type,nir_ssa_def * tex_pos,nir_ssa_def * sample_idx)3124 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3125 nir_variable *sampler,
3126 nir_ssa_def *tex_deref,
3127 enum glsl_base_type tex_type,
3128 nir_ssa_def *tex_pos,
3129 nir_ssa_def *sample_idx)
3130 {
3131 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
3132 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3133 tex->op = nir_texop_txf_ms;
3134 tex->src[0].src_type = nir_tex_src_coord;
3135 tex->src[0].src = nir_src_for_ssa(tex_pos);
3136 tex->src[1].src_type = nir_tex_src_texture_deref;
3137 tex->src[1].src = nir_src_for_ssa(tex_deref);
3138 tex->src[2].src_type = nir_tex_src_sampler_deref;
3139 tex->src[2].src = nir_src_for_ssa(tex_deref);
3140 tex->src[3].src_type = nir_tex_src_ms_index;
3141 tex->src[3].src = nir_src_for_ssa(sample_idx);
3142 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3143 tex->is_array = false;
3144 tex->coord_components = tex_pos->num_components;
3145
3146 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3147 nir_builder_instr_insert(b, &tex->instr);
3148 return &tex->dest.ssa;
3149 }
3150
3151 /* Fetches all samples at the given position and averages them */
3152 static nir_ssa_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3153 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3154 nir_ssa_def *tex_pos,
3155 enum glsl_base_type tex_type,
3156 VkSampleCountFlagBits src_samples)
3157 {
3158 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3159 const struct glsl_type *sampler_type =
3160 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3161 nir_variable *sampler =
3162 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3163 sampler->data.descriptor_set = 0;
3164 sampler->data.binding = 0;
3165
3166 const bool is_int = glsl_base_type_is_integer(tex_type);
3167
3168 nir_ssa_def *tmp = NULL;
3169 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3170 for (uint32_t i = 0; i < src_samples; i++) {
3171 nir_ssa_def *s =
3172 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3173 tex_type, tex_pos,
3174 nir_imm_int(b, i));
3175
3176 /* For integer formats, the multisample resolve operation is expected to
3177 * return one of the samples, we just return the first one.
3178 */
3179 if (is_int)
3180 return s;
3181
3182 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3183 }
3184
3185 assert(!is_int);
3186 return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
3187 }
3188
3189 /* Fetches the current sample (gl_SampleID) at the given position */
3190 static nir_ssa_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type)3191 build_nir_tex_op_ms_read(struct nir_builder *b,
3192 nir_ssa_def *tex_pos,
3193 enum glsl_base_type tex_type)
3194 {
3195 const struct glsl_type *sampler_type =
3196 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3197 nir_variable *sampler =
3198 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3199 sampler->data.descriptor_set = 0;
3200 sampler->data.binding = 0;
3201
3202 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3203
3204 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3205 tex_type, tex_pos,
3206 nir_load_sample_id(b));
3207 }
3208
3209 static nir_ssa_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3210 build_nir_tex_op(struct nir_builder *b,
3211 struct v3dv_device *device,
3212 nir_ssa_def *tex_pos,
3213 enum glsl_base_type tex_type,
3214 VkSampleCountFlagBits dst_samples,
3215 VkSampleCountFlagBits src_samples,
3216 enum glsl_sampler_dim dim)
3217 {
3218 switch (dim) {
3219 case GLSL_SAMPLER_DIM_MS:
3220 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3221 /* For multisampled texture sources we need to use fetching instead of
3222 * normalized texture coordinates. We already configured our blit
3223 * coordinates to be in texel units, but here we still need to convert
3224 * them from floating point to integer.
3225 */
3226 tex_pos = nir_f2i32(b, tex_pos);
3227
3228 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3229 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3230 else
3231 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3232 default:
3233 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3234 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3235 }
3236 }
3237
3238 static nir_shader *
get_blit_vs()3239 get_blit_vs()
3240 {
3241 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3242 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3243 "meta blit vs");
3244
3245 const struct glsl_type *vec4 = glsl_vec4_type();
3246
3247 nir_variable *vs_out_pos =
3248 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3249 vs_out_pos->data.location = VARYING_SLOT_POS;
3250
3251 nir_variable *vs_out_tex_coord =
3252 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3253 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3254 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3255
3256 nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
3257 nir_store_var(&b, vs_out_pos, pos, 0xf);
3258
3259 nir_ssa_def *tex_coord = gen_tex_coords(&b);
3260 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3261
3262 return b.shader;
3263 }
3264
3265 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3266 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3267 {
3268 switch (sampler_dim) {
3269 case GLSL_SAMPLER_DIM_1D: return 0x1;
3270 case GLSL_SAMPLER_DIM_2D: return 0x3;
3271 case GLSL_SAMPLER_DIM_MS: return 0x3;
3272 case GLSL_SAMPLER_DIM_3D: return 0x7;
3273 default:
3274 unreachable("invalid sampler dim");
3275 };
3276 }
3277
3278 static nir_shader *
get_color_blit_fs(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3279 get_color_blit_fs(struct v3dv_device *device,
3280 VkFormat dst_format,
3281 VkFormat src_format,
3282 VkSampleCountFlagBits dst_samples,
3283 VkSampleCountFlagBits src_samples,
3284 enum glsl_sampler_dim sampler_dim)
3285 {
3286 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3287 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3288 "meta blit fs");
3289
3290 const struct glsl_type *vec4 = glsl_vec4_type();
3291
3292 nir_variable *fs_in_tex_coord =
3293 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3294 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3295
3296 const struct glsl_type *fs_out_type =
3297 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3298 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3299 glsl_vec4_type();
3300
3301 enum glsl_base_type src_base_type =
3302 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3303 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3304 GLSL_TYPE_FLOAT;
3305
3306 nir_variable *fs_out_color =
3307 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3308 fs_out_color->data.location = FRAG_RESULT_DATA0;
3309
3310 nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3311 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3312 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3313
3314 nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3315 dst_samples, src_samples, sampler_dim);
3316
3317 /* For integer textures, if the bit-size of the destination is too small to
3318 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3319 * maximum value the destination can hold. The hardware can clamp to the
3320 * render target type, which usually matches the component bit-size, but
3321 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3322 * render target type, so in these cases we need to clamp manually.
3323 */
3324 if (format_needs_software_int_clamp(dst_format)) {
3325 assert(vk_format_is_int(dst_format));
3326 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3327 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3328
3329 nir_ssa_def *c[4];
3330 for (uint32_t i = 0; i < 4; i++) {
3331 c[i] = nir_channel(&b, color, i);
3332
3333 const uint32_t src_bit_size =
3334 util_format_get_component_bits(src_pformat,
3335 UTIL_FORMAT_COLORSPACE_RGB,
3336 i);
3337 const uint32_t dst_bit_size =
3338 util_format_get_component_bits(dst_pformat,
3339 UTIL_FORMAT_COLORSPACE_RGB,
3340 i);
3341
3342 if (dst_bit_size >= src_bit_size)
3343 continue;
3344
3345 assert(dst_bit_size > 0);
3346 if (util_format_is_pure_uint(dst_pformat)) {
3347 nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3348 c[i] = nir_umin(&b, c[i], max);
3349 } else {
3350 nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3351 nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3352 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3353 }
3354 }
3355
3356 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3357 }
3358
3359 nir_store_var(&b, fs_out_color, color, 0xf);
3360
3361 return b.shader;
3362 }
3363
3364 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3365 create_pipeline(struct v3dv_device *device,
3366 struct v3dv_render_pass *pass,
3367 struct nir_shader *vs_nir,
3368 struct nir_shader *gs_nir,
3369 struct nir_shader *fs_nir,
3370 const VkPipelineVertexInputStateCreateInfo *vi_state,
3371 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3372 const VkPipelineColorBlendStateCreateInfo *cb_state,
3373 const VkPipelineMultisampleStateCreateInfo *ms_state,
3374 const VkPipelineLayout layout,
3375 VkPipeline *pipeline)
3376 {
3377 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
3378 struct vk_shader_module fs_m = vk_shader_module_from_nir(fs_nir);
3379 struct vk_shader_module gs_m;
3380
3381 uint32_t num_stages = gs_nir ? 3 : 2;
3382
3383
3384 VkPipelineShaderStageCreateInfo stages[3] = {
3385 {
3386 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3387 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3388 .module = vk_shader_module_to_handle(&vs_m),
3389 .pName = "main",
3390 },
3391 {
3392 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3393 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3394 .module = vk_shader_module_to_handle(&fs_m),
3395 .pName = "main",
3396 },
3397 {
3398 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3399 .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3400 .module = VK_NULL_HANDLE,
3401 .pName = "main",
3402 },
3403 };
3404
3405 if (gs_nir) {
3406 gs_m = vk_shader_module_from_nir(gs_nir);
3407 stages[2].module = vk_shader_module_to_handle(&gs_m);
3408 }
3409
3410 VkGraphicsPipelineCreateInfo info = {
3411 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3412
3413 .stageCount = num_stages,
3414 .pStages = stages,
3415
3416 .pVertexInputState = vi_state,
3417
3418 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3419 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3420 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3421 .primitiveRestartEnable = false,
3422 },
3423
3424 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3425 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3426 .viewportCount = 1,
3427 .scissorCount = 1,
3428 },
3429
3430 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3431 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3432 .rasterizerDiscardEnable = false,
3433 .polygonMode = VK_POLYGON_MODE_FILL,
3434 .cullMode = VK_CULL_MODE_NONE,
3435 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3436 .depthBiasEnable = false,
3437 },
3438
3439 .pMultisampleState = ms_state,
3440
3441 .pDepthStencilState = ds_state,
3442
3443 .pColorBlendState = cb_state,
3444
3445 /* The meta clear pipeline declares all state as dynamic.
3446 * As a consequence, vkCmdBindPipeline writes no dynamic state
3447 * to the cmd buffer. Therefore, at the end of the meta clear,
3448 * we need only restore dynamic state that was vkCmdSet.
3449 */
3450 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3451 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3452 .dynamicStateCount = 6,
3453 .pDynamicStates = (VkDynamicState[]) {
3454 VK_DYNAMIC_STATE_VIEWPORT,
3455 VK_DYNAMIC_STATE_SCISSOR,
3456 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3457 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3458 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3459 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3460 VK_DYNAMIC_STATE_DEPTH_BIAS,
3461 VK_DYNAMIC_STATE_LINE_WIDTH,
3462 },
3463 },
3464
3465 .flags = 0,
3466 .layout = layout,
3467 .renderPass = v3dv_render_pass_to_handle(pass),
3468 .subpass = 0,
3469 };
3470
3471 VkResult result =
3472 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3473 VK_NULL_HANDLE,
3474 1, &info,
3475 &device->vk.alloc,
3476 pipeline);
3477
3478 ralloc_free(vs_nir);
3479 ralloc_free(gs_nir);
3480 ralloc_free(fs_nir);
3481
3482 return result == VK_SUCCESS;
3483 }
3484
3485 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)3486 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3487 {
3488 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3489 *
3490 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3491 * VK_IMAGE_TYPE_2D, ..."
3492 */
3493 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3494
3495 switch (type) {
3496 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3497 case VK_IMAGE_TYPE_2D:
3498 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3499 GLSL_SAMPLER_DIM_MS;
3500 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3501 default:
3502 unreachable("Invalid image type");
3503 }
3504 }
3505
3506 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)3507 create_blit_pipeline(struct v3dv_device *device,
3508 VkFormat dst_format,
3509 VkFormat src_format,
3510 VkColorComponentFlags cmask,
3511 VkImageType src_type,
3512 VkSampleCountFlagBits dst_samples,
3513 VkSampleCountFlagBits src_samples,
3514 VkRenderPass _pass,
3515 VkPipelineLayout pipeline_layout,
3516 VkPipeline *pipeline)
3517 {
3518 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
3519
3520 /* We always rewrite depth/stencil blits to compatible color blits */
3521 assert(vk_format_is_color(dst_format));
3522 assert(vk_format_is_color(src_format));
3523
3524 const enum glsl_sampler_dim sampler_dim =
3525 get_sampler_dim(src_type, src_samples);
3526
3527 nir_shader *vs_nir = get_blit_vs();
3528 nir_shader *fs_nir =
3529 get_color_blit_fs(device, dst_format, src_format,
3530 dst_samples, src_samples, sampler_dim);
3531
3532 const VkPipelineVertexInputStateCreateInfo vi_state = {
3533 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3534 .vertexBindingDescriptionCount = 0,
3535 .vertexAttributeDescriptionCount = 0,
3536 };
3537
3538 VkPipelineDepthStencilStateCreateInfo ds_state = {
3539 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3540 };
3541
3542 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
3543 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
3544 .blendEnable = false,
3545 .colorWriteMask = cmask,
3546 };
3547
3548 const VkPipelineColorBlendStateCreateInfo cb_state = {
3549 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3550 .logicOpEnable = false,
3551 .attachmentCount = 1,
3552 .pAttachments = blend_att_state
3553 };
3554
3555 const VkPipelineMultisampleStateCreateInfo ms_state = {
3556 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3557 .rasterizationSamples = dst_samples,
3558 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
3559 .pSampleMask = NULL,
3560 .alphaToCoverageEnable = false,
3561 .alphaToOneEnable = false,
3562 };
3563
3564 return create_pipeline(device,
3565 pass,
3566 vs_nir, NULL, fs_nir,
3567 &vi_state,
3568 &ds_state,
3569 &cb_state,
3570 &ms_state,
3571 pipeline_layout,
3572 pipeline);
3573 }
3574
3575 /**
3576 * Return a pipeline suitable for blitting the requested aspect given the
3577 * destination and source formats.
3578 */
3579 static bool
get_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)3580 get_blit_pipeline(struct v3dv_device *device,
3581 VkFormat dst_format,
3582 VkFormat src_format,
3583 VkColorComponentFlags cmask,
3584 VkImageType src_type,
3585 VkSampleCountFlagBits dst_samples,
3586 VkSampleCountFlagBits src_samples,
3587 struct v3dv_meta_blit_pipeline **pipeline)
3588 {
3589 bool ok = true;
3590
3591 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
3592 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
3593 dst_samples, src_samples, key);
3594 mtx_lock(&device->meta.mtx);
3595 struct hash_entry *entry =
3596 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
3597 if (entry) {
3598 mtx_unlock(&device->meta.mtx);
3599 *pipeline = entry->data;
3600 return true;
3601 }
3602
3603 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
3604 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3605
3606 if (*pipeline == NULL)
3607 goto fail;
3608
3609 ok = create_blit_render_pass(device, dst_format, src_format,
3610 &(*pipeline)->pass,
3611 &(*pipeline)->pass_no_load);
3612 if (!ok)
3613 goto fail;
3614
3615 /* Create the pipeline using one of the render passes, they are both
3616 * compatible, so we don't care which one we use here.
3617 */
3618 ok = create_blit_pipeline(device,
3619 dst_format,
3620 src_format,
3621 cmask,
3622 src_type,
3623 dst_samples,
3624 src_samples,
3625 (*pipeline)->pass,
3626 device->meta.blit.p_layout,
3627 &(*pipeline)->pipeline);
3628 if (!ok)
3629 goto fail;
3630
3631 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
3632 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
3633 &(*pipeline)->key, *pipeline);
3634
3635 mtx_unlock(&device->meta.mtx);
3636 return true;
3637
3638 fail:
3639 mtx_unlock(&device->meta.mtx);
3640
3641 VkDevice _device = v3dv_device_to_handle(device);
3642 if (*pipeline) {
3643 if ((*pipeline)->pass)
3644 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
3645 if ((*pipeline)->pass_no_load)
3646 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
3647 if ((*pipeline)->pipeline)
3648 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
3649 vk_free(&device->vk.alloc, *pipeline);
3650 *pipeline = NULL;
3651 }
3652
3653 return false;
3654 }
3655
3656 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)3657 compute_blit_box(const VkOffset3D *offsets,
3658 uint32_t image_w, uint32_t image_h,
3659 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
3660 bool *mirror_x, bool *mirror_y)
3661 {
3662 if (offsets[1].x >= offsets[0].x) {
3663 *mirror_x = false;
3664 *x = MIN2(offsets[0].x, image_w - 1);
3665 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
3666 } else {
3667 *mirror_x = true;
3668 *x = MIN2(offsets[1].x, image_w - 1);
3669 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
3670 }
3671 if (offsets[1].y >= offsets[0].y) {
3672 *mirror_y = false;
3673 *y = MIN2(offsets[0].y, image_h - 1);
3674 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
3675 } else {
3676 *mirror_y = true;
3677 *y = MIN2(offsets[1].y, image_h - 1);
3678 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
3679 }
3680 }
3681
3682 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)3683 compute_blit_3d_layers(const VkOffset3D *offsets,
3684 uint32_t *min_layer, uint32_t *max_layer,
3685 bool *mirror_z)
3686 {
3687 if (offsets[1].z >= offsets[0].z) {
3688 *mirror_z = false;
3689 *min_layer = offsets[0].z;
3690 *max_layer = offsets[1].z;
3691 } else {
3692 *mirror_z = true;
3693 *min_layer = offsets[1].z;
3694 *max_layer = offsets[0].z;
3695 }
3696 }
3697
3698 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)3699 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
3700 {
3701 /* If this is not the first pool we create for this command buffer
3702 * size it based on the size of the currently exhausted pool.
3703 */
3704 uint32_t descriptor_count = 64;
3705 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
3706 struct v3dv_descriptor_pool *exhausted_pool =
3707 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
3708 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
3709 }
3710
3711 /* Create the descriptor pool */
3712 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
3713 VkDescriptorPoolSize pool_size = {
3714 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3715 .descriptorCount = descriptor_count,
3716 };
3717 VkDescriptorPoolCreateInfo info = {
3718 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
3719 .maxSets = descriptor_count,
3720 .poolSizeCount = 1,
3721 .pPoolSizes = &pool_size,
3722 .flags = 0,
3723 };
3724 VkResult result =
3725 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
3726 &info,
3727 &cmd_buffer->device->vk.alloc,
3728 &cmd_buffer->meta.blit.dspool);
3729
3730 if (result == VK_SUCCESS) {
3731 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3732 const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
3733
3734 v3dv_cmd_buffer_add_private_obj(
3735 cmd_buffer, (uintptr_t) _pool,
3736 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
3737
3738 struct v3dv_descriptor_pool *pool =
3739 v3dv_descriptor_pool_from_handle(_pool);
3740 pool->is_driver_internal = true;
3741 }
3742
3743 return result;
3744 }
3745
3746 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)3747 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
3748 VkDescriptorSet *set)
3749 {
3750 /* Make sure we have a descriptor pool */
3751 VkResult result;
3752 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
3753 result = create_blit_descriptor_pool(cmd_buffer);
3754 if (result != VK_SUCCESS)
3755 return result;
3756 }
3757 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3758
3759 /* Allocate descriptor set */
3760 struct v3dv_device *device = cmd_buffer->device;
3761 VkDevice _device = v3dv_device_to_handle(device);
3762 VkDescriptorSetAllocateInfo info = {
3763 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3764 .descriptorPool = cmd_buffer->meta.blit.dspool,
3765 .descriptorSetCount = 1,
3766 .pSetLayouts = &device->meta.blit.ds_layout,
3767 };
3768 result = v3dv_AllocateDescriptorSets(_device, &info, set);
3769
3770 /* If we ran out of pool space, grow the pool and try again */
3771 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
3772 result = create_blit_descriptor_pool(cmd_buffer);
3773 if (result == VK_SUCCESS) {
3774 info.descriptorPool = cmd_buffer->meta.blit.dspool;
3775 result = v3dv_AllocateDescriptorSets(_device, &info, set);
3776 }
3777 }
3778
3779 return result;
3780 }
3781
3782 /**
3783 * Returns true if the implementation supports the requested operation (even if
3784 * it failed to process it, for example, due to an out-of-memory error).
3785 *
3786 * The caller can specify the channels on the destination to be written via the
3787 * cmask parameter (which can be 0 to default to all channels), as well as a
3788 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
3789 * to use the default identity swizzle).
3790 */
3791 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2 * region,VkFilter filter,bool dst_is_padded_image)3792 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
3793 struct v3dv_image *dst,
3794 VkFormat dst_format,
3795 struct v3dv_image *src,
3796 VkFormat src_format,
3797 VkColorComponentFlags cmask,
3798 VkComponentMapping *cswizzle,
3799 const VkImageBlit2 *region,
3800 VkFilter filter,
3801 bool dst_is_padded_image)
3802 {
3803 bool handled = true;
3804 VkResult result;
3805 uint32_t dirty_dynamic_state = 0;
3806
3807 /* We don't support rendering to linear depth/stencil, this should have
3808 * been rewritten to a compatible color blit by the caller.
3809 */
3810 assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
3811 !vk_format_is_depth_or_stencil(dst_format));
3812
3813 /* Can't sample from linear images */
3814 if (src->vk.tiling == VK_IMAGE_TILING_LINEAR &&
3815 src->vk.image_type != VK_IMAGE_TYPE_1D) {
3816 return false;
3817 }
3818
3819 /* Rewrite combined D/S blits to compatible color blits */
3820 if (vk_format_is_depth_or_stencil(dst_format)) {
3821 assert(src_format == dst_format);
3822 assert(cmask == 0);
3823 switch(dst_format) {
3824 case VK_FORMAT_D16_UNORM:
3825 dst_format = VK_FORMAT_R16_UINT;
3826 break;
3827 case VK_FORMAT_D32_SFLOAT:
3828 dst_format = VK_FORMAT_R32_UINT;
3829 break;
3830 case VK_FORMAT_X8_D24_UNORM_PACK32:
3831 case VK_FORMAT_D24_UNORM_S8_UINT:
3832 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
3833 cmask |= VK_COLOR_COMPONENT_G_BIT |
3834 VK_COLOR_COMPONENT_B_BIT |
3835 VK_COLOR_COMPONENT_A_BIT;
3836 }
3837 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
3838 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
3839 cmask |= VK_COLOR_COMPONENT_R_BIT;
3840 }
3841 dst_format = VK_FORMAT_R8G8B8A8_UINT;
3842 break;
3843 default:
3844 unreachable("Unsupported depth/stencil format");
3845 };
3846 src_format = dst_format;
3847 }
3848
3849 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
3850 VK_COLOR_COMPONENT_G_BIT |
3851 VK_COLOR_COMPONENT_B_BIT |
3852 VK_COLOR_COMPONENT_A_BIT;
3853 if (cmask == 0)
3854 cmask = full_cmask;
3855
3856 VkComponentMapping ident_swizzle = {
3857 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3858 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3859 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3860 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3861 };
3862 if (!cswizzle)
3863 cswizzle = &ident_swizzle;
3864
3865 /* When we get here from a copy between compressed / uncompressed images
3866 * we choose to specify the destination blit region based on the size
3867 * semantics of the source image of the copy (see copy_image_blit), so we
3868 * need to apply those same semantics here when we compute the size of the
3869 * destination image level.
3870 */
3871 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
3872 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
3873 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
3874 const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
3875 const uint32_t dst_level_w =
3876 u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
3877 region->dstSubresource.mipLevel);
3878 const uint32_t dst_level_h =
3879 u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
3880 region->dstSubresource.mipLevel);
3881
3882 const uint32_t src_level_w =
3883 u_minify(src->vk.extent.width, region->srcSubresource.mipLevel);
3884 const uint32_t src_level_h =
3885 u_minify(src->vk.extent.height, region->srcSubresource.mipLevel);
3886 const uint32_t src_level_d =
3887 u_minify(src->vk.extent.depth, region->srcSubresource.mipLevel);
3888
3889 uint32_t dst_x, dst_y, dst_w, dst_h;
3890 bool dst_mirror_x, dst_mirror_y;
3891 compute_blit_box(region->dstOffsets,
3892 dst_level_w, dst_level_h,
3893 &dst_x, &dst_y, &dst_w, &dst_h,
3894 &dst_mirror_x, &dst_mirror_y);
3895
3896 uint32_t src_x, src_y, src_w, src_h;
3897 bool src_mirror_x, src_mirror_y;
3898 compute_blit_box(region->srcOffsets,
3899 src_level_w, src_level_h,
3900 &src_x, &src_y, &src_w, &src_h,
3901 &src_mirror_x, &src_mirror_y);
3902
3903 uint32_t min_dst_layer;
3904 uint32_t max_dst_layer;
3905 bool dst_mirror_z = false;
3906 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
3907 min_dst_layer = region->dstSubresource.baseArrayLayer;
3908 max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
3909 } else {
3910 compute_blit_3d_layers(region->dstOffsets,
3911 &min_dst_layer, &max_dst_layer,
3912 &dst_mirror_z);
3913 }
3914
3915 uint32_t min_src_layer;
3916 uint32_t max_src_layer;
3917 bool src_mirror_z = false;
3918 if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
3919 min_src_layer = region->srcSubresource.baseArrayLayer;
3920 max_src_layer = min_src_layer + region->srcSubresource.layerCount;
3921 } else {
3922 compute_blit_3d_layers(region->srcOffsets,
3923 &min_src_layer, &max_src_layer,
3924 &src_mirror_z);
3925 }
3926
3927 uint32_t layer_count = max_dst_layer - min_dst_layer;
3928
3929 /* Translate source blit coordinates to normalized texture coordinates for
3930 * single sampled textures. For multisampled textures we require
3931 * unnormalized coordinates, since we can only do texelFetch on them.
3932 */
3933 float coords[4] = {
3934 (float)src_x,
3935 (float)src_y,
3936 (float)(src_x + src_w),
3937 (float)(src_y + src_h),
3938 };
3939
3940 if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
3941 coords[0] /= (float)src_level_w;
3942 coords[1] /= (float)src_level_h;
3943 coords[2] /= (float)src_level_w;
3944 coords[3] /= (float)src_level_h;
3945 }
3946
3947 /* Handle mirroring */
3948 const bool mirror_x = dst_mirror_x != src_mirror_x;
3949 const bool mirror_y = dst_mirror_y != src_mirror_y;
3950 const bool mirror_z = dst_mirror_z != src_mirror_z;
3951 float tex_coords[5] = {
3952 !mirror_x ? coords[0] : coords[2],
3953 !mirror_y ? coords[1] : coords[3],
3954 !mirror_x ? coords[2] : coords[0],
3955 !mirror_y ? coords[3] : coords[1],
3956 /* Z coordinate for 3D blit sources, to be filled for each
3957 * destination layer
3958 */
3959 0.0f
3960 };
3961
3962 /* For blits from 3D images we also need to compute the slice coordinate to
3963 * sample from, which will change for each layer in the destination.
3964 * Compute the step we should increase for each iteration.
3965 */
3966 const float src_z_step =
3967 (float)(max_src_layer - min_src_layer) / (float)layer_count;
3968
3969 /* Get the blit pipeline */
3970 struct v3dv_meta_blit_pipeline *pipeline = NULL;
3971 bool ok = get_blit_pipeline(cmd_buffer->device,
3972 dst_format, src_format, cmask, src->vk.image_type,
3973 dst->vk.samples, src->vk.samples,
3974 &pipeline);
3975 if (!ok)
3976 return handled;
3977 assert(pipeline && pipeline->pipeline &&
3978 pipeline->pass && pipeline->pass_no_load);
3979
3980 struct v3dv_device *device = cmd_buffer->device;
3981 assert(device->meta.blit.ds_layout);
3982
3983 VkDevice _device = v3dv_device_to_handle(device);
3984 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
3985
3986 /* Create sampler for blit source image */
3987 VkSamplerCreateInfo sampler_info = {
3988 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
3989 .magFilter = filter,
3990 .minFilter = filter,
3991 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3992 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3993 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3994 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
3995 };
3996 VkSampler sampler;
3997 result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
3998 &sampler);
3999 if (result != VK_SUCCESS)
4000 goto fail;
4001
4002 v3dv_cmd_buffer_add_private_obj(
4003 cmd_buffer, (uintptr_t)sampler,
4004 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4005
4006 /* Push command buffer state before starting meta operation */
4007 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4008
4009 /* Push state that is common for all layers */
4010 v3dv_CmdBindPipeline(_cmd_buffer,
4011 VK_PIPELINE_BIND_POINT_GRAPHICS,
4012 pipeline->pipeline);
4013
4014 const VkViewport viewport = {
4015 .x = dst_x,
4016 .y = dst_y,
4017 .width = dst_w,
4018 .height = dst_h,
4019 .minDepth = 0.0f,
4020 .maxDepth = 1.0f
4021 };
4022 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4023
4024 const VkRect2D scissor = {
4025 .offset = { dst_x, dst_y },
4026 .extent = { dst_w, dst_h }
4027 };
4028 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4029
4030 bool can_skip_tlb_load = false;
4031 const VkRect2D render_area = {
4032 .offset = { dst_x, dst_y },
4033 .extent = { dst_w, dst_h },
4034 };
4035
4036 /* Record per-layer commands */
4037 for (uint32_t i = 0; i < layer_count; i++) {
4038 /* Setup framebuffer */
4039 VkImageViewCreateInfo dst_image_view_info = {
4040 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4041 .image = v3dv_image_to_handle(dst),
4042 .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4043 .format = dst_format,
4044 .subresourceRange = {
4045 .aspectMask = region->dstSubresource.aspectMask,
4046 .baseMipLevel = region->dstSubresource.mipLevel,
4047 .levelCount = 1,
4048 .baseArrayLayer = min_dst_layer + i,
4049 .layerCount = 1
4050 },
4051 };
4052 VkImageView dst_image_view;
4053 result = v3dv_create_image_view(device, &dst_image_view_info,
4054 &dst_image_view);
4055 if (result != VK_SUCCESS)
4056 goto fail;
4057
4058 v3dv_cmd_buffer_add_private_obj(
4059 cmd_buffer, (uintptr_t)dst_image_view,
4060 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4061
4062 VkFramebufferCreateInfo fb_info = {
4063 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4064 .renderPass = pipeline->pass,
4065 .attachmentCount = 1,
4066 .pAttachments = &dst_image_view,
4067 .width = dst_x + dst_w,
4068 .height = dst_y + dst_h,
4069 .layers = 1,
4070 };
4071
4072 VkFramebuffer fb;
4073 result = v3dv_CreateFramebuffer(_device, &fb_info,
4074 &cmd_buffer->device->vk.alloc, &fb);
4075 if (result != VK_SUCCESS)
4076 goto fail;
4077
4078 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4079 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4080 fb_info.height == dst_level_h &&
4081 dst_is_padded_image;
4082
4083 v3dv_cmd_buffer_add_private_obj(
4084 cmd_buffer, (uintptr_t)fb,
4085 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4086
4087 /* Setup descriptor set for blit source texture. We don't have to
4088 * register the descriptor as a private command buffer object since
4089 * all descriptors will be freed automatically with the descriptor
4090 * pool.
4091 */
4092 VkDescriptorSet set;
4093 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4094 if (result != VK_SUCCESS)
4095 goto fail;
4096
4097 VkImageViewCreateInfo src_image_view_info = {
4098 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4099 .image = v3dv_image_to_handle(src),
4100 .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4101 .format = src_format,
4102 .components = *cswizzle,
4103 .subresourceRange = {
4104 .aspectMask = region->srcSubresource.aspectMask,
4105 .baseMipLevel = region->srcSubresource.mipLevel,
4106 .levelCount = 1,
4107 .baseArrayLayer =
4108 src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4109 .layerCount = 1
4110 },
4111 };
4112 VkImageView src_image_view;
4113 result = v3dv_create_image_view(device, &src_image_view_info,
4114 &src_image_view);
4115 if (result != VK_SUCCESS)
4116 goto fail;
4117
4118 v3dv_cmd_buffer_add_private_obj(
4119 cmd_buffer, (uintptr_t)src_image_view,
4120 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4121
4122 VkDescriptorImageInfo image_info = {
4123 .sampler = sampler,
4124 .imageView = src_image_view,
4125 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4126 };
4127 VkWriteDescriptorSet write = {
4128 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4129 .dstSet = set,
4130 .dstBinding = 0,
4131 .dstArrayElement = 0,
4132 .descriptorCount = 1,
4133 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4134 .pImageInfo = &image_info,
4135 };
4136 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4137
4138 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4139 VK_PIPELINE_BIND_POINT_GRAPHICS,
4140 device->meta.blit.p_layout,
4141 0, 1, &set,
4142 0, NULL);
4143
4144 /* If the region we are about to blit is tile-aligned, then we can
4145 * use the render pass version that won't pre-load the tile buffer
4146 * with the dst image contents before the blit. The exception is when we
4147 * don't have a full color mask, since in that case we need to preserve
4148 * the original value of some of the color components.
4149 *
4150 * Since all layers have the same area, we only need to compute this for
4151 * the first.
4152 */
4153 if (i == 0) {
4154 struct v3dv_render_pass *pipeline_pass =
4155 v3dv_render_pass_from_handle(pipeline->pass);
4156 can_skip_tlb_load =
4157 cmask == full_cmask &&
4158 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4159 framebuffer, pipeline_pass, 0);
4160 }
4161
4162 /* Record blit */
4163 VkRenderPassBeginInfo rp_info = {
4164 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4165 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4166 pipeline->pass,
4167 .framebuffer = fb,
4168 .renderArea = render_area,
4169 .clearValueCount = 0,
4170 };
4171
4172 VkSubpassBeginInfo sp_info = {
4173 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
4174 .contents = VK_SUBPASS_CONTENTS_INLINE,
4175 };
4176
4177 v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
4178 struct v3dv_job *job = cmd_buffer->state.job;
4179 if (!job)
4180 goto fail;
4181
4182 /* For 3D blits we need to compute the source slice to blit from (the Z
4183 * coordinate of the source sample operation). We want to choose this
4184 * based on the ratio of the depth of the source and the destination
4185 * images, picking the coordinate in the middle of each step.
4186 */
4187 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4188 tex_coords[4] =
4189 !mirror_z ?
4190 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4191 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4192 }
4193
4194 v3dv_CmdPushConstants(_cmd_buffer,
4195 device->meta.blit.p_layout,
4196 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4197 &tex_coords);
4198
4199 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4200
4201 VkSubpassEndInfo sp_end_info = {
4202 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
4203 };
4204
4205 v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
4206 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
4207 }
4208
4209 fail:
4210 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
4211
4212 return handled;
4213 }
4214
4215 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)4216 v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
4217 const VkBlitImageInfo2 *pBlitImageInfo)
4218 {
4219 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4220 V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4221 V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4222
4223 /* This command can only happen outside a render pass */
4224 assert(cmd_buffer->state.pass == NULL);
4225 assert(cmd_buffer->state.job == NULL);
4226
4227 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4228 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4229 src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4230
4231 /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4232 assert(!vk_format_is_compressed(dst->vk.format));
4233
4234 cmd_buffer->state.is_transfer = true;
4235
4236 for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4237 if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
4238 continue;
4239 if (blit_shader(cmd_buffer,
4240 dst, dst->vk.format,
4241 src, src->vk.format,
4242 0, NULL,
4243 &pBlitImageInfo->pRegions[i],
4244 pBlitImageInfo->filter, true)) {
4245 continue;
4246 }
4247 unreachable("Unsupported blit operation");
4248 }
4249
4250 cmd_buffer->state.is_transfer = false;
4251 }
4252
4253 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4254 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4255 struct v3dv_image *dst,
4256 struct v3dv_image *src,
4257 const VkImageResolve2 *region)
4258 {
4259 if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) ||
4260 !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) {
4261 return false;
4262 }
4263
4264 if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4265 return false;
4266
4267 const VkFormat fb_format = src->vk.format;
4268
4269 uint32_t num_layers;
4270 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
4271 num_layers = region->dstSubresource.layerCount;
4272 else
4273 num_layers = region->extent.depth;
4274 assert(num_layers > 0);
4275
4276 struct v3dv_job *job =
4277 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4278 if (!job)
4279 return true;
4280
4281 const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
4282 const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
4283 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4284 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4285
4286 uint32_t internal_type, internal_bpp;
4287 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4288 (fb_format, region->srcSubresource.aspectMask,
4289 &internal_type, &internal_bpp);
4290
4291 v3dv_job_start_frame(job, width, height, num_layers, false,
4292 1, internal_bpp, true);
4293
4294 struct v3dv_meta_framebuffer framebuffer;
4295 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4296 internal_type, &job->frame_tiling);
4297
4298 v3dv_X(job->device, job_emit_binning_flush)(job);
4299 v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4300 &framebuffer, region);
4301
4302 v3dv_cmd_buffer_finish_job(cmd_buffer);
4303 return true;
4304 }
4305
4306 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2 * region)4307 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4308 struct v3dv_image *dst,
4309 struct v3dv_image *src,
4310 const VkImageResolve2 *region)
4311 {
4312 const VkImageBlit2 blit_region = {
4313 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
4314 .srcSubresource = region->srcSubresource,
4315 .srcOffsets = {
4316 region->srcOffset,
4317 {
4318 region->srcOffset.x + region->extent.width,
4319 region->srcOffset.y + region->extent.height,
4320 }
4321 },
4322 .dstSubresource = region->dstSubresource,
4323 .dstOffsets = {
4324 region->dstOffset,
4325 {
4326 region->dstOffset.x + region->extent.width,
4327 region->dstOffset.y + region->extent.height,
4328 }
4329 },
4330 };
4331 return blit_shader(cmd_buffer,
4332 dst, dst->vk.format,
4333 src, src->vk.format,
4334 0, NULL,
4335 &blit_region, VK_FILTER_NEAREST, true);
4336 }
4337
4338 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * info)4339 v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
4340 const VkResolveImageInfo2 *info)
4341
4342 {
4343 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4344 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4345 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4346
4347 /* This command can only happen outside a render pass */
4348 assert(cmd_buffer->state.pass == NULL);
4349 assert(cmd_buffer->state.job == NULL);
4350
4351 assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4352 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4353
4354 cmd_buffer->state.is_transfer = true;
4355
4356 for (uint32_t i = 0; i < info->regionCount; i++) {
4357 if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4358 continue;
4359 if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4360 continue;
4361 unreachable("Unsupported multismaple resolve operation");
4362 }
4363
4364 cmd_buffer->state.is_transfer = false;
4365 }
4366