1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25
26 #include "compiler/nir/nir_builder.h"
27 #include "broadcom/cle/v3dx_pack.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30
31 static uint32_t
meta_blit_key_hash(const void * key)32 meta_blit_key_hash(const void *key)
33 {
34 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
35 }
36
37 static bool
meta_blit_key_compare(const void * key1,const void * key2)38 meta_blit_key_compare(const void *key1, const void *key2)
39 {
40 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
41 }
42
43 void
v3dv_meta_blit_init(struct v3dv_device * device)44 v3dv_meta_blit_init(struct v3dv_device *device)
45 {
46 for (uint32_t i = 0; i < 3; i++) {
47 device->meta.blit.cache[i] =
48 _mesa_hash_table_create(NULL,
49 meta_blit_key_hash,
50 meta_blit_key_compare);
51 }
52 }
53
54 void
v3dv_meta_blit_finish(struct v3dv_device * device)55 v3dv_meta_blit_finish(struct v3dv_device *device)
56 {
57 VkDevice _device = v3dv_device_to_handle(device);
58
59 for (uint32_t i = 0; i < 3; i++) {
60 hash_table_foreach(device->meta.blit.cache[i], entry) {
61 struct v3dv_meta_blit_pipeline *item = entry->data;
62 v3dv_DestroyPipeline(_device, item->pipeline, &device->alloc);
63 v3dv_DestroyRenderPass(_device, item->pass, &device->alloc);
64 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->alloc);
65 vk_free(&device->alloc, item);
66 }
67 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
68 }
69
70 if (device->meta.blit.playout) {
71 v3dv_DestroyPipelineLayout(_device, device->meta.blit.playout,
72 &device->alloc);
73 }
74
75 if (device->meta.blit.dslayout) {
76 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.dslayout,
77 &device->alloc);
78 }
79 }
80
81 static inline bool
82 can_use_tlb(struct v3dv_image *image,
83 const VkOffset3D *offset,
84 VkFormat *compat_format);
85
86 /**
87 * Copy operations implemented in this file don't operate on a framebuffer
88 * object provided by the user, however, since most use the TLB for this,
89 * we still need to have some representation of the framebuffer. For the most
90 * part, the job's frame tiling information is enough for this, however we
91 * still need additional information such us the internal type of our single
92 * render target, so we use this auxiliary struct to pass that information
93 * around.
94 */
95 struct framebuffer_data {
96 /* The internal type of the single render target */
97 uint32_t internal_type;
98
99 /* Supertile coverage */
100 uint32_t min_x_supertile;
101 uint32_t min_y_supertile;
102 uint32_t max_x_supertile;
103 uint32_t max_y_supertile;
104
105 /* Format info */
106 VkFormat vk_format;
107 const struct v3dv_format *format;
108 uint8_t internal_depth_type;
109 };
110
111 static void
setup_framebuffer_data(struct framebuffer_data * fb,VkFormat vk_format,uint32_t internal_type,const struct v3dv_frame_tiling * tiling)112 setup_framebuffer_data(struct framebuffer_data *fb,
113 VkFormat vk_format,
114 uint32_t internal_type,
115 const struct v3dv_frame_tiling *tiling)
116 {
117 fb->internal_type = internal_type;
118
119 /* Supertile coverage always starts at 0,0 */
120 uint32_t supertile_w_in_pixels =
121 tiling->tile_width * tiling->supertile_width;
122 uint32_t supertile_h_in_pixels =
123 tiling->tile_height * tiling->supertile_height;
124
125 fb->min_x_supertile = 0;
126 fb->min_y_supertile = 0;
127 fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
128 fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
129
130 fb->vk_format = vk_format;
131 fb->format = v3dv_get_format(vk_format);
132
133 fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
134 if (vk_format_is_depth_or_stencil(vk_format))
135 fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format);
136 }
137
138 /* This chooses a tile buffer format that is appropriate for the copy operation.
139 * Typically, this is the image render target type, however, if we are copying
140 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
141 * we need to load and store to/from a tile color buffer using a compatible
142 * color format.
143 */
144 static uint32_t
choose_tlb_format(struct framebuffer_data * framebuffer,VkImageAspectFlags aspect,bool for_store,bool is_copy_to_buffer,bool is_copy_from_buffer)145 choose_tlb_format(struct framebuffer_data *framebuffer,
146 VkImageAspectFlags aspect,
147 bool for_store,
148 bool is_copy_to_buffer,
149 bool is_copy_from_buffer)
150 {
151 if (is_copy_to_buffer || is_copy_from_buffer) {
152 switch (framebuffer->vk_format) {
153 case VK_FORMAT_D16_UNORM:
154 return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
155 case VK_FORMAT_D32_SFLOAT:
156 return V3D_OUTPUT_IMAGE_FORMAT_R32F;
157 case VK_FORMAT_X8_D24_UNORM_PACK32:
158 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
159 case VK_FORMAT_D24_UNORM_S8_UINT:
160 /* When storing the stencil aspect of a combined depth/stencil image
161 * to a buffer, the Vulkan spec states that the output buffer must
162 * have packed stencil values, so we choose an R8UI format for our
163 * store outputs. For the load input we still want RGBA8UI since the
164 * source image contains 4 channels (including the 3 channels
165 * containing the 24-bit depth value).
166 *
167 * When loading the stencil aspect of a combined depth/stencil image
168 * from a buffer, we read packed 8-bit stencil values from the buffer
169 * that we need to put into the LSB of the 32-bit format (the R
170 * channel), so we use R8UI. For the store, if we used R8UI then we
171 * would write 8-bit stencil values consecutively over depth channels,
172 * so we need to use RGBA8UI. This will write each stencil value in
173 * its correct position, but will overwrite depth values (channels G
174 * B,A) with undefined values. To fix this, we will have to restore
175 * the depth aspect from the Z tile buffer, which we should pre-load
176 * from the image before the store).
177 */
178 if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
179 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
180 } else {
181 assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
182 if (is_copy_to_buffer) {
183 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
184 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
185 } else {
186 assert(is_copy_from_buffer);
187 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
188 V3D_OUTPUT_IMAGE_FORMAT_R8UI;
189 }
190 }
191 default: /* Color formats */
192 return framebuffer->format->rt_type;
193 break;
194 }
195 } else {
196 return framebuffer->format->rt_type;
197 }
198 }
199
200 static inline bool
format_needs_rb_swap(VkFormat format)201 format_needs_rb_swap(VkFormat format)
202 {
203 const uint8_t *swizzle = v3dv_get_format_swizzle(format);
204 return swizzle[0] == PIPE_SWIZZLE_Z;
205 }
206
207 static void
get_internal_type_bpp_for_image_aspects(VkFormat vk_format,VkImageAspectFlags aspect_mask,uint32_t * internal_type,uint32_t * internal_bpp)208 get_internal_type_bpp_for_image_aspects(VkFormat vk_format,
209 VkImageAspectFlags aspect_mask,
210 uint32_t *internal_type,
211 uint32_t *internal_bpp)
212 {
213 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
214 VK_IMAGE_ASPECT_STENCIL_BIT;
215
216 /* We can't store depth/stencil pixel formats to a raster format, so
217 * so instead we load our depth/stencil aspects to a compatible color
218 * format.
219 */
220 /* FIXME: pre-compute this at image creation time? */
221 if (aspect_mask & ds_aspects) {
222 switch (vk_format) {
223 case VK_FORMAT_D16_UNORM:
224 *internal_type = V3D_INTERNAL_TYPE_16UI;
225 *internal_bpp = V3D_INTERNAL_BPP_64;
226 break;
227 case VK_FORMAT_D32_SFLOAT:
228 *internal_type = V3D_INTERNAL_TYPE_32F;
229 *internal_bpp = V3D_INTERNAL_BPP_128;
230 break;
231 case VK_FORMAT_X8_D24_UNORM_PACK32:
232 case VK_FORMAT_D24_UNORM_S8_UINT:
233 /* Use RGBA8 format so we can relocate the X/S bits in the appropriate
234 * place to match Vulkan expectations. See the comment on the tile
235 * load command for more details.
236 */
237 *internal_type = V3D_INTERNAL_TYPE_8UI;
238 *internal_bpp = V3D_INTERNAL_BPP_32;
239 break;
240 default:
241 assert(!"unsupported format");
242 break;
243 }
244 } else {
245 const struct v3dv_format *format = v3dv_get_format(vk_format);
246 v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
247 internal_type,
248 internal_bpp);
249 }
250 }
251
252 struct rcl_clear_info {
253 const union v3dv_clear_value *clear_value;
254 struct v3dv_image *image;
255 VkImageAspectFlags aspects;
256 uint32_t layer;
257 uint32_t level;
258 };
259
260 static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job * job,struct framebuffer_data * fb,const struct rcl_clear_info * clear_info)261 emit_rcl_prologue(struct v3dv_job *job,
262 struct framebuffer_data *fb,
263 const struct rcl_clear_info *clear_info)
264 {
265 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
266
267 struct v3dv_cl *rcl = &job->rcl;
268 v3dv_cl_ensure_space_with_branch(rcl, 200 +
269 tiling->layers * 256 *
270 cl_packet_length(SUPERTILE_COORDINATES));
271 if (job->cmd_buffer->state.oom)
272 return NULL;
273
274 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
275 config.early_z_disable = true;
276 config.image_width_pixels = tiling->width;
277 config.image_height_pixels = tiling->height;
278 config.number_of_render_targets = 1;
279 config.multisample_mode_4x = tiling->msaa;
280 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
281 config.internal_depth_type = fb->internal_depth_type;
282 }
283
284 if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
285 uint32_t clear_pad = 0;
286 if (clear_info->image) {
287 const struct v3dv_image *image = clear_info->image;
288 const struct v3d_resource_slice *slice =
289 &image->slices[clear_info->level];
290 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
291 slice->tiling == VC5_TILING_UIF_XOR) {
292 int uif_block_height = v3d_utile_height(image->cpp) * 2;
293
294 uint32_t implicit_padded_height =
295 align(tiling->height, uif_block_height) / uif_block_height;
296
297 if (slice->padded_height_of_output_image_in_uif_blocks -
298 implicit_padded_height >= 15) {
299 clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
300 }
301 }
302 }
303
304 const uint32_t *color = &clear_info->clear_value->color[0];
305 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
306 clear.clear_color_low_32_bits = color[0];
307 clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
308 clear.render_target_number = 0;
309 };
310
311 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
312 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
313 clear.clear_color_mid_low_32_bits =
314 ((color[1] >> 24) | (color[2] << 8));
315 clear.clear_color_mid_high_24_bits =
316 ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
317 clear.render_target_number = 0;
318 };
319 }
320
321 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
322 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
323 clear.uif_padded_height_in_uif_blocks = clear_pad;
324 clear.clear_color_high_16_bits = color[3] >> 16;
325 clear.render_target_number = 0;
326 };
327 }
328 }
329
330 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
331 rt.render_target_0_internal_bpp = tiling->internal_bpp;
332 rt.render_target_0_internal_type = fb->internal_type;
333 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
334 }
335
336 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
337 clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
338 clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
339 };
340
341 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
342 init.use_auto_chained_tile_lists = true;
343 init.size_of_first_block_in_chained_tile_lists =
344 TILE_ALLOCATION_BLOCK_SIZE_64B;
345 }
346
347 return rcl;
348 }
349
350 static void
emit_frame_setup(struct v3dv_job * job,uint32_t layer,const union v3dv_clear_value * clear_value)351 emit_frame_setup(struct v3dv_job *job,
352 uint32_t layer,
353 const union v3dv_clear_value *clear_value)
354 {
355 v3dv_return_if_oom(NULL, job);
356
357 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
358
359 struct v3dv_cl *rcl = &job->rcl;
360
361 const uint32_t tile_alloc_offset =
362 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
363 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
364 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
365 }
366
367 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
368 config.number_of_bin_tile_lists = 1;
369 config.total_frame_width_in_tiles = tiling->draw_tiles_x;
370 config.total_frame_height_in_tiles = tiling->draw_tiles_y;
371
372 config.supertile_width_in_tiles = tiling->supertile_width;
373 config.supertile_height_in_tiles = tiling->supertile_height;
374
375 config.total_frame_width_in_supertiles =
376 tiling->frame_width_in_supertiles;
377 config.total_frame_height_in_supertiles =
378 tiling->frame_height_in_supertiles;
379 }
380
381 /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
382 * it here.
383 */
384 for (int i = 0; i < 2; i++) {
385 cl_emit(rcl, TILE_COORDINATES, coords);
386 cl_emit(rcl, END_OF_LOADS, end);
387 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
388 store.buffer_to_store = NONE;
389 }
390 if (clear_value && i == 0) {
391 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
392 clear.clear_z_stencil_buffer = true;
393 clear.clear_all_render_targets = true;
394 }
395 }
396 cl_emit(rcl, END_OF_TILE_MARKER, end);
397 }
398
399 cl_emit(rcl, FLUSH_VCD_CACHE, flush);
400 }
401
402 static void
emit_supertile_coordinates(struct v3dv_job * job,struct framebuffer_data * framebuffer)403 emit_supertile_coordinates(struct v3dv_job *job,
404 struct framebuffer_data *framebuffer)
405 {
406 v3dv_return_if_oom(NULL, job);
407
408 struct v3dv_cl *rcl = &job->rcl;
409
410 const uint32_t min_y = framebuffer->min_y_supertile;
411 const uint32_t max_y = framebuffer->max_y_supertile;
412 const uint32_t min_x = framebuffer->min_x_supertile;
413 const uint32_t max_x = framebuffer->max_x_supertile;
414
415 for (int y = min_y; y <= max_y; y++) {
416 for (int x = min_x; x <= max_x; x++) {
417 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
418 coords.column_number_in_supertiles = x;
419 coords.row_number_in_supertiles = y;
420 }
421 }
422 }
423 }
424
425 static void
emit_linear_load(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,uint32_t format)426 emit_linear_load(struct v3dv_cl *cl,
427 uint32_t buffer,
428 struct v3dv_bo *bo,
429 uint32_t offset,
430 uint32_t stride,
431 uint32_t format)
432 {
433 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
434 load.buffer_to_load = buffer;
435 load.address = v3dv_cl_address(bo, offset);
436 load.input_image_format = format;
437 load.memory_format = VC5_TILING_RASTER;
438 load.height_in_ub_or_stride = stride;
439 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
440 }
441 }
442
443 static void
emit_linear_store(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,bool msaa,uint32_t format)444 emit_linear_store(struct v3dv_cl *cl,
445 uint32_t buffer,
446 struct v3dv_bo *bo,
447 uint32_t offset,
448 uint32_t stride,
449 bool msaa,
450 uint32_t format)
451 {
452 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
453 store.buffer_to_store = RENDER_TARGET_0;
454 store.address = v3dv_cl_address(bo, offset);
455 store.clear_buffer_being_stored = false;
456 store.output_image_format = format;
457 store.memory_format = VC5_TILING_RASTER;
458 store.height_in_ub_or_stride = stride;
459 store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
460 V3D_DECIMATE_MODE_SAMPLE_0;
461 }
462 }
463
464 static void
emit_image_load(struct v3dv_cl * cl,struct framebuffer_data * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer)465 emit_image_load(struct v3dv_cl *cl,
466 struct framebuffer_data *framebuffer,
467 struct v3dv_image *image,
468 VkImageAspectFlags aspect,
469 uint32_t layer,
470 uint32_t mip_level,
471 bool is_copy_to_buffer,
472 bool is_copy_from_buffer)
473 {
474 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
475
476 /* For image to/from buffer copies we always load to and store from RT0,
477 * even for depth/stencil aspects, because the hardware can't do raster
478 * stores or loads from/to the depth/stencil tile buffers.
479 */
480 bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
481 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
482
483 const struct v3d_resource_slice *slice = &image->slices[mip_level];
484 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
485 load.buffer_to_load = load_to_color_tlb ?
486 RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
487
488 load.address = v3dv_cl_address(image->mem->bo, layer_offset);
489
490 load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
491 is_copy_to_buffer,
492 is_copy_from_buffer);
493 load.memory_format = slice->tiling;
494
495 /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
496 * expects the depth value in the LSB bits of each 32-bit pixel.
497 * Unfortunately, the hardware seems to put the S8/X8 bits there and the
498 * depth bits on the MSB. To work around that we can reverse the channel
499 * order and then swap the R/B channels to get what we want.
500 *
501 * NOTE: reversing and swapping only gets us the behavior we want if the
502 * operations happen in that exact order, which seems to be the case when
503 * done on the tile buffer load operations. On the store, it seems the
504 * order is not the same. The order on the store is probably reversed so
505 * that reversing and swapping on both the load and the store preserves
506 * the original order of the channels in memory.
507 *
508 * Notice that we only need to do this when copying to a buffer, where
509 * depth and stencil aspects are copied as separate regions and
510 * the spec expects them to be tightly packed.
511 */
512 bool needs_rb_swap = false;
513 bool needs_chan_reverse = false;
514 if (is_copy_to_buffer &&
515 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
516 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
517 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
518 needs_rb_swap = true;
519 needs_chan_reverse = true;
520 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
521 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
522 /* This is not a raw data copy (i.e. we are clearing the image),
523 * so we need to make sure we respect the format swizzle.
524 */
525 needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
526 }
527
528 load.r_b_swap = needs_rb_swap;
529 load.channel_reverse = needs_chan_reverse;
530
531 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
532 slice->tiling == VC5_TILING_UIF_XOR) {
533 load.height_in_ub_or_stride =
534 slice->padded_height_of_output_image_in_uif_blocks;
535 } else if (slice->tiling == VC5_TILING_RASTER) {
536 load.height_in_ub_or_stride = slice->stride;
537 }
538
539 if (image->samples > VK_SAMPLE_COUNT_1_BIT)
540 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
541 else
542 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
543 }
544 }
545
546 static void
emit_image_store(struct v3dv_cl * cl,struct framebuffer_data * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer,bool is_multisample_resolve)547 emit_image_store(struct v3dv_cl *cl,
548 struct framebuffer_data *framebuffer,
549 struct v3dv_image *image,
550 VkImageAspectFlags aspect,
551 uint32_t layer,
552 uint32_t mip_level,
553 bool is_copy_to_buffer,
554 bool is_copy_from_buffer,
555 bool is_multisample_resolve)
556 {
557 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
558
559 bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
560 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
561
562 const struct v3d_resource_slice *slice = &image->slices[mip_level];
563 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
564 store.buffer_to_store = store_from_color_tlb ?
565 RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
566
567 store.address = v3dv_cl_address(image->mem->bo, layer_offset);
568 store.clear_buffer_being_stored = false;
569
570 /* See rationale in emit_image_load() */
571 bool needs_rb_swap = false;
572 bool needs_chan_reverse = false;
573 if (is_copy_from_buffer &&
574 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
575 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
576 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
577 needs_rb_swap = true;
578 needs_chan_reverse = true;
579 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
580 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
581 needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
582 }
583
584 store.r_b_swap = needs_rb_swap;
585 store.channel_reverse = needs_chan_reverse;
586
587 store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
588 is_copy_to_buffer,
589 is_copy_from_buffer);
590 store.memory_format = slice->tiling;
591 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
592 slice->tiling == VC5_TILING_UIF_XOR) {
593 store.height_in_ub_or_stride =
594 slice->padded_height_of_output_image_in_uif_blocks;
595 } else if (slice->tiling == VC5_TILING_RASTER) {
596 store.height_in_ub_or_stride = slice->stride;
597 }
598
599 if (image->samples > VK_SAMPLE_COUNT_1_BIT)
600 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
601 else if (is_multisample_resolve)
602 store.decimate_mode = V3D_DECIMATE_MODE_4X;
603 else
604 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
605 }
606 }
607
608 static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job * job,struct framebuffer_data * framebuffer,struct v3dv_buffer * buffer,struct v3dv_image * image,uint32_t layer_offset,const VkBufferImageCopy * region)609 emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
610 struct framebuffer_data *framebuffer,
611 struct v3dv_buffer *buffer,
612 struct v3dv_image *image,
613 uint32_t layer_offset,
614 const VkBufferImageCopy *region)
615 {
616 struct v3dv_cl *cl = &job->indirect;
617 v3dv_cl_ensure_space(cl, 200, 1);
618 v3dv_return_if_oom(NULL, job);
619
620 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
621
622 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
623
624 /* Load image to TLB */
625 assert((image->type != VK_IMAGE_TYPE_3D &&
626 layer_offset < region->imageSubresource.layerCount) ||
627 layer_offset < image->extent.depth);
628
629 const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
630 region->imageSubresource.baseArrayLayer + layer_offset :
631 region->imageOffset.z + layer_offset;
632
633 emit_image_load(cl, framebuffer, image,
634 region->imageSubresource.aspectMask,
635 image_layer,
636 region->imageSubresource.mipLevel,
637 true, false);
638
639 cl_emit(cl, END_OF_LOADS, end);
640
641 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
642
643 /* Store TLB to buffer */
644 uint32_t width, height;
645 if (region->bufferRowLength == 0)
646 width = region->imageExtent.width;
647 else
648 width = region->bufferRowLength;
649
650 if (region->bufferImageHeight == 0)
651 height = region->imageExtent.height;
652 else
653 height = region->bufferImageHeight;
654
655 /* Handle copy from compressed format */
656 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
657 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
658
659 /* If we are storing stencil from a combined depth/stencil format the
660 * Vulkan spec states that the output buffer must have packed stencil
661 * values, where each stencil value is 1 byte.
662 */
663 uint32_t cpp =
664 region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
665 1 : image->cpp;
666 uint32_t buffer_stride = width * cpp;
667 uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
668 height * buffer_stride * layer_offset;
669
670 uint32_t format = choose_tlb_format(framebuffer,
671 region->imageSubresource.aspectMask,
672 true, true, false);
673 bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
674
675 emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
676 buffer_offset, buffer_stride, msaa, format);
677
678 cl_emit(cl, END_OF_TILE_MARKER, end);
679
680 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
681
682 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
683 branch.start = tile_list_start;
684 branch.end = v3dv_cl_get_address(cl);
685 }
686 }
687
688 static void
emit_copy_layer_to_buffer(struct v3dv_job * job,struct v3dv_buffer * buffer,struct v3dv_image * image,struct framebuffer_data * framebuffer,uint32_t layer,const VkBufferImageCopy * region)689 emit_copy_layer_to_buffer(struct v3dv_job *job,
690 struct v3dv_buffer *buffer,
691 struct v3dv_image *image,
692 struct framebuffer_data *framebuffer,
693 uint32_t layer,
694 const VkBufferImageCopy *region)
695 {
696 emit_frame_setup(job, layer, NULL);
697 emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
698 image, layer, region);
699 emit_supertile_coordinates(job, framebuffer);
700 }
701
702 static void
emit_copy_image_to_buffer_rcl(struct v3dv_job * job,struct v3dv_buffer * buffer,struct v3dv_image * image,struct framebuffer_data * framebuffer,const VkBufferImageCopy * region)703 emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
704 struct v3dv_buffer *buffer,
705 struct v3dv_image *image,
706 struct framebuffer_data *framebuffer,
707 const VkBufferImageCopy *region)
708 {
709 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
710 v3dv_return_if_oom(NULL, job);
711
712 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
713 emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
714 cl_emit(rcl, END_OF_RENDERING, end);
715 }
716
717 /* Implements a copy using the TLB.
718 *
719 * This only works if we are copying from offset (0,0), since a TLB store for
720 * tile (x,y) will be written at the same tile offset into the destination.
721 * When this requirement is not met, we need to use a blit instead.
722 *
723 * Returns true if the implementation supports the requested operation (even if
724 * it failed to process it, for example, due to an out-of-memory error).
725 *
726 */
727 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy * region)728 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
729 struct v3dv_buffer *buffer,
730 struct v3dv_image *image,
731 const VkBufferImageCopy *region)
732 {
733 VkFormat fb_format;
734 if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))
735 return false;
736
737 uint32_t internal_type, internal_bpp;
738 get_internal_type_bpp_for_image_aspects(fb_format,
739 region->imageSubresource.aspectMask,
740 &internal_type, &internal_bpp);
741
742 uint32_t num_layers;
743 if (image->type != VK_IMAGE_TYPE_3D)
744 num_layers = region->imageSubresource.layerCount;
745 else
746 num_layers = region->imageExtent.depth;
747 assert(num_layers > 0);
748
749 struct v3dv_job *job =
750 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
751 if (!job)
752 return true;
753
754 /* Handle copy from compressed format using a compatible format */
755 const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
756 const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
757 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
758 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
759
760 v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
761
762 struct framebuffer_data framebuffer;
763 setup_framebuffer_data(&framebuffer, fb_format, internal_type,
764 &job->frame_tiling);
765
766 v3dv_job_emit_binning_flush(job);
767 emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region);
768
769 v3dv_cmd_buffer_finish_job(cmd_buffer);
770
771 return true;
772 }
773
774 static bool
775 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
776 struct v3dv_image *dst,
777 VkFormat dst_format,
778 struct v3dv_image *src,
779 VkFormat src_format,
780 VkColorComponentFlags cmask,
781 VkComponentMapping *cswizzle,
782 const VkImageBlit *region,
783 VkFilter filter,
784 bool dst_is_padded_image);
785
786 /**
787 * Returns true if the implementation supports the requested operation (even if
788 * it failed to process it, for example, due to an out-of-memory error).
789 */
790 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy * region)791 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
792 struct v3dv_buffer *buffer,
793 struct v3dv_image *image,
794 const VkBufferImageCopy *region)
795 {
796 bool handled = false;
797
798 /* Generally, the bpp of the data in the buffer matches that of the
799 * source image. The exception is the case where we are copying
800 * stencil (8bpp) to a combined d24s8 image (32bpp).
801 */
802 uint32_t buffer_bpp = image->cpp;
803
804 VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
805
806 /* Because we are going to implement the copy as a blit, we need to create
807 * a linear image from the destination buffer and we also want our blit
808 * source and destination formats to be the same (to avoid any format
809 * conversions), so we choose a canonical format that matches the
810 * source image bpp.
811 *
812 * The exception to the above is copying from combined depth/stencil images
813 * because we are copying only one aspect of the image, so we need to setup
814 * our formats, color write mask and source swizzle mask to match that.
815 */
816 VkFormat dst_format;
817 VkFormat src_format;
818 VkColorComponentFlags cmask = 0; /* All components */
819 VkComponentMapping cswizzle = {
820 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
821 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
822 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
823 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
824 };
825 switch (buffer_bpp) {
826 case 16:
827 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
828 dst_format = VK_FORMAT_R32G32B32A32_UINT;
829 src_format = dst_format;
830 break;
831 case 8:
832 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
833 dst_format = VK_FORMAT_R16G16B16A16_UINT;
834 src_format = dst_format;
835 break;
836 case 4:
837 switch (copy_aspect) {
838 case VK_IMAGE_ASPECT_COLOR_BIT:
839 src_format = VK_FORMAT_R8G8B8A8_UINT;
840 dst_format = VK_FORMAT_R8G8B8A8_UINT;
841 break;
842 case VK_IMAGE_ASPECT_DEPTH_BIT:
843 assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
844 image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
845 image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
846 if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
847 src_format = VK_FORMAT_R32_UINT;
848 dst_format = VK_FORMAT_R32_UINT;
849 } else {
850 /* We want to write depth in the buffer in the first 24-bits,
851 * however, the hardware has depth in bits 8-31, so swizzle the
852 * the source components to match what we want. Also, we don't
853 * want to write bits 24-31 in the destination.
854 */
855 src_format = VK_FORMAT_R8G8B8A8_UINT;
856 dst_format = VK_FORMAT_R8G8B8A8_UINT;
857 cmask = VK_COLOR_COMPONENT_R_BIT |
858 VK_COLOR_COMPONENT_G_BIT |
859 VK_COLOR_COMPONENT_B_BIT;
860 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
861 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
862 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
863 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
864 }
865 break;
866 case VK_IMAGE_ASPECT_STENCIL_BIT:
867 assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
868 assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
869 /* Copying from S8D24. We want to write 8-bit stencil values only,
870 * so adjust the buffer bpp for that. Since the hardware stores stencil
871 * in the LSB, we can just do a RGBA8UI to R8UI blit.
872 */
873 src_format = VK_FORMAT_R8G8B8A8_UINT;
874 dst_format = VK_FORMAT_R8_UINT;
875 buffer_bpp = 1;
876 break;
877 default:
878 unreachable("unsupported aspect");
879 return handled;
880 };
881 break;
882 case 2:
883 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
884 copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
885 dst_format = VK_FORMAT_R16_UINT;
886 src_format = dst_format;
887 break;
888 case 1:
889 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
890 dst_format = VK_FORMAT_R8_UINT;
891 src_format = dst_format;
892 break;
893 default:
894 unreachable("unsupported bit-size");
895 return handled;
896 };
897
898 /* The hardware doesn't support linear depth/stencil stores, so we
899 * implement copies of depth/stencil aspect as color copies using a
900 * compatible color format.
901 */
902 assert(vk_format_is_color(src_format));
903 assert(vk_format_is_color(dst_format));
904 copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
905
906 /* We should be able to handle the blit if we got this far */
907 handled = true;
908
909 /* Obtain the 2D buffer region spec */
910 uint32_t buf_width, buf_height;
911 if (region->bufferRowLength == 0)
912 buf_width = region->imageExtent.width;
913 else
914 buf_width = region->bufferRowLength;
915
916 if (region->bufferImageHeight == 0)
917 buf_height = region->imageExtent.height;
918 else
919 buf_height = region->bufferImageHeight;
920
921 /* If the image is compressed, the bpp refers to blocks, not pixels */
922 uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
923 uint32_t block_height = vk_format_get_blockheight(image->vk_format);
924 buf_width = buf_width / block_width;
925 buf_height = buf_height / block_height;
926
927 /* Compute layers to copy */
928 uint32_t num_layers;
929 if (image->type != VK_IMAGE_TYPE_3D)
930 num_layers = region->imageSubresource.layerCount;
931 else
932 num_layers = region->imageExtent.depth;
933 assert(num_layers > 0);
934
935 /* Our blit interface can see the real format of the images to detect
936 * copies between compressed and uncompressed images and adapt the
937 * blit region accordingly. Here we are just doing a raw copy of
938 * compressed data, but we are passing an uncompressed view of the
939 * buffer for the blit destination image (since compressed formats are
940 * not renderable), so we also want to provide an uncompressed view of
941 * the source image.
942 */
943 VkResult result;
944 struct v3dv_device *device = cmd_buffer->device;
945 VkDevice _device = v3dv_device_to_handle(device);
946 if (vk_format_is_compressed(image->vk_format)) {
947 VkImage uiview;
948 VkImageCreateInfo uiview_info = {
949 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
950 .imageType = VK_IMAGE_TYPE_3D,
951 .format = dst_format,
952 .extent = { buf_width, buf_height, image->extent.depth },
953 .mipLevels = image->levels,
954 .arrayLayers = image->array_size,
955 .samples = image->samples,
956 .tiling = image->tiling,
957 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
958 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
959 .queueFamilyIndexCount = 0,
960 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
961 };
962 result = v3dv_CreateImage(_device, &uiview_info, &device->alloc, &uiview);
963 if (result != VK_SUCCESS)
964 return handled;
965
966 v3dv_cmd_buffer_add_private_obj(
967 cmd_buffer, (uintptr_t)uiview,
968 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
969
970 result = v3dv_BindImageMemory(_device, uiview,
971 v3dv_device_memory_to_handle(image->mem),
972 image->mem_offset);
973 if (result != VK_SUCCESS)
974 return handled;
975
976 image = v3dv_image_from_handle(uiview);
977 }
978
979 /* Copy requested layers */
980 for (uint32_t i = 0; i < num_layers; i++) {
981 /* Create the destination blit image from the destination buffer */
982 VkImageCreateInfo image_info = {
983 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
984 .imageType = VK_IMAGE_TYPE_2D,
985 .format = dst_format,
986 .extent = { buf_width, buf_height, 1 },
987 .mipLevels = 1,
988 .arrayLayers = 1,
989 .samples = VK_SAMPLE_COUNT_1_BIT,
990 .tiling = VK_IMAGE_TILING_LINEAR,
991 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
992 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
993 .queueFamilyIndexCount = 0,
994 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
995 };
996
997 VkImage buffer_image;
998 result =
999 v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image);
1000 if (result != VK_SUCCESS)
1001 return handled;
1002
1003 v3dv_cmd_buffer_add_private_obj(
1004 cmd_buffer, (uintptr_t)buffer_image,
1005 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
1006
1007 /* Bind the buffer memory to the image */
1008 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
1009 i * buf_width * buf_height * buffer_bpp;
1010 result = v3dv_BindImageMemory(_device, buffer_image,
1011 v3dv_device_memory_to_handle(buffer->mem),
1012 buffer_offset);
1013 if (result != VK_SUCCESS)
1014 return handled;
1015
1016 /* Blit-copy the requested image extent.
1017 *
1018 * Since we are copying, the blit must use the same format on the
1019 * destination and source images to avoid format conversions. The
1020 * only exception is copying stencil, which we upload to a R8UI source
1021 * image, but that we need to blit to a S8D24 destination (the only
1022 * stencil format we support).
1023 */
1024 const VkImageBlit blit_region = {
1025 .srcSubresource = {
1026 .aspectMask = copy_aspect,
1027 .mipLevel = region->imageSubresource.mipLevel,
1028 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
1029 .layerCount = 1,
1030 },
1031 .srcOffsets = {
1032 {
1033 DIV_ROUND_UP(region->imageOffset.x, block_width),
1034 DIV_ROUND_UP(region->imageOffset.y, block_height),
1035 region->imageOffset.z + i,
1036 },
1037 {
1038 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
1039 block_width),
1040 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
1041 block_height),
1042 region->imageOffset.z + i + 1,
1043 },
1044 },
1045 .dstSubresource = {
1046 .aspectMask = copy_aspect,
1047 .mipLevel = 0,
1048 .baseArrayLayer = 0,
1049 .layerCount = 1,
1050 },
1051 .dstOffsets = {
1052 { 0, 0, 0 },
1053 {
1054 DIV_ROUND_UP(region->imageExtent.width, block_width),
1055 DIV_ROUND_UP(region->imageExtent.height, block_height),
1056 1
1057 },
1058 },
1059 };
1060
1061 handled = blit_shader(cmd_buffer,
1062 v3dv_image_from_handle(buffer_image), dst_format,
1063 image, src_format,
1064 cmask, &cswizzle,
1065 &blit_region, VK_FILTER_NEAREST, false);
1066 if (!handled) {
1067 /* This is unexpected, we should have a supported blit spec */
1068 unreachable("Unable to blit buffer to destination image");
1069 return false;
1070 }
1071 }
1072
1073 assert(handled);
1074 return true;
1075 }
1076
1077 static VkFormat
get_compatible_tlb_format(VkFormat format)1078 get_compatible_tlb_format(VkFormat format)
1079 {
1080 switch (format) {
1081 case VK_FORMAT_R8G8B8A8_SNORM:
1082 return VK_FORMAT_R8G8B8A8_UINT;
1083
1084 case VK_FORMAT_R8G8_SNORM:
1085 return VK_FORMAT_R8G8_UINT;
1086
1087 case VK_FORMAT_R8_SNORM:
1088 return VK_FORMAT_R8_UINT;
1089
1090 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1091 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
1092
1093 case VK_FORMAT_R16_UNORM:
1094 case VK_FORMAT_R16_SNORM:
1095 return VK_FORMAT_R16_UINT;
1096
1097 case VK_FORMAT_R16G16_UNORM:
1098 case VK_FORMAT_R16G16_SNORM:
1099 return VK_FORMAT_R16G16_UINT;
1100
1101 case VK_FORMAT_R16G16B16A16_UNORM:
1102 case VK_FORMAT_R16G16B16A16_SNORM:
1103 return VK_FORMAT_R16G16B16A16_UINT;
1104
1105 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
1106 return VK_FORMAT_R32_SFLOAT;
1107
1108 /* We can't render to compressed formats using the TLB so instead we use
1109 * a compatible format with the same bpp as the compressed format. Because
1110 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
1111 * case of ETC), when we implement copies with the compatible format we
1112 * will have to divide offsets and dimensions on the compressed image by
1113 * the compressed block size.
1114 */
1115 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
1116 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
1117 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
1118 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
1119 return VK_FORMAT_R32G32B32A32_UINT;
1120
1121 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
1122 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
1123 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
1124 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
1125 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
1126 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
1127 return VK_FORMAT_R16G16B16A16_UINT;
1128
1129 default:
1130 return VK_FORMAT_UNDEFINED;
1131 }
1132 }
1133
1134 static inline bool
can_use_tlb(struct v3dv_image * image,const VkOffset3D * offset,VkFormat * compat_format)1135 can_use_tlb(struct v3dv_image *image,
1136 const VkOffset3D *offset,
1137 VkFormat *compat_format)
1138 {
1139 if (offset->x != 0 || offset->y != 0)
1140 return false;
1141
1142 if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
1143 if (compat_format)
1144 *compat_format = image->vk_format;
1145 return true;
1146 }
1147
1148 /* If the image format is not TLB-supported, then check if we can use
1149 * a compatible format instead.
1150 */
1151 if (compat_format) {
1152 *compat_format = get_compatible_tlb_format(image->vk_format);
1153 if (*compat_format != VK_FORMAT_UNDEFINED)
1154 return true;
1155 }
1156
1157 return false;
1158 }
1159
1160 void
v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkBuffer destBuffer,uint32_t regionCount,const VkBufferImageCopy * pRegions)1161 v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
1162 VkImage srcImage,
1163 VkImageLayout srcImageLayout,
1164 VkBuffer destBuffer,
1165 uint32_t regionCount,
1166 const VkBufferImageCopy *pRegions)
1167 {
1168 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1169 V3DV_FROM_HANDLE(v3dv_image, image, srcImage);
1170 V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer);
1171
1172 assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
1173
1174 for (uint32_t i = 0; i < regionCount; i++) {
1175 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i]))
1176 continue;
1177 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i]))
1178 continue;
1179 unreachable("Unsupported image to buffer copy.");
1180 }
1181 }
1182
1183 static void
emit_copy_image_layer_per_tile_list(struct v3dv_job * job,struct framebuffer_data * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageCopy * region)1184 emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
1185 struct framebuffer_data *framebuffer,
1186 struct v3dv_image *dst,
1187 struct v3dv_image *src,
1188 uint32_t layer_offset,
1189 const VkImageCopy *region)
1190 {
1191 struct v3dv_cl *cl = &job->indirect;
1192 v3dv_cl_ensure_space(cl, 200, 1);
1193 v3dv_return_if_oom(NULL, job);
1194
1195 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1196
1197 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1198
1199 assert((src->type != VK_IMAGE_TYPE_3D &&
1200 layer_offset < region->srcSubresource.layerCount) ||
1201 layer_offset < src->extent.depth);
1202
1203 const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
1204 region->srcSubresource.baseArrayLayer + layer_offset :
1205 region->srcOffset.z + layer_offset;
1206
1207 emit_image_load(cl, framebuffer, src,
1208 region->srcSubresource.aspectMask,
1209 src_layer,
1210 region->srcSubresource.mipLevel,
1211 false, false);
1212
1213 cl_emit(cl, END_OF_LOADS, end);
1214
1215 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1216
1217 assert((dst->type != VK_IMAGE_TYPE_3D &&
1218 layer_offset < region->dstSubresource.layerCount) ||
1219 layer_offset < dst->extent.depth);
1220
1221 const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
1222 region->dstSubresource.baseArrayLayer + layer_offset :
1223 region->dstOffset.z + layer_offset;
1224
1225 emit_image_store(cl, framebuffer, dst,
1226 region->dstSubresource.aspectMask,
1227 dst_layer,
1228 region->dstSubresource.mipLevel,
1229 false, false, false);
1230
1231 cl_emit(cl, END_OF_TILE_MARKER, end);
1232
1233 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1234
1235 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1236 branch.start = tile_list_start;
1237 branch.end = v3dv_cl_get_address(cl);
1238 }
1239 }
1240
1241 static void
emit_copy_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct framebuffer_data * framebuffer,uint32_t layer,const VkImageCopy * region)1242 emit_copy_image_layer(struct v3dv_job *job,
1243 struct v3dv_image *dst,
1244 struct v3dv_image *src,
1245 struct framebuffer_data *framebuffer,
1246 uint32_t layer,
1247 const VkImageCopy *region)
1248 {
1249 emit_frame_setup(job, layer, NULL);
1250 emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
1251 emit_supertile_coordinates(job, framebuffer);
1252 }
1253
1254 static void
emit_copy_image_rcl(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct framebuffer_data * framebuffer,const VkImageCopy * region)1255 emit_copy_image_rcl(struct v3dv_job *job,
1256 struct v3dv_image *dst,
1257 struct v3dv_image *src,
1258 struct framebuffer_data *framebuffer,
1259 const VkImageCopy *region)
1260 {
1261 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1262 v3dv_return_if_oom(NULL, job);
1263
1264 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1265 emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
1266 cl_emit(rcl, END_OF_RENDERING, end);
1267 }
1268
1269 /**
1270 * Returns true if the implementation supports the requested operation (even if
1271 * it failed to process it, for example, due to an out-of-memory error).
1272 */
1273 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy * region)1274 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1275 struct v3dv_image *dst,
1276 struct v3dv_image *src,
1277 const VkImageCopy *region)
1278 {
1279 VkFormat fb_format;
1280 if (!can_use_tlb(src, ®ion->srcOffset, &fb_format) ||
1281 !can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {
1282 return false;
1283 }
1284
1285 /* From the Vulkan spec, VkImageCopy valid usage:
1286 *
1287 * "If neither the calling command’s srcImage nor the calling command’s
1288 * dstImage has a multi-planar image format then the aspectMask member
1289 * of srcSubresource and dstSubresource must match."
1290 */
1291 assert(region->dstSubresource.aspectMask ==
1292 region->srcSubresource.aspectMask);
1293 uint32_t internal_type, internal_bpp;
1294 get_internal_type_bpp_for_image_aspects(fb_format,
1295 region->dstSubresource.aspectMask,
1296 &internal_type, &internal_bpp);
1297
1298 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
1299 *
1300 * "The number of slices of the extent (for 3D) or layers of the
1301 * srcSubresource (for non-3D) must match the number of slices of the
1302 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
1303 */
1304 assert((src->type != VK_IMAGE_TYPE_3D ?
1305 region->srcSubresource.layerCount : region->extent.depth) ==
1306 (dst->type != VK_IMAGE_TYPE_3D ?
1307 region->dstSubresource.layerCount : region->extent.depth));
1308 uint32_t num_layers;
1309 if (dst->type != VK_IMAGE_TYPE_3D)
1310 num_layers = region->dstSubresource.layerCount;
1311 else
1312 num_layers = region->extent.depth;
1313 assert(num_layers > 0);
1314
1315 struct v3dv_job *job =
1316 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1317 if (!job)
1318 return true;
1319
1320 /* Handle copy to compressed image using compatible format */
1321 const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
1322 const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
1323 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
1324 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
1325
1326 v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,
1327 src->samples > VK_SAMPLE_COUNT_1_BIT);
1328
1329 struct framebuffer_data framebuffer;
1330 setup_framebuffer_data(&framebuffer, fb_format, internal_type,
1331 &job->frame_tiling);
1332
1333 v3dv_job_emit_binning_flush(job);
1334 emit_copy_image_rcl(job, dst, src, &framebuffer, region);
1335
1336 v3dv_cmd_buffer_finish_job(cmd_buffer);
1337
1338 return true;
1339 }
1340
1341 /**
1342 * Takes the image provided as argument and creates a new image that has
1343 * the same specification and aliases the same memory storage, except that:
1344 *
1345 * - It has the uncompressed format passed in.
1346 * - Its original width/height are scaled by the factors passed in.
1347 *
1348 * This is useful to implement copies from compressed images using the blit
1349 * path. The idea is that we create uncompressed "image views" of both the
1350 * source and destination images using the uncompressed format and then we
1351 * define the copy blit in terms of that format.
1352 */
1353 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)1354 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
1355 struct v3dv_image *src,
1356 float width_scale,
1357 float height_scale,
1358 VkFormat format)
1359 {
1360 assert(!vk_format_is_compressed(format));
1361
1362 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
1363
1364 VkImageCreateInfo info = {
1365 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1366 .imageType = src->type,
1367 .format = format,
1368 .extent = {
1369 .width = src->extent.width * width_scale,
1370 .height = src->extent.height * height_scale,
1371 .depth = src->extent.depth,
1372 },
1373 .mipLevels = src->levels,
1374 .arrayLayers = src->array_size,
1375 .samples = src->samples,
1376 .tiling = src->tiling,
1377 .usage = src->usage,
1378 };
1379
1380 VkImage _image;
1381 VkResult result =
1382 v3dv_CreateImage(_device, &info, &cmd_buffer->device->alloc, &_image);
1383 if (result != VK_SUCCESS) {
1384 v3dv_flag_oom(cmd_buffer, NULL);
1385 return NULL;
1386 }
1387
1388 struct v3dv_image *image = v3dv_image_from_handle(_image);
1389 image->mem = src->mem;
1390 image->mem_offset = src->mem_offset;
1391 return image;
1392 }
1393
1394 /**
1395 * Returns true if the implementation supports the requested operation (even if
1396 * it failed to process it, for example, due to an out-of-memory error).
1397 */
1398 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy * region)1399 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
1400 struct v3dv_image *dst,
1401 struct v3dv_image *src,
1402 const VkImageCopy *region)
1403 {
1404 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
1405 const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
1406 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
1407 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
1408 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1409 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1410
1411 /* We need to choose a single format for the blit to ensure that this is
1412 * really a copy and there are not format conversions going on. Since we
1413 * going to blit, we need to make sure that the selected format can be
1414 * both rendered to and textured from.
1415 */
1416 VkFormat format;
1417 float src_scale_w = 1.0f;
1418 float src_scale_h = 1.0f;
1419 float dst_scale_w = block_scale_w;
1420 float dst_scale_h = block_scale_h;
1421 if (vk_format_is_compressed(src->vk_format)) {
1422 /* If we are copying from a compressed format we should be aware that we
1423 * are going to texture from the source image, and the texture setup
1424 * knows the actual size of the image, so we need to choose a format
1425 * that has a per-texel (not per-block) bpp that is compatible for that
1426 * image size. For example, for a source image with size Bw*WxBh*H
1427 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1428 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1429 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1430 * so we could specify a blit with size Bw*WxBh*H and a format with
1431 * a bpp of 8-bit per texel (R8_UINT).
1432 *
1433 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1434 * which is 64-bit per texel, then we would need a 4-bit format, which
1435 * we don't have, so instead we still choose an 8-bit format, but we
1436 * apply a divisor to the row dimensions of the blit, since we are
1437 * copying two texels per item.
1438 *
1439 * Generally, we can choose any format so long as we compute appropriate
1440 * divisors for the width and height depending on the source image's
1441 * bpp.
1442 */
1443 assert(src->cpp == dst->cpp);
1444
1445 uint32_t divisor_w, divisor_h;
1446 format = VK_FORMAT_R32G32_UINT;
1447 switch (src->cpp) {
1448 case 16:
1449 format = VK_FORMAT_R32G32B32A32_UINT;
1450 divisor_w = 4;
1451 divisor_h = 4;
1452 break;
1453 case 8:
1454 format = VK_FORMAT_R16G16B16A16_UINT;
1455 divisor_w = 4;
1456 divisor_h = 4;
1457 break;
1458 default:
1459 unreachable("Unsupported compressed format");
1460 }
1461
1462 /* Create image views of the src/dst images that we can interpret in
1463 * terms of the canonical format.
1464 */
1465 src_scale_w /= divisor_w;
1466 src_scale_h /= divisor_h;
1467 dst_scale_w /= divisor_w;
1468 dst_scale_h /= divisor_h;
1469
1470 src = create_image_alias(cmd_buffer, src,
1471 src_scale_w, src_scale_h, format);
1472
1473 dst = create_image_alias(cmd_buffer, dst,
1474 dst_scale_w, dst_scale_h, format);
1475 } else {
1476 format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1477 src->vk_format : get_compatible_tlb_format(src->vk_format);
1478 if (format == VK_FORMAT_UNDEFINED)
1479 return false;
1480
1481 const struct v3dv_format *f = v3dv_get_format(format);
1482 if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
1483 return false;
1484 }
1485
1486 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1487 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1488 * are the compressed format's block width and height. This means that
1489 * copies between compressed and uncompressed images involve different
1490 * image sizes, and therefore, we need to take that into account when
1491 * setting up the source and destination blit regions below, so they are
1492 * consistent from the point of view of the single compatible format
1493 * selected for the copy.
1494 *
1495 * We should take into account that the dimensions of the region provided
1496 * to the copy command are specified in terms of the source image. With that
1497 * in mind, below we adjust the blit destination region to be consistent with
1498 * the source region for the compatible format, so basically, we apply
1499 * the block scale factor to the destination offset provided by the copy
1500 * command (because it is specified in terms of the destination image, not
1501 * the source), and then we just add the region copy dimensions to that
1502 * (since the region dimensions are already specified in terms of the source
1503 * image).
1504 */
1505 const VkOffset3D src_start = {
1506 region->srcOffset.x * src_scale_w,
1507 region->srcOffset.y * src_scale_h,
1508 region->srcOffset.z,
1509 };
1510 const VkOffset3D src_end = {
1511 src_start.x + region->extent.width * src_scale_w,
1512 src_start.y + region->extent.height * src_scale_h,
1513 src_start.z + region->extent.depth,
1514 };
1515
1516 const VkOffset3D dst_start = {
1517 region->dstOffset.x * dst_scale_w,
1518 region->dstOffset.y * dst_scale_h,
1519 region->dstOffset.z,
1520 };
1521 const VkOffset3D dst_end = {
1522 dst_start.x + region->extent.width * src_scale_w,
1523 dst_start.y + region->extent.height * src_scale_h,
1524 dst_start.z + region->extent.depth,
1525 };
1526
1527 const VkImageBlit blit_region = {
1528 .srcSubresource = region->srcSubresource,
1529 .srcOffsets = { src_start, src_end },
1530 .dstSubresource = region->dstSubresource,
1531 .dstOffsets = { dst_start, dst_end },
1532 };
1533 bool handled = blit_shader(cmd_buffer,
1534 dst, format,
1535 src, format,
1536 0, NULL,
1537 &blit_region, VK_FILTER_NEAREST, true);
1538
1539 /* We should have selected formats that we can blit */
1540 assert(handled);
1541 return handled;
1542 }
1543
1544 void
v3dv_CmdCopyImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageCopy * pRegions)1545 v3dv_CmdCopyImage(VkCommandBuffer commandBuffer,
1546 VkImage srcImage,
1547 VkImageLayout srcImageLayout,
1548 VkImage dstImage,
1549 VkImageLayout dstImageLayout,
1550 uint32_t regionCount,
1551 const VkImageCopy *pRegions)
1552 {
1553 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1554 V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
1555 V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
1556
1557 assert(src->samples == dst->samples);
1558
1559 for (uint32_t i = 0; i < regionCount; i++) {
1560 if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
1561 continue;
1562 if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i]))
1563 continue;
1564 unreachable("Image copy not supported");
1565 }
1566 }
1567
1568 static void
emit_clear_image_per_tile_list(struct v3dv_job * job,struct framebuffer_data * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspects,uint32_t layer,uint32_t level)1569 emit_clear_image_per_tile_list(struct v3dv_job *job,
1570 struct framebuffer_data *framebuffer,
1571 struct v3dv_image *image,
1572 VkImageAspectFlags aspects,
1573 uint32_t layer,
1574 uint32_t level)
1575 {
1576 struct v3dv_cl *cl = &job->indirect;
1577 v3dv_cl_ensure_space(cl, 200, 1);
1578 v3dv_return_if_oom(NULL, job);
1579
1580 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1581
1582 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1583
1584 cl_emit(cl, END_OF_LOADS, end);
1585
1586 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1587
1588 emit_image_store(cl, framebuffer, image, aspects, layer, level,
1589 false, false, false);
1590
1591 cl_emit(cl, END_OF_TILE_MARKER, end);
1592
1593 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1594
1595 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1596 branch.start = tile_list_start;
1597 branch.end = v3dv_cl_get_address(cl);
1598 }
1599 }
1600
1601 static void
emit_clear_image(struct v3dv_job * job,struct v3dv_image * image,struct framebuffer_data * framebuffer,VkImageAspectFlags aspects,uint32_t layer,uint32_t level)1602 emit_clear_image(struct v3dv_job *job,
1603 struct v3dv_image *image,
1604 struct framebuffer_data *framebuffer,
1605 VkImageAspectFlags aspects,
1606 uint32_t layer,
1607 uint32_t level)
1608 {
1609 emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level);
1610 emit_supertile_coordinates(job, framebuffer);
1611 }
1612
1613 static void
emit_clear_image_rcl(struct v3dv_job * job,struct v3dv_image * image,struct framebuffer_data * framebuffer,const union v3dv_clear_value * clear_value,VkImageAspectFlags aspects,uint32_t layer,uint32_t level)1614 emit_clear_image_rcl(struct v3dv_job *job,
1615 struct v3dv_image *image,
1616 struct framebuffer_data *framebuffer,
1617 const union v3dv_clear_value *clear_value,
1618 VkImageAspectFlags aspects,
1619 uint32_t layer,
1620 uint32_t level)
1621 {
1622 const struct rcl_clear_info clear_info = {
1623 .clear_value = clear_value,
1624 .image = image,
1625 .aspects = aspects,
1626 .layer = layer,
1627 .level = level,
1628 };
1629
1630 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1631 v3dv_return_if_oom(NULL, job);
1632
1633 emit_frame_setup(job, 0, clear_value);
1634 emit_clear_image(job, image, framebuffer, aspects, layer, level);
1635 cl_emit(rcl, END_OF_RENDERING, end);
1636 }
1637
1638 static void
get_hw_clear_color(const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)1639 get_hw_clear_color(const VkClearColorValue *color,
1640 VkFormat fb_format,
1641 VkFormat image_format,
1642 uint32_t internal_type,
1643 uint32_t internal_bpp,
1644 uint32_t *hw_color)
1645 {
1646 const uint32_t internal_size = 4 << internal_bpp;
1647
1648 /* If the image format doesn't match the framebuffer format, then we are
1649 * trying to clear an unsupported tlb format using a compatible
1650 * format for the framebuffer. In this case, we want to make sure that
1651 * we pack the clear value according to the original format semantics,
1652 * not the compatible format.
1653 */
1654 if (fb_format == image_format) {
1655 v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color);
1656 } else {
1657 union util_color uc;
1658 enum pipe_format pipe_image_format =
1659 vk_format_to_pipe_format(image_format);
1660 util_pack_color(color->float32, pipe_image_format, &uc);
1661 memcpy(hw_color, uc.ui, internal_size);
1662 }
1663 }
1664
1665 /* Returns true if the implementation is able to handle the case, false
1666 * otherwise.
1667 */
1668 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)1669 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1670 struct v3dv_image *image,
1671 const VkClearValue *clear_value,
1672 const VkImageSubresourceRange *range)
1673 {
1674 const VkOffset3D origin = { 0, 0, 0 };
1675 VkFormat fb_format;
1676 if (!can_use_tlb(image, &origin, &fb_format))
1677 return false;
1678
1679 uint32_t internal_type, internal_bpp;
1680 get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask,
1681 &internal_type, &internal_bpp);
1682
1683 union v3dv_clear_value hw_clear_value = { 0 };
1684 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1685 get_hw_clear_color(&clear_value->color, fb_format, image->vk_format,
1686 internal_type, internal_bpp, &hw_clear_value.color[0]);
1687 } else {
1688 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
1689 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
1690 hw_clear_value.z = clear_value->depthStencil.depth;
1691 hw_clear_value.s = clear_value->depthStencil.stencil;
1692 }
1693
1694 uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?
1695 image->levels - range->baseMipLevel :
1696 range->levelCount;
1697 uint32_t min_level = range->baseMipLevel;
1698 uint32_t max_level = range->baseMipLevel + level_count;
1699
1700 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
1701 * Instead, we need to consider the full depth dimension of the image, which
1702 * goes from 0 up to the level's depth extent.
1703 */
1704 uint32_t min_layer;
1705 uint32_t max_layer;
1706 if (image->type != VK_IMAGE_TYPE_3D) {
1707 uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
1708 image->array_size - range->baseArrayLayer :
1709 range->layerCount;
1710 min_layer = range->baseArrayLayer;
1711 max_layer = range->baseArrayLayer + layer_count;
1712 } else {
1713 min_layer = 0;
1714 max_layer = 0;
1715 }
1716
1717 for (uint32_t level = min_level; level < max_level; level++) {
1718 if (image->type == VK_IMAGE_TYPE_3D)
1719 max_layer = u_minify(image->extent.depth, level);
1720 for (uint32_t layer = min_layer; layer < max_layer; layer++) {
1721 uint32_t width = u_minify(image->extent.width, level);
1722 uint32_t height = u_minify(image->extent.height, level);
1723
1724 struct v3dv_job *job =
1725 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1726
1727 if (!job)
1728 return true;
1729
1730 /* We start a a new job for each layer so the frame "depth" is 1 */
1731 v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,
1732 image->samples > VK_SAMPLE_COUNT_1_BIT);
1733
1734 struct framebuffer_data framebuffer;
1735 setup_framebuffer_data(&framebuffer, fb_format, internal_type,
1736 &job->frame_tiling);
1737
1738 v3dv_job_emit_binning_flush(job);
1739
1740 /* If this triggers it is an application bug: the spec requires
1741 * that any aspects to clear are present in the image.
1742 */
1743 assert(range->aspectMask & image->aspects);
1744
1745 emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value,
1746 range->aspectMask, layer, level);
1747
1748 v3dv_cmd_buffer_finish_job(cmd_buffer);
1749 }
1750 }
1751
1752 return true;
1753 }
1754
1755 void
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1756 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
1757 VkImage _image,
1758 VkImageLayout imageLayout,
1759 const VkClearColorValue *pColor,
1760 uint32_t rangeCount,
1761 const VkImageSubresourceRange *pRanges)
1762 {
1763 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1764 V3DV_FROM_HANDLE(v3dv_image, image, _image);
1765
1766 const VkClearValue clear_value = {
1767 .color = *pColor,
1768 };
1769
1770 for (uint32_t i = 0; i < rangeCount; i++) {
1771 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
1772 continue;
1773 unreachable("Unsupported color clear.");
1774 }
1775 }
1776
1777 void
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1778 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1779 VkImage _image,
1780 VkImageLayout imageLayout,
1781 const VkClearDepthStencilValue *pDepthStencil,
1782 uint32_t rangeCount,
1783 const VkImageSubresourceRange *pRanges)
1784 {
1785 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1786 V3DV_FROM_HANDLE(v3dv_image, image, _image);
1787
1788 const VkClearValue clear_value = {
1789 .depthStencil = *pDepthStencil,
1790 };
1791
1792 for (uint32_t i = 0; i < rangeCount; i++) {
1793 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
1794 continue;
1795 unreachable("Unsupported depth/stencil clear.");
1796 }
1797 }
1798
1799 static void
emit_copy_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * dst,struct v3dv_bo * src,uint32_t dst_offset,uint32_t src_offset,uint32_t stride,uint32_t format)1800 emit_copy_buffer_per_tile_list(struct v3dv_job *job,
1801 struct v3dv_bo *dst,
1802 struct v3dv_bo *src,
1803 uint32_t dst_offset,
1804 uint32_t src_offset,
1805 uint32_t stride,
1806 uint32_t format)
1807 {
1808 struct v3dv_cl *cl = &job->indirect;
1809 v3dv_cl_ensure_space(cl, 200, 1);
1810 v3dv_return_if_oom(NULL, job);
1811
1812 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1813
1814 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1815
1816 emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
1817
1818 cl_emit(cl, END_OF_LOADS, end);
1819
1820 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1821
1822 emit_linear_store(cl, RENDER_TARGET_0,
1823 dst, dst_offset, stride, false, format);
1824
1825 cl_emit(cl, END_OF_TILE_MARKER, end);
1826
1827 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1828
1829 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1830 branch.start = tile_list_start;
1831 branch.end = v3dv_cl_get_address(cl);
1832 }
1833 }
1834
1835 static void
emit_copy_buffer(struct v3dv_job * job,struct v3dv_bo * dst,struct v3dv_bo * src,uint32_t dst_offset,uint32_t src_offset,struct framebuffer_data * framebuffer,uint32_t format)1836 emit_copy_buffer(struct v3dv_job *job,
1837 struct v3dv_bo *dst,
1838 struct v3dv_bo *src,
1839 uint32_t dst_offset,
1840 uint32_t src_offset,
1841 struct framebuffer_data *framebuffer,
1842 uint32_t format)
1843 {
1844 const uint32_t stride = job->frame_tiling.width * 4;
1845 emit_copy_buffer_per_tile_list(job, dst, src,
1846 dst_offset, src_offset,
1847 stride, format);
1848 emit_supertile_coordinates(job, framebuffer);
1849 }
1850
1851 static void
emit_copy_buffer_rcl(struct v3dv_job * job,struct v3dv_bo * dst,struct v3dv_bo * src,uint32_t dst_offset,uint32_t src_offset,struct framebuffer_data * framebuffer,uint32_t format)1852 emit_copy_buffer_rcl(struct v3dv_job *job,
1853 struct v3dv_bo *dst,
1854 struct v3dv_bo *src,
1855 uint32_t dst_offset,
1856 uint32_t src_offset,
1857 struct framebuffer_data *framebuffer,
1858 uint32_t format)
1859 {
1860 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1861 v3dv_return_if_oom(NULL, job);
1862
1863 emit_frame_setup(job, 0, NULL);
1864 emit_copy_buffer(job, dst, src, dst_offset, src_offset, framebuffer, format);
1865 cl_emit(rcl, END_OF_RENDERING, end);
1866 }
1867
1868 /* Figure out a TLB size configuration for a number of pixels to process.
1869 * Beware that we can't "render" more than 4096x4096 pixels in a single job,
1870 * if the pixel count is larger than this, the caller might need to split
1871 * the job and call this function multiple times.
1872 */
1873 static void
framebuffer_size_for_pixel_count(uint32_t num_pixels,uint32_t * width,uint32_t * height)1874 framebuffer_size_for_pixel_count(uint32_t num_pixels,
1875 uint32_t *width,
1876 uint32_t *height)
1877 {
1878 assert(num_pixels > 0);
1879
1880 const uint32_t max_dim_pixels = 4096;
1881 const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1882
1883 uint32_t w, h;
1884 if (num_pixels > max_pixels) {
1885 w = max_dim_pixels;
1886 h = max_dim_pixels;
1887 } else {
1888 w = num_pixels;
1889 h = 1;
1890 while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1891 w >>= 1;
1892 h <<= 1;
1893 }
1894 }
1895 assert(w <= max_dim_pixels && h <= max_dim_pixels);
1896 assert(w * h <= num_pixels);
1897 assert(w > 0 && h > 0);
1898
1899 *width = w;
1900 *height = h;
1901 }
1902
1903 static struct v3dv_job *
copy_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_bo * dst,uint32_t dst_offset,struct v3dv_bo * src,uint32_t src_offset,const VkBufferCopy * region)1904 copy_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1905 struct v3dv_bo *dst,
1906 uint32_t dst_offset,
1907 struct v3dv_bo *src,
1908 uint32_t src_offset,
1909 const VkBufferCopy *region)
1910 {
1911 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1912 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1913
1914 /* Select appropriate pixel format for the copy operation based on the
1915 * size to copy and the alignment of the source and destination offsets.
1916 */
1917 src_offset += region->srcOffset;
1918 dst_offset += region->dstOffset;
1919 uint32_t item_size = 4;
1920 while (item_size > 1 &&
1921 (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1922 item_size /= 2;
1923 }
1924
1925 while (item_size > 1 && region->size % item_size != 0)
1926 item_size /= 2;
1927
1928 assert(region->size % item_size == 0);
1929 uint32_t num_items = region->size / item_size;
1930 assert(num_items > 0);
1931
1932 uint32_t format;
1933 VkFormat vk_format;
1934 switch (item_size) {
1935 case 4:
1936 format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1937 vk_format = VK_FORMAT_R8G8B8A8_UINT;
1938 break;
1939 case 2:
1940 format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1941 vk_format = VK_FORMAT_R8G8_UINT;
1942 break;
1943 default:
1944 format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1945 vk_format = VK_FORMAT_R8_UINT;
1946 break;
1947 }
1948
1949 struct v3dv_job *job = NULL;
1950 while (num_items > 0) {
1951 job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1952 if (!job)
1953 return NULL;
1954
1955 uint32_t width, height;
1956 framebuffer_size_for_pixel_count(num_items, &width, &height);
1957
1958 v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
1959
1960 struct framebuffer_data framebuffer;
1961 setup_framebuffer_data(&framebuffer, vk_format, internal_type,
1962 &job->frame_tiling);
1963
1964 v3dv_job_emit_binning_flush(job);
1965
1966 emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset,
1967 &framebuffer, format);
1968
1969 v3dv_cmd_buffer_finish_job(cmd_buffer);
1970
1971 const uint32_t items_copied = width * height;
1972 const uint32_t bytes_copied = items_copied * item_size;
1973 num_items -= items_copied;
1974 src_offset += bytes_copied;
1975 dst_offset += bytes_copied;
1976 }
1977
1978 return job;
1979 }
1980
1981 void
v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkBuffer dstBuffer,uint32_t regionCount,const VkBufferCopy * pRegions)1982 v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer,
1983 VkBuffer srcBuffer,
1984 VkBuffer dstBuffer,
1985 uint32_t regionCount,
1986 const VkBufferCopy *pRegions)
1987 {
1988 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1989 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer);
1990 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1991
1992 for (uint32_t i = 0; i < regionCount; i++) {
1993 copy_buffer(cmd_buffer,
1994 dst_buffer->mem->bo, dst_buffer->mem_offset,
1995 src_buffer->mem->bo, src_buffer->mem_offset,
1996 &pRegions[i]);
1997 }
1998 }
1999
2000 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)2001 destroy_update_buffer_cb(VkDevice _device,
2002 uint64_t pobj,
2003 VkAllocationCallbacks *alloc)
2004 {
2005 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2006 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
2007 v3dv_bo_free(device, bo);
2008 }
2009
2010 void
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)2011 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
2012 VkBuffer dstBuffer,
2013 VkDeviceSize dstOffset,
2014 VkDeviceSize dataSize,
2015 const void *pData)
2016 {
2017 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2018 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
2019
2020 struct v3dv_bo *src_bo =
2021 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
2022 if (!src_bo) {
2023 fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
2024 return;
2025 }
2026
2027 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
2028 if (!ok) {
2029 fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
2030 return;
2031 }
2032
2033 memcpy(src_bo->map, pData, dataSize);
2034
2035 v3dv_bo_unmap(cmd_buffer->device, src_bo);
2036
2037 VkBufferCopy region = {
2038 .srcOffset = 0,
2039 .dstOffset = dstOffset,
2040 .size = dataSize,
2041 };
2042 struct v3dv_job *copy_job =
2043 copy_buffer(cmd_buffer,
2044 dst_buffer->mem->bo, dst_buffer->mem_offset,
2045 src_bo, 0,
2046 ®ion);
2047 if (!copy_job)
2048 return;
2049
2050 v3dv_cmd_buffer_add_private_obj(
2051 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
2052 }
2053
2054 static void
emit_fill_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,uint32_t stride)2055 emit_fill_buffer_per_tile_list(struct v3dv_job *job,
2056 struct v3dv_bo *bo,
2057 uint32_t offset,
2058 uint32_t stride)
2059 {
2060 struct v3dv_cl *cl = &job->indirect;
2061 v3dv_cl_ensure_space(cl, 200, 1);
2062 v3dv_return_if_oom(NULL, job);
2063
2064 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
2065
2066 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
2067
2068 cl_emit(cl, END_OF_LOADS, end);
2069
2070 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
2071
2072 emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
2073 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
2074
2075 cl_emit(cl, END_OF_TILE_MARKER, end);
2076
2077 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
2078
2079 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
2080 branch.start = tile_list_start;
2081 branch.end = v3dv_cl_get_address(cl);
2082 }
2083 }
2084
2085 static void
emit_fill_buffer(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,struct framebuffer_data * framebuffer)2086 emit_fill_buffer(struct v3dv_job *job,
2087 struct v3dv_bo *bo,
2088 uint32_t offset,
2089 struct framebuffer_data *framebuffer)
2090 {
2091 const uint32_t stride = job->frame_tiling.width * 4;
2092 emit_fill_buffer_per_tile_list(job, bo, offset, stride);
2093 emit_supertile_coordinates(job, framebuffer);
2094 }
2095
2096 static void
emit_fill_buffer_rcl(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,struct framebuffer_data * framebuffer,uint32_t data)2097 emit_fill_buffer_rcl(struct v3dv_job *job,
2098 struct v3dv_bo *bo,
2099 uint32_t offset,
2100 struct framebuffer_data *framebuffer,
2101 uint32_t data)
2102 {
2103 const union v3dv_clear_value clear_value = {
2104 .color = { data, 0, 0, 0 },
2105 };
2106
2107 const struct rcl_clear_info clear_info = {
2108 .clear_value = &clear_value,
2109 .image = NULL,
2110 .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
2111 .layer = 0,
2112 .level = 0,
2113 };
2114
2115 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
2116 v3dv_return_if_oom(NULL, job);
2117
2118 emit_frame_setup(job, 0, &clear_value);
2119 emit_fill_buffer(job, bo, offset, framebuffer);
2120 cl_emit(rcl, END_OF_RENDERING, end);
2121 }
2122
2123 static void
fill_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t size,uint32_t data)2124 fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2125 struct v3dv_bo *bo,
2126 uint32_t offset,
2127 uint32_t size,
2128 uint32_t data)
2129 {
2130 assert(size > 0 && size % 4 == 0);
2131 assert(offset + size <= bo->size);
2132
2133 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
2134 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
2135 uint32_t num_items = size / 4;
2136
2137 while (num_items > 0) {
2138 struct v3dv_job *job =
2139 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
2140 if (!job)
2141 return;
2142
2143 uint32_t width, height;
2144 framebuffer_size_for_pixel_count(num_items, &width, &height);
2145
2146 v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
2147
2148 struct framebuffer_data framebuffer;
2149 setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
2150 internal_type, &job->frame_tiling);
2151
2152 v3dv_job_emit_binning_flush(job);
2153
2154 emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data);
2155
2156 v3dv_cmd_buffer_finish_job(cmd_buffer);
2157
2158 const uint32_t items_copied = width * height;
2159 const uint32_t bytes_copied = items_copied * 4;
2160 num_items -= items_copied;
2161 offset += bytes_copied;
2162 }
2163 }
2164
2165 void
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)2166 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
2167 VkBuffer dstBuffer,
2168 VkDeviceSize dstOffset,
2169 VkDeviceSize size,
2170 uint32_t data)
2171 {
2172 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2173 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
2174
2175 struct v3dv_bo *bo = dst_buffer->mem->bo;
2176
2177 /* From the Vulkan spec:
2178 *
2179 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
2180 * a multiple of 4, then the nearest smaller multiple is used."
2181 */
2182 if (size == VK_WHOLE_SIZE) {
2183 size = dst_buffer->size - dstOffset;
2184 size -= size % 4;
2185 }
2186
2187 fill_buffer(cmd_buffer, bo, dstOffset, size, data);
2188 }
2189
2190 /* Disable level 0 write, just write following mipmaps */
2191 #define V3D_TFU_IOA_DIMTW (1 << 0)
2192 #define V3D_TFU_IOA_FORMAT_SHIFT 3
2193 #define V3D_TFU_IOA_FORMAT_LINEARTILE 3
2194 #define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
2195 #define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
2196 #define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
2197 #define V3D_TFU_IOA_FORMAT_UIF_XOR 7
2198
2199 #define V3D_TFU_ICFG_NUMMM_SHIFT 5
2200 #define V3D_TFU_ICFG_TTYPE_SHIFT 9
2201
2202 #define V3D_TFU_ICFG_OPAD_SHIFT 22
2203
2204 #define V3D_TFU_ICFG_FORMAT_SHIFT 18
2205 #define V3D_TFU_ICFG_FORMAT_RASTER 0
2206 #define V3D_TFU_ICFG_FORMAT_SAND_128 1
2207 #define V3D_TFU_ICFG_FORMAT_SAND_256 2
2208 #define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
2209 #define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
2210 #define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
2211 #define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
2212 #define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
2213
2214 /**
2215 * Returns true if the implementation supports the requested operation (even if
2216 * it failed to process it, for example, due to an out-of-memory error).
2217 */
2218 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy * region)2219 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2220 struct v3dv_image *image,
2221 struct v3dv_buffer *buffer,
2222 const VkBufferImageCopy *region)
2223 {
2224 VkFormat vk_format = image->vk_format;
2225 const struct v3dv_format *format = image->format;
2226
2227 /* Format must be supported for texturing */
2228 if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo,
2229 format->tex_type)) {
2230 return false;
2231 }
2232
2233 /* Only color formats */
2234 if (vk_format_is_depth_or_stencil(vk_format))
2235 return false;
2236
2237 /* Destination can't be raster format */
2238 const uint32_t mip_level = region->imageSubresource.mipLevel;
2239 if (image->slices[mip_level].tiling == VC5_TILING_RASTER)
2240 return false;
2241
2242 /* Region must include full slice */
2243 const uint32_t offset_x = region->imageOffset.x;
2244 const uint32_t offset_y = region->imageOffset.y;
2245 if (offset_x != 0 || offset_y != 0)
2246 return false;
2247
2248 uint32_t width, height;
2249 if (region->bufferRowLength == 0)
2250 width = region->imageExtent.width;
2251 else
2252 width = region->bufferRowLength;
2253
2254 if (region->bufferImageHeight == 0)
2255 height = region->imageExtent.height;
2256 else
2257 height = region->bufferImageHeight;
2258
2259 if (width != image->extent.width || height != image->extent.height)
2260 return false;
2261
2262 const struct v3d_resource_slice *slice = &image->slices[mip_level];
2263
2264 uint32_t num_layers;
2265 if (image->type != VK_IMAGE_TYPE_3D)
2266 num_layers = region->imageSubresource.layerCount;
2267 else
2268 num_layers = region->imageExtent.depth;
2269 assert(num_layers > 0);
2270
2271 assert(image->mem && image->mem->bo);
2272 const struct v3dv_bo *dst_bo = image->mem->bo;
2273
2274 assert(buffer->mem && buffer->mem->bo);
2275 const struct v3dv_bo *src_bo = buffer->mem->bo;
2276
2277 /* Emit a TFU job per layer to copy */
2278 const uint32_t buffer_stride = width * image->cpp;
2279 for (int i = 0; i < num_layers; i++) {
2280 uint32_t layer = region->imageSubresource.baseArrayLayer + i;
2281
2282 struct drm_v3d_submit_tfu tfu = {
2283 .ios = (height << 16) | width,
2284 .bo_handles = {
2285 dst_bo->handle,
2286 src_bo != dst_bo ? src_bo->handle : 0
2287 },
2288 };
2289
2290 const uint32_t buffer_offset =
2291 buffer->mem_offset + region->bufferOffset +
2292 height * buffer_stride * i;
2293
2294 const uint32_t src_offset = src_bo->offset + buffer_offset;
2295 tfu.iia |= src_offset;
2296 tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
2297 tfu.iis |= width;
2298
2299 const uint32_t dst_offset =
2300 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
2301 tfu.ioa |= dst_offset;
2302
2303 tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
2304 (slice->tiling - VC5_TILING_LINEARTILE)) <<
2305 V3D_TFU_IOA_FORMAT_SHIFT;
2306 tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
2307
2308 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
2309 * OPAD field for the destination (how many extra UIF blocks beyond
2310 * those necessary to cover the height).
2311 */
2312 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
2313 slice->tiling == VC5_TILING_UIF_XOR) {
2314 uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
2315 uint32_t implicit_padded_height = align(height, uif_block_h);
2316 uint32_t icfg =
2317 (slice->padded_height - implicit_padded_height) / uif_block_h;
2318 tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
2319 }
2320
2321 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
2322 }
2323
2324 return true;
2325 }
2326
2327 static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job * job,struct framebuffer_data * framebuffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t layer,const VkBufferImageCopy * region)2328 emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
2329 struct framebuffer_data *framebuffer,
2330 struct v3dv_image *image,
2331 struct v3dv_buffer *buffer,
2332 uint32_t layer,
2333 const VkBufferImageCopy *region)
2334 {
2335 struct v3dv_cl *cl = &job->indirect;
2336 v3dv_cl_ensure_space(cl, 200, 1);
2337 v3dv_return_if_oom(NULL, job);
2338
2339 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
2340
2341 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
2342
2343 const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource;
2344 assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
2345 layer < image->extent.depth);
2346
2347 /* Load TLB from buffer */
2348 uint32_t width, height;
2349 if (region->bufferRowLength == 0)
2350 width = region->imageExtent.width;
2351 else
2352 width = region->bufferRowLength;
2353
2354 if (region->bufferImageHeight == 0)
2355 height = region->imageExtent.height;
2356 else
2357 height = region->bufferImageHeight;
2358
2359 /* Handle copy to compressed format using a compatible format */
2360 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
2361 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
2362
2363 uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
2364 1 : image->cpp;
2365 uint32_t buffer_stride = width * cpp;
2366 uint32_t buffer_offset =
2367 buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
2368
2369 uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
2370 false, false, true);
2371
2372 emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
2373 buffer_offset, buffer_stride, format);
2374
2375 /* Because we can't do raster loads/stores of Z/S formats we need to
2376 * use a color tile buffer with a compatible RGBA color format instead.
2377 * However, when we are uploading a single aspect to a combined
2378 * depth/stencil image we have the problem that our tile buffer stores don't
2379 * allow us to mask out the other aspect, so we always write all four RGBA
2380 * channels to the image and we end up overwriting that other aspect with
2381 * undefined values. To work around that, we first load the aspect we are
2382 * not copying from the image memory into a proper Z/S tile buffer. Then we
2383 * do our store from the color buffer for the aspect we are copying, and
2384 * after that, we do another store from the Z/S tile buffer to restore the
2385 * other aspect to its original value.
2386 */
2387 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
2388 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2389 emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
2390 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
2391 false, false);
2392 } else {
2393 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
2394 emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
2395 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
2396 false, false);
2397 }
2398 }
2399
2400 cl_emit(cl, END_OF_LOADS, end);
2401
2402 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
2403
2404 /* Store TLB to image */
2405 emit_image_store(cl, framebuffer, image, imgrsc->aspectMask,
2406 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
2407 false, true, false);
2408
2409 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
2410 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2411 emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
2412 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
2413 false, false, false);
2414 } else {
2415 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
2416 emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
2417 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
2418 false, false, false);
2419 }
2420 }
2421
2422 cl_emit(cl, END_OF_TILE_MARKER, end);
2423
2424 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
2425
2426 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
2427 branch.start = tile_list_start;
2428 branch.end = v3dv_cl_get_address(cl);
2429 }
2430 }
2431
2432 static void
emit_copy_buffer_to_layer(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_buffer * buffer,struct framebuffer_data * framebuffer,uint32_t layer,const VkBufferImageCopy * region)2433 emit_copy_buffer_to_layer(struct v3dv_job *job,
2434 struct v3dv_image *image,
2435 struct v3dv_buffer *buffer,
2436 struct framebuffer_data *framebuffer,
2437 uint32_t layer,
2438 const VkBufferImageCopy *region)
2439 {
2440 emit_frame_setup(job, layer, NULL);
2441 emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
2442 layer, region);
2443 emit_supertile_coordinates(job, framebuffer);
2444 }
2445
2446 static void
emit_copy_buffer_to_image_rcl(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_buffer * buffer,struct framebuffer_data * framebuffer,const VkBufferImageCopy * region)2447 emit_copy_buffer_to_image_rcl(struct v3dv_job *job,
2448 struct v3dv_image *image,
2449 struct v3dv_buffer *buffer,
2450 struct framebuffer_data *framebuffer,
2451 const VkBufferImageCopy *region)
2452 {
2453 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
2454 v3dv_return_if_oom(NULL, job);
2455
2456 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
2457 emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
2458 cl_emit(rcl, END_OF_RENDERING, end);
2459 }
2460
2461 /**
2462 * Returns true if the implementation supports the requested operation (even if
2463 * it failed to process it, for example, due to an out-of-memory error).
2464 */
2465 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy * region)2466 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
2467 struct v3dv_image *image,
2468 struct v3dv_buffer *buffer,
2469 const VkBufferImageCopy *region)
2470 {
2471 VkFormat fb_format;
2472 if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))
2473 return false;
2474
2475 uint32_t internal_type, internal_bpp;
2476 get_internal_type_bpp_for_image_aspects(fb_format,
2477 region->imageSubresource.aspectMask,
2478 &internal_type, &internal_bpp);
2479
2480 uint32_t num_layers;
2481 if (image->type != VK_IMAGE_TYPE_3D)
2482 num_layers = region->imageSubresource.layerCount;
2483 else
2484 num_layers = region->imageExtent.depth;
2485 assert(num_layers > 0);
2486
2487 struct v3dv_job *job =
2488 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
2489 if (!job)
2490 return true;
2491
2492 /* Handle copy to compressed format using a compatible format */
2493 const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
2494 const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
2495 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
2496 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
2497
2498 v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
2499
2500 struct framebuffer_data framebuffer;
2501 setup_framebuffer_data(&framebuffer, fb_format, internal_type,
2502 &job->frame_tiling);
2503
2504 v3dv_job_emit_binning_flush(job);
2505 emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region);
2506
2507 v3dv_cmd_buffer_finish_job(cmd_buffer);
2508
2509 return true;
2510 }
2511
2512 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy * region)2513 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
2514 struct v3dv_image *image,
2515 struct v3dv_buffer *buffer,
2516 const VkBufferImageCopy *region)
2517 {
2518 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
2519 return true;
2520 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
2521 return true;
2522 return false;
2523 }
2524 /**
2525 * Returns true if the implementation supports the requested operation (even if
2526 * it failed to process it, for example, due to an out-of-memory error).
2527 */
2528 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy * region)2529 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2530 struct v3dv_image *image,
2531 struct v3dv_buffer *buffer,
2532 const VkBufferImageCopy *region)
2533 {
2534 bool handled = false;
2535
2536 /* Generally, the bpp of the data in the buffer matches that of the
2537 * destination image. The exception is the case where we are uploading
2538 * stencil (8bpp) to a combined d24s8 image (32bpp).
2539 */
2540 uint32_t buffer_bpp = image->cpp;
2541
2542 VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
2543
2544 /* We are about to upload the buffer data to an image so we can then
2545 * blit that to our destination region. Because we are going to implement
2546 * the copy as a blit, we want our blit source and destination formats to be
2547 * the same (to avoid any format conversions), so we choose a canonical
2548 * format that matches the destination image bpp.
2549 */
2550 VkColorComponentFlags cmask = 0; /* Write all components */
2551 VkFormat src_format;
2552 VkFormat dst_format;
2553 switch (buffer_bpp) {
2554 case 16:
2555 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2556 src_format = VK_FORMAT_R32G32B32A32_UINT;
2557 dst_format = src_format;
2558 break;
2559 case 8:
2560 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2561 src_format = VK_FORMAT_R16G16B16A16_UINT;
2562 dst_format = src_format;
2563 break;
2564 case 4:
2565 switch (aspect) {
2566 case VK_IMAGE_ASPECT_COLOR_BIT:
2567 src_format = VK_FORMAT_R8G8B8A8_UINT;
2568 dst_format = src_format;
2569 break;
2570 case VK_IMAGE_ASPECT_DEPTH_BIT:
2571 assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
2572 image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
2573 image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
2574 if (image->tiling != VK_IMAGE_TILING_LINEAR) {
2575 src_format = image->vk_format;
2576 } else {
2577 src_format = VK_FORMAT_R8G8B8A8_UINT;
2578 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2579 if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
2580 cmask = VK_COLOR_COMPONENT_R_BIT |
2581 VK_COLOR_COMPONENT_G_BIT |
2582 VK_COLOR_COMPONENT_B_BIT;
2583 }
2584 }
2585 dst_format = src_format;
2586 break;
2587 case VK_IMAGE_ASPECT_STENCIL_BIT:
2588 /* Since we don't support separate stencil this is always a stencil
2589 * copy to a combined depth/stencil image. Becasue we don't support
2590 * separate stencil images, we upload the buffer data to a compatible
2591 * color R8UI image, and implement the blit as a compatible color
2592 * blit to an RGBA8UI destination masking out writes to components
2593 * GBA (which map to the D24 component of a S8D24 image).
2594 */
2595 assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
2596 buffer_bpp = 1;
2597 src_format = VK_FORMAT_R8_UINT;
2598 dst_format = VK_FORMAT_R8G8B8A8_UINT;
2599 cmask = VK_COLOR_COMPONENT_R_BIT;
2600 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2601 break;
2602 default:
2603 unreachable("unsupported aspect");
2604 return handled;
2605 };
2606 break;
2607 case 2:
2608 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2609 src_format = VK_FORMAT_R16_UINT;
2610 dst_format = src_format;
2611 break;
2612 case 1:
2613 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2614 src_format = VK_FORMAT_R8_UINT;
2615 dst_format = src_format;
2616 break;
2617 default:
2618 unreachable("unsupported bit-size");
2619 return handled;
2620 }
2621
2622 /* We should be able to handle the blit if we reached here */
2623 handled = true;
2624
2625 /* Obtain the 2D buffer region spec */
2626 uint32_t buf_width, buf_height;
2627 if (region->bufferRowLength == 0)
2628 buf_width = region->imageExtent.width;
2629 else
2630 buf_width = region->bufferRowLength;
2631
2632 if (region->bufferImageHeight == 0)
2633 buf_height = region->imageExtent.height;
2634 else
2635 buf_height = region->bufferImageHeight;
2636
2637 /* If the image is compressed, the bpp refers to blocks, not pixels */
2638 uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
2639 uint32_t block_height = vk_format_get_blockheight(image->vk_format);
2640 buf_width = buf_width / block_width;
2641 buf_height = buf_height / block_height;
2642
2643 /* Compute layers to copy */
2644 uint32_t num_layers;
2645 if (image->type != VK_IMAGE_TYPE_3D)
2646 num_layers = region->imageSubresource.layerCount;
2647 else
2648 num_layers = region->imageExtent.depth;
2649 assert(num_layers > 0);
2650
2651 struct v3dv_device *device = cmd_buffer->device;
2652 VkDevice _device = v3dv_device_to_handle(device);
2653 for (uint32_t i = 0; i < num_layers; i++) {
2654 /* Create the source blit image from the source buffer.
2655 *
2656 * We can't texture from a linear image, so we can't just setup a blit
2657 * straight from the buffer contents. Instead, we need to upload the
2658 * buffer to a tiled image, and then copy that image to the selected
2659 * region of the destination.
2660 *
2661 * FIXME: we could do better than this is we use a blit shader that has
2662 * a UBO (for the buffer) as input instead of a texture. Then we would
2663 * have to do some arithmetics in the shader to identify the offset into
2664 * the UBO that we need to load for each pixel in the destination image
2665 * (we would need to support all the possible copy formats we have above).
2666 */
2667 VkImageCreateInfo image_info = {
2668 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2669 .imageType = VK_IMAGE_TYPE_2D,
2670 .format = src_format,
2671 .extent = { buf_width, buf_height, 1 },
2672 .mipLevels = 1,
2673 .arrayLayers = 1,
2674 .samples = VK_SAMPLE_COUNT_1_BIT,
2675 .tiling = VK_IMAGE_TILING_OPTIMAL,
2676 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2677 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2678 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2679 .queueFamilyIndexCount = 0,
2680 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2681 };
2682
2683 VkImage buffer_image;
2684 VkResult result =
2685 v3dv_CreateImage(_device, &image_info, &device->alloc, &buffer_image);
2686 if (result != VK_SUCCESS)
2687 return handled;
2688
2689 v3dv_cmd_buffer_add_private_obj(
2690 cmd_buffer, (uintptr_t)buffer_image,
2691 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2692
2693 /* Allocate and bind memory for the image */
2694 VkDeviceMemory mem;
2695 VkMemoryRequirements reqs;
2696 v3dv_GetImageMemoryRequirements(_device, buffer_image, &reqs);
2697 VkMemoryAllocateInfo alloc_info = {
2698 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2699 .allocationSize = reqs.size,
2700 .memoryTypeIndex = 0,
2701 };
2702 result = v3dv_AllocateMemory(_device, &alloc_info, &device->alloc, &mem);
2703 if (result != VK_SUCCESS)
2704 return handled;
2705
2706 v3dv_cmd_buffer_add_private_obj(
2707 cmd_buffer, (uintptr_t)mem,
2708 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2709
2710 result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
2711 if (result != VK_SUCCESS)
2712 return handled;
2713
2714 /* Upload buffer contents for the selected layer */
2715 VkDeviceSize buffer_offset =
2716 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2717 const VkBufferImageCopy buffer_image_copy = {
2718 .bufferOffset = buffer_offset,
2719 .bufferRowLength = region->bufferRowLength / block_width,
2720 .bufferImageHeight = region->bufferImageHeight / block_height,
2721 .imageSubresource = {
2722 .aspectMask = aspect,
2723 .mipLevel = 0,
2724 .baseArrayLayer = 0,
2725 .layerCount = 1,
2726 },
2727 .imageOffset = { 0, 0, 0 },
2728 .imageExtent = { buf_width, buf_height, 1 }
2729 };
2730 handled =
2731 create_tiled_image_from_buffer(cmd_buffer,
2732 v3dv_image_from_handle(buffer_image),
2733 buffer, &buffer_image_copy);
2734 if (!handled) {
2735 /* This is unexpected, we should have setup the upload to be
2736 * conformant to a TFU or TLB copy.
2737 */
2738 unreachable("Unable to copy buffer to image through TLB");
2739 return false;
2740 }
2741
2742 /* Blit-copy the requested image extent from the buffer image to the
2743 * destination image.
2744 *
2745 * Since we are copying, the blit must use the same format on the
2746 * destination and source images to avoid format conversions. The
2747 * only exception is copying stencil, which we upload to a R8UI source
2748 * image, but that we need to blit to a S8D24 destination (the only
2749 * stencil format we support).
2750 */
2751 const VkImageBlit blit_region = {
2752 .srcSubresource = {
2753 .aspectMask = aspect,
2754 .mipLevel = 0,
2755 .baseArrayLayer = 0,
2756 .layerCount = 1,
2757 },
2758 .srcOffsets = {
2759 { 0, 0, 0 },
2760 { region->imageExtent.width, region->imageExtent.height, 1 },
2761 },
2762 .dstSubresource = {
2763 .aspectMask = aspect,
2764 .mipLevel = region->imageSubresource.mipLevel,
2765 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
2766 .layerCount = 1,
2767 },
2768 .dstOffsets = {
2769 {
2770 DIV_ROUND_UP(region->imageOffset.x, block_width),
2771 DIV_ROUND_UP(region->imageOffset.y, block_height),
2772 region->imageOffset.z + i,
2773 },
2774 {
2775 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
2776 block_width),
2777 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
2778 block_height),
2779 region->imageOffset.z + i + 1,
2780 },
2781 },
2782 };
2783
2784 handled = blit_shader(cmd_buffer,
2785 image, dst_format,
2786 v3dv_image_from_handle(buffer_image), src_format,
2787 cmask, NULL,
2788 &blit_region, VK_FILTER_NEAREST, true);
2789 if (!handled) {
2790 /* This is unexpected, we should have a supported blit spec */
2791 unreachable("Unable to blit buffer to destination image");
2792 return false;
2793 }
2794 }
2795
2796 assert(handled);
2797 return true;
2798 }
2799
2800 /**
2801 * Returns true if the implementation supports the requested operation (even if
2802 * it failed to process it, for example, due to an out-of-memory error).
2803 */
2804 static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy * region)2805 copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
2806 struct v3dv_image *image,
2807 struct v3dv_buffer *buffer,
2808 const VkBufferImageCopy *region)
2809 {
2810 /* FIXME */
2811 if (vk_format_is_depth_or_stencil(image->vk_format))
2812 return false;
2813
2814 if (vk_format_is_compressed(image->vk_format))
2815 return false;
2816
2817 if (image->tiling == VK_IMAGE_TILING_LINEAR)
2818 return false;
2819
2820 uint32_t buffer_width, buffer_height;
2821 if (region->bufferRowLength == 0)
2822 buffer_width = region->imageExtent.width;
2823 else
2824 buffer_width = region->bufferRowLength;
2825
2826 if (region->bufferImageHeight == 0)
2827 buffer_height = region->imageExtent.height;
2828 else
2829 buffer_height = region->bufferImageHeight;
2830
2831 uint32_t buffer_stride = buffer_width * image->cpp;
2832 uint32_t buffer_layer_stride = buffer_stride * buffer_height;
2833
2834 uint32_t num_layers;
2835 if (image->type != VK_IMAGE_TYPE_3D)
2836 num_layers = region->imageSubresource.layerCount;
2837 else
2838 num_layers = region->imageExtent.depth;
2839 assert(num_layers > 0);
2840
2841 struct v3dv_job *job =
2842 v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
2843 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
2844 cmd_buffer, -1);
2845 if (!job)
2846 return true;
2847
2848 job->cpu.copy_buffer_to_image.image = image;
2849 job->cpu.copy_buffer_to_image.buffer = buffer;
2850 job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
2851 job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
2852 job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
2853 job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
2854 job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
2855 job->cpu.copy_buffer_to_image.mip_level =
2856 region->imageSubresource.mipLevel;
2857 job->cpu.copy_buffer_to_image.base_layer =
2858 region->imageSubresource.baseArrayLayer;
2859 job->cpu.copy_buffer_to_image.layer_count = num_layers;
2860
2861 list_addtail(&job->list_link, &cmd_buffer->jobs);
2862
2863 return true;
2864 }
2865
2866 void
v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,VkBuffer srcBuffer,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions)2867 v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
2868 VkBuffer srcBuffer,
2869 VkImage dstImage,
2870 VkImageLayout dstImageLayout,
2871 uint32_t regionCount,
2872 const VkBufferImageCopy *pRegions)
2873 {
2874 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2875 V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer);
2876 V3DV_FROM_HANDLE(v3dv_image, image, dstImage);
2877
2878 assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
2879
2880 for (uint32_t i = 0; i < regionCount; i++) {
2881 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[i]))
2882 continue;
2883 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[i]))
2884 continue;
2885 if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[i]))
2886 continue;
2887 if (copy_buffer_to_image_blit(cmd_buffer, image, buffer, &pRegions[i]))
2888 continue;
2889 unreachable("Unsupported buffer to image copy.");
2890 }
2891 }
2892
2893 static void
emit_tfu_job(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,uint32_t dst_mip_level,uint32_t dst_layer,struct v3dv_image * src,uint32_t src_mip_level,uint32_t src_layer,uint32_t width,uint32_t height)2894 emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
2895 struct v3dv_image *dst,
2896 uint32_t dst_mip_level,
2897 uint32_t dst_layer,
2898 struct v3dv_image *src,
2899 uint32_t src_mip_level,
2900 uint32_t src_layer,
2901 uint32_t width,
2902 uint32_t height)
2903 {
2904 const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
2905 const struct v3d_resource_slice *dst_slice = &dst->slices[src_mip_level];
2906
2907 assert(dst->mem && dst->mem->bo);
2908 const struct v3dv_bo *dst_bo = dst->mem->bo;
2909
2910 assert(src->mem && src->mem->bo);
2911 const struct v3dv_bo *src_bo = src->mem->bo;
2912
2913 struct drm_v3d_submit_tfu tfu = {
2914 .ios = (height << 16) | width,
2915 .bo_handles = {
2916 dst_bo->handle,
2917 src != dst ? src_bo->handle : 0
2918 },
2919 };
2920
2921 const uint32_t src_offset =
2922 src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
2923 tfu.iia |= src_offset;
2924
2925 uint32_t icfg;
2926 if (src_slice->tiling == VC5_TILING_RASTER) {
2927 icfg = V3D_TFU_ICFG_FORMAT_RASTER;
2928 } else {
2929 icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
2930 (src_slice->tiling - VC5_TILING_LINEARTILE);
2931 }
2932 tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
2933
2934 const uint32_t dst_offset =
2935 dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
2936 tfu.ioa |= dst_offset;
2937
2938 tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
2939 (dst_slice->tiling - VC5_TILING_LINEARTILE)) <<
2940 V3D_TFU_IOA_FORMAT_SHIFT;
2941 tfu.icfg |= dst->format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
2942
2943 switch (src_slice->tiling) {
2944 case VC5_TILING_UIF_NO_XOR:
2945 case VC5_TILING_UIF_XOR:
2946 tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
2947 break;
2948 case VC5_TILING_RASTER:
2949 tfu.iis |= src_slice->stride / src->cpp;
2950 break;
2951 default:
2952 break;
2953 }
2954
2955 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
2956 * OPAD field for the destination (how many extra UIF blocks beyond
2957 * those necessary to cover the height).
2958 */
2959 if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR ||
2960 dst_slice->tiling == VC5_TILING_UIF_XOR) {
2961 uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
2962 uint32_t implicit_padded_height = align(height, uif_block_h);
2963 uint32_t icfg =
2964 (dst_slice->padded_height - implicit_padded_height) / uif_block_h;
2965 tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
2966 }
2967
2968 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
2969 }
2970
2971 static void
2972 compute_blit_3d_layers(const VkOffset3D *offsets,
2973 uint32_t *min_layer, uint32_t *max_layer,
2974 bool *mirror_z);
2975
2976 /**
2977 * Returns true if the implementation supports the requested operation (even if
2978 * it failed to process it, for example, due to an out-of-memory error).
2979 */
2980 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit * region,VkFilter filter)2981 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2982 struct v3dv_image *dst,
2983 struct v3dv_image *src,
2984 const VkImageBlit *region,
2985 VkFilter filter)
2986 {
2987 /* FIXME? The v3d driver seems to ignore filtering completely! */
2988 if (filter != VK_FILTER_NEAREST)
2989 return false;
2990
2991 /* Format must match */
2992 if (src->vk_format != dst->vk_format)
2993 return false;
2994
2995 VkFormat vk_format = dst->vk_format;
2996 const struct v3dv_format *format = dst->format;
2997
2998 /* Format must be supported for texturing */
2999 if (!v3dv_tfu_supports_tex_format(&cmd_buffer->device->devinfo,
3000 format->tex_type)) {
3001 return false;
3002 }
3003
3004 /* Only color formats */
3005 if (vk_format_is_depth_or_stencil(vk_format))
3006 return false;
3007
3008 #if 0
3009 /* FIXME: Only 2D images? */
3010 if (dst->type == VK_IMAGE_TYPE_2D || src->type == VK_IMAGE_TYPE_2D)
3011 return false;
3012 #endif
3013
3014 /* Destination can't be raster format */
3015 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
3016 if (dst->slices[dst_mip_level].tiling == VC5_TILING_RASTER)
3017 return false;
3018
3019 /* Source region must start at (0,0) */
3020 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
3021 return false;
3022
3023 /* Destination image must be complete */
3024 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
3025 return false;
3026
3027 const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
3028 const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
3029 if (region->dstOffsets[1].x < dst_width - 1||
3030 region->dstOffsets[1].y < dst_height - 1) {
3031 return false;
3032 }
3033
3034 /* No scaling */
3035 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
3036 region->srcOffsets[1].y != region->dstOffsets[1].y) {
3037 return false;
3038 }
3039
3040 if (dst->type == VK_IMAGE_TYPE_3D &&
3041 region->srcOffsets[1].z != region->dstOffsets[1].z) {
3042 return false;
3043 }
3044
3045 /* Emit a TFU job for each layer to blit */
3046 assert(region->dstSubresource.layerCount ==
3047 region->srcSubresource.layerCount);
3048
3049 uint32_t min_dst_layer;
3050 uint32_t max_dst_layer;
3051 bool dst_mirror_z = false;
3052 if (dst->type == VK_IMAGE_TYPE_3D) {
3053 compute_blit_3d_layers(region->dstOffsets,
3054 &min_dst_layer, &max_dst_layer,
3055 &dst_mirror_z);
3056
3057 /* TFU can only do exact copies, so we can't handle mirroring. This checks
3058 * mirroring in Z for 3D images, XY mirroring is already handled by earlier
3059 * checks
3060 */
3061 if (dst_mirror_z)
3062 return false;
3063 }
3064
3065 uint32_t min_src_layer;
3066 uint32_t max_src_layer;
3067 bool src_mirror_z = false;
3068 if (src->type == VK_IMAGE_TYPE_3D) {
3069 compute_blit_3d_layers(region->srcOffsets,
3070 &min_src_layer, &max_src_layer,
3071 &src_mirror_z);
3072
3073 if (src_mirror_z)
3074 return false;
3075
3076 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3077 return false;
3078 }
3079
3080 const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?
3081 region->dstSubresource.layerCount :
3082 max_dst_layer - min_dst_layer;
3083 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3084
3085 for (uint32_t i = 0; i < layer_count; i++) {
3086 emit_tfu_job(cmd_buffer,
3087 dst, dst_mip_level, region->dstSubresource.baseArrayLayer + i,
3088 src, src_mip_level, region->srcSubresource.baseArrayLayer + i,
3089 dst_width, dst_height);
3090 }
3091
3092 return true;
3093 }
3094
3095 static bool
format_needs_software_int_clamp(VkFormat format)3096 format_needs_software_int_clamp(VkFormat format)
3097 {
3098 switch (format) {
3099 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3100 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3101 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3102 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3103 return true;
3104 default:
3105 return false;
3106 };
3107 }
3108
3109 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)3110 get_blit_pipeline_cache_key(VkFormat dst_format,
3111 VkFormat src_format,
3112 VkColorComponentFlags cmask,
3113 VkSampleCountFlagBits dst_samples,
3114 VkSampleCountFlagBits src_samples,
3115 uint8_t *key)
3116 {
3117 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3118
3119 uint32_t *p = (uint32_t *) key;
3120
3121 *p = dst_format;
3122 p++;
3123
3124 /* Generally, when blitting from a larger format to a smaller format
3125 * the hardware takes care of clamping the source to the RT range.
3126 * Specifically, for integer formats, this is done by using
3127 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3128 * clamps to the bit-size of the render type, and some formats, such as
3129 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3130 * require to clamp in software. In these cases, we need to amend the blit
3131 * shader with clamp code that depends on both the src and dst formats, so
3132 * we need the src format to be part of the key.
3133 */
3134 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3135 p++;
3136
3137 *p = cmask;
3138 p++;
3139
3140 *p = (dst_samples << 8) | src_samples;
3141 p++;
3142
3143 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3144 }
3145
3146 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)3147 create_blit_pipeline_layout(struct v3dv_device *device,
3148 VkDescriptorSetLayout *descriptor_set_layout,
3149 VkPipelineLayout *pipeline_layout)
3150 {
3151 VkResult result;
3152
3153 if (*descriptor_set_layout == 0) {
3154 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
3155 .binding = 0,
3156 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3157 .descriptorCount = 1,
3158 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
3159 };
3160 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
3161 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
3162 .bindingCount = 1,
3163 .pBindings = &descriptor_set_layout_binding,
3164 };
3165 result =
3166 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
3167 &descriptor_set_layout_info,
3168 &device->alloc,
3169 descriptor_set_layout);
3170 if (result != VK_SUCCESS)
3171 return false;
3172 }
3173
3174 assert(*pipeline_layout == 0);
3175 VkPipelineLayoutCreateInfo pipeline_layout_info = {
3176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
3177 .setLayoutCount = 1,
3178 .pSetLayouts = descriptor_set_layout,
3179 .pushConstantRangeCount = 1,
3180 .pPushConstantRanges =
3181 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
3182 };
3183
3184 result =
3185 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
3186 &pipeline_layout_info,
3187 &device->alloc,
3188 pipeline_layout);
3189 return result == VK_SUCCESS;
3190 }
3191
3192 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)3193 create_blit_render_pass(struct v3dv_device *device,
3194 VkFormat dst_format,
3195 VkFormat src_format,
3196 VkRenderPass *pass_load,
3197 VkRenderPass *pass_no_load)
3198 {
3199 const bool is_color_blit = vk_format_is_color(dst_format);
3200
3201 /* Attachment load operation is specified below */
3202 VkAttachmentDescription att = {
3203 .format = dst_format,
3204 .samples = VK_SAMPLE_COUNT_1_BIT,
3205 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3206 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3207 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3208 };
3209
3210 VkAttachmentReference att_ref = {
3211 .attachment = 0,
3212 .layout = VK_IMAGE_LAYOUT_GENERAL,
3213 };
3214
3215 VkSubpassDescription subpass = {
3216 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3217 .inputAttachmentCount = 0,
3218 .colorAttachmentCount = is_color_blit ? 1 : 0,
3219 .pColorAttachments = is_color_blit ? &att_ref : NULL,
3220 .pResolveAttachments = NULL,
3221 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3222 .preserveAttachmentCount = 0,
3223 .pPreserveAttachments = NULL,
3224 };
3225
3226 VkRenderPassCreateInfo info = {
3227 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
3228 .attachmentCount = 1,
3229 .pAttachments = &att,
3230 .subpassCount = 1,
3231 .pSubpasses = &subpass,
3232 .dependencyCount = 0,
3233 .pDependencies = NULL,
3234 };
3235
3236 VkResult result;
3237 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3238 result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
3239 &info, &device->alloc, pass_load);
3240 if (result != VK_SUCCESS)
3241 return false;
3242
3243 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3244 result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
3245 &info, &device->alloc, pass_no_load);
3246 return result == VK_SUCCESS;
3247 }
3248
3249 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)3250 gen_rect_vertices(nir_builder *b)
3251 {
3252 nir_intrinsic_instr *vertex_id =
3253 nir_intrinsic_instr_create(b->shader,
3254 nir_intrinsic_load_vertex_id);
3255 nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
3256 nir_builder_instr_insert(b, &vertex_id->instr);
3257
3258
3259 /* vertex 0: -1.0, -1.0
3260 * vertex 1: -1.0, 1.0
3261 * vertex 2: 1.0, -1.0
3262 * vertex 3: 1.0, 1.0
3263 *
3264 * so:
3265 *
3266 * channel 0 is vertex_id < 2 ? -1.0 : 1.0
3267 * channel 1 is vertex id & 1 ? 1.0 : -1.0
3268 */
3269
3270 nir_ssa_def *one = nir_imm_int(b, 1);
3271 nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
3272 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
3273
3274 nir_ssa_def *comp[4];
3275 comp[0] = nir_bcsel(b, c0cmp,
3276 nir_imm_float(b, -1.0f),
3277 nir_imm_float(b, 1.0f));
3278
3279 comp[1] = nir_bcsel(b, c1cmp,
3280 nir_imm_float(b, 1.0f),
3281 nir_imm_float(b, -1.0f));
3282 comp[2] = nir_imm_float(b, 0.0f);
3283 comp[3] = nir_imm_float(b, 1.0f);
3284 return nir_vec(b, comp, 4);
3285 }
3286
3287 static nir_ssa_def *
gen_tex_coords(nir_builder * b)3288 gen_tex_coords(nir_builder *b)
3289 {
3290 nir_intrinsic_instr *tex_box =
3291 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
3292 tex_box->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
3293 nir_intrinsic_set_base(tex_box, 0);
3294 nir_intrinsic_set_range(tex_box, 16);
3295 tex_box->num_components = 4;
3296 nir_ssa_dest_init(&tex_box->instr, &tex_box->dest, 4, 32, "tex_box");
3297 nir_builder_instr_insert(b, &tex_box->instr);
3298
3299 nir_intrinsic_instr *tex_z =
3300 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
3301 tex_z->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
3302 nir_intrinsic_set_base(tex_z, 16);
3303 nir_intrinsic_set_range(tex_z, 4);
3304 tex_z->num_components = 1;
3305 nir_ssa_dest_init(&tex_z->instr, &tex_z->dest, 1, 32, "tex_z");
3306 nir_builder_instr_insert(b, &tex_z->instr);
3307
3308 nir_intrinsic_instr *vertex_id =
3309 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_vertex_id);
3310 nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
3311 nir_builder_instr_insert(b, &vertex_id->instr);
3312
3313 /* vertex 0: src0_x, src0_y
3314 * vertex 1: src0_x, src1_y
3315 * vertex 2: src1_x, src0_y
3316 * vertex 3: src1_x, src1_y
3317 *
3318 * So:
3319 *
3320 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3321 * channel 1 is vertex id & 1 ? src1_y : src0_y
3322 */
3323
3324 nir_ssa_def *one = nir_imm_int(b, 1);
3325 nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
3326 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
3327
3328 nir_ssa_def *comp[4];
3329 comp[0] = nir_bcsel(b, c0cmp,
3330 nir_channel(b, &tex_box->dest.ssa, 0),
3331 nir_channel(b, &tex_box->dest.ssa, 2));
3332
3333 comp[1] = nir_bcsel(b, c1cmp,
3334 nir_channel(b, &tex_box->dest.ssa, 3),
3335 nir_channel(b, &tex_box->dest.ssa, 1));
3336 comp[2] = &tex_z->dest.ssa;
3337 comp[3] = nir_imm_float(b, 1.0f);
3338 return nir_vec(b, comp, 4);
3339 }
3340
3341 static nir_ssa_def *
build_nir_tex_op_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3342 build_nir_tex_op_read(struct nir_builder *b,
3343 nir_ssa_def *tex_pos,
3344 enum glsl_base_type tex_type,
3345 enum glsl_sampler_dim dim)
3346 {
3347 assert(dim != GLSL_SAMPLER_DIM_MS);
3348
3349 const struct glsl_type *sampler_type =
3350 glsl_sampler_type(dim, false, false, tex_type);
3351 nir_variable *sampler =
3352 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3353 sampler->data.descriptor_set = 0;
3354 sampler->data.binding = 0;
3355
3356 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3357 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3358 tex->sampler_dim = dim;
3359 tex->op = nir_texop_tex;
3360 tex->src[0].src_type = nir_tex_src_coord;
3361 tex->src[0].src = nir_src_for_ssa(tex_pos);
3362 tex->src[1].src_type = nir_tex_src_texture_deref;
3363 tex->src[1].src = nir_src_for_ssa(tex_deref);
3364 tex->src[2].src_type = nir_tex_src_sampler_deref;
3365 tex->src[2].src = nir_src_for_ssa(tex_deref);
3366 tex->dest_type =
3367 nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(tex_type));
3368 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3369 tex->coord_components = tex_pos->num_components;
3370
3371 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3372 nir_builder_instr_insert(b, &tex->instr);
3373 return &tex->dest.ssa;
3374 }
3375
3376 static nir_ssa_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_ssa_def * tex_deref,enum glsl_base_type tex_type,nir_ssa_def * tex_pos,nir_ssa_def * sample_idx)3377 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3378 nir_variable *sampler,
3379 nir_ssa_def *tex_deref,
3380 enum glsl_base_type tex_type,
3381 nir_ssa_def *tex_pos,
3382 nir_ssa_def *sample_idx)
3383 {
3384 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
3385 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3386 tex->op = nir_texop_txf_ms;
3387 tex->src[0].src_type = nir_tex_src_coord;
3388 tex->src[0].src = nir_src_for_ssa(tex_pos);
3389 tex->src[1].src_type = nir_tex_src_texture_deref;
3390 tex->src[1].src = nir_src_for_ssa(tex_deref);
3391 tex->src[2].src_type = nir_tex_src_sampler_deref;
3392 tex->src[2].src = nir_src_for_ssa(tex_deref);
3393 tex->src[3].src_type = nir_tex_src_ms_index;
3394 tex->src[3].src = nir_src_for_ssa(sample_idx);
3395 tex->dest_type =
3396 nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(tex_type));
3397 tex->is_array = false;
3398 tex->coord_components = tex_pos->num_components;
3399
3400 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3401 nir_builder_instr_insert(b, &tex->instr);
3402 return &tex->dest.ssa;
3403 }
3404
3405 /* Fetches all samples at the given position and averages them */
3406 static nir_ssa_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3407 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3408 nir_ssa_def *tex_pos,
3409 enum glsl_base_type tex_type,
3410 VkSampleCountFlagBits src_samples)
3411 {
3412 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3413 const struct glsl_type *sampler_type =
3414 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3415 nir_variable *sampler =
3416 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3417 sampler->data.descriptor_set = 0;
3418 sampler->data.binding = 0;
3419
3420 const bool is_int = glsl_base_type_is_integer(tex_type);
3421
3422 nir_ssa_def *tmp;
3423 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3424 for (uint32_t i = 0; i < src_samples; i++) {
3425 nir_ssa_def *s =
3426 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3427 tex_type, tex_pos,
3428 nir_imm_int(b, i));
3429
3430 /* For integer formats, the multisample resolve operation is expected to
3431 * return one of the samples, we just return the first one.
3432 */
3433 if (is_int)
3434 return s;
3435
3436 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3437 }
3438
3439 assert(!is_int);
3440 return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
3441 }
3442
3443 /* Fetches the current sample (gl_SampleID) at the given position */
3444 static nir_ssa_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type)3445 build_nir_tex_op_ms_read(struct nir_builder *b,
3446 nir_ssa_def *tex_pos,
3447 enum glsl_base_type tex_type)
3448 {
3449 const struct glsl_type *sampler_type =
3450 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3451 nir_variable *sampler =
3452 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3453 sampler->data.descriptor_set = 0;
3454 sampler->data.binding = 0;
3455
3456 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3457
3458 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3459 tex_type, tex_pos,
3460 nir_load_sample_id(b));
3461 }
3462
3463 static nir_ssa_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3464 build_nir_tex_op(struct nir_builder *b,
3465 struct v3dv_device *device,
3466 nir_ssa_def *tex_pos,
3467 enum glsl_base_type tex_type,
3468 VkSampleCountFlagBits dst_samples,
3469 VkSampleCountFlagBits src_samples,
3470 enum glsl_sampler_dim dim)
3471 {
3472 switch (dim) {
3473 case GLSL_SAMPLER_DIM_MS:
3474 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3475 /* For multisampled texture sources we need to use fetching instead of
3476 * normalized texture coordinates. We already configured our blit
3477 * coordinates to be in texel units, but here we still need to convert
3478 * them from floating point to integer.
3479 */
3480 tex_pos = nir_f2i32(b, tex_pos);
3481
3482 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3483 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3484 else
3485 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3486 default:
3487 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3488 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3489 }
3490 }
3491
3492 static nir_shader *
get_blit_vs()3493 get_blit_vs()
3494 {
3495 nir_builder b;
3496 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3497 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
3498 b.shader->info.name = ralloc_strdup(b.shader, "meta blit vs");
3499
3500 const struct glsl_type *vec4 = glsl_vec4_type();
3501
3502 nir_variable *vs_out_pos =
3503 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3504 vs_out_pos->data.location = VARYING_SLOT_POS;
3505
3506 nir_variable *vs_out_tex_coord =
3507 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3508 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3509 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3510
3511 nir_ssa_def *pos = gen_rect_vertices(&b);
3512 nir_store_var(&b, vs_out_pos, pos, 0xf);
3513
3514 nir_ssa_def *tex_coord = gen_tex_coords(&b);
3515 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3516
3517 return b.shader;
3518 }
3519
3520 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3521 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3522 {
3523 switch (sampler_dim) {
3524 case GLSL_SAMPLER_DIM_1D: return 0x1;
3525 case GLSL_SAMPLER_DIM_2D: return 0x3;
3526 case GLSL_SAMPLER_DIM_MS: return 0x3;
3527 case GLSL_SAMPLER_DIM_3D: return 0x7;
3528 default:
3529 unreachable("invalid sampler dim");
3530 };
3531 }
3532
3533 static nir_shader *
get_color_blit_fs(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3534 get_color_blit_fs(struct v3dv_device *device,
3535 VkFormat dst_format,
3536 VkFormat src_format,
3537 VkSampleCountFlagBits dst_samples,
3538 VkSampleCountFlagBits src_samples,
3539 enum glsl_sampler_dim sampler_dim)
3540 {
3541 nir_builder b;
3542 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3543 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
3544 b.shader->info.name = ralloc_strdup(b.shader, "meta blit fs");
3545
3546 const struct glsl_type *vec4 = glsl_vec4_type();
3547
3548 nir_variable *fs_in_tex_coord =
3549 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3550 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3551
3552 const struct glsl_type *fs_out_type =
3553 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3554 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3555 glsl_vec4_type();
3556
3557 enum glsl_base_type src_base_type =
3558 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3559 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3560 GLSL_TYPE_FLOAT;
3561
3562 nir_variable *fs_out_color =
3563 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3564 fs_out_color->data.location = FRAG_RESULT_DATA0;
3565
3566 nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3567 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3568 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3569
3570 nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3571 dst_samples, src_samples, sampler_dim);
3572
3573 /* For integer textures, if the bit-size of the destination is too small to
3574 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3575 * maximum value the destination can hold. The hardware can clamp to the
3576 * render target type, which usually matches the component bit-size, but
3577 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3578 * render target type, so in these cases we need to clamp manually.
3579 */
3580 if (format_needs_software_int_clamp(dst_format)) {
3581 assert(vk_format_is_int(dst_format));
3582 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3583 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3584
3585 nir_ssa_def *c[4];
3586 for (uint32_t i = 0; i < 4; i++) {
3587 c[i] = nir_channel(&b, color, i);
3588
3589 const uint32_t src_bit_size =
3590 util_format_get_component_bits(src_pformat,
3591 UTIL_FORMAT_COLORSPACE_RGB,
3592 i);
3593 const uint32_t dst_bit_size =
3594 util_format_get_component_bits(dst_pformat,
3595 UTIL_FORMAT_COLORSPACE_RGB,
3596 i);
3597
3598 if (dst_bit_size >= src_bit_size)
3599 continue;
3600
3601 if (util_format_is_pure_uint(dst_pformat)) {
3602 nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3603 c[i] = nir_umin(&b, c[i], max);
3604 } else {
3605 nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3606 nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3607 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3608 }
3609 }
3610
3611 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3612 }
3613
3614 nir_store_var(&b, fs_out_color, color, 0xf);
3615
3616 return b.shader;
3617 }
3618
3619 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3620 create_pipeline(struct v3dv_device *device,
3621 struct v3dv_render_pass *pass,
3622 struct nir_shader *vs_nir,
3623 struct nir_shader *fs_nir,
3624 const VkPipelineVertexInputStateCreateInfo *vi_state,
3625 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3626 const VkPipelineColorBlendStateCreateInfo *cb_state,
3627 const VkPipelineMultisampleStateCreateInfo *ms_state,
3628 const VkPipelineLayout layout,
3629 VkPipeline *pipeline)
3630 {
3631 struct v3dv_shader_module vs_m;
3632 struct v3dv_shader_module fs_m;
3633
3634 v3dv_shader_module_internal_init(&vs_m, vs_nir);
3635 v3dv_shader_module_internal_init(&fs_m, fs_nir);
3636
3637 VkPipelineShaderStageCreateInfo stages[2] = {
3638 {
3639 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3640 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3641 .module = v3dv_shader_module_to_handle(&vs_m),
3642 .pName = "main",
3643 },
3644 {
3645 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3646 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3647 .module = v3dv_shader_module_to_handle(&fs_m),
3648 .pName = "main",
3649 },
3650 };
3651
3652 VkGraphicsPipelineCreateInfo info = {
3653 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3654
3655 .stageCount = 2,
3656 .pStages = stages,
3657
3658 .pVertexInputState = vi_state,
3659
3660 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3661 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3662 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3663 .primitiveRestartEnable = false,
3664 },
3665
3666 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3667 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3668 .viewportCount = 1,
3669 .scissorCount = 1,
3670 },
3671
3672 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3673 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3674 .rasterizerDiscardEnable = false,
3675 .polygonMode = VK_POLYGON_MODE_FILL,
3676 .cullMode = VK_CULL_MODE_NONE,
3677 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3678 .depthBiasEnable = false,
3679 },
3680
3681 .pMultisampleState = ms_state,
3682
3683 .pDepthStencilState = ds_state,
3684
3685 .pColorBlendState = cb_state,
3686
3687 /* The meta clear pipeline declares all state as dynamic.
3688 * As a consequence, vkCmdBindPipeline writes no dynamic state
3689 * to the cmd buffer. Therefore, at the end of the meta clear,
3690 * we need only restore dynamic state that was vkCmdSet.
3691 */
3692 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3693 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3694 .dynamicStateCount = 6,
3695 .pDynamicStates = (VkDynamicState[]) {
3696 VK_DYNAMIC_STATE_VIEWPORT,
3697 VK_DYNAMIC_STATE_SCISSOR,
3698 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3699 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3700 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3701 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3702 VK_DYNAMIC_STATE_DEPTH_BIAS,
3703 VK_DYNAMIC_STATE_LINE_WIDTH,
3704 },
3705 },
3706
3707 .flags = 0,
3708 .layout = layout,
3709 .renderPass = v3dv_render_pass_to_handle(pass),
3710 .subpass = 0,
3711 };
3712
3713 VkResult result =
3714 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3715 VK_NULL_HANDLE,
3716 1, &info,
3717 &device->alloc,
3718 pipeline);
3719
3720 ralloc_free(vs_nir);
3721 ralloc_free(fs_nir);
3722
3723 return result == VK_SUCCESS;
3724 }
3725
3726 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)3727 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3728 {
3729 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3730 *
3731 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3732 * VK_IMAGE_TYPE_2D, ..."
3733 */
3734 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3735
3736 switch (type) {
3737 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3738 case VK_IMAGE_TYPE_2D:
3739 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3740 GLSL_SAMPLER_DIM_MS;
3741 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3742 default:
3743 unreachable("Invalid image type");
3744 }
3745 }
3746
3747 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)3748 create_blit_pipeline(struct v3dv_device *device,
3749 VkFormat dst_format,
3750 VkFormat src_format,
3751 VkColorComponentFlags cmask,
3752 VkImageType src_type,
3753 VkSampleCountFlagBits dst_samples,
3754 VkSampleCountFlagBits src_samples,
3755 VkRenderPass _pass,
3756 VkPipelineLayout pipeline_layout,
3757 VkPipeline *pipeline)
3758 {
3759 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
3760
3761 /* We always rewrite depth/stencil blits to compatible color blits */
3762 assert(vk_format_is_color(dst_format));
3763 assert(vk_format_is_color(src_format));
3764
3765 const enum glsl_sampler_dim sampler_dim =
3766 get_sampler_dim(src_type, src_samples);
3767
3768 nir_shader *vs_nir = get_blit_vs();
3769 nir_shader *fs_nir =
3770 get_color_blit_fs(device, dst_format, src_format,
3771 dst_samples, src_samples, sampler_dim);
3772
3773 const VkPipelineVertexInputStateCreateInfo vi_state = {
3774 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3775 .vertexBindingDescriptionCount = 0,
3776 .vertexAttributeDescriptionCount = 0,
3777 };
3778
3779 VkPipelineDepthStencilStateCreateInfo ds_state = {
3780 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3781 };
3782
3783 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
3784 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
3785 .blendEnable = false,
3786 .colorWriteMask = cmask,
3787 };
3788
3789 const VkPipelineColorBlendStateCreateInfo cb_state = {
3790 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3791 .logicOpEnable = false,
3792 .attachmentCount = 1,
3793 .pAttachments = blend_att_state
3794 };
3795
3796 const VkPipelineMultisampleStateCreateInfo ms_state = {
3797 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3798 .rasterizationSamples = dst_samples,
3799 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
3800 .pSampleMask = NULL,
3801 .alphaToCoverageEnable = false,
3802 .alphaToOneEnable = false,
3803 };
3804
3805 return create_pipeline(device,
3806 pass,
3807 vs_nir, fs_nir,
3808 &vi_state,
3809 &ds_state,
3810 &cb_state,
3811 &ms_state,
3812 pipeline_layout,
3813 pipeline);
3814 }
3815
3816 /**
3817 * Return a pipeline suitable for blitting the requested aspect given the
3818 * destination and source formats.
3819 */
3820 static bool
get_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)3821 get_blit_pipeline(struct v3dv_device *device,
3822 VkFormat dst_format,
3823 VkFormat src_format,
3824 VkColorComponentFlags cmask,
3825 VkImageType src_type,
3826 VkSampleCountFlagBits dst_samples,
3827 VkSampleCountFlagBits src_samples,
3828 struct v3dv_meta_blit_pipeline **pipeline)
3829 {
3830 bool ok = true;
3831
3832 mtx_lock(&device->meta.mtx);
3833 if (!device->meta.blit.playout) {
3834 ok = create_blit_pipeline_layout(device,
3835 &device->meta.blit.dslayout,
3836 &device->meta.blit.playout);
3837 }
3838 mtx_unlock(&device->meta.mtx);
3839 if (!ok)
3840 return false;
3841
3842 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
3843 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
3844 dst_samples, src_samples, key);
3845 mtx_lock(&device->meta.mtx);
3846 struct hash_entry *entry =
3847 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
3848 if (entry) {
3849 mtx_unlock(&device->meta.mtx);
3850 *pipeline = entry->data;
3851 return true;
3852 }
3853
3854 *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
3855 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3856
3857 if (*pipeline == NULL)
3858 goto fail;
3859
3860 ok = create_blit_render_pass(device, dst_format, src_format,
3861 &(*pipeline)->pass,
3862 &(*pipeline)->pass_no_load);
3863 if (!ok)
3864 goto fail;
3865
3866 /* Create the pipeline using one of the render passes, they are both
3867 * compatible, so we don't care which one we use here.
3868 */
3869 ok = create_blit_pipeline(device,
3870 dst_format,
3871 src_format,
3872 cmask,
3873 src_type,
3874 dst_samples,
3875 src_samples,
3876 (*pipeline)->pass,
3877 device->meta.blit.playout,
3878 &(*pipeline)->pipeline);
3879 if (!ok)
3880 goto fail;
3881
3882 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
3883 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
3884 &(*pipeline)->key, *pipeline);
3885
3886 mtx_unlock(&device->meta.mtx);
3887 return true;
3888
3889 fail:
3890 mtx_unlock(&device->meta.mtx);
3891
3892 VkDevice _device = v3dv_device_to_handle(device);
3893 if (*pipeline) {
3894 if ((*pipeline)->pass)
3895 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc);
3896 if ((*pipeline)->pass_no_load)
3897 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->alloc);
3898 if ((*pipeline)->pipeline)
3899 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
3900 vk_free(&device->alloc, *pipeline);
3901 *pipeline = NULL;
3902 }
3903
3904 return false;
3905 }
3906
3907 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)3908 compute_blit_box(const VkOffset3D *offsets,
3909 uint32_t image_w, uint32_t image_h,
3910 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
3911 bool *mirror_x, bool *mirror_y)
3912 {
3913 if (offsets[1].x >= offsets[0].x) {
3914 *mirror_x = false;
3915 *x = MIN2(offsets[0].x, image_w - 1);
3916 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
3917 } else {
3918 *mirror_x = true;
3919 *x = MIN2(offsets[1].x, image_w - 1);
3920 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
3921 }
3922 if (offsets[1].y >= offsets[0].y) {
3923 *mirror_y = false;
3924 *y = MIN2(offsets[0].y, image_h - 1);
3925 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
3926 } else {
3927 *mirror_y = true;
3928 *y = MIN2(offsets[1].y, image_h - 1);
3929 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
3930 }
3931 }
3932
3933 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)3934 compute_blit_3d_layers(const VkOffset3D *offsets,
3935 uint32_t *min_layer, uint32_t *max_layer,
3936 bool *mirror_z)
3937 {
3938 if (offsets[1].z >= offsets[0].z) {
3939 *mirror_z = false;
3940 *min_layer = offsets[0].z;
3941 *max_layer = offsets[1].z;
3942 } else {
3943 *mirror_z = true;
3944 *min_layer = offsets[1].z;
3945 *max_layer = offsets[0].z;
3946 }
3947 }
3948
3949 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)3950 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
3951 {
3952 /* If this is not the first pool we create for this command buffer
3953 * size it based on the size of the currently exhausted pool.
3954 */
3955 uint32_t descriptor_count = 64;
3956 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
3957 struct v3dv_descriptor_pool *exhausted_pool =
3958 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
3959 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
3960 }
3961
3962 /* Create the descriptor pool */
3963 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
3964 VkDescriptorPoolSize pool_size = {
3965 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3966 .descriptorCount = descriptor_count,
3967 };
3968 VkDescriptorPoolCreateInfo info = {
3969 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
3970 .maxSets = descriptor_count,
3971 .poolSizeCount = 1,
3972 .pPoolSizes = &pool_size,
3973 .flags = 0,
3974 };
3975 VkResult result =
3976 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
3977 &info,
3978 &cmd_buffer->device->alloc,
3979 &cmd_buffer->meta.blit.dspool);
3980
3981 if (result == VK_SUCCESS) {
3982 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3983 v3dv_cmd_buffer_add_private_obj(
3984 cmd_buffer, (uintptr_t)cmd_buffer->meta.blit.dspool,
3985 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
3986 }
3987
3988 return result;
3989 }
3990
3991 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)3992 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
3993 VkDescriptorSet *set)
3994 {
3995 /* Make sure we have a descriptor pool */
3996 VkResult result;
3997 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
3998 result = create_blit_descriptor_pool(cmd_buffer);
3999 if (result != VK_SUCCESS)
4000 return result;
4001 }
4002 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
4003
4004 /* Allocate descriptor set */
4005 struct v3dv_device *device = cmd_buffer->device;
4006 VkDevice _device = v3dv_device_to_handle(device);
4007 VkDescriptorSetAllocateInfo info = {
4008 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
4009 .descriptorPool = cmd_buffer->meta.blit.dspool,
4010 .descriptorSetCount = 1,
4011 .pSetLayouts = &device->meta.blit.dslayout,
4012 };
4013 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4014
4015 /* If we ran out of pool space, grow the pool and try again */
4016 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
4017 result = create_blit_descriptor_pool(cmd_buffer);
4018 if (result == VK_SUCCESS) {
4019 info.descriptorPool = cmd_buffer->meta.blit.dspool;
4020 result = v3dv_AllocateDescriptorSets(_device, &info, set);
4021 }
4022 }
4023
4024 return result;
4025 }
4026
4027 /**
4028 * Returns true if the implementation supports the requested operation (even if
4029 * it failed to process it, for example, due to an out-of-memory error).
4030 *
4031 * The caller can specify the channels on the destination to be written via the
4032 * cmask parameter (which can be 0 to default to all channels), as well as a
4033 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
4034 * to use the default identity swizzle).
4035 */
4036 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit * _region,VkFilter filter,bool dst_is_padded_image)4037 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
4038 struct v3dv_image *dst,
4039 VkFormat dst_format,
4040 struct v3dv_image *src,
4041 VkFormat src_format,
4042 VkColorComponentFlags cmask,
4043 VkComponentMapping *cswizzle,
4044 const VkImageBlit *_region,
4045 VkFilter filter,
4046 bool dst_is_padded_image)
4047 {
4048 bool handled = true;
4049
4050 /* We don't support rendering to linear depth/stencil, this should have
4051 * been rewritten to a compatible color blit by the caller.
4052 */
4053 assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||
4054 !vk_format_is_depth_or_stencil(dst_format));
4055
4056 /* Can't sample from linear images */
4057 if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)
4058 return false;
4059
4060 VkImageBlit region = *_region;
4061 /* Rewrite combined D/S blits to compatible color blits */
4062 if (vk_format_is_depth_or_stencil(dst_format)) {
4063 assert(src_format == dst_format);
4064 assert(cmask == 0);
4065 switch(dst_format) {
4066 case VK_FORMAT_D16_UNORM:
4067 dst_format = VK_FORMAT_R16_UINT;
4068 break;
4069 case VK_FORMAT_D32_SFLOAT:
4070 dst_format = VK_FORMAT_R32_UINT;
4071 break;
4072 case VK_FORMAT_X8_D24_UNORM_PACK32:
4073 case VK_FORMAT_D24_UNORM_S8_UINT:
4074 if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
4075 cmask |= VK_COLOR_COMPONENT_G_BIT |
4076 VK_COLOR_COMPONENT_B_BIT |
4077 VK_COLOR_COMPONENT_A_BIT;
4078 }
4079 if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
4080 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
4081 cmask |= VK_COLOR_COMPONENT_R_BIT;
4082 }
4083 dst_format = VK_FORMAT_R8G8B8A8_UINT;
4084 break;
4085 default:
4086 unreachable("Unsupported depth/stencil format");
4087 };
4088 src_format = dst_format;
4089 region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
4090 region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
4091 }
4092
4093 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
4094 VK_COLOR_COMPONENT_G_BIT |
4095 VK_COLOR_COMPONENT_B_BIT |
4096 VK_COLOR_COMPONENT_A_BIT;
4097 if (cmask == 0)
4098 cmask = full_cmask;
4099
4100 VkComponentMapping ident_swizzle = {
4101 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
4102 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
4103 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
4104 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
4105 };
4106 if (!cswizzle)
4107 cswizzle = &ident_swizzle;
4108
4109 /* When we get here from a copy between compressed / uncompressed images
4110 * we choose to specify the destination blit region based on the size
4111 * semantics of the source image of the copy (see copy_image_blit), so we
4112 * need to apply those same semantics here when we compute the size of the
4113 * destination image level.
4114 */
4115 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
4116 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
4117 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
4118 const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
4119 const uint32_t dst_level_w =
4120 u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),
4121 region.dstSubresource.mipLevel);
4122 const uint32_t dst_level_h =
4123 u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),
4124 region.dstSubresource.mipLevel);
4125
4126 const uint32_t src_level_w =
4127 u_minify(src->extent.width, region.srcSubresource.mipLevel);
4128 const uint32_t src_level_h =
4129 u_minify(src->extent.height, region.srcSubresource.mipLevel);
4130 const uint32_t src_level_d =
4131 u_minify(src->extent.depth, region.srcSubresource.mipLevel);
4132
4133 uint32_t dst_x, dst_y, dst_w, dst_h;
4134 bool dst_mirror_x, dst_mirror_y;
4135 compute_blit_box(region.dstOffsets,
4136 dst_level_w, dst_level_h,
4137 &dst_x, &dst_y, &dst_w, &dst_h,
4138 &dst_mirror_x, &dst_mirror_y);
4139
4140 uint32_t src_x, src_y, src_w, src_h;
4141 bool src_mirror_x, src_mirror_y;
4142 compute_blit_box(region.srcOffsets,
4143 src_level_w, src_level_h,
4144 &src_x, &src_y, &src_w, &src_h,
4145 &src_mirror_x, &src_mirror_y);
4146
4147 uint32_t min_dst_layer;
4148 uint32_t max_dst_layer;
4149 bool dst_mirror_z;
4150 if (dst->type != VK_IMAGE_TYPE_3D) {
4151 min_dst_layer = region.dstSubresource.baseArrayLayer;
4152 max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
4153 } else {
4154 compute_blit_3d_layers(region.dstOffsets,
4155 &min_dst_layer, &max_dst_layer,
4156 &dst_mirror_z);
4157 }
4158
4159 uint32_t min_src_layer;
4160 uint32_t max_src_layer;
4161 bool src_mirror_z;
4162 if (src->type != VK_IMAGE_TYPE_3D) {
4163 min_src_layer = region.srcSubresource.baseArrayLayer;
4164 max_src_layer = min_src_layer + region.srcSubresource.layerCount;
4165 } else {
4166 compute_blit_3d_layers(region.srcOffsets,
4167 &min_src_layer, &max_src_layer,
4168 &src_mirror_z);
4169 }
4170
4171 uint32_t layer_count = max_dst_layer - min_dst_layer;
4172
4173 /* Translate source blit coordinates to normalized texture coordinates for
4174 * single sampled textures. For multisampled textures we require
4175 * unnormalized coordinates, since we can only do texelFetch on them.
4176 */
4177 float coords[4] = {
4178 (float)src_x,
4179 (float)src_y,
4180 (float)(src_x + src_w),
4181 (float)(src_y + src_h),
4182 };
4183
4184 if (src->samples == VK_SAMPLE_COUNT_1_BIT) {
4185 coords[0] /= (float)src_level_w;
4186 coords[1] /= (float)src_level_h;
4187 coords[2] /= (float)src_level_w;
4188 coords[3] /= (float)src_level_h;
4189 }
4190
4191 /* Handle mirroring */
4192 const bool mirror_x = dst_mirror_x != src_mirror_x;
4193 const bool mirror_y = dst_mirror_y != src_mirror_y;
4194 const bool mirror_z = dst_mirror_z != src_mirror_z;
4195 float tex_coords[5] = {
4196 !mirror_x ? coords[0] : coords[2],
4197 !mirror_y ? coords[1] : coords[3],
4198 !mirror_x ? coords[2] : coords[0],
4199 !mirror_y ? coords[3] : coords[1],
4200 /* Z coordinate for 3D blit sources, to be filled for each
4201 * destination layer
4202 */
4203 0.0f
4204 };
4205
4206
4207 /* For blits from 3D images we also need to compute the slice coordinate to
4208 * sample from, which will change for each layer in the destination.
4209 * Compute the step we should increase for each iteration.
4210 */
4211 const float src_z_step =
4212 (float)(max_src_layer - min_src_layer) / (float)layer_count;
4213
4214 /* Get the blit pipeline */
4215 struct v3dv_meta_blit_pipeline *pipeline = NULL;
4216 bool ok = get_blit_pipeline(cmd_buffer->device,
4217 dst_format, src_format, cmask, src->type,
4218 dst->samples, src->samples,
4219 &pipeline);
4220 if (!ok)
4221 return handled;
4222 assert(pipeline && pipeline->pipeline &&
4223 pipeline->pass && pipeline->pass_no_load);
4224
4225 struct v3dv_device *device = cmd_buffer->device;
4226 assert(device->meta.blit.dslayout);
4227
4228 /* Push command buffer state before starting meta operation */
4229 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4230
4231 /* Setup framebuffer */
4232 VkDevice _device = v3dv_device_to_handle(device);
4233 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4234
4235 VkResult result;
4236 uint32_t dirty_dynamic_state = 0;
4237 VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
4238 for (uint32_t i = 0; i < layer_count; i++) {
4239 VkImageViewCreateInfo dst_image_view_info = {
4240 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4241 .image = v3dv_image_to_handle(dst),
4242 .viewType = v3dv_image_type_to_view_type(dst->type),
4243 .format = dst_format,
4244 .subresourceRange = {
4245 .aspectMask = aspects,
4246 .baseMipLevel = region.dstSubresource.mipLevel,
4247 .levelCount = 1,
4248 .baseArrayLayer = min_dst_layer + i,
4249 .layerCount = 1
4250 },
4251 };
4252 VkImageView dst_image_view;
4253 result = v3dv_CreateImageView(_device, &dst_image_view_info,
4254 &device->alloc, &dst_image_view);
4255 if (result != VK_SUCCESS)
4256 goto fail;
4257
4258 v3dv_cmd_buffer_add_private_obj(
4259 cmd_buffer, (uintptr_t)dst_image_view,
4260 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4261
4262 VkFramebufferCreateInfo fb_info = {
4263 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4264 .renderPass = pipeline->pass,
4265 .attachmentCount = 1,
4266 .pAttachments = &dst_image_view,
4267 .width = dst_x + dst_w,
4268 .height = dst_y + dst_h,
4269 .layers = 1,
4270 };
4271
4272 VkFramebuffer fb;
4273 result = v3dv_CreateFramebuffer(_device, &fb_info,
4274 &cmd_buffer->device->alloc, &fb);
4275 if (result != VK_SUCCESS)
4276 goto fail;
4277
4278 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4279 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4280 fb_info.height == dst_level_h &&
4281 dst_is_padded_image;
4282
4283 v3dv_cmd_buffer_add_private_obj(
4284 cmd_buffer, (uintptr_t)fb,
4285 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4286
4287 /* Setup descriptor set for blit source texture. We don't have to
4288 * register the descriptor as a private command buffer object since
4289 * all descriptors will be freed automatically with the descriptor
4290 * pool.
4291 */
4292 VkDescriptorSet set;
4293 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4294 if (result != VK_SUCCESS)
4295 goto fail;
4296
4297 VkSamplerCreateInfo sampler_info = {
4298 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4299 .magFilter = filter,
4300 .minFilter = filter,
4301 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4302 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4303 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4304 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4305 };
4306 VkSampler sampler;
4307 result = v3dv_CreateSampler(_device, &sampler_info, &device->alloc,
4308 &sampler);
4309 if (result != VK_SUCCESS)
4310 goto fail;
4311
4312 v3dv_cmd_buffer_add_private_obj(
4313 cmd_buffer, (uintptr_t)sampler,
4314 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4315
4316 VkImageViewCreateInfo src_image_view_info = {
4317 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4318 .image = v3dv_image_to_handle(src),
4319 .viewType = v3dv_image_type_to_view_type(src->type),
4320 .format = src_format,
4321 .components = *cswizzle,
4322 .subresourceRange = {
4323 .aspectMask = aspects,
4324 .baseMipLevel = region.srcSubresource.mipLevel,
4325 .levelCount = 1,
4326 .baseArrayLayer =
4327 src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4328 .layerCount = 1
4329 },
4330 };
4331 VkImageView src_image_view;
4332 result = v3dv_CreateImageView(_device, &src_image_view_info,
4333 &device->alloc, &src_image_view);
4334 if (result != VK_SUCCESS)
4335 goto fail;
4336
4337 v3dv_cmd_buffer_add_private_obj(
4338 cmd_buffer, (uintptr_t)src_image_view,
4339 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4340
4341 VkDescriptorImageInfo image_info = {
4342 .sampler = sampler,
4343 .imageView = src_image_view,
4344 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4345 };
4346 VkWriteDescriptorSet write = {
4347 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4348 .dstSet = set,
4349 .dstBinding = 0,
4350 .dstArrayElement = 0,
4351 .descriptorCount = 1,
4352 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4353 .pImageInfo = &image_info,
4354 };
4355 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4356
4357 /* If the region we are about to blit is tile-aligned, then we can
4358 * use the render pass version that won't pre-load the tile buffer
4359 * with the dst image contents before the blit. The exception is when we
4360 * don't have a full color mask, since in that case we need to preserve
4361 * the original value of some of the color components.
4362 */
4363 const VkRect2D render_area = {
4364 .offset = { dst_x, dst_y },
4365 .extent = { dst_w, dst_h },
4366 };
4367 struct v3dv_render_pass *pipeline_pass =
4368 v3dv_render_pass_from_handle(pipeline->pass);
4369 bool can_skip_tlb_load =
4370 cmask == full_cmask &&
4371 v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer,
4372 pipeline_pass, 0);
4373
4374 /* Record blit */
4375 VkRenderPassBeginInfo rp_info = {
4376 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4377 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4378 pipeline->pass,
4379 .framebuffer = fb,
4380 .renderArea = render_area,
4381 .clearValueCount = 0,
4382 };
4383
4384 v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
4385 struct v3dv_job *job = cmd_buffer->state.job;
4386 if (!job)
4387 goto fail;
4388
4389 /* For 3D blits we need to compute the source slice to blit from (the Z
4390 * coordinate of the source sample operation). We want to choose this
4391 * based on the ratio of the depth of the source and the destination
4392 * images, picking the coordinate in the middle of each step.
4393 */
4394 if (src->type == VK_IMAGE_TYPE_3D) {
4395 tex_coords[4] =
4396 !mirror_z ?
4397 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4398 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4399 }
4400
4401 v3dv_CmdPushConstants(_cmd_buffer,
4402 device->meta.blit.playout,
4403 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4404 &tex_coords);
4405
4406 v3dv_CmdBindPipeline(_cmd_buffer,
4407 VK_PIPELINE_BIND_POINT_GRAPHICS,
4408 pipeline->pipeline);
4409
4410 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4411 VK_PIPELINE_BIND_POINT_GRAPHICS,
4412 device->meta.blit.playout,
4413 0, 1, &set,
4414 0, NULL);
4415
4416 const VkViewport viewport = {
4417 .x = dst_x,
4418 .y = dst_y,
4419 .width = dst_w,
4420 .height = dst_h,
4421 .minDepth = 0.0f,
4422 .maxDepth = 1.0f
4423 };
4424 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4425 const VkRect2D scissor = {
4426 .offset = { dst_x, dst_y },
4427 .extent = { dst_w, dst_h }
4428 };
4429 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4430
4431 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4432
4433 v3dv_CmdEndRenderPass(_cmd_buffer);
4434 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
4435 }
4436
4437 fail:
4438 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
4439
4440 return handled;
4441 }
4442
4443 void
v3dv_CmdBlitImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageBlit * pRegions,VkFilter filter)4444 v3dv_CmdBlitImage(VkCommandBuffer commandBuffer,
4445 VkImage srcImage,
4446 VkImageLayout srcImageLayout,
4447 VkImage dstImage,
4448 VkImageLayout dstImageLayout,
4449 uint32_t regionCount,
4450 const VkImageBlit* pRegions,
4451 VkFilter filter)
4452 {
4453 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4454 V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
4455 V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
4456
4457 /* This command can only happen outside a render pass */
4458 assert(cmd_buffer->state.pass == NULL);
4459 assert(cmd_buffer->state.job == NULL);
4460
4461 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4462 assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&
4463 src->samples == VK_SAMPLE_COUNT_1_BIT);
4464
4465 for (uint32_t i = 0; i < regionCount; i++) {
4466 if (blit_tfu(cmd_buffer, dst, src, &pRegions[i], filter))
4467 continue;
4468 if (blit_shader(cmd_buffer,
4469 dst, dst->vk_format,
4470 src, src->vk_format,
4471 0, NULL,
4472 &pRegions[i], filter, true)) {
4473 continue;
4474 }
4475 unreachable("Unsupported blit operation");
4476 }
4477 }
4478
4479 static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job * job,struct framebuffer_data * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageResolve * region)4480 emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
4481 struct framebuffer_data *framebuffer,
4482 struct v3dv_image *dst,
4483 struct v3dv_image *src,
4484 uint32_t layer_offset,
4485 const VkImageResolve *region)
4486 {
4487 struct v3dv_cl *cl = &job->indirect;
4488 v3dv_cl_ensure_space(cl, 200, 1);
4489 v3dv_return_if_oom(NULL, job);
4490
4491 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
4492
4493 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
4494
4495 assert((src->type != VK_IMAGE_TYPE_3D &&
4496 layer_offset < region->srcSubresource.layerCount) ||
4497 layer_offset < src->extent.depth);
4498
4499 const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
4500 region->srcSubresource.baseArrayLayer + layer_offset :
4501 region->srcOffset.z + layer_offset;
4502
4503 emit_image_load(cl, framebuffer, src,
4504 region->srcSubresource.aspectMask,
4505 src_layer,
4506 region->srcSubresource.mipLevel,
4507 false, false);
4508
4509 cl_emit(cl, END_OF_LOADS, end);
4510
4511 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
4512
4513 assert((dst->type != VK_IMAGE_TYPE_3D &&
4514 layer_offset < region->dstSubresource.layerCount) ||
4515 layer_offset < dst->extent.depth);
4516
4517 const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
4518 region->dstSubresource.baseArrayLayer + layer_offset :
4519 region->dstOffset.z + layer_offset;
4520
4521 emit_image_store(cl, framebuffer, dst,
4522 region->dstSubresource.aspectMask,
4523 dst_layer,
4524 region->dstSubresource.mipLevel,
4525 false, false, true);
4526
4527 cl_emit(cl, END_OF_TILE_MARKER, end);
4528
4529 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
4530
4531 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
4532 branch.start = tile_list_start;
4533 branch.end = v3dv_cl_get_address(cl);
4534 }
4535 }
4536
4537 static void
emit_resolve_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct framebuffer_data * framebuffer,uint32_t layer,const VkImageResolve * region)4538 emit_resolve_image_layer(struct v3dv_job *job,
4539 struct v3dv_image *dst,
4540 struct v3dv_image *src,
4541 struct framebuffer_data *framebuffer,
4542 uint32_t layer,
4543 const VkImageResolve *region)
4544 {
4545 emit_frame_setup(job, layer, NULL);
4546 emit_resolve_image_layer_per_tile_list(job, framebuffer,
4547 dst, src, layer, region);
4548 emit_supertile_coordinates(job, framebuffer);
4549 }
4550
4551 static void
emit_resolve_image_rcl(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct framebuffer_data * framebuffer,const VkImageResolve * region)4552 emit_resolve_image_rcl(struct v3dv_job *job,
4553 struct v3dv_image *dst,
4554 struct v3dv_image *src,
4555 struct framebuffer_data *framebuffer,
4556 const VkImageResolve *region)
4557 {
4558 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
4559 v3dv_return_if_oom(NULL, job);
4560
4561 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
4562 emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
4563 cl_emit(rcl, END_OF_RENDERING, end);
4564 }
4565
4566 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve * region)4567 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4568 struct v3dv_image *dst,
4569 struct v3dv_image *src,
4570 const VkImageResolve *region)
4571 {
4572 if (!can_use_tlb(src, ®ion->srcOffset, NULL) ||
4573 !can_use_tlb(dst, ®ion->dstOffset, NULL)) {
4574 return false;
4575 }
4576
4577 if (!v3dv_format_supports_tlb_resolve(src->format))
4578 return false;
4579
4580 const VkFormat fb_format = src->vk_format;
4581
4582 uint32_t num_layers;
4583 if (dst->type != VK_IMAGE_TYPE_3D)
4584 num_layers = region->dstSubresource.layerCount;
4585 else
4586 num_layers = region->extent.depth;
4587 assert(num_layers > 0);
4588
4589 struct v3dv_job *job =
4590 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4591 if (!job)
4592 return true;
4593
4594 const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
4595 const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
4596 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4597 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4598
4599 uint32_t internal_type, internal_bpp;
4600 get_internal_type_bpp_for_image_aspects(fb_format,
4601 region->srcSubresource.aspectMask,
4602 &internal_type, &internal_bpp);
4603
4604 v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);
4605
4606 struct framebuffer_data framebuffer;
4607 setup_framebuffer_data(&framebuffer, fb_format, internal_type,
4608 &job->frame_tiling);
4609
4610 v3dv_job_emit_binning_flush(job);
4611 emit_resolve_image_rcl(job, dst, src, &framebuffer, region);
4612
4613 v3dv_cmd_buffer_finish_job(cmd_buffer);
4614 return true;
4615 }
4616
4617 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve * region)4618 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4619 struct v3dv_image *dst,
4620 struct v3dv_image *src,
4621 const VkImageResolve *region)
4622 {
4623 const VkImageBlit blit_region = {
4624 .srcSubresource = region->srcSubresource,
4625 .srcOffsets = {
4626 region->srcOffset,
4627 {
4628 region->srcOffset.x + region->extent.width,
4629 region->srcOffset.y + region->extent.height,
4630 }
4631 },
4632 .dstSubresource = region->dstSubresource,
4633 .dstOffsets = {
4634 region->dstOffset,
4635 {
4636 region->dstOffset.x + region->extent.width,
4637 region->dstOffset.y + region->extent.height,
4638 }
4639 },
4640 };
4641 return blit_shader(cmd_buffer,
4642 dst, dst->vk_format,
4643 src, src->vk_format,
4644 0, NULL,
4645 &blit_region, VK_FILTER_NEAREST, true);
4646 }
4647
4648 void
v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,VkImage srcImage,VkImageLayout srcImageLayout,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkImageResolve * pRegions)4649 v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,
4650 VkImage srcImage,
4651 VkImageLayout srcImageLayout,
4652 VkImage dstImage,
4653 VkImageLayout dstImageLayout,
4654 uint32_t regionCount,
4655 const VkImageResolve *pRegions)
4656 {
4657 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4658 V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
4659 V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
4660
4661 /* This command can only happen outside a render pass */
4662 assert(cmd_buffer->state.pass == NULL);
4663 assert(cmd_buffer->state.job == NULL);
4664
4665 assert(src->samples == VK_SAMPLE_COUNT_4_BIT);
4666 assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
4667
4668 for (uint32_t i = 0; i < regionCount; i++) {
4669 if (resolve_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
4670 continue;
4671 if (resolve_image_blit(cmd_buffer, dst, src, &pRegions[i]))
4672 continue;
4673 unreachable("Unsupported multismaple resolve operation");
4674 }
4675 }
4676