1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6
7 #include "nvk_buffer.h"
8 #include "nvk_device.h"
9 #include "nvk_device_memory.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_physical_device.h"
14
15 #include "vk_format.h"
16
17 #include "nouveau_bo.h"
18 #include "nouveau_context.h"
19
20 #include "nvtypes.h"
21 #include "nvk_cl902d.h"
22 #include "nvk_cl90b5.h"
23 #include "nvk_clc1b5.h"
24
25 struct nouveau_copy_buffer {
26 uint64_t base_addr;
27 VkImageType image_type;
28 struct nil_offset4d offset_el;
29 struct nil_extent4d extent_el;
30 uint32_t bpp;
31 uint32_t row_stride;
32 uint32_t array_stride;
33 struct nil_tiling tiling;
34 };
35
36 struct nouveau_copy {
37 struct nouveau_copy_buffer src;
38 struct nouveau_copy_buffer dst;
39 struct nouveau_copy_remap {
40 uint8_t comp_size;
41 uint8_t dst[4];
42 } remap;
43 struct nil_extent4d extent_el;
44 };
45
46 static struct nouveau_copy_buffer
nouveau_copy_rect_buffer(struct nvk_buffer * buf,VkDeviceSize offset,struct vk_image_buffer_layout buffer_layout)47 nouveau_copy_rect_buffer(struct nvk_buffer *buf,
48 VkDeviceSize offset,
49 struct vk_image_buffer_layout buffer_layout)
50 {
51 return (struct nouveau_copy_buffer) {
52 .base_addr = nvk_buffer_address(buf, offset),
53 .image_type = VK_IMAGE_TYPE_2D,
54 .bpp = buffer_layout.element_size_B,
55 .row_stride = buffer_layout.row_stride_B,
56 .array_stride = buffer_layout.image_stride_B,
57 };
58 }
59
60 static struct nil_offset4d
vk_to_nil_offset(VkOffset3D offset,uint32_t base_array_layer)61 vk_to_nil_offset(VkOffset3D offset, uint32_t base_array_layer)
62 {
63 return nil_offset4d(offset.x, offset.y, offset.z, base_array_layer);
64 }
65
66 static struct nil_extent4d
vk_to_nil_extent(VkExtent3D extent,uint32_t array_layers)67 vk_to_nil_extent(VkExtent3D extent, uint32_t array_layers)
68 {
69 return nil_extent4d(extent.width, extent.height, extent.depth, array_layers);
70 }
71
72 static struct nouveau_copy_buffer
nouveau_copy_rect_image(struct nvk_image * img,struct nvk_image_plane * plane,VkOffset3D offset_px,const VkImageSubresourceLayers * sub_res)73 nouveau_copy_rect_image(struct nvk_image *img,
74 struct nvk_image_plane *plane,
75 VkOffset3D offset_px,
76 const VkImageSubresourceLayers *sub_res)
77 {
78 const struct nil_extent4d lvl_extent4d_px =
79 nil_image_level_extent_px(&plane->nil, sub_res->mipLevel);
80
81 offset_px = vk_image_sanitize_offset(&img->vk, offset_px);
82 const struct nil_offset4d offset4d_px =
83 vk_to_nil_offset(offset_px, sub_res->baseArrayLayer);
84
85 struct nouveau_copy_buffer buf = {
86 .base_addr = nvk_image_plane_base_address(plane) +
87 plane->nil.levels[sub_res->mipLevel].offset_B,
88 .image_type = img->vk.image_type,
89 .offset_el = nil_offset4d_px_to_el(offset4d_px, plane->nil.format,
90 plane->nil.sample_layout),
91 .extent_el = nil_extent4d_px_to_el(lvl_extent4d_px, plane->nil.format,
92 plane->nil.sample_layout),
93 .bpp = util_format_get_blocksize(plane->nil.format),
94 .row_stride = plane->nil.levels[sub_res->mipLevel].row_stride_B,
95 .array_stride = plane->nil.array_stride_B,
96 .tiling = plane->nil.levels[sub_res->mipLevel].tiling,
97 };
98
99 return buf;
100 }
101
102 static struct nouveau_copy_remap
nouveau_copy_remap_format(VkFormat format)103 nouveau_copy_remap_format(VkFormat format)
104 {
105 /* Pick an arbitrary component size. It doesn't matter what size we
106 * pick since we're just doing a copy, as long as it's no more than 4B
107 * and divides the format size.
108 */
109 unsigned comp_size = vk_format_get_blocksize(format);
110 if (comp_size % 3 == 0) {
111 comp_size /= 3;
112 assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 4);
113 } else {
114 assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 16);
115 comp_size = MIN2(comp_size, 4);
116 }
117
118 return (struct nouveau_copy_remap) {
119 .comp_size = comp_size,
120 .dst = { 0, 1, 2, 3 },
121 };
122 }
123
124 static uint32_t
to_90b5_remap_comp_size(uint8_t comp_size)125 to_90b5_remap_comp_size(uint8_t comp_size)
126 {
127 static const uint8_t to_90b5[] = {
128 [1] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE,
129 [2] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO,
130 [3] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE,
131 [4] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR,
132 };
133 assert(comp_size > 0 && comp_size < ARRAY_SIZE(to_90b5));
134
135 uint32_t size_90b5 = comp_size - 1;
136 assert(size_90b5 == to_90b5[comp_size]);
137 return size_90b5;
138 }
139
140 static uint32_t
to_90b5_remap_num_comps(uint8_t num_comps)141 to_90b5_remap_num_comps(uint8_t num_comps)
142 {
143 static const uint8_t to_90b5[] = {
144 [1] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE,
145 [2] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO,
146 [3] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE,
147 [4] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR,
148 };
149 assert(num_comps > 0 && num_comps < ARRAY_SIZE(to_90b5));
150
151 uint32_t num_comps_90b5 = num_comps - 1;
152 assert(num_comps_90b5 == to_90b5[num_comps]);
153 return num_comps_90b5;
154 }
155
156 static void
nouveau_copy_rect(struct nvk_cmd_buffer * cmd,struct nouveau_copy * copy)157 nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
158 {
159 uint32_t src_bw, dst_bw;
160 if (copy->remap.comp_size > 0) {
161 struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
162
163 assert(copy->src.bpp % copy->remap.comp_size == 0);
164 assert(copy->dst.bpp % copy->remap.comp_size == 0);
165 uint32_t num_src_comps = copy->src.bpp / copy->remap.comp_size;
166 uint32_t num_dst_comps = copy->dst.bpp / copy->remap.comp_size;
167
168 /* When running with component remapping enabled, most X/Y dimensions
169 * are in units of blocks.
170 */
171 src_bw = dst_bw = 1;
172
173 P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
174 .dst_x = copy->remap.dst[0],
175 .dst_y = copy->remap.dst[1],
176 .dst_z = copy->remap.dst[2],
177 .dst_w = copy->remap.dst[3],
178 .component_size = to_90b5_remap_comp_size(copy->remap.comp_size),
179 .num_src_components = to_90b5_remap_comp_size(num_src_comps),
180 .num_dst_components = to_90b5_remap_comp_size(num_dst_comps),
181 });
182 } else {
183 /* When component remapping is disabled, dimensions are in units of
184 * bytes (an implicit block widht of 1B).
185 */
186 assert(copy->src.bpp == copy->dst.bpp);
187 src_bw = copy->src.bpp;
188 dst_bw = copy->dst.bpp;
189 }
190
191 assert(copy->extent_el.depth == 1 || copy->extent_el.array_len == 1);
192 for (unsigned z = 0; z < MAX2(copy->extent_el.d, copy->extent_el.a); z++) {
193 VkDeviceSize src_addr = copy->src.base_addr;
194 VkDeviceSize dst_addr = copy->dst.base_addr;
195
196 if (copy->src.image_type != VK_IMAGE_TYPE_3D)
197 src_addr += (z + copy->src.offset_el.a) * copy->src.array_stride;
198
199 if (copy->dst.image_type != VK_IMAGE_TYPE_3D)
200 dst_addr += (z + copy->dst.offset_el.a) * copy->dst.array_stride;
201
202 if (!copy->src.tiling.is_tiled) {
203 src_addr += copy->src.offset_el.x * copy->src.bpp +
204 copy->src.offset_el.y * copy->src.row_stride;
205 }
206
207 if (!copy->dst.tiling.is_tiled) {
208 dst_addr += copy->dst.offset_el.x * copy->dst.bpp +
209 copy->dst.offset_el.y * copy->dst.row_stride;
210 }
211
212 struct nv_push *p = nvk_cmd_buffer_push(cmd, 31);
213
214 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
215 P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
216 P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
217 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
218 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
219 P_NV90B5_PITCH_IN(p, copy->src.row_stride);
220 P_NV90B5_PITCH_OUT(p, copy->dst.row_stride);
221 P_NV90B5_LINE_LENGTH_IN(p, copy->extent_el.width * src_bw);
222 P_NV90B5_LINE_COUNT(p, copy->extent_el.height);
223
224 uint32_t src_layout = 0, dst_layout = 0;
225 if (copy->src.tiling.is_tiled) {
226 P_MTHD(p, NV90B5, SET_SRC_BLOCK_SIZE);
227 P_NV90B5_SET_SRC_BLOCK_SIZE(p, {
228 .width = 0, /* Tiles are always 1 GOB wide */
229 .height = copy->src.tiling.y_log2,
230 .depth = copy->src.tiling.z_log2,
231 .gob_height = copy->src.tiling.gob_height_8 ?
232 GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
233 GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
234 });
235 P_NV90B5_SET_SRC_WIDTH(p, copy->src.extent_el.width * src_bw);
236 P_NV90B5_SET_SRC_HEIGHT(p, copy->src.extent_el.height);
237 P_NV90B5_SET_SRC_DEPTH(p, copy->src.extent_el.depth);
238 if (copy->src.image_type == VK_IMAGE_TYPE_3D)
239 P_NV90B5_SET_SRC_LAYER(p, z + copy->src.offset_el.z);
240 else
241 P_NV90B5_SET_SRC_LAYER(p, 0);
242
243 if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_copy >= 0xc1b5) {
244 P_MTHD(p, NVC1B5, SRC_ORIGIN_X);
245 P_NVC1B5_SRC_ORIGIN_X(p, copy->src.offset_el.x * src_bw);
246 P_NVC1B5_SRC_ORIGIN_Y(p, copy->src.offset_el.y);
247 } else {
248 P_MTHD(p, NV90B5, SET_SRC_ORIGIN);
249 P_NV90B5_SET_SRC_ORIGIN(p, {
250 .x = copy->src.offset_el.x * src_bw,
251 .y = copy->src.offset_el.y
252 });
253 }
254
255 src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
256 } else {
257 src_addr += copy->src.array_stride;
258 src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
259 }
260
261 if (copy->dst.tiling.is_tiled) {
262 P_MTHD(p, NV90B5, SET_DST_BLOCK_SIZE);
263 P_NV90B5_SET_DST_BLOCK_SIZE(p, {
264 .width = 0, /* Tiles are always 1 GOB wide */
265 .height = copy->dst.tiling.y_log2,
266 .depth = copy->dst.tiling.z_log2,
267 .gob_height = copy->dst.tiling.gob_height_8 ?
268 GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
269 GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
270 });
271 P_NV90B5_SET_DST_WIDTH(p, copy->dst.extent_el.width * dst_bw);
272 P_NV90B5_SET_DST_HEIGHT(p, copy->dst.extent_el.height);
273 P_NV90B5_SET_DST_DEPTH(p, copy->dst.extent_el.depth);
274 if (copy->dst.image_type == VK_IMAGE_TYPE_3D)
275 P_NV90B5_SET_DST_LAYER(p, z + copy->dst.offset_el.z);
276 else
277 P_NV90B5_SET_DST_LAYER(p, 0);
278
279 if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_copy >= 0xc1b5) {
280 P_MTHD(p, NVC1B5, DST_ORIGIN_X);
281 P_NVC1B5_DST_ORIGIN_X(p, copy->dst.offset_el.x * dst_bw);
282 P_NVC1B5_DST_ORIGIN_Y(p, copy->dst.offset_el.y);
283 } else {
284 P_MTHD(p, NV90B5, SET_DST_ORIGIN);
285 P_NV90B5_SET_DST_ORIGIN(p, {
286 .x = copy->dst.offset_el.x * dst_bw,
287 .y = copy->dst.offset_el.y
288 });
289 }
290
291 dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
292 } else {
293 dst_addr += copy->dst.array_stride;
294 dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
295 }
296
297 P_IMMD(p, NV90B5, LAUNCH_DMA, {
298 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
299 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
300 .flush_enable = FLUSH_ENABLE_TRUE,
301 .src_memory_layout = src_layout,
302 .dst_memory_layout = dst_layout,
303 .remap_enable = copy->remap.comp_size > 0,
304 });
305 }
306 }
307
308 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)309 nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
310 const VkCopyBufferInfo2 *pCopyBufferInfo)
311 {
312 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
313 VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferInfo->srcBuffer);
314 VK_FROM_HANDLE(nvk_buffer, dst, pCopyBufferInfo->dstBuffer);
315
316 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
317 const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
318
319 uint64_t src_addr = nvk_buffer_address(src, region->srcOffset);
320 uint64_t dst_addr = nvk_buffer_address(dst, region->dstOffset);
321 uint64_t size = region->size;
322
323 while (size) {
324 struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
325
326 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
327 P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
328 P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
329 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
330 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
331
332 unsigned bytes = MIN2(size, 1 << 17);
333
334 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
335 P_NV90B5_LINE_LENGTH_IN(p, bytes);
336 P_NV90B5_LINE_COUNT(p, 1);
337
338 P_IMMD(p, NV90B5, LAUNCH_DMA, {
339 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
340 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
341 .flush_enable = FLUSH_ENABLE_TRUE,
342 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
343 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
344 });
345
346 src_addr += bytes;
347 dst_addr += bytes;
348 size -= bytes;
349 }
350 }
351 }
352
353 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)354 nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
355 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
356 {
357 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
358 VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferToImageInfo->srcBuffer);
359 VK_FROM_HANDLE(nvk_image, dst, pCopyBufferToImageInfo->dstImage);
360
361 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
362 const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
363 struct vk_image_buffer_layout buffer_layout =
364 vk_image_buffer_copy_layout(&dst->vk, region);
365
366 const VkExtent3D extent_px =
367 vk_image_sanitize_extent(&dst->vk, region->imageExtent);
368 const uint32_t layer_count =
369 vk_image_subresource_layer_count(&dst->vk, ®ion->imageSubresource);
370 const struct nil_extent4d extent4d_px =
371 vk_to_nil_extent(extent_px, layer_count);
372
373 const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
374 uint8_t plane = nvk_image_aspects_to_plane(dst, aspects);
375
376 struct nouveau_copy copy = {
377 .src = nouveau_copy_rect_buffer(src, region->bufferOffset,
378 buffer_layout),
379 .dst = nouveau_copy_rect_image(dst, &dst->planes[plane],
380 region->imageOffset,
381 ®ion->imageSubresource),
382 .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format,
383 dst->planes[plane].nil.sample_layout),
384 };
385 struct nouveau_copy copy2 = { 0 };
386
387 switch (dst->vk.format) {
388 case VK_FORMAT_D32_SFLOAT_S8_UINT:
389 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
390 copy.remap.comp_size = 4;
391 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
392 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
393 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
394 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
395 } else {
396 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
397 copy2.dst = copy.dst;
398 copy2.extent_el = copy.extent_el;
399 copy.dst = copy2.src =
400 nouveau_copy_rect_image(dst, &dst->stencil_copy_temp,
401 region->imageOffset,
402 ®ion->imageSubresource);
403
404 copy.remap.comp_size = 1;
405 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
406 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
407 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
408 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
409
410 copy2.remap.comp_size = 2;
411 copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
412 copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
413 copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X;
414 copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
415 }
416 break;
417 case VK_FORMAT_D24_UNORM_S8_UINT:
418 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
419 copy.remap.comp_size = 1;
420 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
421 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
422 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
423 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
424 } else {
425 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
426 copy.remap.comp_size = 1;
427 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
428 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
429 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
430 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
431 }
432 break;
433 default:
434 copy.remap = nouveau_copy_remap_format(dst->vk.format);
435 break;
436 }
437
438 nouveau_copy_rect(cmd, ©);
439 if (copy2.extent_el.w > 0)
440 nouveau_copy_rect(cmd, ©2);
441
442 vk_foreach_struct_const(ext, region->pNext) {
443 switch (ext->sType) {
444 default:
445 nvk_debug_ignored_stype(ext->sType);
446 break;
447 }
448 }
449 }
450
451 vk_foreach_struct_const(ext, pCopyBufferToImageInfo->pNext) {
452 switch (ext->sType) {
453 default:
454 nvk_debug_ignored_stype(ext->sType);
455 break;
456 }
457 }
458 }
459
460 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)461 nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
462 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
463 {
464 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
465 VK_FROM_HANDLE(nvk_image, src, pCopyImageToBufferInfo->srcImage);
466 VK_FROM_HANDLE(nvk_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
467
468 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
469 const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
470 struct vk_image_buffer_layout buffer_layout =
471 vk_image_buffer_copy_layout(&src->vk, region);
472
473 const VkExtent3D extent_px =
474 vk_image_sanitize_extent(&src->vk, region->imageExtent);
475 const uint32_t layer_count =
476 vk_image_subresource_layer_count(&src->vk, ®ion->imageSubresource);
477 const struct nil_extent4d extent4d_px =
478 vk_to_nil_extent(extent_px, layer_count);
479
480 const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
481 uint8_t plane = nvk_image_aspects_to_plane(src, aspects);
482
483 struct nouveau_copy copy = {
484 .src = nouveau_copy_rect_image(src, &src->planes[plane],
485 region->imageOffset,
486 ®ion->imageSubresource),
487 .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset,
488 buffer_layout),
489 .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format,
490 src->planes[plane].nil.sample_layout),
491 };
492 struct nouveau_copy copy2 = { 0 };
493
494 switch (src->vk.format) {
495 case VK_FORMAT_D32_SFLOAT_S8_UINT:
496 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
497 copy.remap.comp_size = 4;
498 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
499 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
500 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
501 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
502 } else {
503 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
504 copy2.dst = copy.dst;
505 copy2.extent_el = copy.extent_el;
506 copy.dst = copy2.src =
507 nouveau_copy_rect_image(src, &src->stencil_copy_temp,
508 region->imageOffset,
509 ®ion->imageSubresource);
510
511 copy.remap.comp_size = 2;
512 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z;
513 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
514 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
515 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
516
517 copy2.remap.comp_size = 1;
518 copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
519 copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
520 copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
521 copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
522 }
523 break;
524 case VK_FORMAT_D24_UNORM_S8_UINT:
525 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
526 copy.remap.comp_size = 1;
527 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
528 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
529 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
530 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
531 } else {
532 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
533 copy.remap.comp_size = 1;
534 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_W;
535 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
536 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
537 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
538 }
539 break;
540 default:
541 copy.remap = nouveau_copy_remap_format(src->vk.format);
542 break;
543 }
544
545 nouveau_copy_rect(cmd, ©);
546 if (copy2.extent_el.w > 0)
547 nouveau_copy_rect(cmd, ©2);
548
549 vk_foreach_struct_const(ext, region->pNext) {
550 switch (ext->sType) {
551 default:
552 nvk_debug_ignored_stype(ext->sType);
553 break;
554 }
555 }
556 }
557
558 vk_foreach_struct_const(ext, pCopyImageToBufferInfo->pNext) {
559 switch (ext->sType) {
560 default:
561 nvk_debug_ignored_stype(ext->sType);
562 break;
563 }
564 }
565 }
566
567 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)568 nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,
569 const VkCopyImageInfo2 *pCopyImageInfo)
570 {
571 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
572 VK_FROM_HANDLE(nvk_image, src, pCopyImageInfo->srcImage);
573 VK_FROM_HANDLE(nvk_image, dst, pCopyImageInfo->dstImage);
574
575 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
576 const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
577
578 /* From the Vulkan 1.3.217 spec:
579 *
580 * "When copying between compressed and uncompressed formats the
581 * extent members represent the texel dimensions of the source image
582 * and not the destination."
583 */
584 const VkExtent3D extent_px =
585 vk_image_sanitize_extent(&src->vk, region->extent);
586 const uint32_t layer_count =
587 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
588 const struct nil_extent4d extent4d_px =
589 vk_to_nil_extent(extent_px, layer_count);
590
591 const VkImageAspectFlagBits src_aspects =
592 region->srcSubresource.aspectMask;
593 uint8_t src_plane = nvk_image_aspects_to_plane(src, src_aspects);
594
595 const VkImageAspectFlagBits dst_aspects =
596 region->dstSubresource.aspectMask;
597 uint8_t dst_plane = nvk_image_aspects_to_plane(dst, dst_aspects);
598
599 struct nouveau_copy copy = {
600 .src = nouveau_copy_rect_image(src, &src->planes[src_plane],
601 region->srcOffset,
602 ®ion->srcSubresource),
603 .dst = nouveau_copy_rect_image(dst, &dst->planes[dst_plane],
604 region->dstOffset,
605 ®ion->dstSubresource),
606 .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[src_plane].nil.format,
607 src->planes[src_plane].nil.sample_layout),
608 };
609
610 assert(src_aspects == region->srcSubresource.aspectMask);
611 switch (src->vk.format) {
612 case VK_FORMAT_D24_UNORM_S8_UINT:
613 if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
614 copy.remap.comp_size = 1;
615 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
616 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
617 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
618 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
619 } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
620 copy.remap.comp_size = 1;
621 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
622 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
623 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
624 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_W;
625 } else {
626 /* If we're copying both, there's nothing special to do */
627 assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
628 VK_IMAGE_ASPECT_STENCIL_BIT));
629 }
630 break;
631 default:
632 copy.remap = nouveau_copy_remap_format(src->vk.format);
633 break;
634 }
635
636 nouveau_copy_rect(cmd, ©);
637 }
638 }
639
640 VKAPI_ATTR void VKAPI_CALL
nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)641 nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
642 VkBuffer dstBuffer,
643 VkDeviceSize dstOffset,
644 VkDeviceSize size,
645 uint32_t data)
646 {
647 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
648 VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
649
650 uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
651 size = vk_buffer_range(&dst_buffer->vk, dstOffset, size);
652
653 uint32_t max_dim = 1 << 15;
654
655 struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
656
657 P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data);
658 P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
659 .dst_x = DST_X_CONST_A,
660 .dst_y = DST_Y_CONST_A,
661 .dst_z = DST_Z_CONST_A,
662 .dst_w = DST_W_CONST_A,
663 .component_size = COMPONENT_SIZE_FOUR,
664 .num_src_components = NUM_SRC_COMPONENTS_ONE,
665 .num_dst_components = NUM_DST_COMPONENTS_ONE,
666 });
667
668 P_MTHD(p, NV90B5, PITCH_IN);
669 P_NV90B5_PITCH_IN(p, max_dim * 4);
670 P_NV90B5_PITCH_OUT(p, max_dim * 4);
671
672 while (size >= 4) {
673 struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
674
675 P_MTHD(p, NV90B5, OFFSET_OUT_UPPER);
676 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
677 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
678
679 uint64_t width, height;
680 if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) {
681 width = height = max_dim;
682 } else if (size >= max_dim * 4) {
683 width = max_dim;
684 height = size / (max_dim * 4);
685 } else {
686 width = size / 4;
687 height = 1;
688 }
689
690 uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4;
691 assert(dma_size <= size);
692
693 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
694 P_NV90B5_LINE_LENGTH_IN(p, width);
695 P_NV90B5_LINE_COUNT(p, height);
696
697 P_IMMD(p, NV90B5, LAUNCH_DMA, {
698 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
699 .multi_line_enable = height > 1,
700 .flush_enable = FLUSH_ENABLE_TRUE,
701 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
702 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
703 .remap_enable = REMAP_ENABLE_TRUE,
704 });
705
706 dst_addr += dma_size;
707 size -= dma_size;
708 }
709 }
710
711 VKAPI_ATTR void VKAPI_CALL
nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)712 nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
713 VkBuffer dstBuffer,
714 VkDeviceSize dstOffset,
715 VkDeviceSize dataSize,
716 const void *pData)
717 {
718 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
719 VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer);
720
721 uint64_t dst_addr = nvk_buffer_address(dst, dstOffset);
722
723 uint64_t data_addr;
724 nvk_cmd_buffer_upload_data(cmd, pData, dataSize, 64, &data_addr);
725
726 struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
727
728 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
729 P_NV90B5_OFFSET_IN_UPPER(p, data_addr >> 32);
730 P_NV90B5_OFFSET_IN_LOWER(p, data_addr & 0xffffffff);
731 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
732 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
733
734 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
735 P_NV90B5_LINE_LENGTH_IN(p, dataSize);
736 P_NV90B5_LINE_COUNT(p, 1);
737
738 P_IMMD(p, NV90B5, LAUNCH_DMA, {
739 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
740 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
741 .flush_enable = FLUSH_ENABLE_TRUE,
742 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
743 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
744 });
745 }
746