1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6
7 #include "nvk_buffer.h"
8 #include "nvk_device.h"
9 #include "nvk_device_memory.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_physical_device.h"
15
16 #include "vk_format.h"
17
18 #include "nvtypes.h"
19 #include "nv_push_cl902d.h"
20 #include "nv_push_cl90b5.h"
21 #include "nv_push_clc1b5.h"
22
23 static inline uint16_t
nvk_cmd_buffer_copy_cls(struct nvk_cmd_buffer * cmd)24 nvk_cmd_buffer_copy_cls(struct nvk_cmd_buffer *cmd)
25 {
26 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
27 struct nvk_physical_device *pdev = nvk_device_physical(dev);
28 return pdev->info.cls_copy;
29 }
30
31 struct nouveau_copy_buffer {
32 uint64_t base_addr;
33 VkImageType image_type;
34 struct nil_Offset4D_Elements offset_el;
35 struct nil_Extent4D_Elements extent_el;
36 uint32_t bpp;
37 uint32_t row_stride;
38 uint32_t array_stride;
39 struct nil_tiling tiling;
40 };
41
42 struct nouveau_copy {
43 struct nouveau_copy_buffer src;
44 struct nouveau_copy_buffer dst;
45 struct nouveau_copy_remap {
46 uint8_t comp_size;
47 uint8_t dst[4];
48 } remap;
49 struct nil_Extent4D_Elements extent_el;
50 };
51
52 static struct nouveau_copy_buffer
nouveau_copy_rect_buffer(struct nvk_buffer * buf,VkDeviceSize offset,struct vk_image_buffer_layout buffer_layout)53 nouveau_copy_rect_buffer(struct nvk_buffer *buf,
54 VkDeviceSize offset,
55 struct vk_image_buffer_layout buffer_layout)
56 {
57 return (struct nouveau_copy_buffer) {
58 .base_addr = nvk_buffer_address(buf, offset),
59 .image_type = VK_IMAGE_TYPE_2D,
60 .bpp = buffer_layout.element_size_B,
61 .row_stride = buffer_layout.row_stride_B,
62 .array_stride = buffer_layout.image_stride_B,
63 };
64 }
65
66 static struct nil_Offset4D_Pixels
vk_to_nil_offset(VkOffset3D offset,uint32_t base_array_layer)67 vk_to_nil_offset(VkOffset3D offset, uint32_t base_array_layer)
68 {
69 return (struct nil_Offset4D_Pixels) {
70 .x = offset.x,
71 .y = offset.y,
72 .z = offset.z,
73 .a = base_array_layer
74 };
75 }
76
77 static struct nil_Extent4D_Pixels
vk_to_nil_extent(VkExtent3D extent,uint32_t array_layers)78 vk_to_nil_extent(VkExtent3D extent, uint32_t array_layers)
79 {
80 return (struct nil_Extent4D_Pixels) {
81 .width = extent.width,
82 .height = extent.height,
83 .depth = extent.depth,
84 .array_len = array_layers,
85 };
86 }
87
88 static struct nouveau_copy_buffer
nouveau_copy_rect_image(const struct nvk_image * img,const struct nvk_image_plane * plane,VkOffset3D offset_px,const VkImageSubresourceLayers * sub_res)89 nouveau_copy_rect_image(const struct nvk_image *img,
90 const struct nvk_image_plane *plane,
91 VkOffset3D offset_px,
92 const VkImageSubresourceLayers *sub_res)
93 {
94 const struct nil_Extent4D_Pixels lvl_extent4d_px =
95 nil_image_level_extent_px(&plane->nil, sub_res->mipLevel);
96
97 offset_px = vk_image_sanitize_offset(&img->vk, offset_px);
98 const struct nil_Offset4D_Pixels offset4d_px =
99 vk_to_nil_offset(offset_px, sub_res->baseArrayLayer);
100
101 struct nouveau_copy_buffer buf = {
102 .base_addr = nvk_image_plane_base_address(plane) +
103 plane->nil.levels[sub_res->mipLevel].offset_B,
104 .image_type = img->vk.image_type,
105 .offset_el = nil_offset4d_px_to_el(offset4d_px, plane->nil.format,
106 plane->nil.sample_layout),
107 .extent_el = nil_extent4d_px_to_el(lvl_extent4d_px, plane->nil.format,
108 plane->nil.sample_layout),
109 .bpp = util_format_get_blocksize(plane->nil.format.p_format),
110 .row_stride = plane->nil.levels[sub_res->mipLevel].row_stride_B,
111 .array_stride = plane->nil.array_stride_B,
112 .tiling = plane->nil.levels[sub_res->mipLevel].tiling,
113 };
114
115 return buf;
116 }
117
118 static struct nouveau_copy_remap
nouveau_copy_remap_format(VkFormat format)119 nouveau_copy_remap_format(VkFormat format)
120 {
121 /* Pick an arbitrary component size. It doesn't matter what size we
122 * pick since we're just doing a copy, as long as it's no more than 4B
123 * and divides the format size.
124 */
125 unsigned comp_size = vk_format_get_blocksize(format);
126 if (comp_size % 3 == 0) {
127 comp_size /= 3;
128 assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 4);
129 } else {
130 assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 16);
131 comp_size = MIN2(comp_size, 4);
132 }
133
134 return (struct nouveau_copy_remap) {
135 .comp_size = comp_size,
136 .dst = { 0, 1, 2, 3 },
137 };
138 }
139
140 static uint32_t
to_90b5_remap_comp_size(uint8_t comp_size)141 to_90b5_remap_comp_size(uint8_t comp_size)
142 {
143 static const uint8_t to_90b5[] = {
144 [1] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE,
145 [2] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO,
146 [3] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE,
147 [4] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR,
148 };
149 assert(comp_size > 0 && comp_size < ARRAY_SIZE(to_90b5));
150
151 uint32_t size_90b5 = comp_size - 1;
152 assert(size_90b5 == to_90b5[comp_size]);
153 return size_90b5;
154 }
155
156 static uint32_t
to_90b5_remap_num_comps(uint8_t num_comps)157 to_90b5_remap_num_comps(uint8_t num_comps)
158 {
159 static const uint8_t to_90b5[] = {
160 [1] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE,
161 [2] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO,
162 [3] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE,
163 [4] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR,
164 };
165 assert(num_comps > 0 && num_comps < ARRAY_SIZE(to_90b5));
166
167 uint32_t num_comps_90b5 = num_comps - 1;
168 assert(num_comps_90b5 == to_90b5[num_comps]);
169 return num_comps_90b5;
170 }
171
172 static void
nouveau_copy_rect(struct nvk_cmd_buffer * cmd,struct nouveau_copy * copy)173 nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
174 {
175 uint32_t src_bw, dst_bw;
176 if (copy->remap.comp_size > 0) {
177 struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
178
179 assert(copy->src.bpp % copy->remap.comp_size == 0);
180 assert(copy->dst.bpp % copy->remap.comp_size == 0);
181 uint32_t num_src_comps = copy->src.bpp / copy->remap.comp_size;
182 uint32_t num_dst_comps = copy->dst.bpp / copy->remap.comp_size;
183
184 /* When running with component remapping enabled, most X/Y dimensions
185 * are in units of blocks.
186 */
187 src_bw = dst_bw = 1;
188
189 P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
190 .dst_x = copy->remap.dst[0],
191 .dst_y = copy->remap.dst[1],
192 .dst_z = copy->remap.dst[2],
193 .dst_w = copy->remap.dst[3],
194 .component_size = to_90b5_remap_comp_size(copy->remap.comp_size),
195 .num_src_components = to_90b5_remap_comp_size(num_src_comps),
196 .num_dst_components = to_90b5_remap_comp_size(num_dst_comps),
197 });
198 } else {
199 /* When component remapping is disabled, dimensions are in units of
200 * bytes (an implicit block width of 1B).
201 */
202 assert(copy->src.bpp == copy->dst.bpp);
203 src_bw = copy->src.bpp;
204 dst_bw = copy->dst.bpp;
205 }
206
207 assert(copy->extent_el.depth == 1 || copy->extent_el.array_len == 1);
208 uint32_t layers = MAX2(copy->extent_el.depth, copy->extent_el.array_len);
209 for (unsigned z = 0; z < layers; z++) {
210 VkDeviceSize src_addr = copy->src.base_addr;
211 VkDeviceSize dst_addr = copy->dst.base_addr;
212
213 if (copy->src.image_type != VK_IMAGE_TYPE_3D)
214 src_addr += (z + copy->src.offset_el.a) * copy->src.array_stride;
215
216 if (copy->dst.image_type != VK_IMAGE_TYPE_3D)
217 dst_addr += (z + copy->dst.offset_el.a) * copy->dst.array_stride;
218
219 if (copy->src.tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
220 src_addr += copy->src.offset_el.x * copy->src.bpp +
221 copy->src.offset_el.y * copy->src.row_stride;
222 }
223
224 if (copy->dst.tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
225 dst_addr += copy->dst.offset_el.x * copy->dst.bpp +
226 copy->dst.offset_el.y * copy->dst.row_stride;
227 }
228
229 struct nv_push *p = nvk_cmd_buffer_push(cmd, 31);
230
231 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
232 P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
233 P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
234 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
235 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
236 P_NV90B5_PITCH_IN(p, copy->src.row_stride);
237 P_NV90B5_PITCH_OUT(p, copy->dst.row_stride);
238 P_NV90B5_LINE_LENGTH_IN(p, copy->extent_el.width * src_bw);
239 P_NV90B5_LINE_COUNT(p, copy->extent_el.height);
240
241 uint32_t src_layout = 0, dst_layout = 0;
242 if (copy->src.tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
243 P_MTHD(p, NV90B5, SET_SRC_BLOCK_SIZE);
244 assert(nil_gob_type_height(copy->src.tiling.gob_type) == 8);
245 P_NV90B5_SET_SRC_BLOCK_SIZE(p, {
246 .width = 0, /* Tiles are always 1 GOB wide */
247 .height = copy->src.tiling.y_log2,
248 .depth = copy->src.tiling.z_log2,
249 .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
250 });
251 /* We use the stride for copies because the copy hardware has no
252 * concept of a tile width. Instead, we just set the width to the
253 * stride divided by bpp.
254 */
255 uint32_t src_stride_el = copy->src.row_stride / copy->src.bpp;
256 P_NV90B5_SET_SRC_WIDTH(p, src_stride_el * src_bw);
257 P_NV90B5_SET_SRC_HEIGHT(p, copy->src.extent_el.height);
258 P_NV90B5_SET_SRC_DEPTH(p, copy->src.extent_el.depth);
259 if (copy->src.image_type == VK_IMAGE_TYPE_3D)
260 P_NV90B5_SET_SRC_LAYER(p, z + copy->src.offset_el.z);
261 else
262 P_NV90B5_SET_SRC_LAYER(p, 0);
263
264 if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) {
265 P_MTHD(p, NVC1B5, SRC_ORIGIN_X);
266 P_NVC1B5_SRC_ORIGIN_X(p, copy->src.offset_el.x * src_bw);
267 P_NVC1B5_SRC_ORIGIN_Y(p, copy->src.offset_el.y);
268 } else {
269 P_MTHD(p, NV90B5, SET_SRC_ORIGIN);
270 P_NV90B5_SET_SRC_ORIGIN(p, {
271 .x = copy->src.offset_el.x * src_bw,
272 .y = copy->src.offset_el.y
273 });
274 }
275
276 src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
277 } else {
278 src_addr += copy->src.array_stride;
279 src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
280 }
281
282 if (copy->dst.tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
283 P_MTHD(p, NV90B5, SET_DST_BLOCK_SIZE);
284 assert(nil_gob_type_height(copy->dst.tiling.gob_type) == 8);
285 P_NV90B5_SET_DST_BLOCK_SIZE(p, {
286 .width = 0, /* Tiles are always 1 GOB wide */
287 .height = copy->dst.tiling.y_log2,
288 .depth = copy->dst.tiling.z_log2,
289 .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
290 });
291 /* We use the stride for copies because the copy hardware has no
292 * concept of a tile width. Instead, we just set the width to the
293 * stride divided by bpp.
294 */
295 uint32_t dst_stride_el = copy->dst.row_stride / copy->dst.bpp;
296 P_NV90B5_SET_DST_WIDTH(p, dst_stride_el * dst_bw);
297 P_NV90B5_SET_DST_HEIGHT(p, copy->dst.extent_el.height);
298 P_NV90B5_SET_DST_DEPTH(p, copy->dst.extent_el.depth);
299 if (copy->dst.image_type == VK_IMAGE_TYPE_3D)
300 P_NV90B5_SET_DST_LAYER(p, z + copy->dst.offset_el.z);
301 else
302 P_NV90B5_SET_DST_LAYER(p, 0);
303
304 if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) {
305 P_MTHD(p, NVC1B5, DST_ORIGIN_X);
306 P_NVC1B5_DST_ORIGIN_X(p, copy->dst.offset_el.x * dst_bw);
307 P_NVC1B5_DST_ORIGIN_Y(p, copy->dst.offset_el.y);
308 } else {
309 P_MTHD(p, NV90B5, SET_DST_ORIGIN);
310 P_NV90B5_SET_DST_ORIGIN(p, {
311 .x = copy->dst.offset_el.x * dst_bw,
312 .y = copy->dst.offset_el.y
313 });
314 }
315
316 dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
317 } else {
318 dst_addr += copy->dst.array_stride;
319 dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
320 }
321
322 P_IMMD(p, NV90B5, LAUNCH_DMA, {
323 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
324 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
325 .flush_enable = FLUSH_ENABLE_TRUE,
326 .src_memory_layout = src_layout,
327 .dst_memory_layout = dst_layout,
328 .remap_enable = copy->remap.comp_size > 0,
329 });
330 }
331 }
332
333 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)334 nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
335 const VkCopyBufferInfo2 *pCopyBufferInfo)
336 {
337 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
338 VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferInfo->srcBuffer);
339 VK_FROM_HANDLE(nvk_buffer, dst, pCopyBufferInfo->dstBuffer);
340
341 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
342 const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
343
344 uint64_t src_addr = nvk_buffer_address(src, region->srcOffset);
345 uint64_t dst_addr = nvk_buffer_address(dst, region->dstOffset);
346 uint64_t size = region->size;
347
348 while (size) {
349 struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
350
351 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
352 P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
353 P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
354 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
355 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
356
357 unsigned bytes = MIN2(size, 1 << 17);
358
359 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
360 P_NV90B5_LINE_LENGTH_IN(p, bytes);
361 P_NV90B5_LINE_COUNT(p, 1);
362
363 P_IMMD(p, NV90B5, LAUNCH_DMA, {
364 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
365 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
366 .flush_enable = FLUSH_ENABLE_TRUE,
367 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
368 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
369 });
370
371 src_addr += bytes;
372 dst_addr += bytes;
373 size -= bytes;
374 }
375 }
376 }
377
378 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)379 nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
380 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
381 {
382 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
383 VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferToImageInfo->srcBuffer);
384 VK_FROM_HANDLE(nvk_image, dst, pCopyBufferToImageInfo->dstImage);
385
386 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
387 const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
388 struct vk_image_buffer_layout buffer_layout =
389 vk_image_buffer_copy_layout(&dst->vk, region);
390
391 const VkExtent3D extent_px =
392 vk_image_sanitize_extent(&dst->vk, region->imageExtent);
393 const uint32_t layer_count =
394 vk_image_subresource_layer_count(&dst->vk, ®ion->imageSubresource);
395 const struct nil_Extent4D_Pixels extent4d_px =
396 vk_to_nil_extent(extent_px, layer_count);
397
398 const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
399 uint8_t plane = nvk_image_aspects_to_plane(dst, aspects);
400
401 struct nouveau_copy copy = {
402 .src = nouveau_copy_rect_buffer(src, region->bufferOffset,
403 buffer_layout),
404 .dst = nouveau_copy_rect_image(dst, &dst->planes[plane],
405 region->imageOffset,
406 ®ion->imageSubresource),
407 .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format,
408 dst->planes[plane].nil.sample_layout),
409 };
410 struct nouveau_copy copy2 = { 0 };
411
412 switch (dst->vk.format) {
413 case VK_FORMAT_D32_SFLOAT_S8_UINT:
414 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
415 copy.remap.comp_size = 4;
416 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
417 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
418 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
419 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
420 } else {
421 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
422 copy2.dst = copy.dst;
423 copy2.extent_el = copy.extent_el;
424 copy.dst = copy2.src =
425 nouveau_copy_rect_image(dst, &dst->stencil_copy_temp,
426 region->imageOffset,
427 ®ion->imageSubresource);
428
429 copy.remap.comp_size = 1;
430 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
431 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
432 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
433 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
434
435 copy2.remap.comp_size = 2;
436 copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
437 copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
438 copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X;
439 copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
440 }
441 break;
442 case VK_FORMAT_D24_UNORM_S8_UINT:
443 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
444 copy.remap.comp_size = 1;
445 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
446 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
447 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
448 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
449 } else {
450 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
451 copy.remap.comp_size = 1;
452 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
453 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
454 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
455 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
456 }
457 break;
458 default:
459 copy.remap = nouveau_copy_remap_format(dst->vk.format);
460 break;
461 }
462
463 nouveau_copy_rect(cmd, ©);
464 if (copy2.extent_el.width > 0)
465 nouveau_copy_rect(cmd, ©2);
466
467 vk_foreach_struct_const(ext, region->pNext) {
468 switch (ext->sType) {
469 default:
470 vk_debug_ignored_stype(ext->sType);
471 break;
472 }
473 }
474 }
475
476 vk_foreach_struct_const(ext, pCopyBufferToImageInfo->pNext) {
477 switch (ext->sType) {
478 default:
479 vk_debug_ignored_stype(ext->sType);
480 break;
481 }
482 }
483 }
484
485 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)486 nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
487 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
488 {
489 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
490 VK_FROM_HANDLE(nvk_image, src, pCopyImageToBufferInfo->srcImage);
491 VK_FROM_HANDLE(nvk_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
492
493 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
494 const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
495 struct vk_image_buffer_layout buffer_layout =
496 vk_image_buffer_copy_layout(&src->vk, region);
497
498 const VkExtent3D extent_px =
499 vk_image_sanitize_extent(&src->vk, region->imageExtent);
500 const uint32_t layer_count =
501 vk_image_subresource_layer_count(&src->vk, ®ion->imageSubresource);
502 const struct nil_Extent4D_Pixels extent4d_px =
503 vk_to_nil_extent(extent_px, layer_count);
504
505 const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
506 uint8_t plane = nvk_image_aspects_to_plane(src, aspects);
507
508 struct nouveau_copy copy = {
509 .src = nouveau_copy_rect_image(src, &src->planes[plane],
510 region->imageOffset,
511 ®ion->imageSubresource),
512 .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset,
513 buffer_layout),
514 .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format,
515 src->planes[plane].nil.sample_layout),
516 };
517 struct nouveau_copy copy2 = { 0 };
518
519 switch (src->vk.format) {
520 case VK_FORMAT_D32_SFLOAT_S8_UINT:
521 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
522 copy.remap.comp_size = 4;
523 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
524 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
525 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
526 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
527 } else {
528 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
529 copy2.dst = copy.dst;
530 copy2.extent_el = copy.extent_el;
531 copy.dst = copy2.src =
532 nouveau_copy_rect_image(src, &src->stencil_copy_temp,
533 region->imageOffset,
534 ®ion->imageSubresource);
535
536 copy.remap.comp_size = 2;
537 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z;
538 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
539 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
540 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
541
542 copy2.remap.comp_size = 1;
543 copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
544 copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
545 copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
546 copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
547 }
548 break;
549 case VK_FORMAT_D24_UNORM_S8_UINT:
550 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
551 copy.remap.comp_size = 1;
552 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
553 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
554 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
555 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
556 } else {
557 assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
558 copy.remap.comp_size = 1;
559 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_W;
560 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
561 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
562 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
563 }
564 break;
565 default:
566 copy.remap = nouveau_copy_remap_format(src->vk.format);
567 break;
568 }
569
570 nouveau_copy_rect(cmd, ©);
571 if (copy2.extent_el.width > 0)
572 nouveau_copy_rect(cmd, ©2);
573
574 vk_foreach_struct_const(ext, region->pNext) {
575 switch (ext->sType) {
576 default:
577 vk_debug_ignored_stype(ext->sType);
578 break;
579 }
580 }
581 }
582
583 vk_foreach_struct_const(ext, pCopyImageToBufferInfo->pNext) {
584 switch (ext->sType) {
585 default:
586 vk_debug_ignored_stype(ext->sType);
587 break;
588 }
589 }
590 }
591
592 void
nvk_linear_render_copy(struct nvk_cmd_buffer * cmd,const struct nvk_image_view * iview,VkRect2D copy_rect,bool copy_to_tiled_shadow)593 nvk_linear_render_copy(struct nvk_cmd_buffer *cmd,
594 const struct nvk_image_view *iview,
595 VkRect2D copy_rect,
596 bool copy_to_tiled_shadow)
597 {
598 const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
599
600 const uint8_t ip = iview->planes[0].image_plane;
601 const struct nvk_image_plane *src_plane = NULL, *dst_plane = NULL;
602 if (copy_to_tiled_shadow) {
603 src_plane = &image->planes[ip];
604 dst_plane = &image->linear_tiled_shadow;
605 } else {
606 src_plane = &image->linear_tiled_shadow;
607 dst_plane = &image->planes[ip];
608 }
609
610 const struct VkImageSubresourceLayers subres = {
611 .aspectMask = iview->vk.aspects,
612 .baseArrayLayer = iview->vk.base_array_layer,
613 .layerCount = iview->vk.layer_count,
614 .mipLevel = iview->vk.base_mip_level,
615 };
616
617 const VkOffset3D offset_px = {
618 .x = copy_rect.offset.x,
619 .y = copy_rect.offset.y,
620 .z = 0,
621 };
622 const struct nil_Extent4D_Pixels extent4d_px = {
623 .width = copy_rect.extent.width,
624 .height = copy_rect.extent.height,
625 .depth = 1,
626 .array_len = 1,
627 };
628
629 struct nouveau_copy copy = {
630 .src = nouveau_copy_rect_image(image, src_plane, offset_px, &subres),
631 .dst = nouveau_copy_rect_image(image, dst_plane, offset_px, &subres),
632 .extent_el = nil_extent4d_px_to_el(extent4d_px, src_plane->nil.format,
633 src_plane->nil.sample_layout),
634 };
635
636 copy.remap = nouveau_copy_remap_format(image->vk.format);
637 nouveau_copy_rect(cmd, ©);
638 }
639
640 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)641 nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,
642 const VkCopyImageInfo2 *pCopyImageInfo)
643 {
644 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
645 VK_FROM_HANDLE(nvk_image, src, pCopyImageInfo->srcImage);
646 VK_FROM_HANDLE(nvk_image, dst, pCopyImageInfo->dstImage);
647
648 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
649 const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
650
651 /* From the Vulkan 1.3.217 spec:
652 *
653 * "When copying between compressed and uncompressed formats the
654 * extent members represent the texel dimensions of the source image
655 * and not the destination."
656 */
657 const VkExtent3D extent_px =
658 vk_image_sanitize_extent(&src->vk, region->extent);
659 const uint32_t layer_count =
660 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
661 const struct nil_Extent4D_Pixels extent4d_px =
662 vk_to_nil_extent(extent_px, layer_count);
663
664 const VkImageAspectFlagBits src_aspects =
665 region->srcSubresource.aspectMask;
666 uint8_t src_plane = nvk_image_aspects_to_plane(src, src_aspects);
667
668 const VkImageAspectFlagBits dst_aspects =
669 region->dstSubresource.aspectMask;
670 uint8_t dst_plane = nvk_image_aspects_to_plane(dst, dst_aspects);
671
672 struct nouveau_copy copy = {
673 .src = nouveau_copy_rect_image(src, &src->planes[src_plane],
674 region->srcOffset,
675 ®ion->srcSubresource),
676 .dst = nouveau_copy_rect_image(dst, &dst->planes[dst_plane],
677 region->dstOffset,
678 ®ion->dstSubresource),
679 .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[src_plane].nil.format,
680 src->planes[src_plane].nil.sample_layout),
681 };
682
683 assert(src_aspects == region->srcSubresource.aspectMask);
684 switch (src->vk.format) {
685 case VK_FORMAT_D24_UNORM_S8_UINT:
686 if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
687 copy.remap.comp_size = 1;
688 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
689 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
690 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
691 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
692 } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
693 copy.remap.comp_size = 1;
694 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
695 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
696 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
697 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_W;
698 } else {
699 /* If we're copying both, there's nothing special to do */
700 assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
701 VK_IMAGE_ASPECT_STENCIL_BIT));
702 }
703 break;
704 case VK_FORMAT_D32_SFLOAT_S8_UINT:
705 if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
706 copy.remap.comp_size = 4;
707 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
708 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
709 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
710 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
711 } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
712 copy.remap.comp_size = 4;
713 copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
714 copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
715 copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
716 copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
717 } else {
718 /* If we're copying both, there's nothing special to do */
719 assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
720 VK_IMAGE_ASPECT_STENCIL_BIT));
721 }
722 break;
723 default:
724 copy.remap = nouveau_copy_remap_format(src->vk.format);
725 break;
726 }
727
728 nouveau_copy_rect(cmd, ©);
729 }
730 }
731
732 VKAPI_ATTR void VKAPI_CALL
nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)733 nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
734 VkBuffer dstBuffer,
735 VkDeviceSize dstOffset,
736 VkDeviceSize size,
737 uint32_t data)
738 {
739 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
740 VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
741
742 uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
743 size = vk_buffer_range(&dst_buffer->vk, dstOffset, size);
744
745 uint32_t max_dim = 1 << 15;
746
747 struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
748
749 P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data);
750 P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
751 .dst_x = DST_X_CONST_A,
752 .dst_y = DST_Y_CONST_A,
753 .dst_z = DST_Z_CONST_A,
754 .dst_w = DST_W_CONST_A,
755 .component_size = COMPONENT_SIZE_FOUR,
756 .num_src_components = NUM_SRC_COMPONENTS_ONE,
757 .num_dst_components = NUM_DST_COMPONENTS_ONE,
758 });
759
760 P_MTHD(p, NV90B5, PITCH_IN);
761 P_NV90B5_PITCH_IN(p, max_dim * 4);
762 P_NV90B5_PITCH_OUT(p, max_dim * 4);
763
764 while (size >= 4) {
765 struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
766
767 P_MTHD(p, NV90B5, OFFSET_OUT_UPPER);
768 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
769 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
770
771 uint64_t width, height;
772 if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) {
773 width = height = max_dim;
774 } else if (size >= max_dim * 4) {
775 width = max_dim;
776 height = size / (max_dim * 4);
777 } else {
778 width = size / 4;
779 height = 1;
780 }
781
782 uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4;
783 assert(dma_size <= size);
784
785 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
786 P_NV90B5_LINE_LENGTH_IN(p, width);
787 P_NV90B5_LINE_COUNT(p, height);
788
789 P_IMMD(p, NV90B5, LAUNCH_DMA, {
790 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
791 .multi_line_enable = height > 1,
792 .flush_enable = FLUSH_ENABLE_TRUE,
793 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
794 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
795 .remap_enable = REMAP_ENABLE_TRUE,
796 });
797
798 dst_addr += dma_size;
799 size -= dma_size;
800 }
801 }
802
803 VKAPI_ATTR void VKAPI_CALL
nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)804 nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
805 VkBuffer dstBuffer,
806 VkDeviceSize dstOffset,
807 VkDeviceSize dataSize,
808 const void *pData)
809 {
810 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
811 VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer);
812
813 uint64_t dst_addr = nvk_buffer_address(dst, dstOffset);
814
815 uint64_t data_addr;
816 nvk_cmd_buffer_upload_data(cmd, pData, dataSize, 64, &data_addr);
817
818 struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
819
820 P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
821 P_NV90B5_OFFSET_IN_UPPER(p, data_addr >> 32);
822 P_NV90B5_OFFSET_IN_LOWER(p, data_addr & 0xffffffff);
823 P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
824 P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
825
826 P_MTHD(p, NV90B5, LINE_LENGTH_IN);
827 P_NV90B5_LINE_LENGTH_IN(p, dataSize);
828 P_NV90B5_LINE_COUNT(p, 1);
829
830 P_IMMD(p, NV90B5, LAUNCH_DMA, {
831 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
832 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
833 .flush_enable = FLUSH_ENABLE_TRUE,
834 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
835 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
836 });
837 }
838