• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_device.h"
9 #include "nvk_device_memory.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_physical_device.h"
14 
15 #include "vk_format.h"
16 
17 #include "nouveau_bo.h"
18 #include "nouveau_context.h"
19 
20 #include "nvtypes.h"
21 #include "nvk_cl902d.h"
22 #include "nvk_cl90b5.h"
23 #include "nvk_clc1b5.h"
24 
25 struct nouveau_copy_buffer {
26    uint64_t base_addr;
27    VkImageType image_type;
28    struct nil_offset4d offset_el;
29    struct nil_extent4d extent_el;
30    uint32_t bpp;
31    uint32_t row_stride;
32    uint32_t array_stride;
33    struct nil_tiling tiling;
34 };
35 
36 struct nouveau_copy {
37    struct nouveau_copy_buffer src;
38    struct nouveau_copy_buffer dst;
39    struct nouveau_copy_remap {
40       uint8_t comp_size;
41       uint8_t dst[4];
42    } remap;
43    struct nil_extent4d extent_el;
44 };
45 
46 static struct nouveau_copy_buffer
nouveau_copy_rect_buffer(struct nvk_buffer * buf,VkDeviceSize offset,struct vk_image_buffer_layout buffer_layout)47 nouveau_copy_rect_buffer(struct nvk_buffer *buf,
48                          VkDeviceSize offset,
49                          struct vk_image_buffer_layout buffer_layout)
50 {
51    return (struct nouveau_copy_buffer) {
52       .base_addr = nvk_buffer_address(buf, offset),
53       .image_type = VK_IMAGE_TYPE_2D,
54       .bpp = buffer_layout.element_size_B,
55       .row_stride = buffer_layout.row_stride_B,
56       .array_stride = buffer_layout.image_stride_B,
57    };
58 }
59 
60 static struct nil_offset4d
vk_to_nil_offset(VkOffset3D offset,uint32_t base_array_layer)61 vk_to_nil_offset(VkOffset3D offset, uint32_t base_array_layer)
62 {
63    return nil_offset4d(offset.x, offset.y, offset.z, base_array_layer);
64 }
65 
66 static struct nil_extent4d
vk_to_nil_extent(VkExtent3D extent,uint32_t array_layers)67 vk_to_nil_extent(VkExtent3D extent, uint32_t array_layers)
68 {
69    return nil_extent4d(extent.width, extent.height, extent.depth, array_layers);
70 }
71 
72 static struct nouveau_copy_buffer
nouveau_copy_rect_image(struct nvk_image * img,struct nvk_image_plane * plane,VkOffset3D offset_px,const VkImageSubresourceLayers * sub_res)73 nouveau_copy_rect_image(struct nvk_image *img,
74                         struct nvk_image_plane *plane,
75                         VkOffset3D offset_px,
76                         const VkImageSubresourceLayers *sub_res)
77 {
78    const struct nil_extent4d lvl_extent4d_px =
79       nil_image_level_extent_px(&plane->nil, sub_res->mipLevel);
80 
81    offset_px = vk_image_sanitize_offset(&img->vk, offset_px);
82    const struct nil_offset4d offset4d_px =
83       vk_to_nil_offset(offset_px, sub_res->baseArrayLayer);
84 
85    struct nouveau_copy_buffer buf = {
86       .base_addr = nvk_image_plane_base_address(plane) +
87                    plane->nil.levels[sub_res->mipLevel].offset_B,
88       .image_type = img->vk.image_type,
89       .offset_el = nil_offset4d_px_to_el(offset4d_px, plane->nil.format,
90                                          plane->nil.sample_layout),
91       .extent_el = nil_extent4d_px_to_el(lvl_extent4d_px, plane->nil.format,
92                                          plane->nil.sample_layout),
93       .bpp = util_format_get_blocksize(plane->nil.format),
94       .row_stride = plane->nil.levels[sub_res->mipLevel].row_stride_B,
95       .array_stride = plane->nil.array_stride_B,
96       .tiling = plane->nil.levels[sub_res->mipLevel].tiling,
97    };
98 
99    return buf;
100 }
101 
102 static struct nouveau_copy_remap
nouveau_copy_remap_format(VkFormat format)103 nouveau_copy_remap_format(VkFormat format)
104 {
105    /* Pick an arbitrary component size.  It doesn't matter what size we
106     * pick since we're just doing a copy, as long as it's no more than 4B
107     * and divides the format size.
108     */
109    unsigned comp_size = vk_format_get_blocksize(format);
110    if (comp_size % 3 == 0) {
111       comp_size /= 3;
112       assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 4);
113    } else {
114       assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 16);
115       comp_size = MIN2(comp_size, 4);
116    }
117 
118    return (struct nouveau_copy_remap) {
119       .comp_size = comp_size,
120       .dst = { 0, 1, 2, 3 },
121    };
122 }
123 
124 static uint32_t
to_90b5_remap_comp_size(uint8_t comp_size)125 to_90b5_remap_comp_size(uint8_t comp_size)
126 {
127    static const uint8_t to_90b5[] = {
128       [1] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE,
129       [2] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO,
130       [3] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE,
131       [4] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR,
132    };
133    assert(comp_size > 0 && comp_size < ARRAY_SIZE(to_90b5));
134 
135    uint32_t size_90b5 = comp_size - 1;
136    assert(size_90b5 == to_90b5[comp_size]);
137    return size_90b5;
138 }
139 
140 static uint32_t
to_90b5_remap_num_comps(uint8_t num_comps)141 to_90b5_remap_num_comps(uint8_t num_comps)
142 {
143    static const uint8_t to_90b5[] = {
144       [1] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE,
145       [2] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO,
146       [3] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE,
147       [4] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR,
148    };
149    assert(num_comps > 0 && num_comps < ARRAY_SIZE(to_90b5));
150 
151    uint32_t num_comps_90b5 = num_comps - 1;
152    assert(num_comps_90b5 == to_90b5[num_comps]);
153    return num_comps_90b5;
154 }
155 
156 static void
nouveau_copy_rect(struct nvk_cmd_buffer * cmd,struct nouveau_copy * copy)157 nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
158 {
159    uint32_t src_bw, dst_bw;
160    if (copy->remap.comp_size > 0) {
161       struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
162 
163       assert(copy->src.bpp % copy->remap.comp_size == 0);
164       assert(copy->dst.bpp % copy->remap.comp_size == 0);
165       uint32_t num_src_comps = copy->src.bpp / copy->remap.comp_size;
166       uint32_t num_dst_comps = copy->dst.bpp / copy->remap.comp_size;
167 
168       /* When running with component remapping enabled, most X/Y dimensions
169        * are in units of blocks.
170        */
171       src_bw = dst_bw = 1;
172 
173       P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
174          .dst_x = copy->remap.dst[0],
175          .dst_y = copy->remap.dst[1],
176          .dst_z = copy->remap.dst[2],
177          .dst_w = copy->remap.dst[3],
178          .component_size = to_90b5_remap_comp_size(copy->remap.comp_size),
179          .num_src_components = to_90b5_remap_comp_size(num_src_comps),
180          .num_dst_components = to_90b5_remap_comp_size(num_dst_comps),
181       });
182    } else {
183       /* When component remapping is disabled, dimensions are in units of
184        * bytes (an implicit block widht of 1B).
185        */
186       assert(copy->src.bpp == copy->dst.bpp);
187       src_bw = copy->src.bpp;
188       dst_bw = copy->dst.bpp;
189    }
190 
191    assert(copy->extent_el.depth == 1 || copy->extent_el.array_len == 1);
192    for (unsigned z = 0; z < MAX2(copy->extent_el.d, copy->extent_el.a); z++) {
193       VkDeviceSize src_addr = copy->src.base_addr;
194       VkDeviceSize dst_addr = copy->dst.base_addr;
195 
196       if (copy->src.image_type != VK_IMAGE_TYPE_3D)
197          src_addr += (z + copy->src.offset_el.a) * copy->src.array_stride;
198 
199       if (copy->dst.image_type != VK_IMAGE_TYPE_3D)
200          dst_addr += (z + copy->dst.offset_el.a) * copy->dst.array_stride;
201 
202       if (!copy->src.tiling.is_tiled) {
203          src_addr += copy->src.offset_el.x * copy->src.bpp +
204                      copy->src.offset_el.y * copy->src.row_stride;
205       }
206 
207       if (!copy->dst.tiling.is_tiled) {
208          dst_addr += copy->dst.offset_el.x * copy->dst.bpp +
209                      copy->dst.offset_el.y * copy->dst.row_stride;
210       }
211 
212       struct nv_push *p = nvk_cmd_buffer_push(cmd, 31);
213 
214       P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
215       P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
216       P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
217       P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
218       P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
219       P_NV90B5_PITCH_IN(p, copy->src.row_stride);
220       P_NV90B5_PITCH_OUT(p, copy->dst.row_stride);
221       P_NV90B5_LINE_LENGTH_IN(p, copy->extent_el.width * src_bw);
222       P_NV90B5_LINE_COUNT(p, copy->extent_el.height);
223 
224       uint32_t src_layout = 0, dst_layout = 0;
225       if (copy->src.tiling.is_tiled) {
226          P_MTHD(p, NV90B5, SET_SRC_BLOCK_SIZE);
227          P_NV90B5_SET_SRC_BLOCK_SIZE(p, {
228             .width = 0, /* Tiles are always 1 GOB wide */
229             .height = copy->src.tiling.y_log2,
230             .depth = copy->src.tiling.z_log2,
231             .gob_height = copy->src.tiling.gob_height_8 ?
232                           GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
233                           GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
234          });
235          P_NV90B5_SET_SRC_WIDTH(p, copy->src.extent_el.width * src_bw);
236          P_NV90B5_SET_SRC_HEIGHT(p, copy->src.extent_el.height);
237          P_NV90B5_SET_SRC_DEPTH(p, copy->src.extent_el.depth);
238          if (copy->src.image_type == VK_IMAGE_TYPE_3D)
239             P_NV90B5_SET_SRC_LAYER(p, z + copy->src.offset_el.z);
240          else
241             P_NV90B5_SET_SRC_LAYER(p, 0);
242 
243          if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_copy >= 0xc1b5) {
244             P_MTHD(p, NVC1B5, SRC_ORIGIN_X);
245             P_NVC1B5_SRC_ORIGIN_X(p, copy->src.offset_el.x * src_bw);
246             P_NVC1B5_SRC_ORIGIN_Y(p, copy->src.offset_el.y);
247          } else {
248             P_MTHD(p, NV90B5, SET_SRC_ORIGIN);
249             P_NV90B5_SET_SRC_ORIGIN(p, {
250                .x = copy->src.offset_el.x * src_bw,
251                .y = copy->src.offset_el.y
252             });
253          }
254 
255          src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
256       } else {
257          src_addr += copy->src.array_stride;
258          src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
259       }
260 
261       if (copy->dst.tiling.is_tiled) {
262          P_MTHD(p, NV90B5, SET_DST_BLOCK_SIZE);
263          P_NV90B5_SET_DST_BLOCK_SIZE(p, {
264             .width = 0, /* Tiles are always 1 GOB wide */
265             .height = copy->dst.tiling.y_log2,
266             .depth = copy->dst.tiling.z_log2,
267             .gob_height = copy->dst.tiling.gob_height_8 ?
268                           GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
269                           GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
270          });
271          P_NV90B5_SET_DST_WIDTH(p, copy->dst.extent_el.width * dst_bw);
272          P_NV90B5_SET_DST_HEIGHT(p, copy->dst.extent_el.height);
273          P_NV90B5_SET_DST_DEPTH(p, copy->dst.extent_el.depth);
274          if (copy->dst.image_type == VK_IMAGE_TYPE_3D)
275             P_NV90B5_SET_DST_LAYER(p, z + copy->dst.offset_el.z);
276          else
277             P_NV90B5_SET_DST_LAYER(p, 0);
278 
279          if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_copy >= 0xc1b5) {
280             P_MTHD(p, NVC1B5, DST_ORIGIN_X);
281             P_NVC1B5_DST_ORIGIN_X(p, copy->dst.offset_el.x * dst_bw);
282             P_NVC1B5_DST_ORIGIN_Y(p, copy->dst.offset_el.y);
283          } else {
284             P_MTHD(p, NV90B5, SET_DST_ORIGIN);
285             P_NV90B5_SET_DST_ORIGIN(p, {
286                .x = copy->dst.offset_el.x * dst_bw,
287                .y = copy->dst.offset_el.y
288             });
289          }
290 
291          dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
292       } else {
293          dst_addr += copy->dst.array_stride;
294          dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
295       }
296 
297       P_IMMD(p, NV90B5, LAUNCH_DMA, {
298          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
299          .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
300          .flush_enable = FLUSH_ENABLE_TRUE,
301          .src_memory_layout = src_layout,
302          .dst_memory_layout = dst_layout,
303          .remap_enable = copy->remap.comp_size > 0,
304       });
305    }
306 }
307 
308 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)309 nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
310                    const VkCopyBufferInfo2 *pCopyBufferInfo)
311 {
312    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
313    VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferInfo->srcBuffer);
314    VK_FROM_HANDLE(nvk_buffer, dst, pCopyBufferInfo->dstBuffer);
315 
316    for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
317       const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
318 
319       uint64_t src_addr = nvk_buffer_address(src, region->srcOffset);
320       uint64_t dst_addr = nvk_buffer_address(dst, region->dstOffset);
321       uint64_t size = region->size;
322 
323       while (size) {
324          struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
325 
326          P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
327          P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
328          P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
329          P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
330          P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
331 
332          unsigned bytes = MIN2(size, 1 << 17);
333 
334          P_MTHD(p, NV90B5, LINE_LENGTH_IN);
335          P_NV90B5_LINE_LENGTH_IN(p, bytes);
336          P_NV90B5_LINE_COUNT(p, 1);
337 
338          P_IMMD(p, NV90B5, LAUNCH_DMA, {
339                 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
340                 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
341                 .flush_enable = FLUSH_ENABLE_TRUE,
342                 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
343                 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
344          });
345 
346          src_addr += bytes;
347          dst_addr += bytes;
348          size -= bytes;
349       }
350    }
351 }
352 
353 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)354 nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
355                           const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
356 {
357    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
358    VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferToImageInfo->srcBuffer);
359    VK_FROM_HANDLE(nvk_image, dst, pCopyBufferToImageInfo->dstImage);
360 
361    for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
362       const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
363       struct vk_image_buffer_layout buffer_layout =
364          vk_image_buffer_copy_layout(&dst->vk, region);
365 
366       const VkExtent3D extent_px =
367          vk_image_sanitize_extent(&dst->vk, region->imageExtent);
368       const uint32_t layer_count =
369          vk_image_subresource_layer_count(&dst->vk, &region->imageSubresource);
370       const struct nil_extent4d extent4d_px =
371          vk_to_nil_extent(extent_px, layer_count);
372 
373       const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
374       uint8_t plane = nvk_image_aspects_to_plane(dst, aspects);
375 
376       struct nouveau_copy copy = {
377          .src = nouveau_copy_rect_buffer(src, region->bufferOffset,
378                                          buffer_layout),
379          .dst = nouveau_copy_rect_image(dst, &dst->planes[plane],
380                                         region->imageOffset,
381                                         &region->imageSubresource),
382          .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format,
383                                             dst->planes[plane].nil.sample_layout),
384       };
385       struct nouveau_copy copy2 = { 0 };
386 
387       switch (dst->vk.format) {
388       case VK_FORMAT_D32_SFLOAT_S8_UINT:
389          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
390             copy.remap.comp_size = 4;
391             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
392             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
393             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
394             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
395          } else {
396             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
397             copy2.dst = copy.dst;
398             copy2.extent_el = copy.extent_el;
399             copy.dst = copy2.src =
400                nouveau_copy_rect_image(dst, &dst->stencil_copy_temp,
401                                        region->imageOffset,
402                                        &region->imageSubresource);
403 
404             copy.remap.comp_size = 1;
405             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
406             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
407             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
408             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
409 
410             copy2.remap.comp_size = 2;
411             copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
412             copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
413             copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X;
414             copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
415          }
416          break;
417       case VK_FORMAT_D24_UNORM_S8_UINT:
418          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
419             copy.remap.comp_size = 1;
420             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
421             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
422             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
423             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
424          } else {
425             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
426             copy.remap.comp_size = 1;
427             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
428             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
429             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
430             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
431          }
432          break;
433       default:
434          copy.remap = nouveau_copy_remap_format(dst->vk.format);
435          break;
436       }
437 
438       nouveau_copy_rect(cmd, &copy);
439       if (copy2.extent_el.w > 0)
440          nouveau_copy_rect(cmd, &copy2);
441 
442       vk_foreach_struct_const(ext, region->pNext) {
443          switch (ext->sType) {
444          default:
445             nvk_debug_ignored_stype(ext->sType);
446             break;
447          }
448       }
449    }
450 
451    vk_foreach_struct_const(ext, pCopyBufferToImageInfo->pNext) {
452       switch (ext->sType) {
453       default:
454          nvk_debug_ignored_stype(ext->sType);
455          break;
456       }
457    }
458 }
459 
460 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)461 nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
462                           const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
463 {
464    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
465    VK_FROM_HANDLE(nvk_image, src, pCopyImageToBufferInfo->srcImage);
466    VK_FROM_HANDLE(nvk_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
467 
468    for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
469       const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
470       struct vk_image_buffer_layout buffer_layout =
471          vk_image_buffer_copy_layout(&src->vk, region);
472 
473       const VkExtent3D extent_px =
474          vk_image_sanitize_extent(&src->vk, region->imageExtent);
475       const uint32_t layer_count =
476          vk_image_subresource_layer_count(&src->vk, &region->imageSubresource);
477       const struct nil_extent4d extent4d_px =
478          vk_to_nil_extent(extent_px, layer_count);
479 
480       const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
481       uint8_t plane = nvk_image_aspects_to_plane(src, aspects);
482 
483       struct nouveau_copy copy = {
484          .src = nouveau_copy_rect_image(src, &src->planes[plane],
485                                         region->imageOffset,
486                                         &region->imageSubresource),
487          .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset,
488                                          buffer_layout),
489          .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format,
490                                             src->planes[plane].nil.sample_layout),
491       };
492       struct nouveau_copy copy2 = { 0 };
493 
494       switch (src->vk.format) {
495       case VK_FORMAT_D32_SFLOAT_S8_UINT:
496          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
497             copy.remap.comp_size = 4;
498             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
499             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
500             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
501             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
502          } else {
503             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
504             copy2.dst = copy.dst;
505             copy2.extent_el = copy.extent_el;
506             copy.dst = copy2.src =
507                nouveau_copy_rect_image(src, &src->stencil_copy_temp,
508                                        region->imageOffset,
509                                        &region->imageSubresource);
510 
511             copy.remap.comp_size = 2;
512             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z;
513             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
514             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
515             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
516 
517             copy2.remap.comp_size = 1;
518             copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
519             copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
520             copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
521             copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
522          }
523          break;
524       case VK_FORMAT_D24_UNORM_S8_UINT:
525          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
526             copy.remap.comp_size = 1;
527             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
528             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
529             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
530             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
531          } else {
532             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
533             copy.remap.comp_size = 1;
534             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_W;
535             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
536             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
537             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
538          }
539          break;
540       default:
541          copy.remap = nouveau_copy_remap_format(src->vk.format);
542          break;
543       }
544 
545       nouveau_copy_rect(cmd, &copy);
546       if (copy2.extent_el.w > 0)
547          nouveau_copy_rect(cmd, &copy2);
548 
549       vk_foreach_struct_const(ext, region->pNext) {
550          switch (ext->sType) {
551          default:
552             nvk_debug_ignored_stype(ext->sType);
553             break;
554          }
555       }
556    }
557 
558    vk_foreach_struct_const(ext, pCopyImageToBufferInfo->pNext) {
559       switch (ext->sType) {
560       default:
561          nvk_debug_ignored_stype(ext->sType);
562          break;
563       }
564    }
565 }
566 
567 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)568 nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,
569                   const VkCopyImageInfo2 *pCopyImageInfo)
570 {
571    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
572    VK_FROM_HANDLE(nvk_image, src, pCopyImageInfo->srcImage);
573    VK_FROM_HANDLE(nvk_image, dst, pCopyImageInfo->dstImage);
574 
575    for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
576       const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
577 
578       /* From the Vulkan 1.3.217 spec:
579        *
580        *    "When copying between compressed and uncompressed formats the
581        *    extent members represent the texel dimensions of the source image
582        *    and not the destination."
583        */
584       const VkExtent3D extent_px =
585          vk_image_sanitize_extent(&src->vk, region->extent);
586       const uint32_t layer_count =
587          vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
588       const struct nil_extent4d extent4d_px =
589          vk_to_nil_extent(extent_px, layer_count);
590 
591       const VkImageAspectFlagBits src_aspects =
592          region->srcSubresource.aspectMask;
593       uint8_t src_plane = nvk_image_aspects_to_plane(src, src_aspects);
594 
595       const VkImageAspectFlagBits dst_aspects =
596          region->dstSubresource.aspectMask;
597       uint8_t dst_plane = nvk_image_aspects_to_plane(dst, dst_aspects);
598 
599       struct nouveau_copy copy = {
600          .src = nouveau_copy_rect_image(src, &src->planes[src_plane],
601                                         region->srcOffset,
602                                         &region->srcSubresource),
603          .dst = nouveau_copy_rect_image(dst, &dst->planes[dst_plane],
604                                         region->dstOffset,
605                                         &region->dstSubresource),
606          .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[src_plane].nil.format,
607                                             src->planes[src_plane].nil.sample_layout),
608       };
609 
610       assert(src_aspects == region->srcSubresource.aspectMask);
611       switch (src->vk.format) {
612       case VK_FORMAT_D24_UNORM_S8_UINT:
613          if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
614             copy.remap.comp_size = 1;
615             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
616             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
617             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
618             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
619          } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
620             copy.remap.comp_size = 1;
621             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
622             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
623             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
624             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_W;
625          } else {
626             /* If we're copying both, there's nothing special to do */
627             assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
628                                VK_IMAGE_ASPECT_STENCIL_BIT));
629          }
630          break;
631       default:
632          copy.remap = nouveau_copy_remap_format(src->vk.format);
633          break;
634       }
635 
636       nouveau_copy_rect(cmd, &copy);
637    }
638 }
639 
640 VKAPI_ATTR void VKAPI_CALL
nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)641 nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
642                   VkBuffer dstBuffer,
643                   VkDeviceSize dstOffset,
644                   VkDeviceSize size,
645                   uint32_t data)
646 {
647    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
648    VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
649 
650    uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
651    size = vk_buffer_range(&dst_buffer->vk, dstOffset, size);
652 
653    uint32_t max_dim = 1 << 15;
654 
655    struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
656 
657    P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data);
658    P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
659       .dst_x = DST_X_CONST_A,
660       .dst_y = DST_Y_CONST_A,
661       .dst_z = DST_Z_CONST_A,
662       .dst_w = DST_W_CONST_A,
663       .component_size = COMPONENT_SIZE_FOUR,
664       .num_src_components = NUM_SRC_COMPONENTS_ONE,
665       .num_dst_components = NUM_DST_COMPONENTS_ONE,
666    });
667 
668    P_MTHD(p, NV90B5, PITCH_IN);
669    P_NV90B5_PITCH_IN(p, max_dim * 4);
670    P_NV90B5_PITCH_OUT(p, max_dim * 4);
671 
672    while (size >= 4) {
673       struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
674 
675       P_MTHD(p, NV90B5, OFFSET_OUT_UPPER);
676       P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
677       P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
678 
679       uint64_t width, height;
680       if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) {
681          width = height = max_dim;
682       } else if (size >= max_dim * 4) {
683          width = max_dim;
684          height = size / (max_dim * 4);
685       } else {
686          width = size / 4;
687          height = 1;
688       }
689 
690       uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4;
691       assert(dma_size <= size);
692 
693       P_MTHD(p, NV90B5, LINE_LENGTH_IN);
694       P_NV90B5_LINE_LENGTH_IN(p, width);
695       P_NV90B5_LINE_COUNT(p, height);
696 
697       P_IMMD(p, NV90B5, LAUNCH_DMA, {
698          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
699          .multi_line_enable = height > 1,
700          .flush_enable = FLUSH_ENABLE_TRUE,
701          .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
702          .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
703          .remap_enable = REMAP_ENABLE_TRUE,
704       });
705 
706       dst_addr += dma_size;
707       size -= dma_size;
708    }
709 }
710 
711 VKAPI_ATTR void VKAPI_CALL
nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)712 nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
713                     VkBuffer dstBuffer,
714                     VkDeviceSize dstOffset,
715                     VkDeviceSize dataSize,
716                     const void *pData)
717 {
718    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
719    VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer);
720 
721    uint64_t dst_addr = nvk_buffer_address(dst, dstOffset);
722 
723    uint64_t data_addr;
724    nvk_cmd_buffer_upload_data(cmd, pData, dataSize, 64, &data_addr);
725 
726    struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
727 
728    P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
729    P_NV90B5_OFFSET_IN_UPPER(p, data_addr >> 32);
730    P_NV90B5_OFFSET_IN_LOWER(p, data_addr & 0xffffffff);
731    P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
732    P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
733 
734    P_MTHD(p, NV90B5, LINE_LENGTH_IN);
735    P_NV90B5_LINE_LENGTH_IN(p, dataSize);
736    P_NV90B5_LINE_COUNT(p, 1);
737 
738    P_IMMD(p, NV90B5, LAUNCH_DMA, {
739       .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
740       .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
741       .flush_enable = FLUSH_ENABLE_TRUE,
742       .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
743       .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
744    });
745 }
746