• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_device.h"
9 #include "nvk_device_memory.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_physical_device.h"
15 
16 #include "vk_format.h"
17 
18 #include "nvtypes.h"
19 #include "nv_push_cl902d.h"
20 #include "nv_push_cl90b5.h"
21 #include "nv_push_clc1b5.h"
22 
23 static inline uint16_t
nvk_cmd_buffer_copy_cls(struct nvk_cmd_buffer * cmd)24 nvk_cmd_buffer_copy_cls(struct nvk_cmd_buffer *cmd)
25 {
26    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
27    struct nvk_physical_device *pdev = nvk_device_physical(dev);
28    return pdev->info.cls_copy;
29 }
30 
31 struct nouveau_copy_buffer {
32    uint64_t base_addr;
33    VkImageType image_type;
34    struct nil_Offset4D_Elements offset_el;
35    struct nil_Extent4D_Elements extent_el;
36    uint32_t bpp;
37    uint32_t row_stride;
38    uint32_t array_stride;
39    struct nil_tiling tiling;
40 };
41 
42 struct nouveau_copy {
43    struct nouveau_copy_buffer src;
44    struct nouveau_copy_buffer dst;
45    struct nouveau_copy_remap {
46       uint8_t comp_size;
47       uint8_t dst[4];
48    } remap;
49    struct nil_Extent4D_Elements extent_el;
50 };
51 
52 static struct nouveau_copy_buffer
nouveau_copy_rect_buffer(struct nvk_buffer * buf,VkDeviceSize offset,struct vk_image_buffer_layout buffer_layout)53 nouveau_copy_rect_buffer(struct nvk_buffer *buf,
54                          VkDeviceSize offset,
55                          struct vk_image_buffer_layout buffer_layout)
56 {
57    return (struct nouveau_copy_buffer) {
58       .base_addr = nvk_buffer_address(buf, offset),
59       .image_type = VK_IMAGE_TYPE_2D,
60       .bpp = buffer_layout.element_size_B,
61       .row_stride = buffer_layout.row_stride_B,
62       .array_stride = buffer_layout.image_stride_B,
63    };
64 }
65 
66 static struct nil_Offset4D_Pixels
vk_to_nil_offset(VkOffset3D offset,uint32_t base_array_layer)67 vk_to_nil_offset(VkOffset3D offset, uint32_t base_array_layer)
68 {
69    return (struct nil_Offset4D_Pixels) {
70       .x = offset.x,
71       .y = offset.y,
72       .z = offset.z,
73       .a = base_array_layer
74    };
75 }
76 
77 static struct nil_Extent4D_Pixels
vk_to_nil_extent(VkExtent3D extent,uint32_t array_layers)78 vk_to_nil_extent(VkExtent3D extent, uint32_t array_layers)
79 {
80    return (struct nil_Extent4D_Pixels) {
81       .width      = extent.width,
82       .height     = extent.height,
83       .depth      = extent.depth,
84       .array_len  = array_layers,
85    };
86 }
87 
88 static struct nouveau_copy_buffer
nouveau_copy_rect_image(const struct nvk_image * img,const struct nvk_image_plane * plane,VkOffset3D offset_px,const VkImageSubresourceLayers * sub_res)89 nouveau_copy_rect_image(const struct nvk_image *img,
90                         const struct nvk_image_plane *plane,
91                         VkOffset3D offset_px,
92                         const VkImageSubresourceLayers *sub_res)
93 {
94    const struct nil_Extent4D_Pixels lvl_extent4d_px =
95       nil_image_level_extent_px(&plane->nil, sub_res->mipLevel);
96 
97    offset_px = vk_image_sanitize_offset(&img->vk, offset_px);
98    const struct nil_Offset4D_Pixels offset4d_px =
99       vk_to_nil_offset(offset_px, sub_res->baseArrayLayer);
100 
101    struct nouveau_copy_buffer buf = {
102       .base_addr = nvk_image_plane_base_address(plane) +
103                    plane->nil.levels[sub_res->mipLevel].offset_B,
104       .image_type = img->vk.image_type,
105       .offset_el = nil_offset4d_px_to_el(offset4d_px, plane->nil.format,
106                                          plane->nil.sample_layout),
107       .extent_el = nil_extent4d_px_to_el(lvl_extent4d_px, plane->nil.format,
108                                          plane->nil.sample_layout),
109       .bpp = util_format_get_blocksize(plane->nil.format.p_format),
110       .row_stride = plane->nil.levels[sub_res->mipLevel].row_stride_B,
111       .array_stride = plane->nil.array_stride_B,
112       .tiling = plane->nil.levels[sub_res->mipLevel].tiling,
113    };
114 
115    return buf;
116 }
117 
118 static struct nouveau_copy_remap
nouveau_copy_remap_format(VkFormat format)119 nouveau_copy_remap_format(VkFormat format)
120 {
121    /* Pick an arbitrary component size.  It doesn't matter what size we
122     * pick since we're just doing a copy, as long as it's no more than 4B
123     * and divides the format size.
124     */
125    unsigned comp_size = vk_format_get_blocksize(format);
126    if (comp_size % 3 == 0) {
127       comp_size /= 3;
128       assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 4);
129    } else {
130       assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 16);
131       comp_size = MIN2(comp_size, 4);
132    }
133 
134    return (struct nouveau_copy_remap) {
135       .comp_size = comp_size,
136       .dst = { 0, 1, 2, 3 },
137    };
138 }
139 
140 static uint32_t
to_90b5_remap_comp_size(uint8_t comp_size)141 to_90b5_remap_comp_size(uint8_t comp_size)
142 {
143    static const uint8_t to_90b5[] = {
144       [1] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE,
145       [2] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO,
146       [3] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE,
147       [4] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR,
148    };
149    assert(comp_size > 0 && comp_size < ARRAY_SIZE(to_90b5));
150 
151    uint32_t size_90b5 = comp_size - 1;
152    assert(size_90b5 == to_90b5[comp_size]);
153    return size_90b5;
154 }
155 
156 static uint32_t
to_90b5_remap_num_comps(uint8_t num_comps)157 to_90b5_remap_num_comps(uint8_t num_comps)
158 {
159    static const uint8_t to_90b5[] = {
160       [1] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE,
161       [2] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO,
162       [3] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE,
163       [4] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR,
164    };
165    assert(num_comps > 0 && num_comps < ARRAY_SIZE(to_90b5));
166 
167    uint32_t num_comps_90b5 = num_comps - 1;
168    assert(num_comps_90b5 == to_90b5[num_comps]);
169    return num_comps_90b5;
170 }
171 
172 static void
nouveau_copy_rect(struct nvk_cmd_buffer * cmd,struct nouveau_copy * copy)173 nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
174 {
175    uint32_t src_bw, dst_bw;
176    if (copy->remap.comp_size > 0) {
177       struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
178 
179       assert(copy->src.bpp % copy->remap.comp_size == 0);
180       assert(copy->dst.bpp % copy->remap.comp_size == 0);
181       uint32_t num_src_comps = copy->src.bpp / copy->remap.comp_size;
182       uint32_t num_dst_comps = copy->dst.bpp / copy->remap.comp_size;
183 
184       /* When running with component remapping enabled, most X/Y dimensions
185        * are in units of blocks.
186        */
187       src_bw = dst_bw = 1;
188 
189       P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
190          .dst_x = copy->remap.dst[0],
191          .dst_y = copy->remap.dst[1],
192          .dst_z = copy->remap.dst[2],
193          .dst_w = copy->remap.dst[3],
194          .component_size = to_90b5_remap_comp_size(copy->remap.comp_size),
195          .num_src_components = to_90b5_remap_comp_size(num_src_comps),
196          .num_dst_components = to_90b5_remap_comp_size(num_dst_comps),
197       });
198    } else {
199       /* When component remapping is disabled, dimensions are in units of
200        * bytes (an implicit block width of 1B).
201        */
202       assert(copy->src.bpp == copy->dst.bpp);
203       src_bw = copy->src.bpp;
204       dst_bw = copy->dst.bpp;
205    }
206 
207    assert(copy->extent_el.depth == 1 || copy->extent_el.array_len == 1);
208    uint32_t layers = MAX2(copy->extent_el.depth, copy->extent_el.array_len);
209    for (unsigned z = 0; z < layers; z++) {
210       VkDeviceSize src_addr = copy->src.base_addr;
211       VkDeviceSize dst_addr = copy->dst.base_addr;
212 
213       if (copy->src.image_type != VK_IMAGE_TYPE_3D)
214          src_addr += (z + copy->src.offset_el.a) * copy->src.array_stride;
215 
216       if (copy->dst.image_type != VK_IMAGE_TYPE_3D)
217          dst_addr += (z + copy->dst.offset_el.a) * copy->dst.array_stride;
218 
219       if (copy->src.tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
220          src_addr += copy->src.offset_el.x * copy->src.bpp +
221                      copy->src.offset_el.y * copy->src.row_stride;
222       }
223 
224       if (copy->dst.tiling.gob_type == NIL_GOB_TYPE_LINEAR) {
225          dst_addr += copy->dst.offset_el.x * copy->dst.bpp +
226                      copy->dst.offset_el.y * copy->dst.row_stride;
227       }
228 
229       struct nv_push *p = nvk_cmd_buffer_push(cmd, 31);
230 
231       P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
232       P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
233       P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
234       P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
235       P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
236       P_NV90B5_PITCH_IN(p, copy->src.row_stride);
237       P_NV90B5_PITCH_OUT(p, copy->dst.row_stride);
238       P_NV90B5_LINE_LENGTH_IN(p, copy->extent_el.width * src_bw);
239       P_NV90B5_LINE_COUNT(p, copy->extent_el.height);
240 
241       uint32_t src_layout = 0, dst_layout = 0;
242       if (copy->src.tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
243          P_MTHD(p, NV90B5, SET_SRC_BLOCK_SIZE);
244          assert(nil_gob_type_height(copy->src.tiling.gob_type) == 8);
245          P_NV90B5_SET_SRC_BLOCK_SIZE(p, {
246             .width = 0, /* Tiles are always 1 GOB wide */
247             .height = copy->src.tiling.y_log2,
248             .depth = copy->src.tiling.z_log2,
249             .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
250          });
251          /* We use the stride for copies because the copy hardware has no
252           * concept of a tile width.  Instead, we just set the width to the
253           * stride divided by bpp.
254           */
255          uint32_t src_stride_el = copy->src.row_stride / copy->src.bpp;
256          P_NV90B5_SET_SRC_WIDTH(p, src_stride_el * src_bw);
257          P_NV90B5_SET_SRC_HEIGHT(p, copy->src.extent_el.height);
258          P_NV90B5_SET_SRC_DEPTH(p, copy->src.extent_el.depth);
259          if (copy->src.image_type == VK_IMAGE_TYPE_3D)
260             P_NV90B5_SET_SRC_LAYER(p, z + copy->src.offset_el.z);
261          else
262             P_NV90B5_SET_SRC_LAYER(p, 0);
263 
264          if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) {
265             P_MTHD(p, NVC1B5, SRC_ORIGIN_X);
266             P_NVC1B5_SRC_ORIGIN_X(p, copy->src.offset_el.x * src_bw);
267             P_NVC1B5_SRC_ORIGIN_Y(p, copy->src.offset_el.y);
268          } else {
269             P_MTHD(p, NV90B5, SET_SRC_ORIGIN);
270             P_NV90B5_SET_SRC_ORIGIN(p, {
271                .x = copy->src.offset_el.x * src_bw,
272                .y = copy->src.offset_el.y
273             });
274          }
275 
276          src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
277       } else {
278          src_addr += copy->src.array_stride;
279          src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
280       }
281 
282       if (copy->dst.tiling.gob_type != NIL_GOB_TYPE_LINEAR) {
283          P_MTHD(p, NV90B5, SET_DST_BLOCK_SIZE);
284          assert(nil_gob_type_height(copy->dst.tiling.gob_type) == 8);
285          P_NV90B5_SET_DST_BLOCK_SIZE(p, {
286             .width = 0, /* Tiles are always 1 GOB wide */
287             .height = copy->dst.tiling.y_log2,
288             .depth = copy->dst.tiling.z_log2,
289             .gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
290          });
291          /* We use the stride for copies because the copy hardware has no
292           * concept of a tile width.  Instead, we just set the width to the
293           * stride divided by bpp.
294           */
295          uint32_t dst_stride_el = copy->dst.row_stride / copy->dst.bpp;
296          P_NV90B5_SET_DST_WIDTH(p, dst_stride_el * dst_bw);
297          P_NV90B5_SET_DST_HEIGHT(p, copy->dst.extent_el.height);
298          P_NV90B5_SET_DST_DEPTH(p, copy->dst.extent_el.depth);
299          if (copy->dst.image_type == VK_IMAGE_TYPE_3D)
300             P_NV90B5_SET_DST_LAYER(p, z + copy->dst.offset_el.z);
301          else
302             P_NV90B5_SET_DST_LAYER(p, 0);
303 
304          if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) {
305             P_MTHD(p, NVC1B5, DST_ORIGIN_X);
306             P_NVC1B5_DST_ORIGIN_X(p, copy->dst.offset_el.x * dst_bw);
307             P_NVC1B5_DST_ORIGIN_Y(p, copy->dst.offset_el.y);
308          } else {
309             P_MTHD(p, NV90B5, SET_DST_ORIGIN);
310             P_NV90B5_SET_DST_ORIGIN(p, {
311                .x = copy->dst.offset_el.x * dst_bw,
312                .y = copy->dst.offset_el.y
313             });
314          }
315 
316          dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
317       } else {
318          dst_addr += copy->dst.array_stride;
319          dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
320       }
321 
322       P_IMMD(p, NV90B5, LAUNCH_DMA, {
323          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
324          .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
325          .flush_enable = FLUSH_ENABLE_TRUE,
326          .src_memory_layout = src_layout,
327          .dst_memory_layout = dst_layout,
328          .remap_enable = copy->remap.comp_size > 0,
329       });
330    }
331 }
332 
333 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)334 nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
335                    const VkCopyBufferInfo2 *pCopyBufferInfo)
336 {
337    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
338    VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferInfo->srcBuffer);
339    VK_FROM_HANDLE(nvk_buffer, dst, pCopyBufferInfo->dstBuffer);
340 
341    for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
342       const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r];
343 
344       uint64_t src_addr = nvk_buffer_address(src, region->srcOffset);
345       uint64_t dst_addr = nvk_buffer_address(dst, region->dstOffset);
346       uint64_t size = region->size;
347 
348       while (size) {
349          struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
350 
351          P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
352          P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32);
353          P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff);
354          P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
355          P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
356 
357          unsigned bytes = MIN2(size, 1 << 17);
358 
359          P_MTHD(p, NV90B5, LINE_LENGTH_IN);
360          P_NV90B5_LINE_LENGTH_IN(p, bytes);
361          P_NV90B5_LINE_COUNT(p, 1);
362 
363          P_IMMD(p, NV90B5, LAUNCH_DMA, {
364                 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
365                 .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
366                 .flush_enable = FLUSH_ENABLE_TRUE,
367                 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
368                 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
369          });
370 
371          src_addr += bytes;
372          dst_addr += bytes;
373          size -= bytes;
374       }
375    }
376 }
377 
378 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)379 nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
380                           const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
381 {
382    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
383    VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferToImageInfo->srcBuffer);
384    VK_FROM_HANDLE(nvk_image, dst, pCopyBufferToImageInfo->dstImage);
385 
386    for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
387       const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
388       struct vk_image_buffer_layout buffer_layout =
389          vk_image_buffer_copy_layout(&dst->vk, region);
390 
391       const VkExtent3D extent_px =
392          vk_image_sanitize_extent(&dst->vk, region->imageExtent);
393       const uint32_t layer_count =
394          vk_image_subresource_layer_count(&dst->vk, &region->imageSubresource);
395       const struct nil_Extent4D_Pixels extent4d_px =
396          vk_to_nil_extent(extent_px, layer_count);
397 
398       const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
399       uint8_t plane = nvk_image_aspects_to_plane(dst, aspects);
400 
401       struct nouveau_copy copy = {
402          .src = nouveau_copy_rect_buffer(src, region->bufferOffset,
403                                          buffer_layout),
404          .dst = nouveau_copy_rect_image(dst, &dst->planes[plane],
405                                         region->imageOffset,
406                                         &region->imageSubresource),
407          .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format,
408                                             dst->planes[plane].nil.sample_layout),
409       };
410       struct nouveau_copy copy2 = { 0 };
411 
412       switch (dst->vk.format) {
413       case VK_FORMAT_D32_SFLOAT_S8_UINT:
414          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
415             copy.remap.comp_size = 4;
416             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
417             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
418             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
419             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
420          } else {
421             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
422             copy2.dst = copy.dst;
423             copy2.extent_el = copy.extent_el;
424             copy.dst = copy2.src =
425                nouveau_copy_rect_image(dst, &dst->stencil_copy_temp,
426                                        region->imageOffset,
427                                        &region->imageSubresource);
428 
429             copy.remap.comp_size = 1;
430             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
431             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
432             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
433             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
434 
435             copy2.remap.comp_size = 2;
436             copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
437             copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
438             copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X;
439             copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
440          }
441          break;
442       case VK_FORMAT_D24_UNORM_S8_UINT:
443          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
444             copy.remap.comp_size = 1;
445             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
446             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
447             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
448             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
449          } else {
450             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
451             copy.remap.comp_size = 1;
452             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
453             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
454             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
455             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
456          }
457          break;
458       default:
459          copy.remap = nouveau_copy_remap_format(dst->vk.format);
460          break;
461       }
462 
463       nouveau_copy_rect(cmd, &copy);
464       if (copy2.extent_el.width > 0)
465          nouveau_copy_rect(cmd, &copy2);
466 
467       vk_foreach_struct_const(ext, region->pNext) {
468          switch (ext->sType) {
469          default:
470             vk_debug_ignored_stype(ext->sType);
471             break;
472          }
473       }
474    }
475 
476    vk_foreach_struct_const(ext, pCopyBufferToImageInfo->pNext) {
477       switch (ext->sType) {
478       default:
479          vk_debug_ignored_stype(ext->sType);
480          break;
481       }
482    }
483 }
484 
485 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)486 nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
487                           const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
488 {
489    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
490    VK_FROM_HANDLE(nvk_image, src, pCopyImageToBufferInfo->srcImage);
491    VK_FROM_HANDLE(nvk_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
492 
493    for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
494       const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
495       struct vk_image_buffer_layout buffer_layout =
496          vk_image_buffer_copy_layout(&src->vk, region);
497 
498       const VkExtent3D extent_px =
499          vk_image_sanitize_extent(&src->vk, region->imageExtent);
500       const uint32_t layer_count =
501          vk_image_subresource_layer_count(&src->vk, &region->imageSubresource);
502       const struct nil_Extent4D_Pixels extent4d_px =
503          vk_to_nil_extent(extent_px, layer_count);
504 
505       const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask;
506       uint8_t plane = nvk_image_aspects_to_plane(src, aspects);
507 
508       struct nouveau_copy copy = {
509          .src = nouveau_copy_rect_image(src, &src->planes[plane],
510                                         region->imageOffset,
511                                         &region->imageSubresource),
512          .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset,
513                                          buffer_layout),
514          .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format,
515                                             src->planes[plane].nil.sample_layout),
516       };
517       struct nouveau_copy copy2 = { 0 };
518 
519       switch (src->vk.format) {
520       case VK_FORMAT_D32_SFLOAT_S8_UINT:
521          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
522             copy.remap.comp_size = 4;
523             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
524             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
525             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
526             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
527          } else {
528             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
529             copy2.dst = copy.dst;
530             copy2.extent_el = copy.extent_el;
531             copy.dst = copy2.src =
532                nouveau_copy_rect_image(src, &src->stencil_copy_temp,
533                                        region->imageOffset,
534                                        &region->imageSubresource);
535 
536             copy.remap.comp_size = 2;
537             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z;
538             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
539             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
540             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
541 
542             copy2.remap.comp_size = 1;
543             copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
544             copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
545             copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
546             copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
547          }
548          break;
549       case VK_FORMAT_D24_UNORM_S8_UINT:
550          if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
551             copy.remap.comp_size = 1;
552             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X;
553             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
554             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
555             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
556          } else {
557             assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
558             copy.remap.comp_size = 1;
559             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_W;
560             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
561             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
562             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
563          }
564          break;
565       default:
566          copy.remap = nouveau_copy_remap_format(src->vk.format);
567          break;
568       }
569 
570       nouveau_copy_rect(cmd, &copy);
571       if (copy2.extent_el.width > 0)
572          nouveau_copy_rect(cmd, &copy2);
573 
574       vk_foreach_struct_const(ext, region->pNext) {
575          switch (ext->sType) {
576          default:
577             vk_debug_ignored_stype(ext->sType);
578             break;
579          }
580       }
581    }
582 
583    vk_foreach_struct_const(ext, pCopyImageToBufferInfo->pNext) {
584       switch (ext->sType) {
585       default:
586          vk_debug_ignored_stype(ext->sType);
587          break;
588       }
589    }
590 }
591 
592 void
nvk_linear_render_copy(struct nvk_cmd_buffer * cmd,const struct nvk_image_view * iview,VkRect2D copy_rect,bool copy_to_tiled_shadow)593 nvk_linear_render_copy(struct nvk_cmd_buffer *cmd,
594                        const struct nvk_image_view *iview,
595                        VkRect2D copy_rect,
596                        bool copy_to_tiled_shadow)
597 {
598    const struct nvk_image *image = (struct nvk_image *)iview->vk.image;
599 
600    const uint8_t ip = iview->planes[0].image_plane;
601    const struct nvk_image_plane *src_plane = NULL, *dst_plane = NULL;
602    if (copy_to_tiled_shadow) {
603       src_plane = &image->planes[ip];
604       dst_plane = &image->linear_tiled_shadow;
605    } else {
606       src_plane = &image->linear_tiled_shadow;
607       dst_plane = &image->planes[ip];
608    }
609 
610    const struct VkImageSubresourceLayers subres = {
611       .aspectMask = iview->vk.aspects,
612       .baseArrayLayer = iview->vk.base_array_layer,
613       .layerCount = iview->vk.layer_count,
614       .mipLevel = iview->vk.base_mip_level,
615    };
616 
617    const VkOffset3D offset_px = {
618       .x = copy_rect.offset.x,
619       .y = copy_rect.offset.y,
620       .z = 0,
621    };
622    const struct nil_Extent4D_Pixels extent4d_px = {
623       .width = copy_rect.extent.width,
624       .height = copy_rect.extent.height,
625       .depth = 1,
626       .array_len = 1,
627    };
628 
629    struct nouveau_copy copy = {
630       .src = nouveau_copy_rect_image(image, src_plane, offset_px, &subres),
631       .dst = nouveau_copy_rect_image(image, dst_plane, offset_px, &subres),
632       .extent_el = nil_extent4d_px_to_el(extent4d_px, src_plane->nil.format,
633                                          src_plane->nil.sample_layout),
634    };
635 
636    copy.remap = nouveau_copy_remap_format(image->vk.format);
637    nouveau_copy_rect(cmd, &copy);
638 }
639 
640 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)641 nvk_CmdCopyImage2(VkCommandBuffer commandBuffer,
642                   const VkCopyImageInfo2 *pCopyImageInfo)
643 {
644    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
645    VK_FROM_HANDLE(nvk_image, src, pCopyImageInfo->srcImage);
646    VK_FROM_HANDLE(nvk_image, dst, pCopyImageInfo->dstImage);
647 
648    for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
649       const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r];
650 
651       /* From the Vulkan 1.3.217 spec:
652        *
653        *    "When copying between compressed and uncompressed formats the
654        *    extent members represent the texel dimensions of the source image
655        *    and not the destination."
656        */
657       const VkExtent3D extent_px =
658          vk_image_sanitize_extent(&src->vk, region->extent);
659       const uint32_t layer_count =
660          vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
661       const struct nil_Extent4D_Pixels extent4d_px =
662          vk_to_nil_extent(extent_px, layer_count);
663 
664       const VkImageAspectFlagBits src_aspects =
665          region->srcSubresource.aspectMask;
666       uint8_t src_plane = nvk_image_aspects_to_plane(src, src_aspects);
667 
668       const VkImageAspectFlagBits dst_aspects =
669          region->dstSubresource.aspectMask;
670       uint8_t dst_plane = nvk_image_aspects_to_plane(dst, dst_aspects);
671 
672       struct nouveau_copy copy = {
673          .src = nouveau_copy_rect_image(src, &src->planes[src_plane],
674                                         region->srcOffset,
675                                         &region->srcSubresource),
676          .dst = nouveau_copy_rect_image(dst, &dst->planes[dst_plane],
677                                         region->dstOffset,
678                                         &region->dstSubresource),
679          .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[src_plane].nil.format,
680                                             src->planes[src_plane].nil.sample_layout),
681       };
682 
683       assert(src_aspects == region->srcSubresource.aspectMask);
684       switch (src->vk.format) {
685       case VK_FORMAT_D24_UNORM_S8_UINT:
686          if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
687             copy.remap.comp_size = 1;
688             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
689             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
690             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z;
691             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
692          } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
693             copy.remap.comp_size = 1;
694             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
695             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
696             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
697             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_W;
698          } else {
699             /* If we're copying both, there's nothing special to do */
700             assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
701                                VK_IMAGE_ASPECT_STENCIL_BIT));
702          }
703          break;
704       case VK_FORMAT_D32_SFLOAT_S8_UINT:
705          if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
706             copy.remap.comp_size = 4;
707             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X;
708             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE;
709             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
710             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
711          } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) {
712             copy.remap.comp_size = 4;
713             copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE;
714             copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y;
715             copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE;
716             copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE;
717          } else {
718             /* If we're copying both, there's nothing special to do */
719             assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
720                                VK_IMAGE_ASPECT_STENCIL_BIT));
721          }
722          break;
723       default:
724          copy.remap = nouveau_copy_remap_format(src->vk.format);
725          break;
726       }
727 
728       nouveau_copy_rect(cmd, &copy);
729    }
730 }
731 
732 VKAPI_ATTR void VKAPI_CALL
nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)733 nvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
734                   VkBuffer dstBuffer,
735                   VkDeviceSize dstOffset,
736                   VkDeviceSize size,
737                   uint32_t data)
738 {
739    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
740    VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
741 
742    uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
743    size = vk_buffer_range(&dst_buffer->vk, dstOffset, size);
744 
745    uint32_t max_dim = 1 << 15;
746 
747    struct nv_push *p = nvk_cmd_buffer_push(cmd, 7);
748 
749    P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data);
750    P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, {
751       .dst_x = DST_X_CONST_A,
752       .dst_y = DST_Y_CONST_A,
753       .dst_z = DST_Z_CONST_A,
754       .dst_w = DST_W_CONST_A,
755       .component_size = COMPONENT_SIZE_FOUR,
756       .num_src_components = NUM_SRC_COMPONENTS_ONE,
757       .num_dst_components = NUM_DST_COMPONENTS_ONE,
758    });
759 
760    P_MTHD(p, NV90B5, PITCH_IN);
761    P_NV90B5_PITCH_IN(p, max_dim * 4);
762    P_NV90B5_PITCH_OUT(p, max_dim * 4);
763 
764    while (size >= 4) {
765       struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
766 
767       P_MTHD(p, NV90B5, OFFSET_OUT_UPPER);
768       P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
769       P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
770 
771       uint64_t width, height;
772       if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) {
773          width = height = max_dim;
774       } else if (size >= max_dim * 4) {
775          width = max_dim;
776          height = size / (max_dim * 4);
777       } else {
778          width = size / 4;
779          height = 1;
780       }
781 
782       uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4;
783       assert(dma_size <= size);
784 
785       P_MTHD(p, NV90B5, LINE_LENGTH_IN);
786       P_NV90B5_LINE_LENGTH_IN(p, width);
787       P_NV90B5_LINE_COUNT(p, height);
788 
789       P_IMMD(p, NV90B5, LAUNCH_DMA, {
790          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
791          .multi_line_enable = height > 1,
792          .flush_enable = FLUSH_ENABLE_TRUE,
793          .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
794          .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
795          .remap_enable = REMAP_ENABLE_TRUE,
796       });
797 
798       dst_addr += dma_size;
799       size -= dma_size;
800    }
801 }
802 
803 VKAPI_ATTR void VKAPI_CALL
nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)804 nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
805                     VkBuffer dstBuffer,
806                     VkDeviceSize dstOffset,
807                     VkDeviceSize dataSize,
808                     const void *pData)
809 {
810    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
811    VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer);
812 
813    uint64_t dst_addr = nvk_buffer_address(dst, dstOffset);
814 
815    uint64_t data_addr;
816    nvk_cmd_buffer_upload_data(cmd, pData, dataSize, 64, &data_addr);
817 
818    struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
819 
820    P_MTHD(p, NV90B5, OFFSET_IN_UPPER);
821    P_NV90B5_OFFSET_IN_UPPER(p, data_addr >> 32);
822    P_NV90B5_OFFSET_IN_LOWER(p, data_addr & 0xffffffff);
823    P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32);
824    P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff);
825 
826    P_MTHD(p, NV90B5, LINE_LENGTH_IN);
827    P_NV90B5_LINE_LENGTH_IN(p, dataSize);
828    P_NV90B5_LINE_COUNT(p, 1);
829 
830    P_IMMD(p, NV90B5, LAUNCH_DMA, {
831       .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
832       .multi_line_enable = MULTI_LINE_ENABLE_TRUE,
833       .flush_enable = FLUSH_ENABLE_TRUE,
834       .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
835       .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
836    });
837 }
838