• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include <assert.h>
6 #include <stdbool.h>
7 
8 #include "anv_private.h"
9 #include "util/u_cpu_detect.h"
10 #include "util/u_debug.h"
11 #include "vk_util.h"
12 
13 static inline VkOffset3D
vk_offset3d_to_el(enum isl_format format,VkOffset3D offset)14 vk_offset3d_to_el(enum isl_format format, VkOffset3D offset)
15 {
16    const struct isl_format_layout *fmt_layout =
17       isl_format_get_layout(format);
18    return (VkOffset3D) {
19       .x = offset.x / fmt_layout->bw,
20       .y = offset.y / fmt_layout->bh,
21       .z = offset.z / fmt_layout->bd,
22    };
23 }
24 
25 static inline VkExtent3D
vk_extent3d_to_el(enum isl_format format,VkExtent3D extent)26 vk_extent3d_to_el(enum isl_format format, VkExtent3D extent)
27 {
28    const struct isl_format_layout *fmt_layout =
29       isl_format_get_layout(format);
30    return (VkExtent3D) {
31       .width  = DIV_ROUND_UP(extent.width,  fmt_layout->bw),
32       .height = DIV_ROUND_UP(extent.height, fmt_layout->bh),
33       .depth  = DIV_ROUND_UP(extent.depth,  fmt_layout->bd),
34    };
35 }
36 
37 static void
anv_memcpy_image_memory(struct anv_device * device,const struct isl_surf * surf,const struct anv_image_binding * binding,uint64_t binding_offset,void * mem_ptr,uint32_t level,uint32_t base_img_array_layer,uint32_t base_img_z_offset_px,uint32_t array_layer,uint32_t z_offset_px,bool mem_to_img)38 anv_memcpy_image_memory(struct anv_device *device,
39                         const struct isl_surf *surf,
40                         const struct anv_image_binding *binding,
41                         uint64_t binding_offset,
42                         void *mem_ptr,
43                         uint32_t level,
44                         uint32_t base_img_array_layer,
45                         uint32_t base_img_z_offset_px,
46                         uint32_t array_layer,
47                         uint32_t z_offset_px,
48                         bool mem_to_img)
49 {
50    uint64_t start_tile_B, end_tile_B;
51    isl_surf_get_image_range_B_tile(surf, level,
52                                    base_img_array_layer,
53                                    base_img_z_offset_px,
54                                    &start_tile_B, &end_tile_B);
55    uint32_t array_pitch_B = isl_surf_get_array_pitch(surf);
56 
57    uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
58                                       base_img_z_offset_px + z_offset_px);
59    uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
60 
61    void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
62    if (mem_to_img) {
63       memcpy(img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
64              mem_ptr + mem_depth_or_layer * array_pitch_B,
65              end_tile_B - start_tile_B);
66    } else {
67       memcpy(mem_ptr + mem_depth_or_layer * array_pitch_B,
68              img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
69              end_tile_B - start_tile_B);
70    }
71 }
72 
73 static void
get_image_offset_el(const struct isl_surf * surf,unsigned level,unsigned z,uint32_t * out_x0_el,uint32_t * out_y0_el)74 get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z,
75                     uint32_t *out_x0_el, uint32_t *out_y0_el)
76 {
77    ASSERTED uint32_t z0_el, a0_el;
78    if (surf->dim == ISL_SURF_DIM_3D) {
79       isl_surf_get_image_offset_el(surf, level, 0, z,
80                                    out_x0_el, out_y0_el, &z0_el, &a0_el);
81    } else {
82       isl_surf_get_image_offset_el(surf, level, z, 0,
83                                    out_x0_el, out_y0_el, &z0_el, &a0_el);
84    }
85    assert(z0_el == 0 && a0_el == 0);
86 }
87 
88 /* Compute extent parameters for use with tiled_memcpy functions.
89  * xs are in units of bytes and ys are in units of strides.
90  */
91 static inline void
tile_extents(const struct isl_surf * surf,const VkOffset3D * offset_el,const VkExtent3D * extent_el,unsigned level,int z,uint32_t * x1_B,uint32_t * x2_B,uint32_t * y1_el,uint32_t * y2_el)92 tile_extents(const struct isl_surf *surf,
93              const VkOffset3D *offset_el,
94              const VkExtent3D *extent_el,
95              unsigned level, int z,
96              uint32_t *x1_B, uint32_t *x2_B,
97              uint32_t *y1_el, uint32_t *y2_el)
98 {
99    const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
100    const unsigned cpp = fmtl->bpb / 8;
101 
102    /* z contains offset->z */
103    assert (z >= offset_el->z);
104 
105    unsigned x0_el, y0_el;
106    get_image_offset_el(surf, level, z, &x0_el, &y0_el);
107 
108    *x1_B = (offset_el->x + x0_el) * cpp;
109    *y1_el = offset_el->y + y0_el;
110    *x2_B = (offset_el->x + extent_el->width + x0_el) * cpp;
111    *y2_el = offset_el->y + extent_el->height + y0_el;
112 }
113 
114 static void
anv_copy_image_memory(struct anv_device * device,const struct isl_surf * surf,const struct anv_image_binding * binding,uint64_t binding_offset,void * mem_ptr,uint64_t mem_row_pitch_B,uint64_t mem_height_pitch_B,const VkOffset3D * offset_el,const VkExtent3D * extent_el,uint32_t level,uint32_t base_img_array_layer,uint32_t base_img_z_offset_px,uint32_t array_layer,uint32_t z_offset_px,bool mem_to_img)115 anv_copy_image_memory(struct anv_device *device,
116                       const struct isl_surf *surf,
117                       const struct anv_image_binding *binding,
118                       uint64_t binding_offset,
119                       void *mem_ptr,
120                       uint64_t mem_row_pitch_B,
121                       uint64_t mem_height_pitch_B,
122                       const VkOffset3D *offset_el,
123                       const VkExtent3D *extent_el,
124                       uint32_t level,
125                       uint32_t base_img_array_layer,
126                       uint32_t base_img_z_offset_px,
127                       uint32_t array_layer,
128                       uint32_t z_offset_px,
129                       bool mem_to_img)
130 {
131    const struct isl_format_layout *fmt_layout =
132       isl_format_get_layout(surf->format);
133    const uint32_t bs = fmt_layout->bpb / 8;
134    void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
135 
136    uint64_t start_tile_B, end_tile_B;
137    isl_surf_get_image_range_B_tile(surf, level,
138                                    base_img_array_layer + array_layer,
139                                    base_img_z_offset_px + z_offset_px,
140                                    &start_tile_B, &end_tile_B);
141 
142 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
143    const bool need_invalidate_flush =
144       (binding->address.bo->flags & ANV_BO_ALLOC_HOST_COHERENT) == 0 &&
145       device->physical->memory.need_flush;
146    if (need_invalidate_flush && !mem_to_img)
147       intel_invalidate_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
148 #endif
149 
150    uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
151                                       base_img_z_offset_px + z_offset_px);
152    uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
153 
154    if (surf->tiling == ISL_TILING_LINEAR) {
155       uint64_t img_col_offset = offset_el->x * bs;
156       uint64_t row_copy_size = extent_el->width * bs;
157       for (uint32_t h_el = 0; h_el < extent_el->height; h_el++) {
158          uint64_t mem_row_offset =
159             mem_height_pitch_B * mem_depth_or_layer +
160             h_el * mem_row_pitch_B;
161          uint64_t img_row = h_el + offset_el->y;
162          uint64_t img_offset =
163             start_tile_B + img_row * surf->row_pitch_B + img_col_offset;
164          assert((img_offset + row_copy_size) <= binding->memory_range.size);
165 
166          if (mem_to_img)
167             memcpy(img_ptr + img_offset, mem_ptr + mem_row_offset, row_copy_size);
168          else
169             memcpy(mem_ptr + mem_row_offset, img_ptr + img_offset, row_copy_size);
170       }
171    } else {
172       uint32_t x1, x2, y1, y2;
173       tile_extents(surf, offset_el, extent_el, level, img_depth_or_layer,
174                    &x1, &x2, &y1, &y2);
175 
176       if (mem_to_img) {
177          isl_memcpy_linear_to_tiled(x1, x2, y1, y2,
178                                     img_ptr,
179                                     mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
180                                     surf->row_pitch_B,
181                                     mem_row_pitch_B,
182                                     false,
183                                     surf->tiling,
184                                     ISL_MEMCPY);
185       } else {
186          isl_memcpy_tiled_to_linear(x1, x2, y1, y2,
187                                     mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
188                                     img_ptr,
189                                     mem_row_pitch_B,
190                                     surf->row_pitch_B,
191                                     false,
192                                     surf->tiling,
193 #if defined(USE_SSE41)
194                                     util_get_cpu_caps()->has_sse4_1 ?
195                                     ISL_MEMCPY_STREAMING_LOAD :
196 #endif
197                                     ISL_MEMCPY);
198       }
199    }
200 
201 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
202    if (need_invalidate_flush && mem_to_img)
203       intel_flush_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
204 #endif
205 }
206 
207 static uint64_t
calc_mem_row_pitch_B(const struct isl_surf * surf,uint64_t api_row_length_px,const VkExtent3D * extent_px)208 calc_mem_row_pitch_B(const struct isl_surf *surf,
209                      uint64_t api_row_length_px,
210                      const VkExtent3D *extent_px)
211 {
212    const struct isl_format_layout *fmt_layout =
213       isl_format_get_layout(surf->format);
214    const uint32_t bs = fmt_layout->bpb / 8;
215 
216    return api_row_length_px != 0 ?
217       (bs * DIV_ROUND_UP(api_row_length_px, fmt_layout->bw)) :
218       (bs * DIV_ROUND_UP(extent_px->width, fmt_layout->bw));
219 }
220 
221 static uint64_t
calc_mem_height_pitch_B(const struct isl_surf * surf,uint64_t row_pitch_B,uint64_t api_height_px,const VkExtent3D * extent_px)222 calc_mem_height_pitch_B(const struct isl_surf *surf,
223                         uint64_t row_pitch_B,
224                         uint64_t api_height_px,
225                         const VkExtent3D *extent_px)
226 {
227    const struct isl_format_layout *fmt_layout =
228       isl_format_get_layout(surf->format);
229 
230    return api_height_px != 0 ?
231       (row_pitch_B * DIV_ROUND_UP(api_height_px, fmt_layout->bh)) :
232       (row_pitch_B * DIV_ROUND_UP(extent_px->height, fmt_layout->bh));
233 }
234 
235 VkResult
anv_CopyMemoryToImageEXT(VkDevice _device,const VkCopyMemoryToImageInfoEXT * pCopyMemoryToImageInfo)236 anv_CopyMemoryToImageEXT(
237     VkDevice                                    _device,
238     const VkCopyMemoryToImageInfoEXT*           pCopyMemoryToImageInfo)
239 {
240    ANV_FROM_HANDLE(anv_device, device, _device);
241    ANV_FROM_HANDLE(anv_image, image, pCopyMemoryToImageInfo->dstImage);
242 
243    for (uint32_t r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) {
244       const VkMemoryToImageCopyEXT *region =
245          &pCopyMemoryToImageInfo->pRegions[r];
246       const uint32_t plane =
247          anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
248       const struct anv_surface *anv_surf =
249          &image->planes[plane].primary_surface;
250       const struct isl_surf *surf = &anv_surf->isl;
251       const struct anv_image_binding *binding =
252          &image->bindings[anv_surf->memory_range.binding];
253 
254       assert(binding->host_map != NULL);
255 
256       /* Memory distance between each row */
257       uint64_t mem_row_pitch_B =
258          calc_mem_row_pitch_B(surf, region->memoryRowLength,
259                               &region->imageExtent);
260       /* Memory distance between each slice (1 3D level or 1 array layer) */
261       uint64_t mem_height_pitch_B =
262          calc_mem_height_pitch_B(surf, mem_row_pitch_B,
263                                  region->memoryImageHeight,
264                                  &region->imageExtent);
265 
266       VkOffset3D offset_el =
267          vk_offset3d_to_el(surf->format, region->imageOffset);
268       VkExtent3D extent_el =
269          vk_extent3d_to_el(surf->format, region->imageExtent);
270 
271       for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
272          for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
273             if ((pCopyMemoryToImageInfo->flags &
274                  VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
275                 anv_image_can_host_memcpy(image)) {
276                anv_memcpy_image_memory(device, surf, binding,
277                                        anv_surf->memory_range.offset,
278                                        (void *)region->pHostPointer,
279                                        region->imageSubresource.mipLevel,
280                                        region->imageSubresource.baseArrayLayer,
281                                        region->imageOffset.z,
282                                        a, z, true /* mem_to_img */);
283             } else {
284                anv_copy_image_memory(device, surf,
285                                      binding, anv_surf->memory_range.offset,
286                                      (void *)region->pHostPointer,
287                                      mem_row_pitch_B,
288                                      mem_height_pitch_B,
289                                      &offset_el,
290                                      &extent_el,
291                                      region->imageSubresource.mipLevel,
292                                      region->imageSubresource.baseArrayLayer,
293                                      region->imageOffset.z,
294                                      a, z, true /* mem_to_img */);
295             }
296          }
297       }
298    }
299 
300    return VK_SUCCESS;
301 }
302 
303 VkResult
anv_CopyImageToMemoryEXT(VkDevice _device,const VkCopyImageToMemoryInfoEXT * pCopyImageToMemoryInfo)304 anv_CopyImageToMemoryEXT(
305     VkDevice                                    _device,
306     const VkCopyImageToMemoryInfoEXT*           pCopyImageToMemoryInfo)
307 {
308    ANV_FROM_HANDLE(anv_device, device, _device);
309    ANV_FROM_HANDLE(anv_image, image, pCopyImageToMemoryInfo->srcImage);
310 
311    for (uint32_t r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) {
312       const VkImageToMemoryCopyEXT *region =
313          &pCopyImageToMemoryInfo->pRegions[r];
314       const uint32_t plane =
315          anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
316       const struct anv_surface *anv_surf =
317          &image->planes[plane].primary_surface;
318       const struct isl_surf *surf = &anv_surf->isl;
319       const struct anv_image_binding *binding =
320          &image->bindings[anv_surf->memory_range.binding];
321 
322       assert(binding->host_map != NULL);
323 
324       VkOffset3D offset_el =
325          vk_offset3d_to_el(surf->format, region->imageOffset);
326       VkExtent3D extent_el =
327          vk_extent3d_to_el(surf->format, region->imageExtent);
328 
329       /* Memory distance between each row */
330       uint64_t mem_row_pitch_B =
331          calc_mem_row_pitch_B(surf, region->memoryRowLength,
332                               &region->imageExtent);
333       /* Memory distance between each slice (1 3D level or 1 array layer) */
334       uint64_t mem_height_pitch_B =
335          calc_mem_height_pitch_B(surf, mem_row_pitch_B,
336                                  region->memoryImageHeight,
337                                  &region->imageExtent);
338 
339       for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
340          for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
341             if ((pCopyImageToMemoryInfo->flags &
342                  VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
343                 anv_image_can_host_memcpy(image)) {
344                anv_memcpy_image_memory(device, surf, binding,
345                                        anv_surf->memory_range.offset,
346                                        region->pHostPointer,
347                                        region->imageSubresource.mipLevel,
348                                        region->imageSubresource.baseArrayLayer,
349                                        region->imageOffset.z,
350                                        a, z, false /* mem_to_img */);
351             } else {
352                anv_copy_image_memory(device, surf,
353                                      binding, anv_surf->memory_range.offset,
354                                      region->pHostPointer,
355                                      mem_row_pitch_B,
356                                      mem_height_pitch_B,
357                                      &offset_el,
358                                      &extent_el,
359                                      region->imageSubresource.mipLevel,
360                                      region->imageSubresource.baseArrayLayer,
361                                      region->imageOffset.z,
362                                      a, z, false /* mem_to_img */);
363             }
364          }
365       }
366    }
367 
368    return VK_SUCCESS;
369 }
370 
371 VkResult
anv_CopyImageToImageEXT(VkDevice _device,const VkCopyImageToImageInfoEXT * pCopyImageToImageInfo)372 anv_CopyImageToImageEXT(
373     VkDevice                                    _device,
374     const VkCopyImageToImageInfoEXT*            pCopyImageToImageInfo)
375 {
376    ANV_FROM_HANDLE(anv_device, device, _device);
377    ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToImageInfo->srcImage);
378    ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageToImageInfo->dstImage);
379 
380    /* Work with a tile's worth of data */
381    void *tmp_map = vk_alloc(&device->vk.alloc, 4096, 8,
382                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
383    if (tmp_map == NULL)
384       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
385 
386    for (uint32_t r = 0; r < pCopyImageToImageInfo->regionCount; r++) {
387       const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[r];
388 
389       const uint32_t src_plane =
390          anv_image_aspect_to_plane(src_image,
391                                    region->srcSubresource.aspectMask);
392       const uint32_t dst_plane =
393          anv_image_aspect_to_plane(dst_image,
394                                    region->srcSubresource.aspectMask);
395       const struct anv_surface *src_anv_surf =
396          &src_image->planes[src_plane].primary_surface;
397       const struct anv_surface *dst_anv_surf =
398          &dst_image->planes[dst_plane].primary_surface;
399       const struct isl_surf *src_surf = &src_anv_surf->isl;
400       const struct isl_surf *dst_surf = &dst_anv_surf->isl;
401       const struct anv_image_binding *src_binding =
402          &src_image->bindings[src_anv_surf->memory_range.binding];
403       const struct anv_image_binding *dst_binding =
404          &dst_image->bindings[dst_anv_surf->memory_range.binding];
405 
406       struct isl_tile_info src_tile;
407       struct isl_tile_info dst_tile;
408 
409       isl_surf_get_tile_info(src_surf, &src_tile);
410       isl_surf_get_tile_info(dst_surf, &dst_tile);
411 
412       uint32_t tile_width_B;
413       uint32_t tile_width_el, tile_height_el;
414       if (src_tile.phys_extent_B.w > dst_tile.phys_extent_B.w) {
415          tile_width_B   = src_tile.phys_extent_B.w;
416          tile_width_el  = src_tile.logical_extent_el.w;
417          tile_height_el = src_tile.logical_extent_el.h;
418       } else {
419          tile_width_B   = dst_tile.phys_extent_B.w;
420          tile_width_el  = dst_tile.logical_extent_el.w;
421          tile_height_el = dst_tile.logical_extent_el.h;
422       }
423 
424       /* There is no requirement that the extent be aligned to the texel block
425        * size.
426        */
427       VkOffset3D src_offset_el =
428          vk_offset3d_to_el(src_surf->format, region->srcOffset);
429       VkOffset3D dst_offset_el =
430          vk_offset3d_to_el(src_surf->format, region->dstOffset);
431       VkExtent3D extent_el =
432          vk_extent3d_to_el(src_surf->format, region->extent);
433 
434       /* linear-to-linear case */
435       if (tile_width_el == 1 && tile_height_el == 1) {
436          tile_width_el = MIN2(4096 / (src_tile.format_bpb / 8),
437                               extent_el.width);
438          tile_height_el = 4096 / (tile_width_el * (src_tile.format_bpb / 8));
439          tile_width_B = tile_width_el * src_tile.format_bpb / 8;
440       }
441 
442       for (uint32_t a = 0; a < region->srcSubresource.layerCount; a++) {
443          for (uint32_t z = 0; z < region->extent.depth; z++) {
444             for (uint32_t y_el = 0; y_el < extent_el.height; y_el += tile_height_el) {
445                for (uint32_t x_el = 0; x_el < extent_el.width; x_el += tile_width_el) {
446                   VkOffset3D src_offset = {
447                      .x = src_offset_el.x + x_el,
448                      .y = src_offset_el.y + y_el,
449                   };
450                   VkOffset3D dst_offset = {
451                      .x = dst_offset_el.x + x_el,
452                      .y = dst_offset_el.y + y_el,
453                   };
454                   VkExtent3D extent = {
455                      .width  = MIN2(extent_el.width - x_el, tile_width_el),
456                      .height = MIN2(extent_el.height - y_el, tile_height_el),
457                      .depth  = 1,
458                   };
459 
460                   anv_copy_image_memory(device, src_surf,
461                                         src_binding,
462                                         src_anv_surf->memory_range.offset,
463                                         tmp_map,
464                                         tile_width_B, 0,
465                                         &src_offset, &extent,
466                                         region->srcSubresource.mipLevel,
467                                         region->srcSubresource.baseArrayLayer,
468                                         region->srcOffset.z,
469                                         a, z,
470                                         false /* mem_to_img */);
471                   anv_copy_image_memory(device, dst_surf,
472                                         dst_binding,
473                                         dst_anv_surf->memory_range.offset,
474                                         tmp_map,
475                                         tile_width_B, 0,
476                                         &dst_offset, &extent,
477                                         region->dstSubresource.mipLevel,
478                                         region->dstSubresource.baseArrayLayer,
479                                         region->dstOffset.z,
480                                         a, z,
481                                         true /* mem_to_img */);
482                }
483             }
484          }
485       }
486    }
487 
488    vk_free(&device->vk.alloc, tmp_map);
489 
490    return VK_SUCCESS;
491 }
492 
493 VkResult
anv_TransitionImageLayoutEXT(VkDevice device,uint32_t transitionCount,const VkHostImageLayoutTransitionInfoEXT * pTransitions)494 anv_TransitionImageLayoutEXT(
495     VkDevice                                    device,
496     uint32_t                                    transitionCount,
497     const VkHostImageLayoutTransitionInfoEXT*   pTransitions)
498 {
499    /* Our layout transitions are mostly about resolving the auxiliary surface
500     * into the main surface. Since we disable the auxiliary surface, there is
501     * nothing here for us to do.
502     */
503    return VK_SUCCESS;
504 }
505