1 /* Copyright © 2024 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include <assert.h>
6 #include <stdbool.h>
7
8 #include "anv_private.h"
9 #include "util/u_cpu_detect.h"
10 #include "util/u_debug.h"
11 #include "vk_util.h"
12
13 static inline VkOffset3D
vk_offset3d_to_el(enum isl_format format,VkOffset3D offset)14 vk_offset3d_to_el(enum isl_format format, VkOffset3D offset)
15 {
16 const struct isl_format_layout *fmt_layout =
17 isl_format_get_layout(format);
18 return (VkOffset3D) {
19 .x = offset.x / fmt_layout->bw,
20 .y = offset.y / fmt_layout->bh,
21 .z = offset.z / fmt_layout->bd,
22 };
23 }
24
25 static inline VkExtent3D
vk_extent3d_to_el(enum isl_format format,VkExtent3D extent)26 vk_extent3d_to_el(enum isl_format format, VkExtent3D extent)
27 {
28 const struct isl_format_layout *fmt_layout =
29 isl_format_get_layout(format);
30 return (VkExtent3D) {
31 .width = DIV_ROUND_UP(extent.width, fmt_layout->bw),
32 .height = DIV_ROUND_UP(extent.height, fmt_layout->bh),
33 .depth = DIV_ROUND_UP(extent.depth, fmt_layout->bd),
34 };
35 }
36
37 static void
anv_memcpy_image_memory(struct anv_device * device,const struct isl_surf * surf,const struct anv_image_binding * binding,uint64_t binding_offset,void * mem_ptr,uint32_t level,uint32_t base_img_array_layer,uint32_t base_img_z_offset_px,uint32_t array_layer,uint32_t z_offset_px,bool mem_to_img)38 anv_memcpy_image_memory(struct anv_device *device,
39 const struct isl_surf *surf,
40 const struct anv_image_binding *binding,
41 uint64_t binding_offset,
42 void *mem_ptr,
43 uint32_t level,
44 uint32_t base_img_array_layer,
45 uint32_t base_img_z_offset_px,
46 uint32_t array_layer,
47 uint32_t z_offset_px,
48 bool mem_to_img)
49 {
50 uint64_t start_tile_B, end_tile_B;
51 isl_surf_get_image_range_B_tile(surf, level,
52 base_img_array_layer,
53 base_img_z_offset_px,
54 &start_tile_B, &end_tile_B);
55 uint32_t array_pitch_B = isl_surf_get_array_pitch(surf);
56
57 uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
58 base_img_z_offset_px + z_offset_px);
59 uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
60
61 void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
62 if (mem_to_img) {
63 memcpy(img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
64 mem_ptr + mem_depth_or_layer * array_pitch_B,
65 end_tile_B - start_tile_B);
66 } else {
67 memcpy(mem_ptr + mem_depth_or_layer * array_pitch_B,
68 img_ptr + start_tile_B + img_depth_or_layer * array_pitch_B,
69 end_tile_B - start_tile_B);
70 }
71 }
72
73 static void
get_image_offset_el(const struct isl_surf * surf,unsigned level,unsigned z,uint32_t * out_x0_el,uint32_t * out_y0_el)74 get_image_offset_el(const struct isl_surf *surf, unsigned level, unsigned z,
75 uint32_t *out_x0_el, uint32_t *out_y0_el)
76 {
77 ASSERTED uint32_t z0_el, a0_el;
78 if (surf->dim == ISL_SURF_DIM_3D) {
79 isl_surf_get_image_offset_el(surf, level, 0, z,
80 out_x0_el, out_y0_el, &z0_el, &a0_el);
81 } else {
82 isl_surf_get_image_offset_el(surf, level, z, 0,
83 out_x0_el, out_y0_el, &z0_el, &a0_el);
84 }
85 assert(z0_el == 0 && a0_el == 0);
86 }
87
88 /* Compute extent parameters for use with tiled_memcpy functions.
89 * xs are in units of bytes and ys are in units of strides.
90 */
91 static inline void
tile_extents(const struct isl_surf * surf,const VkOffset3D * offset_el,const VkExtent3D * extent_el,unsigned level,int z,uint32_t * x1_B,uint32_t * x2_B,uint32_t * y1_el,uint32_t * y2_el)92 tile_extents(const struct isl_surf *surf,
93 const VkOffset3D *offset_el,
94 const VkExtent3D *extent_el,
95 unsigned level, int z,
96 uint32_t *x1_B, uint32_t *x2_B,
97 uint32_t *y1_el, uint32_t *y2_el)
98 {
99 const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
100 const unsigned cpp = fmtl->bpb / 8;
101
102 /* z contains offset->z */
103 assert (z >= offset_el->z);
104
105 unsigned x0_el, y0_el;
106 get_image_offset_el(surf, level, z, &x0_el, &y0_el);
107
108 *x1_B = (offset_el->x + x0_el) * cpp;
109 *y1_el = offset_el->y + y0_el;
110 *x2_B = (offset_el->x + extent_el->width + x0_el) * cpp;
111 *y2_el = offset_el->y + extent_el->height + y0_el;
112 }
113
114 static void
anv_copy_image_memory(struct anv_device * device,const struct isl_surf * surf,const struct anv_image_binding * binding,uint64_t binding_offset,void * mem_ptr,uint64_t mem_row_pitch_B,uint64_t mem_height_pitch_B,const VkOffset3D * offset_el,const VkExtent3D * extent_el,uint32_t level,uint32_t base_img_array_layer,uint32_t base_img_z_offset_px,uint32_t array_layer,uint32_t z_offset_px,bool mem_to_img)115 anv_copy_image_memory(struct anv_device *device,
116 const struct isl_surf *surf,
117 const struct anv_image_binding *binding,
118 uint64_t binding_offset,
119 void *mem_ptr,
120 uint64_t mem_row_pitch_B,
121 uint64_t mem_height_pitch_B,
122 const VkOffset3D *offset_el,
123 const VkExtent3D *extent_el,
124 uint32_t level,
125 uint32_t base_img_array_layer,
126 uint32_t base_img_z_offset_px,
127 uint32_t array_layer,
128 uint32_t z_offset_px,
129 bool mem_to_img)
130 {
131 const struct isl_format_layout *fmt_layout =
132 isl_format_get_layout(surf->format);
133 const uint32_t bs = fmt_layout->bpb / 8;
134 void *img_ptr = binding->host_map + binding->map_delta + binding_offset;
135
136 uint64_t start_tile_B, end_tile_B;
137 isl_surf_get_image_range_B_tile(surf, level,
138 base_img_array_layer + array_layer,
139 base_img_z_offset_px + z_offset_px,
140 &start_tile_B, &end_tile_B);
141
142 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
143 const bool need_invalidate_flush =
144 (binding->address.bo->flags & ANV_BO_ALLOC_HOST_COHERENT) == 0 &&
145 device->physical->memory.need_flush;
146 if (need_invalidate_flush && !mem_to_img)
147 intel_invalidate_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
148 #endif
149
150 uint32_t img_depth_or_layer = MAX2(base_img_array_layer + array_layer,
151 base_img_z_offset_px + z_offset_px);
152 uint32_t mem_depth_or_layer = MAX2(z_offset_px, array_layer);
153
154 if (surf->tiling == ISL_TILING_LINEAR) {
155 uint64_t img_col_offset = offset_el->x * bs;
156 uint64_t row_copy_size = extent_el->width * bs;
157 for (uint32_t h_el = 0; h_el < extent_el->height; h_el++) {
158 uint64_t mem_row_offset =
159 mem_height_pitch_B * mem_depth_or_layer +
160 h_el * mem_row_pitch_B;
161 uint64_t img_row = h_el + offset_el->y;
162 uint64_t img_offset =
163 start_tile_B + img_row * surf->row_pitch_B + img_col_offset;
164 assert((img_offset + row_copy_size) <= binding->memory_range.size);
165
166 if (mem_to_img)
167 memcpy(img_ptr + img_offset, mem_ptr + mem_row_offset, row_copy_size);
168 else
169 memcpy(mem_ptr + mem_row_offset, img_ptr + img_offset, row_copy_size);
170 }
171 } else {
172 uint32_t x1, x2, y1, y2;
173 tile_extents(surf, offset_el, extent_el, level, img_depth_or_layer,
174 &x1, &x2, &y1, &y2);
175
176 if (mem_to_img) {
177 isl_memcpy_linear_to_tiled(x1, x2, y1, y2,
178 img_ptr,
179 mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
180 surf->row_pitch_B,
181 mem_row_pitch_B,
182 false,
183 surf->tiling,
184 ISL_MEMCPY);
185 } else {
186 isl_memcpy_tiled_to_linear(x1, x2, y1, y2,
187 mem_ptr + mem_height_pitch_B * mem_depth_or_layer,
188 img_ptr,
189 mem_row_pitch_B,
190 surf->row_pitch_B,
191 false,
192 surf->tiling,
193 #if defined(USE_SSE41)
194 util_get_cpu_caps()->has_sse4_1 ?
195 ISL_MEMCPY_STREAMING_LOAD :
196 #endif
197 ISL_MEMCPY);
198 }
199 }
200
201 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
202 if (need_invalidate_flush && mem_to_img)
203 intel_flush_range(img_ptr + start_tile_B, end_tile_B - start_tile_B);
204 #endif
205 }
206
207 static uint64_t
calc_mem_row_pitch_B(const struct isl_surf * surf,uint64_t api_row_length_px,const VkExtent3D * extent_px)208 calc_mem_row_pitch_B(const struct isl_surf *surf,
209 uint64_t api_row_length_px,
210 const VkExtent3D *extent_px)
211 {
212 const struct isl_format_layout *fmt_layout =
213 isl_format_get_layout(surf->format);
214 const uint32_t bs = fmt_layout->bpb / 8;
215
216 return api_row_length_px != 0 ?
217 (bs * DIV_ROUND_UP(api_row_length_px, fmt_layout->bw)) :
218 (bs * DIV_ROUND_UP(extent_px->width, fmt_layout->bw));
219 }
220
221 static uint64_t
calc_mem_height_pitch_B(const struct isl_surf * surf,uint64_t row_pitch_B,uint64_t api_height_px,const VkExtent3D * extent_px)222 calc_mem_height_pitch_B(const struct isl_surf *surf,
223 uint64_t row_pitch_B,
224 uint64_t api_height_px,
225 const VkExtent3D *extent_px)
226 {
227 const struct isl_format_layout *fmt_layout =
228 isl_format_get_layout(surf->format);
229
230 return api_height_px != 0 ?
231 (row_pitch_B * DIV_ROUND_UP(api_height_px, fmt_layout->bh)) :
232 (row_pitch_B * DIV_ROUND_UP(extent_px->height, fmt_layout->bh));
233 }
234
235 VkResult
anv_CopyMemoryToImageEXT(VkDevice _device,const VkCopyMemoryToImageInfoEXT * pCopyMemoryToImageInfo)236 anv_CopyMemoryToImageEXT(
237 VkDevice _device,
238 const VkCopyMemoryToImageInfoEXT* pCopyMemoryToImageInfo)
239 {
240 ANV_FROM_HANDLE(anv_device, device, _device);
241 ANV_FROM_HANDLE(anv_image, image, pCopyMemoryToImageInfo->dstImage);
242
243 for (uint32_t r = 0; r < pCopyMemoryToImageInfo->regionCount; r++) {
244 const VkMemoryToImageCopyEXT *region =
245 &pCopyMemoryToImageInfo->pRegions[r];
246 const uint32_t plane =
247 anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
248 const struct anv_surface *anv_surf =
249 &image->planes[plane].primary_surface;
250 const struct isl_surf *surf = &anv_surf->isl;
251 const struct anv_image_binding *binding =
252 &image->bindings[anv_surf->memory_range.binding];
253
254 assert(binding->host_map != NULL);
255
256 /* Memory distance between each row */
257 uint64_t mem_row_pitch_B =
258 calc_mem_row_pitch_B(surf, region->memoryRowLength,
259 ®ion->imageExtent);
260 /* Memory distance between each slice (1 3D level or 1 array layer) */
261 uint64_t mem_height_pitch_B =
262 calc_mem_height_pitch_B(surf, mem_row_pitch_B,
263 region->memoryImageHeight,
264 ®ion->imageExtent);
265
266 VkOffset3D offset_el =
267 vk_offset3d_to_el(surf->format, region->imageOffset);
268 VkExtent3D extent_el =
269 vk_extent3d_to_el(surf->format, region->imageExtent);
270
271 for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
272 for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
273 if ((pCopyMemoryToImageInfo->flags &
274 VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
275 anv_image_can_host_memcpy(image)) {
276 anv_memcpy_image_memory(device, surf, binding,
277 anv_surf->memory_range.offset,
278 (void *)region->pHostPointer,
279 region->imageSubresource.mipLevel,
280 region->imageSubresource.baseArrayLayer,
281 region->imageOffset.z,
282 a, z, true /* mem_to_img */);
283 } else {
284 anv_copy_image_memory(device, surf,
285 binding, anv_surf->memory_range.offset,
286 (void *)region->pHostPointer,
287 mem_row_pitch_B,
288 mem_height_pitch_B,
289 &offset_el,
290 &extent_el,
291 region->imageSubresource.mipLevel,
292 region->imageSubresource.baseArrayLayer,
293 region->imageOffset.z,
294 a, z, true /* mem_to_img */);
295 }
296 }
297 }
298 }
299
300 return VK_SUCCESS;
301 }
302
303 VkResult
anv_CopyImageToMemoryEXT(VkDevice _device,const VkCopyImageToMemoryInfoEXT * pCopyImageToMemoryInfo)304 anv_CopyImageToMemoryEXT(
305 VkDevice _device,
306 const VkCopyImageToMemoryInfoEXT* pCopyImageToMemoryInfo)
307 {
308 ANV_FROM_HANDLE(anv_device, device, _device);
309 ANV_FROM_HANDLE(anv_image, image, pCopyImageToMemoryInfo->srcImage);
310
311 for (uint32_t r = 0; r < pCopyImageToMemoryInfo->regionCount; r++) {
312 const VkImageToMemoryCopyEXT *region =
313 &pCopyImageToMemoryInfo->pRegions[r];
314 const uint32_t plane =
315 anv_image_aspect_to_plane(image, region->imageSubresource.aspectMask);
316 const struct anv_surface *anv_surf =
317 &image->planes[plane].primary_surface;
318 const struct isl_surf *surf = &anv_surf->isl;
319 const struct anv_image_binding *binding =
320 &image->bindings[anv_surf->memory_range.binding];
321
322 assert(binding->host_map != NULL);
323
324 VkOffset3D offset_el =
325 vk_offset3d_to_el(surf->format, region->imageOffset);
326 VkExtent3D extent_el =
327 vk_extent3d_to_el(surf->format, region->imageExtent);
328
329 /* Memory distance between each row */
330 uint64_t mem_row_pitch_B =
331 calc_mem_row_pitch_B(surf, region->memoryRowLength,
332 ®ion->imageExtent);
333 /* Memory distance between each slice (1 3D level or 1 array layer) */
334 uint64_t mem_height_pitch_B =
335 calc_mem_height_pitch_B(surf, mem_row_pitch_B,
336 region->memoryImageHeight,
337 ®ion->imageExtent);
338
339 for (uint32_t a = 0; a < region->imageSubresource.layerCount; a++) {
340 for (uint32_t z = 0; z < region->imageExtent.depth; z++) {
341 if ((pCopyImageToMemoryInfo->flags &
342 VK_HOST_IMAGE_COPY_MEMCPY_EXT) &&
343 anv_image_can_host_memcpy(image)) {
344 anv_memcpy_image_memory(device, surf, binding,
345 anv_surf->memory_range.offset,
346 region->pHostPointer,
347 region->imageSubresource.mipLevel,
348 region->imageSubresource.baseArrayLayer,
349 region->imageOffset.z,
350 a, z, false /* mem_to_img */);
351 } else {
352 anv_copy_image_memory(device, surf,
353 binding, anv_surf->memory_range.offset,
354 region->pHostPointer,
355 mem_row_pitch_B,
356 mem_height_pitch_B,
357 &offset_el,
358 &extent_el,
359 region->imageSubresource.mipLevel,
360 region->imageSubresource.baseArrayLayer,
361 region->imageOffset.z,
362 a, z, false /* mem_to_img */);
363 }
364 }
365 }
366 }
367
368 return VK_SUCCESS;
369 }
370
371 VkResult
anv_CopyImageToImageEXT(VkDevice _device,const VkCopyImageToImageInfoEXT * pCopyImageToImageInfo)372 anv_CopyImageToImageEXT(
373 VkDevice _device,
374 const VkCopyImageToImageInfoEXT* pCopyImageToImageInfo)
375 {
376 ANV_FROM_HANDLE(anv_device, device, _device);
377 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToImageInfo->srcImage);
378 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageToImageInfo->dstImage);
379
380 /* Work with a tile's worth of data */
381 void *tmp_map = vk_alloc(&device->vk.alloc, 4096, 8,
382 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
383 if (tmp_map == NULL)
384 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
385
386 for (uint32_t r = 0; r < pCopyImageToImageInfo->regionCount; r++) {
387 const VkImageCopy2 *region = &pCopyImageToImageInfo->pRegions[r];
388
389 const uint32_t src_plane =
390 anv_image_aspect_to_plane(src_image,
391 region->srcSubresource.aspectMask);
392 const uint32_t dst_plane =
393 anv_image_aspect_to_plane(dst_image,
394 region->srcSubresource.aspectMask);
395 const struct anv_surface *src_anv_surf =
396 &src_image->planes[src_plane].primary_surface;
397 const struct anv_surface *dst_anv_surf =
398 &dst_image->planes[dst_plane].primary_surface;
399 const struct isl_surf *src_surf = &src_anv_surf->isl;
400 const struct isl_surf *dst_surf = &dst_anv_surf->isl;
401 const struct anv_image_binding *src_binding =
402 &src_image->bindings[src_anv_surf->memory_range.binding];
403 const struct anv_image_binding *dst_binding =
404 &dst_image->bindings[dst_anv_surf->memory_range.binding];
405
406 struct isl_tile_info src_tile;
407 struct isl_tile_info dst_tile;
408
409 isl_surf_get_tile_info(src_surf, &src_tile);
410 isl_surf_get_tile_info(dst_surf, &dst_tile);
411
412 uint32_t tile_width_B;
413 uint32_t tile_width_el, tile_height_el;
414 if (src_tile.phys_extent_B.w > dst_tile.phys_extent_B.w) {
415 tile_width_B = src_tile.phys_extent_B.w;
416 tile_width_el = src_tile.logical_extent_el.w;
417 tile_height_el = src_tile.logical_extent_el.h;
418 } else {
419 tile_width_B = dst_tile.phys_extent_B.w;
420 tile_width_el = dst_tile.logical_extent_el.w;
421 tile_height_el = dst_tile.logical_extent_el.h;
422 }
423
424 /* There is no requirement that the extent be aligned to the texel block
425 * size.
426 */
427 VkOffset3D src_offset_el =
428 vk_offset3d_to_el(src_surf->format, region->srcOffset);
429 VkOffset3D dst_offset_el =
430 vk_offset3d_to_el(src_surf->format, region->dstOffset);
431 VkExtent3D extent_el =
432 vk_extent3d_to_el(src_surf->format, region->extent);
433
434 /* linear-to-linear case */
435 if (tile_width_el == 1 && tile_height_el == 1) {
436 tile_width_el = MIN2(4096 / (src_tile.format_bpb / 8),
437 extent_el.width);
438 tile_height_el = 4096 / (tile_width_el * (src_tile.format_bpb / 8));
439 tile_width_B = tile_width_el * src_tile.format_bpb / 8;
440 }
441
442 for (uint32_t a = 0; a < region->srcSubresource.layerCount; a++) {
443 for (uint32_t z = 0; z < region->extent.depth; z++) {
444 for (uint32_t y_el = 0; y_el < extent_el.height; y_el += tile_height_el) {
445 for (uint32_t x_el = 0; x_el < extent_el.width; x_el += tile_width_el) {
446 VkOffset3D src_offset = {
447 .x = src_offset_el.x + x_el,
448 .y = src_offset_el.y + y_el,
449 };
450 VkOffset3D dst_offset = {
451 .x = dst_offset_el.x + x_el,
452 .y = dst_offset_el.y + y_el,
453 };
454 VkExtent3D extent = {
455 .width = MIN2(extent_el.width - x_el, tile_width_el),
456 .height = MIN2(extent_el.height - y_el, tile_height_el),
457 .depth = 1,
458 };
459
460 anv_copy_image_memory(device, src_surf,
461 src_binding,
462 src_anv_surf->memory_range.offset,
463 tmp_map,
464 tile_width_B, 0,
465 &src_offset, &extent,
466 region->srcSubresource.mipLevel,
467 region->srcSubresource.baseArrayLayer,
468 region->srcOffset.z,
469 a, z,
470 false /* mem_to_img */);
471 anv_copy_image_memory(device, dst_surf,
472 dst_binding,
473 dst_anv_surf->memory_range.offset,
474 tmp_map,
475 tile_width_B, 0,
476 &dst_offset, &extent,
477 region->dstSubresource.mipLevel,
478 region->dstSubresource.baseArrayLayer,
479 region->dstOffset.z,
480 a, z,
481 true /* mem_to_img */);
482 }
483 }
484 }
485 }
486 }
487
488 vk_free(&device->vk.alloc, tmp_map);
489
490 return VK_SUCCESS;
491 }
492
493 VkResult
anv_TransitionImageLayoutEXT(VkDevice device,uint32_t transitionCount,const VkHostImageLayoutTransitionInfoEXT * pTransitions)494 anv_TransitionImageLayoutEXT(
495 VkDevice device,
496 uint32_t transitionCount,
497 const VkHostImageLayoutTransitionInfoEXT* pTransitions)
498 {
499 /* Our layout transitions are mostly about resolving the auxiliary surface
500 * into the main surface. Since we disable the auxiliary surface, there is
501 * nothing here for us to do.
502 */
503 return VK_SUCCESS;
504 }
505