1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/drm_fourcc.h"
34
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #else
40 #define VG(x) ((void)0)
41 #endif
42
43 #include "common/intel_aux_map.h"
44 #include "common/intel_bind_timeline.h"
45 #include "common/intel_engine.h"
46 #include "common/intel_gem.h"
47 #include "common/intel_l3_config.h"
48 #include "common/intel_measure.h"
49 #include "common/intel_mem.h"
50 #include "common/intel_sample_positions.h"
51 #include "decoder/intel_decoder.h"
52 #include "dev/intel_device_info.h"
53 #include "blorp/blorp.h"
54 #include "compiler/brw_compiler.h"
55 #include "compiler/brw_kernel.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/detect_os.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/perf/u_trace.h"
65 #include "util/set.h"
66 #include "util/sparse_array.h"
67 #include "util/u_atomic.h"
68 #if DETECT_OS_ANDROID
69 #include "util/u_gralloc/u_gralloc.h"
70 #endif
71 #include "util/u_vector.h"
72 #include "util/u_math.h"
73 #include "util/vma.h"
74 #include "util/xmlconfig.h"
75 #include "vk_acceleration_structure.h"
76 #include "vk_alloc.h"
77 #include "vk_buffer.h"
78 #include "vk_buffer_view.h"
79 #include "vk_command_buffer.h"
80 #include "vk_command_pool.h"
81 #include "vk_debug_report.h"
82 #include "vk_descriptor_update_template.h"
83 #include "vk_device.h"
84 #include "vk_device_memory.h"
85 #include "vk_drm_syncobj.h"
86 #include "vk_enum_defines.h"
87 #include "vk_format.h"
88 #include "vk_framebuffer.h"
89 #include "vk_graphics_state.h"
90 #include "vk_image.h"
91 #include "vk_instance.h"
92 #include "vk_pipeline_cache.h"
93 #include "vk_physical_device.h"
94 #include "vk_sampler.h"
95 #include "vk_shader_module.h"
96 #include "vk_sync.h"
97 #include "vk_sync_timeline.h"
98 #include "vk_texcompress_astc.h"
99 #include "vk_util.h"
100 #include "vk_query_pool.h"
101 #include "vk_queue.h"
102 #include "vk_log.h"
103 #include "vk_ycbcr_conversion.h"
104 #include "vk_video.h"
105
106 #ifdef __cplusplus
107 extern "C" {
108 #endif
109
110 /* Pre-declarations needed for WSI entrypoints */
111 struct wl_surface;
112 struct wl_display;
113 typedef struct xcb_connection_t xcb_connection_t;
114 typedef uint32_t xcb_visualid_t;
115 typedef uint32_t xcb_window_t;
116
117 struct anv_batch;
118 struct anv_buffer;
119 struct anv_buffer_view;
120 struct anv_image_view;
121 struct anv_instance;
122
123 struct intel_aux_map_context;
124 struct intel_perf_config;
125 struct intel_perf_counter_pass;
126 struct intel_perf_query_result;
127
128 #include <vulkan/vulkan.h>
129 #include <vulkan/vk_icd.h>
130
131 #include "anv_android.h"
132 #include "anv_entrypoints.h"
133 #include "anv_kmd_backend.h"
134 #include "anv_rmv.h"
135 #include "isl/isl.h"
136
137 #include "dev/intel_debug.h"
138 #undef MESA_LOG_TAG
139 #define MESA_LOG_TAG "MESA-INTEL"
140 #include "util/log.h"
141 #include "wsi_common.h"
142
143 #define NSEC_PER_SEC 1000000000ull
144
145 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
146
147 /* Allowing different clear colors requires us to perform a depth resolve at
148 * the end of certain render passes. This is because while slow clears store
149 * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
150 * See the PRMs for examples describing when additional resolves would be
151 * necessary. To enable fast clears without requiring extra resolves, we set
152 * the clear value to a globally-defined one. We could allow different values
153 * if the user doesn't expect coherent data during or after a render passes
154 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
155 * don't seem to exist yet. In almost all Vulkan applications tested thus far,
156 * 1.0f seems to be the only value used. The only application that doesn't set
157 * this value does so through the usage of an seemingly uninitialized clear
158 * value.
159 */
160 #define ANV_HZ_FC_VAL 1.0f
161
162 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
163 #define MAX_VBS (33 - 2)
164
165 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
166 * only supports the push model of VS inputs, and we only have 128 GRFs,
167 * minus the g0 and g1 payload, which gives us a maximum of 31 VEs. Plus,
168 * we use two of them for SGVs.
169 */
170 #define MAX_VES (31 - 2)
171
172 #define MAX_XFB_BUFFERS 4
173 #define MAX_XFB_STREAMS 4
174 #define MAX_SETS 8
175 #define MAX_RTS 8
176 #define MAX_VIEWPORTS 16
177 #define MAX_SCISSORS 16
178 #define MAX_PUSH_CONSTANTS_SIZE 128
179 #define MAX_DYNAMIC_BUFFERS 16
180 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
181 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
182 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
183 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
184 * use 64 here to avoid cache issues. This could most likely bring it back to
185 * 32 if we had different virtual addresses for the different views on a given
186 * GEM object.
187 */
188 #define ANV_UBO_ALIGNMENT 64
189 #define ANV_SSBO_ALIGNMENT 4
190 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
191 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
192 #define MAX_SAMPLE_LOCATIONS 16
193
194 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
195 * and we can't put anything else there we use 64b.
196 */
197 #define ANV_SURFACE_STATE_SIZE (64)
198
199 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
200 *
201 * "The surface state model is used when a Binding Table Index (specified
202 * in the message descriptor) of less than 240 is specified. In this model,
203 * the Binding Table Index is used to index into the binding table, and the
204 * binding table entry contains a pointer to the SURFACE_STATE."
205 *
206 * Binding table values above 240 are used for various things in the hardware
207 * such as stateless, stateless with incoherent cache, SLM, and bindless.
208 */
209 #define MAX_BINDING_TABLE_SIZE 240
210
211 #define ANV_SVGS_VB_INDEX MAX_VBS
212 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
213
214 /* We reserve this MI ALU register for the purpose of handling predication.
215 * Other code which uses the MI ALU should leave it alone.
216 */
217 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
218
219 /* We reserve this MI ALU register to pass around an offset computed from
220 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
221 * Other code which uses the MI ALU should leave it alone.
222 */
223 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
224
225 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
226
227 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
228 * and we can't put anything else there we use 64b.
229 */
230 #define ANV_SURFACE_STATE_SIZE (64)
231 #define ANV_SAMPLER_STATE_SIZE (32)
232
233 /* For gfx12 we set the streamout buffers using 4 separate commands
234 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
235 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
236 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
237 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
238 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
239 * 3DSTATE_SO_BUFFER_INDEX_0.
240 */
241 #define SO_BUFFER_INDEX_0_CMD 0x60
242 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
243
244 /* The TR-TT L1 page table entries may contain these values instead of actual
245 * pointers to indicate the regions are either NULL or invalid. We program
246 * these values to TR-TT registers, so we could change them, but it's super
247 * convenient to have the NULL value be 0 because everything is
248 * zero-initialized when allocated.
249 *
250 * Since we reserve these values for NULL/INVALID, then we can't use them as
251 * destinations for TR-TT address translation. Both values are shifted by 16
252 * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
253 * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
254 * nothing really to reserve. We could instead just reserve random 64kb
255 * ranges from any of the non-TR-TT vmas and use their addresses.
256 */
257 #define ANV_TRTT_L1_NULL_TILE_VAL 0
258 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
259
260 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)261 align_down_npot_u32(uint32_t v, uint32_t a)
262 {
263 return v - (v % a);
264 }
265
266 /** Alignment must be a power of 2. */
267 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)268 anv_is_aligned(uintmax_t n, uintmax_t a)
269 {
270 assert(a == (a & -a));
271 return (n & (a - 1)) == 0;
272 }
273
274 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)275 vk_to_isl_color(VkClearColorValue color)
276 {
277 return (union isl_color_value) {
278 .u32 = {
279 color.uint32[0],
280 color.uint32[1],
281 color.uint32[2],
282 color.uint32[3],
283 },
284 };
285 }
286
287 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)288 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
289 {
290 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
291 union isl_color_value isl_color = { .u32 = {0, } };
292
293 #define COPY_COLOR_CHANNEL(c, i) \
294 if (fmtl->channels.c.bits) \
295 isl_color.u32[i] = color.uint32[i]
296
297 COPY_COLOR_CHANNEL(r, 0);
298 COPY_COLOR_CHANNEL(g, 1);
299 COPY_COLOR_CHANNEL(b, 2);
300 COPY_COLOR_CHANNEL(a, 3);
301
302 #undef COPY_COLOR_CHANNEL
303
304 return isl_color;
305 }
306
307 /**
308 * Warn on ignored extension structs.
309 *
310 * The Vulkan spec requires us to ignore unsupported or unknown structs in
311 * a pNext chain. In debug mode, emitting warnings for ignored structs may
312 * help us discover structs that we should not have ignored.
313 *
314 *
315 * From the Vulkan 1.0.38 spec:
316 *
317 * Any component of the implementation (the loader, any enabled layers,
318 * and drivers) must skip over, without processing (other than reading the
319 * sType and pNext members) any chained structures with sType values not
320 * defined by extensions supported by that component.
321 */
322 #define anv_debug_ignored_stype(sType) \
323 mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
324
325 void __anv_perf_warn(struct anv_device *device,
326 const struct vk_object_base *object,
327 const char *file, int line, const char *format, ...)
328 anv_printflike(5, 6);
329
330 /**
331 * Print a FINISHME message, including its source location.
332 */
333 #define anv_finishme(format, ...) \
334 do { \
335 static bool reported = false; \
336 if (!reported) { \
337 mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
338 ##__VA_ARGS__); \
339 reported = true; \
340 } \
341 } while (0)
342
343 /**
344 * Print a perf warning message. Set INTEL_DEBUG=perf to see these.
345 */
346 #define anv_perf_warn(objects_macro, format, ...) \
347 do { \
348 static bool reported = false; \
349 if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
350 __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \
351 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \
352 objects_macro, __FILE__, __LINE__, \
353 format, ## __VA_ARGS__); \
354 reported = true; \
355 } \
356 } while (0)
357
358 /* A non-fatal assert. Useful for debugging. */
359 #ifdef DEBUG
360 #define anv_assert(x) ({ \
361 if (unlikely(!(x))) \
362 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
363 })
364 #else
365 #define anv_assert(x)
366 #endif
367
368 enum anv_bo_alloc_flags {
369 /** Specifies that the BO must have a 32-bit address
370 *
371 * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
372 */
373 ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
374
375 /** Specifies that the BO may be shared externally */
376 ANV_BO_ALLOC_EXTERNAL = (1 << 1),
377
378 /** Specifies that the BO should be mapped */
379 ANV_BO_ALLOC_MAPPED = (1 << 2),
380
381 /** Specifies that the BO should be coherent.
382 *
383 * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free,
384 * bo can get upgraded to HOST_CACHED_COHERENT
385 */
386 ANV_BO_ALLOC_HOST_COHERENT = (1 << 3),
387
388 /** Specifies that the BO should be captured in error states */
389 ANV_BO_ALLOC_CAPTURE = (1 << 4),
390
391 /** Specifies that the BO will have an address assigned by the caller
392 *
393 * Such BOs do not exist in any VMA heap.
394 */
395 ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
396
397 /** Enables implicit synchronization on the BO
398 *
399 * This is the opposite of EXEC_OBJECT_ASYNC.
400 */
401 ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
402
403 /** Enables implicit synchronization on the BO
404 *
405 * This is equivalent to EXEC_OBJECT_WRITE.
406 */
407 ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
408
409 /** Has an address which is visible to the client */
410 ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
411
412 /** Align the BO's virtual address to match AUX-TT requirements */
413 ANV_BO_ALLOC_AUX_TT_ALIGNED = (1 << 9),
414
415 /** This buffer is allocated from local memory and should be cpu visible */
416 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
417
418 /** For non device local allocations */
419 ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
420
421 /** This buffer will be scanout to display */
422 ANV_BO_ALLOC_SCANOUT = (1 << 12),
423
424 /** For descriptor pools */
425 ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13),
426
427 /** For buffers that will be bound using TR-TT.
428 *
429 * Not for buffers used as the TR-TT page tables.
430 */
431 ANV_BO_ALLOC_TRTT = (1 << 14),
432
433 /** Protected buffer */
434 ANV_BO_ALLOC_PROTECTED = (1 << 15),
435
436 /** Specifies that the BO should be cached and incoherent. */
437 ANV_BO_ALLOC_HOST_CACHED = (1 << 16),
438
439 /** For sampler pools */
440 ANV_BO_ALLOC_SAMPLER_POOL = (1 << 17),
441
442 /** Specifies that the BO is imported.
443 *
444 * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL
445 */
446 ANV_BO_ALLOC_IMPORTED = (1 << 18),
447
448 /** Specify whether this BO is internal to the driver */
449 ANV_BO_ALLOC_INTERNAL = (1 << 19),
450
451 /** Allocate with CCS AUX requirements
452 *
453 * This pads the BO include CCS data mapppable through the AUX-TT and
454 * aligned to the AUX-TT requirements.
455 */
456 ANV_BO_ALLOC_AUX_CCS = (1 << 20),
457 };
458
459 /** Specifies that the BO should be cached and coherent. */
460 #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \
461 ANV_BO_ALLOC_HOST_CACHED)
462
463
464 struct anv_bo {
465 const char *name;
466
467 /* The VMA heap in anv_device from which this BO takes its offset.
468 *
469 * This can only be NULL when has_fixed_address is true.
470 */
471 struct util_vma_heap *vma_heap;
472
473 /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */
474 uint32_t gem_handle;
475
476 uint32_t refcount;
477
478 /* Index into the current validation list. This is used by the
479 * validation list building algorithm to track which buffers are already
480 * in the validation list so that we can ensure uniqueness.
481 */
482 uint32_t exec_obj_index;
483
484 /* Index for use with util_sparse_array_free_list */
485 uint32_t free_index;
486
487 /* Last known offset. This value is provided by the kernel when we
488 * execbuf and is used as the presumed offset for the next bunch of
489 * relocations, in canonical address format.
490 */
491 uint64_t offset;
492
493 /** Size of the buffer */
494 uint64_t size;
495
496 /** Offset at which the CCS data is stored */
497 uint64_t ccs_offset;
498
499 /* Map for internally mapped BOs.
500 *
501 * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
502 * BO.
503 */
504 void *map;
505
506 /* The actual size of bo allocated by kmd, basically:
507 * align(size, mem_alignment)
508 */
509 uint64_t actual_size;
510
511 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
512 uint32_t flags;
513
514 enum anv_bo_alloc_flags alloc_flags;
515
516 /** True if this BO wraps a host pointer */
517 bool from_host_ptr:1;
518
519 /** True if this BO is mapped in the GTT (only used for RMV) */
520 bool gtt_mapped:1;
521 };
522
523 static inline bool
anv_bo_is_external(const struct anv_bo * bo)524 anv_bo_is_external(const struct anv_bo *bo)
525 {
526 return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL;
527 }
528
529 static inline bool
anv_bo_is_vram_only(const struct anv_bo * bo)530 anv_bo_is_vram_only(const struct anv_bo *bo)
531 {
532 return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM |
533 ANV_BO_ALLOC_MAPPED |
534 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
535 ANV_BO_ALLOC_IMPORTED));
536 }
537
538 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)539 anv_bo_ref(struct anv_bo *bo)
540 {
541 p_atomic_inc(&bo->refcount);
542 return bo;
543 }
544
545 enum intel_device_info_mmap_mode
546 anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo);
547
548 static inline bool
anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)549 anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)
550 {
551 return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) ==
552 ANV_BO_ALLOC_HOST_CACHED;
553 }
554
555 struct anv_address {
556 struct anv_bo *bo;
557 int64_t offset;
558 };
559
560 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
561
562 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)563 anv_address_from_u64(uint64_t addr_u64)
564 {
565 assert(addr_u64 == intel_canonical_address(addr_u64));
566 return (struct anv_address) {
567 .bo = NULL,
568 .offset = addr_u64,
569 };
570 }
571
572 static inline bool
anv_address_is_null(struct anv_address addr)573 anv_address_is_null(struct anv_address addr)
574 {
575 return addr.bo == NULL && addr.offset == 0;
576 }
577
578 static inline uint64_t
anv_address_physical(struct anv_address addr)579 anv_address_physical(struct anv_address addr)
580 {
581 uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset;
582 return intel_canonical_address(address);
583 }
584
585 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)586 anv_address_add(struct anv_address addr, uint64_t offset)
587 {
588 addr.offset += offset;
589 return addr;
590 }
591
592 static inline void *
anv_address_map(struct anv_address addr)593 anv_address_map(struct anv_address addr)
594 {
595 if (addr.bo == NULL)
596 return NULL;
597
598 if (addr.bo->map == NULL)
599 return NULL;
600
601 return addr.bo->map + addr.offset;
602 }
603
604 /* Represent a virtual address range */
605 struct anv_va_range {
606 uint64_t addr;
607 uint64_t size;
608 };
609
610 /* Represents a lock-free linked list of "free" things. This is used by
611 * both the block pool and the state pools. Unfortunately, in order to
612 * solve the ABA problem, we can't use a single uint32_t head.
613 */
614 union anv_free_list {
615 struct {
616 uint32_t offset;
617
618 /* A simple count that is incremented every time the head changes. */
619 uint32_t count;
620 };
621 /* Make sure it's aligned to 64 bits. This will make atomic operations
622 * faster on 32 bit platforms.
623 */
624 alignas(8) uint64_t u64;
625 };
626
627 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
628
629 struct anv_block_state {
630 union {
631 struct {
632 uint32_t next;
633 uint32_t end;
634 };
635 /* Make sure it's aligned to 64 bits. This will make atomic operations
636 * faster on 32 bit platforms.
637 */
638 alignas(8) uint64_t u64;
639 };
640 };
641
642 #define anv_block_pool_foreach_bo(bo, pool) \
643 for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
644 _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
645 _pp_bo++)
646
647 #define ANV_MAX_BLOCK_POOL_BOS 20
648
649 struct anv_block_pool {
650 const char *name;
651
652 struct anv_device *device;
653
654 struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
655 struct anv_bo *bo;
656 uint32_t nbos;
657
658 /* Maximum size of the pool */
659 uint64_t max_size;
660
661 /* Current size of the pool */
662 uint64_t size;
663
664 /* The canonical address where the start of the pool is pinned. The various bos that
665 * are created as the pool grows will have addresses in the range
666 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
667 */
668 uint64_t start_address;
669
670 /* The offset from the start of the bo to the "center" of the block
671 * pool. Pointers to allocated blocks are given by
672 * bo.map + center_bo_offset + offsets.
673 */
674 uint32_t center_bo_offset;
675
676 struct anv_block_state state;
677
678 enum anv_bo_alloc_flags bo_alloc_flags;
679 };
680
681 /* Block pools are backed by a fixed-size 1GB memfd */
682 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
683
684 /* The center of the block pool is also the middle of the memfd. This may
685 * change in the future if we decide differently for some reason.
686 */
687 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
688
689 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)690 anv_block_pool_size(struct anv_block_pool *pool)
691 {
692 return pool->state.end;
693 }
694
695 struct anv_state {
696 int64_t offset;
697 uint32_t alloc_size;
698 uint32_t idx;
699 void *map;
700 };
701
702 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
703
704 struct anv_fixed_size_state_pool {
705 union anv_free_list free_list;
706 struct anv_block_state block;
707 };
708
709 #define ANV_MIN_STATE_SIZE_LOG2 6
710 #define ANV_MAX_STATE_SIZE_LOG2 22
711
712 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
713
714 struct anv_free_entry {
715 uint32_t next;
716 struct anv_state state;
717 };
718
719 struct anv_state_table {
720 struct anv_device *device;
721 int fd;
722 struct anv_free_entry *map;
723 uint32_t size;
724 uint64_t max_size;
725 struct anv_block_state state;
726 struct u_vector cleanups;
727 };
728
729 struct anv_state_pool {
730 struct anv_block_pool block_pool;
731
732 /* Offset into the relevant state base address where the state pool starts
733 * allocating memory.
734 */
735 int64_t start_offset;
736
737 struct anv_state_table table;
738
739 /* The size of blocks which will be allocated from the block pool */
740 uint32_t block_size;
741
742 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
743 };
744
745 struct anv_state_reserved_pool {
746 struct anv_state_pool *pool;
747 union anv_free_list reserved_blocks;
748 uint32_t count;
749 };
750
751 struct anv_state_stream {
752 struct anv_state_pool *state_pool;
753
754 /* The size of blocks to allocate from the state pool */
755 uint32_t block_size;
756
757 /* Current block we're allocating from */
758 struct anv_state block;
759
760 /* Offset into the current block at which to allocate the next state */
761 uint32_t next;
762
763 /* Sum of all the blocks in all_blocks */
764 uint32_t total_size;
765
766 /* List of all blocks allocated from this pool */
767 struct util_dynarray all_blocks;
768 };
769
770 struct anv_sparse_submission {
771 struct anv_queue *queue;
772
773 struct anv_vm_bind *binds;
774 int binds_len;
775 int binds_capacity;
776
777 uint32_t wait_count;
778 uint32_t signal_count;
779
780 struct vk_sync_wait *waits;
781 struct vk_sync_signal *signals;
782 };
783
784 struct anv_trtt_bind {
785 uint64_t pte_addr;
786 uint64_t entry_addr;
787 };
788
789 struct anv_trtt_submission {
790 struct anv_sparse_submission *sparse;
791
792 struct anv_trtt_bind *l3l2_binds;
793 struct anv_trtt_bind *l1_binds;
794
795 int l3l2_binds_len;
796 int l1_binds_len;
797 };
798
799 /* The block_pool functions exported for testing only. The block pool should
800 * only be used via a state pool (see below).
801 */
802 VkResult anv_block_pool_init(struct anv_block_pool *pool,
803 struct anv_device *device,
804 const char *name,
805 uint64_t start_address,
806 uint32_t initial_size,
807 uint32_t max_size);
808 void anv_block_pool_finish(struct anv_block_pool *pool);
809 VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
810 uint32_t block_size,
811 int64_t *offset,
812 uint32_t *padding);
813 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
814 size);
815
816 struct anv_state_pool_params {
817 const char *name;
818 uint64_t base_address;
819 int64_t start_offset;
820 uint32_t block_size;
821 uint32_t max_size;
822 };
823
824 VkResult anv_state_pool_init(struct anv_state_pool *pool,
825 struct anv_device *device,
826 const struct anv_state_pool_params *params);
827 void anv_state_pool_finish(struct anv_state_pool *pool);
828 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
829 uint32_t state_size, uint32_t alignment);
830 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
831
832 static inline struct anv_address
anv_state_pool_state_address(struct anv_state_pool * pool,struct anv_state state)833 anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state)
834 {
835 return (struct anv_address) {
836 .bo = pool->block_pool.bo,
837 .offset = state.offset - pool->start_offset,
838 };
839 }
840
841 void anv_state_stream_init(struct anv_state_stream *stream,
842 struct anv_state_pool *state_pool,
843 uint32_t block_size);
844 void anv_state_stream_finish(struct anv_state_stream *stream);
845 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
846 uint32_t size, uint32_t alignment);
847
848 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
849 struct anv_state_pool *parent,
850 uint32_t count, uint32_t size,
851 uint32_t alignment);
852 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
853 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
854 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
855 struct anv_state state);
856
857 VkResult anv_state_table_init(struct anv_state_table *table,
858 struct anv_device *device,
859 uint32_t initial_entries);
860 void anv_state_table_finish(struct anv_state_table *table);
861 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
862 uint32_t count);
863 void anv_free_list_push(union anv_free_list *list,
864 struct anv_state_table *table,
865 uint32_t idx, uint32_t count);
866 struct anv_state* anv_free_list_pop(union anv_free_list *list,
867 struct anv_state_table *table);
868
869
870 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)871 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
872 {
873 return &table->map[idx].state;
874 }
875 /**
876 * Implements a pool of re-usable BOs. The interface is identical to that
877 * of block_pool except that each block is its own BO.
878 */
879 struct anv_bo_pool {
880 const char *name;
881
882 struct anv_device *device;
883
884 enum anv_bo_alloc_flags bo_alloc_flags;
885
886 struct util_sparse_array_free_list free_list[16];
887 };
888
889 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
890 const char *name, enum anv_bo_alloc_flags alloc_flags);
891 void anv_bo_pool_finish(struct anv_bo_pool *pool);
892 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
893 struct anv_bo **bo_out);
894 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
895
896 struct anv_scratch_pool {
897 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
898 struct anv_bo *bos[16][MESA_SHADER_STAGES];
899 uint32_t surfs[16];
900 struct anv_state surf_states[16];
901 };
902
903 void anv_scratch_pool_init(struct anv_device *device,
904 struct anv_scratch_pool *pool);
905 void anv_scratch_pool_finish(struct anv_device *device,
906 struct anv_scratch_pool *pool);
907 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
908 struct anv_scratch_pool *pool,
909 gl_shader_stage stage,
910 unsigned per_thread_scratch);
911 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
912 struct anv_scratch_pool *pool,
913 unsigned per_thread_scratch);
914
915 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
916 struct anv_bo_cache {
917 struct util_sparse_array bo_map;
918 pthread_mutex_t mutex;
919 };
920
921 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
922 struct anv_device *device);
923 void anv_bo_cache_finish(struct anv_bo_cache *cache);
924
925 struct anv_queue_family {
926 /* Standard bits passed on to the client */
927 VkQueueFlags queueFlags;
928 uint32_t queueCount;
929
930 enum intel_engine_class engine_class;
931 };
932
933 #define ANV_MAX_QUEUE_FAMILIES 5
934
935 struct anv_memory_type {
936 /* Standard bits passed on to the client */
937 VkMemoryPropertyFlags propertyFlags;
938 uint32_t heapIndex;
939 };
940
941 struct anv_memory_heap {
942 /* Standard bits passed on to the client */
943 VkDeviceSize size;
944 VkMemoryHeapFlags flags;
945
946 /** Driver-internal book-keeping.
947 *
948 * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
949 */
950 alignas(8) VkDeviceSize used;
951
952 bool is_local_mem;
953 };
954
955 struct anv_memregion {
956 const struct intel_memory_class_instance *region;
957 uint64_t size;
958 uint64_t available;
959 };
960
961 enum anv_timestamp_capture_type {
962 ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
963 ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
964 ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
965 ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
966 ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
967 };
968
969 struct anv_physical_device {
970 struct vk_physical_device vk;
971
972 /* Link in anv_instance::physical_devices */
973 struct list_head link;
974
975 struct anv_instance * instance;
976 char path[20];
977 struct intel_device_info info;
978
979 bool video_decode_enabled;
980
981 struct brw_compiler * compiler;
982 struct isl_device isl_dev;
983 struct intel_perf_config * perf;
984 /*
985 * Number of commands required to implement a performance query begin +
986 * end.
987 */
988 uint32_t n_perf_query_commands;
989 bool has_exec_async;
990 bool has_exec_capture;
991 VkQueueGlobalPriorityKHR max_context_priority;
992 uint64_t gtt_size;
993
994 bool always_use_bindless;
995 bool use_call_secondary;
996
997 /** True if we can use timeline semaphores through execbuf */
998 bool has_exec_timeline;
999
1000 /** True if we can read the GPU timestamp register
1001 *
1002 * When running in a virtual context, the timestamp register is unreadable
1003 * on Gfx12+.
1004 */
1005 bool has_reg_timestamp;
1006
1007 /** True if we can create protected contexts. */
1008 bool has_protected_contexts;
1009
1010 /** Whether the i915 driver has the ability to create VM objects */
1011 bool has_vm_control;
1012
1013 /** True if we have the means to do sparse binding (e.g., a Kernel driver
1014 * a vm_bind ioctl).
1015 */
1016 bool has_sparse;
1017 bool sparse_uses_trtt;
1018
1019 /** True if HW supports ASTC LDR */
1020 bool has_astc_ldr;
1021 /** True if denorms in void extents should be flushed to zero */
1022 bool flush_astc_ldr_void_extent_denorms;
1023 /** True if ASTC LDR is supported via emulation */
1024 bool emu_astc_ldr;
1025 /* true if FCV optimization should be disabled. */
1026 bool disable_fcv;
1027 /**/
1028 bool uses_ex_bso;
1029
1030 bool always_flush_cache;
1031
1032 /** True if application memory is allocated with extra AUX memory
1033 *
1034 * Applications quite often pool image allocations together in a single
1035 * VkDeviceMemory object. On platforms like MTL, the alignment of images
1036 * with compression mapped through the AUX translation tables is large :
1037 * 1MB. This can create a lot of wasted space in the application memory
1038 * objects.
1039 *
1040 * To workaround this problem, we allocate CCS data at the end of
1041 * VkDeviceMemory objects. This would not work well for TGL-like platforms
1042 * because the AUX translation tables also contain the format of the
1043 * images, but on MTL the HW ignore those values. So we can share the AUX
1044 * TT entries between different images without problem.
1045 *
1046 * This should be only true for platforms with AUX TT.
1047 */
1048 bool alloc_aux_tt_mem;
1049
1050 /**
1051 * True if the descriptors buffers are holding one of the following :
1052 * - anv_sampled_image_descriptor
1053 * - anv_storage_image_descriptor
1054 * - anv_address_range_descriptor
1055 *
1056 * Accessing the descriptors in a bindless fashion from the shader
1057 * requires an indirection in the shader, first fetch one of the structure
1058 * listed above from the descriptor buffer, then emit the send message to
1059 * the fixed function (sampler, dataport, etc...) with the handle fetched
1060 * above.
1061 *
1062 * We need to do things this way prior to DG2 because the bindless surface
1063 * state space is limited to 64Mb and some application will allocate more
1064 * than what HW can support. On DG2+ we get 4Gb of bindless surface state
1065 * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
1066 * structures instead.
1067 */
1068 bool indirect_descriptors;
1069
1070 bool uses_relocs;
1071
1072 /** Can the platform support cooperative matrices and is it enabled? */
1073 bool has_cooperative_matrix;
1074
1075 struct {
1076 uint32_t family_count;
1077 struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
1078 } queue;
1079
1080 struct {
1081 uint32_t type_count;
1082 struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
1083 uint32_t heap_count;
1084 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
1085 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1086 bool need_flush;
1087 #endif
1088 } memory;
1089
1090 struct {
1091 /**
1092 * General state pool
1093 */
1094 struct anv_va_range general_state_pool;
1095 /**
1096 * Low 32bit heap
1097 */
1098 struct anv_va_range low_heap;
1099 /**
1100 * Binding table pool
1101 */
1102 struct anv_va_range binding_table_pool;
1103 /**
1104 * Internal surface states for blorp & push descriptors.
1105 */
1106 struct anv_va_range internal_surface_state_pool;
1107 /**
1108 * Scratch surfaces (overlaps with internal_surface_state_pool).
1109 */
1110 struct anv_va_range scratch_surface_state_pool;
1111 /**
1112 * Bindless surface states (indirectly referred to by indirect
1113 * descriptors or for direct descriptors)
1114 */
1115 struct anv_va_range bindless_surface_state_pool;
1116 /**
1117 * Dynamic state pool
1118 */
1119 struct anv_va_range dynamic_state_pool;
1120 /**
1121 * Sampler state pool
1122 */
1123 struct anv_va_range sampler_state_pool;
1124 /**
1125 * Indirect descriptor pool
1126 */
1127 struct anv_va_range indirect_descriptor_pool;
1128 /**
1129 * Indirect push descriptor pool
1130 */
1131 struct anv_va_range indirect_push_descriptor_pool;
1132 /**
1133 * Instruction state pool
1134 */
1135 struct anv_va_range instruction_state_pool;
1136 /**
1137 * Client heap
1138 */
1139 struct anv_va_range high_heap;
1140 struct anv_va_range trtt;
1141 } va;
1142
1143 /* Either we have a single vram region and it's all mappable, or we have
1144 * both mappable & non-mappable parts. System memory is always available.
1145 */
1146 struct anv_memregion vram_mappable;
1147 struct anv_memregion vram_non_mappable;
1148 struct anv_memregion sys;
1149 uint8_t driver_build_sha1[20];
1150 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
1151 uint8_t driver_uuid[VK_UUID_SIZE];
1152 uint8_t device_uuid[VK_UUID_SIZE];
1153 uint8_t rt_uuid[VK_UUID_SIZE];
1154
1155 /* Maximum amount of scratch space used by all the GRL kernels */
1156 uint32_t max_grl_scratch_size;
1157
1158 struct vk_sync_type sync_syncobj_type;
1159 struct vk_sync_timeline_type sync_timeline_type;
1160 const struct vk_sync_type * sync_types[4];
1161
1162 struct wsi_device wsi_device;
1163 int local_fd;
1164 bool has_local;
1165 int64_t local_major;
1166 int64_t local_minor;
1167 int master_fd;
1168 bool has_master;
1169 int64_t master_major;
1170 int64_t master_minor;
1171 struct intel_query_engine_info * engine_info;
1172
1173 void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
1174 enum anv_timestamp_capture_type, void *);
1175 struct intel_measure_device measure_device;
1176 };
1177
1178 static inline uint32_t
anv_physical_device_bindless_heap_size(const struct anv_physical_device * device)1179 anv_physical_device_bindless_heap_size(const struct anv_physical_device *device)
1180 {
1181 return device->uses_ex_bso ?
1182 128 * 1024 * 1024 /* 128 MiB */ :
1183 64 * 1024 * 1024 /* 64 MiB */;
1184 }
1185
1186 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1187 anv_physical_device_has_vram(const struct anv_physical_device *device)
1188 {
1189 return device->vram_mappable.size > 0;
1190 }
1191
1192 struct anv_instance {
1193 struct vk_instance vk;
1194
1195 struct driOptionCache dri_options;
1196 struct driOptionCache available_dri_options;
1197
1198 int mesh_conv_prim_attrs_to_vert_attrs;
1199 bool enable_tbimr;
1200 bool external_memory_implicit_sync;
1201
1202 /**
1203 * Workarounds for game bugs.
1204 */
1205 uint8_t assume_full_subgroups;
1206 bool limit_trig_input_range;
1207 bool sample_mask_out_opengl_behaviour;
1208 bool force_filter_addr_rounding;
1209 bool fp64_workaround_enabled;
1210 float lower_depth_range_rate;
1211 unsigned generated_indirect_threshold;
1212 unsigned generated_indirect_ring_threshold;
1213 unsigned query_clear_with_blorp_threshold;
1214 unsigned query_copy_with_shader_threshold;
1215 unsigned force_vk_vendor;
1216 bool has_fake_sparse;
1217 bool disable_fcv;
1218
1219 /* HW workarounds */
1220 bool no_16bit;
1221 bool intel_enable_wa_14018912822;
1222 };
1223
1224 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1225 void anv_finish_wsi(struct anv_physical_device *physical_device);
1226
1227 struct anv_queue {
1228 struct vk_queue vk;
1229
1230 struct anv_device * device;
1231
1232 const struct anv_queue_family * family;
1233
1234 struct intel_batch_decode_ctx * decoder;
1235
1236 union {
1237 uint32_t exec_flags; /* i915 */
1238 uint32_t context_id; /* i915 */
1239 uint32_t exec_queue_id; /* Xe */
1240 };
1241
1242 /** Context/Engine id which executes companion RCS command buffer */
1243 uint32_t companion_rcs_id;
1244
1245 /** Synchronization object for debug purposes (DEBUG_SYNC) */
1246 struct vk_sync *sync;
1247
1248 /** Companion synchronization object
1249 *
1250 * Vulkan command buffers can be destroyed as soon as their lifecycle moved
1251 * from the Pending state to the Invalid/Executable state. This transition
1252 * happens when the VkFence/VkSemaphore associated with the completion of
1253 * the command buffer work is signaled.
1254 *
1255 * When we're using a companion command buffer to execute part of another
1256 * command buffer, we need to tie the 2 work submissions together to ensure
1257 * when the associated VkFence/VkSemaphore is signaled, both command
1258 * buffers are actually unused by the HW. To do this, we run an empty batch
1259 * buffer that we use to signal after both submissions :
1260 *
1261 * CCS --> main ---> empty_batch (with wait on companion) --> signal
1262 * RCS --> companion -|
1263 *
1264 * When companion batch completes, it signals companion_sync and allow
1265 * empty_batch to execute. Since empty_batch is running on the main engine,
1266 * we're guaranteed that upon completion both main & companion command
1267 * buffers are not used by HW anymore.
1268 */
1269 struct vk_sync *companion_sync;
1270
1271 struct intel_ds_queue ds;
1272 };
1273
1274 struct nir_xfb_info;
1275 struct anv_pipeline_bind_map;
1276 struct anv_push_descriptor_info;
1277 enum anv_dynamic_push_bits;
1278
1279 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1280
1281 struct anv_shader_bin *
1282 anv_device_search_for_kernel(struct anv_device *device,
1283 struct vk_pipeline_cache *cache,
1284 const void *key_data, uint32_t key_size,
1285 bool *user_cache_bit);
1286
1287 struct anv_shader_upload_params;
1288
1289 struct anv_shader_bin *
1290 anv_device_upload_kernel(struct anv_device *device,
1291 struct vk_pipeline_cache *cache,
1292 const struct anv_shader_upload_params *params);
1293
1294 struct nir_shader;
1295 struct nir_shader_compiler_options;
1296
1297 struct nir_shader *
1298 anv_device_search_for_nir(struct anv_device *device,
1299 struct vk_pipeline_cache *cache,
1300 const struct nir_shader_compiler_options *nir_options,
1301 unsigned char sha1_key[20],
1302 void *mem_ctx);
1303
1304 void
1305 anv_device_upload_nir(struct anv_device *device,
1306 struct vk_pipeline_cache *cache,
1307 const struct nir_shader *nir,
1308 unsigned char sha1_key[20]);
1309
1310 void
1311 anv_load_fp64_shader(struct anv_device *device);
1312
1313 /**
1314 * This enum tracks the various HW instructions that hold graphics state
1315 * needing to be reprogrammed. Some instructions are grouped together as they
1316 * pretty much need to be emitted together (like 3DSTATE_URB_*).
1317 *
1318 * Not all bits apply to all platforms. We build a dirty state based on
1319 * enabled extensions & generation on anv_device.
1320 */
1321 enum anv_gfx_state_bits {
1322 /* Pipeline states */
1323 ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
1324 ANV_GFX_STATE_VF_STATISTICS,
1325 ANV_GFX_STATE_VF_SGVS,
1326 ANV_GFX_STATE_VF_SGVS_2,
1327 ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
1328 ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
1329 ANV_GFX_STATE_PRIMITIVE_REPLICATION,
1330 ANV_GFX_STATE_MULTISAMPLE,
1331 ANV_GFX_STATE_SBE,
1332 ANV_GFX_STATE_SBE_SWIZ,
1333 ANV_GFX_STATE_SO_DECL_LIST,
1334 ANV_GFX_STATE_VS,
1335 ANV_GFX_STATE_HS,
1336 ANV_GFX_STATE_DS,
1337 ANV_GFX_STATE_GS,
1338 ANV_GFX_STATE_PS,
1339 ANV_GFX_STATE_SBE_MESH,
1340 ANV_GFX_STATE_CLIP_MESH,
1341 ANV_GFX_STATE_MESH_CONTROL,
1342 ANV_GFX_STATE_MESH_SHADER,
1343 ANV_GFX_STATE_MESH_DISTRIB,
1344 ANV_GFX_STATE_TASK_CONTROL,
1345 ANV_GFX_STATE_TASK_SHADER,
1346 ANV_GFX_STATE_TASK_REDISTRIB,
1347 /* Dynamic states */
1348 ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
1349 ANV_GFX_STATE_BLEND_STATE_POINTERS, /* The pointer to the dynamic state */
1350 ANV_GFX_STATE_CLIP,
1351 ANV_GFX_STATE_CC_STATE,
1352 ANV_GFX_STATE_CPS,
1353 ANV_GFX_STATE_DEPTH_BOUNDS,
1354 ANV_GFX_STATE_INDEX_BUFFER,
1355 ANV_GFX_STATE_LINE_STIPPLE,
1356 ANV_GFX_STATE_PS_BLEND,
1357 ANV_GFX_STATE_RASTER,
1358 ANV_GFX_STATE_SAMPLE_MASK,
1359 ANV_GFX_STATE_SAMPLE_PATTERN,
1360 ANV_GFX_STATE_SCISSOR,
1361 ANV_GFX_STATE_SF,
1362 ANV_GFX_STATE_STREAMOUT,
1363 ANV_GFX_STATE_TE,
1364 ANV_GFX_STATE_VERTEX_INPUT,
1365 ANV_GFX_STATE_VF,
1366 ANV_GFX_STATE_VF_TOPOLOGY,
1367 ANV_GFX_STATE_VFG,
1368 ANV_GFX_STATE_VIEWPORT_CC,
1369 ANV_GFX_STATE_VIEWPORT_SF_CLIP,
1370 ANV_GFX_STATE_WM,
1371 ANV_GFX_STATE_WM_DEPTH_STENCIL,
1372 ANV_GFX_STATE_PS_EXTRA,
1373 ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
1374 ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
1375 ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
1376
1377 ANV_GFX_STATE_MAX,
1378 };
1379
1380 const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
1381
1382 /* This structure tracks the values to program in HW instructions for
1383 * corresponding to dynamic states of the Vulkan API. Only fields that need to
1384 * be reemitted outside of the VkPipeline object are tracked here.
1385 */
1386 struct anv_gfx_dynamic_state {
1387 /* 3DSTATE_BLEND_STATE_POINTERS */
1388 struct {
1389 bool AlphaToCoverageEnable;
1390 bool AlphaToOneEnable;
1391 bool IndependentAlphaBlendEnable;
1392 struct {
1393 bool WriteDisableAlpha;
1394 bool WriteDisableRed;
1395 bool WriteDisableGreen;
1396 bool WriteDisableBlue;
1397
1398 uint32_t LogicOpFunction;
1399 bool LogicOpEnable;
1400
1401 bool ColorBufferBlendEnable;
1402 uint32_t ColorClampRange;
1403 bool PreBlendColorClampEnable;
1404 bool PostBlendColorClampEnable;
1405 uint32_t SourceBlendFactor;
1406 uint32_t DestinationBlendFactor;
1407 uint32_t ColorBlendFunction;
1408 uint32_t SourceAlphaBlendFactor;
1409 uint32_t DestinationAlphaBlendFactor;
1410 uint32_t AlphaBlendFunction;
1411 } rts[MAX_RTS];
1412 } blend;
1413
1414 /* 3DSTATE_CC_STATE_POINTERS */
1415 struct {
1416 float BlendConstantColorRed;
1417 float BlendConstantColorGreen;
1418 float BlendConstantColorBlue;
1419 float BlendConstantColorAlpha;
1420 } cc;
1421
1422 /* 3DSTATE_CLIP */
1423 struct {
1424 uint32_t APIMode;
1425 uint32_t ViewportXYClipTestEnable;
1426 uint32_t MaximumVPIndex;
1427 uint32_t TriangleStripListProvokingVertexSelect;
1428 uint32_t LineStripListProvokingVertexSelect;
1429 uint32_t TriangleFanProvokingVertexSelect;
1430 } clip;
1431
1432 /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
1433 struct {
1434 /* Gfx11 */
1435 uint32_t CoarsePixelShadingMode;
1436 float MinCPSizeX;
1437 float MinCPSizeY;
1438 /* Gfx12+ */
1439 uint32_t CoarsePixelShadingStateArrayPointer;
1440 } cps;
1441
1442 /* 3DSTATE_DEPTH_BOUNDS */
1443 struct {
1444 bool DepthBoundsTestEnable;
1445 float DepthBoundsTestMinValue;
1446 float DepthBoundsTestMaxValue;
1447 } db;
1448
1449 /* 3DSTATE_GS */
1450 struct {
1451 uint32_t ReorderMode;
1452 } gs;
1453
1454 /* 3DSTATE_LINE_STIPPLE */
1455 struct {
1456 uint32_t LineStipplePattern;
1457 float LineStippleInverseRepeatCount;
1458 uint32_t LineStippleRepeatCount;
1459 } ls;
1460
1461 /* 3DSTATE_PS_EXTRA */
1462 struct {
1463 bool PixelShaderKillsPixel;
1464 } ps_extra;
1465
1466 /* 3DSTATE_PS_BLEND */
1467 struct {
1468 bool HasWriteableRT;
1469 bool ColorBufferBlendEnable;
1470 uint32_t SourceAlphaBlendFactor;
1471 uint32_t DestinationAlphaBlendFactor;
1472 uint32_t SourceBlendFactor;
1473 uint32_t DestinationBlendFactor;
1474 bool AlphaTestEnable;
1475 bool IndependentAlphaBlendEnable;
1476 bool AlphaToCoverageEnable;
1477 } ps_blend;
1478
1479 /* 3DSTATE_RASTER */
1480 struct {
1481 uint32_t APIMode;
1482 bool DXMultisampleRasterizationEnable;
1483 bool AntialiasingEnable;
1484 uint32_t CullMode;
1485 uint32_t FrontWinding;
1486 bool GlobalDepthOffsetEnableSolid;
1487 bool GlobalDepthOffsetEnableWireframe;
1488 bool GlobalDepthOffsetEnablePoint;
1489 float GlobalDepthOffsetConstant;
1490 float GlobalDepthOffsetScale;
1491 float GlobalDepthOffsetClamp;
1492 uint32_t FrontFaceFillMode;
1493 uint32_t BackFaceFillMode;
1494 bool ViewportZFarClipTestEnable;
1495 bool ViewportZNearClipTestEnable;
1496 bool ConservativeRasterizationEnable;
1497 } raster;
1498
1499 /* 3DSTATE_SCISSOR_STATE_POINTERS */
1500 struct {
1501 uint32_t count;
1502 struct {
1503 uint32_t ScissorRectangleYMin;
1504 uint32_t ScissorRectangleXMin;
1505 uint32_t ScissorRectangleYMax;
1506 uint32_t ScissorRectangleXMax;
1507 } elem[MAX_SCISSORS];
1508 } scissor;
1509
1510 /* 3DSTATE_SF */
1511 struct {
1512 float LineWidth;
1513 uint32_t TriangleStripListProvokingVertexSelect;
1514 uint32_t LineStripListProvokingVertexSelect;
1515 uint32_t TriangleFanProvokingVertexSelect;
1516 bool LegacyGlobalDepthBiasEnable;
1517 } sf;
1518
1519 /* 3DSTATE_STREAMOUT */
1520 struct {
1521 bool RenderingDisable;
1522 uint32_t RenderStreamSelect;
1523 uint32_t ReorderMode;
1524 uint32_t ForceRendering;
1525 } so;
1526
1527 /* 3DSTATE_SAMPLE_MASK */
1528 struct {
1529 uint32_t SampleMask;
1530 } sm;
1531
1532 /* 3DSTATE_TE */
1533 struct {
1534 uint32_t OutputTopology;
1535 } te;
1536
1537 /* 3DSTATE_VF */
1538 struct {
1539 bool IndexedDrawCutIndexEnable;
1540 uint32_t CutIndex;
1541 } vf;
1542
1543 /* 3DSTATE_VFG */
1544 struct {
1545 uint32_t DistributionMode;
1546 bool ListCutIndexEnable;
1547 } vfg;
1548
1549 /* 3DSTATE_VF_TOPOLOGY */
1550 struct {
1551 uint32_t PrimitiveTopologyType;
1552 } vft;
1553
1554 /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
1555 struct {
1556 uint32_t count;
1557 struct {
1558 float MinimumDepth;
1559 float MaximumDepth;
1560 } elem[MAX_VIEWPORTS];
1561 } vp_cc;
1562
1563 /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
1564 struct {
1565 uint32_t count;
1566 struct {
1567 float ViewportMatrixElementm00;
1568 float ViewportMatrixElementm11;
1569 float ViewportMatrixElementm22;
1570 float ViewportMatrixElementm30;
1571 float ViewportMatrixElementm31;
1572 float ViewportMatrixElementm32;
1573 float XMinClipGuardband;
1574 float XMaxClipGuardband;
1575 float YMinClipGuardband;
1576 float YMaxClipGuardband;
1577 float XMinViewPort;
1578 float XMaxViewPort;
1579 float YMinViewPort;
1580 float YMaxViewPort;
1581 } elem[MAX_VIEWPORTS];
1582 } vp_sf_clip;
1583
1584 /* 3DSTATE_WM */
1585 struct {
1586 uint32_t ForceThreadDispatchEnable;
1587 bool LineStippleEnable;
1588 } wm;
1589
1590 /* 3DSTATE_WM_DEPTH_STENCIL */
1591 struct {
1592 bool DoubleSidedStencilEnable;
1593 uint32_t StencilTestMask;
1594 uint32_t StencilWriteMask;
1595 uint32_t BackfaceStencilTestMask;
1596 uint32_t BackfaceStencilWriteMask;
1597 uint32_t StencilReferenceValue;
1598 uint32_t BackfaceStencilReferenceValue;
1599 bool DepthTestEnable;
1600 bool DepthBufferWriteEnable;
1601 uint32_t DepthTestFunction;
1602 bool StencilTestEnable;
1603 bool StencilBufferWriteEnable;
1604 uint32_t StencilFailOp;
1605 uint32_t StencilPassDepthPassOp;
1606 uint32_t StencilPassDepthFailOp;
1607 uint32_t StencilTestFunction;
1608 uint32_t BackfaceStencilFailOp;
1609 uint32_t BackfaceStencilPassDepthPassOp;
1610 uint32_t BackfaceStencilPassDepthFailOp;
1611 uint32_t BackfaceStencilTestFunction;
1612 } ds;
1613
1614 /* 3DSTATE_TBIMR_TILE_PASS_INFO */
1615 struct {
1616 unsigned TileRectangleHeight;
1617 unsigned TileRectangleWidth;
1618 unsigned VerticalTileCount;
1619 unsigned HorizontalTileCount;
1620 unsigned TBIMRBatchSize;
1621 unsigned TileBoxCheck;
1622 } tbimr;
1623 bool use_tbimr;
1624
1625 bool pma_fix;
1626
1627 BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
1628 };
1629
1630 enum anv_internal_kernel_name {
1631 ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
1632 ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
1633 ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1634 ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
1635
1636 ANV_INTERNAL_KERNEL_COUNT,
1637 };
1638
1639 enum anv_rt_bvh_build_method {
1640 ANV_BVH_BUILD_METHOD_TRIVIAL,
1641 ANV_BVH_BUILD_METHOD_NEW_SAH,
1642 };
1643
1644 struct anv_device_astc_emu {
1645 struct vk_texcompress_astc_state *texcompress;
1646
1647 /* for flush_astc_ldr_void_extent_denorms */
1648 simple_mtx_t mutex;
1649 VkDescriptorSetLayout ds_layout;
1650 VkPipelineLayout pipeline_layout;
1651 VkPipeline pipeline;
1652 };
1653
1654 struct anv_trtt_batch_bo {
1655 struct anv_bo *bo;
1656 uint32_t size;
1657
1658 /* Once device->trtt.timeline_handle signals timeline_val as complete we
1659 * can free this struct and its members.
1660 */
1661 uint64_t timeline_val;
1662
1663 /* Part of device->trtt.in_flight_batches. */
1664 struct list_head link;
1665 };
1666
1667 struct anv_device {
1668 struct vk_device vk;
1669
1670 struct anv_physical_device * physical;
1671 const struct intel_device_info * info;
1672 const struct anv_kmd_backend * kmd_backend;
1673 struct isl_device isl_dev;
1674 union {
1675 uint32_t context_id; /* i915 */
1676 uint32_t vm_id; /* Xe */
1677 };
1678 int fd;
1679
1680 pthread_mutex_t vma_mutex;
1681 struct util_vma_heap vma_lo;
1682 struct util_vma_heap vma_hi;
1683 struct util_vma_heap vma_desc;
1684 struct util_vma_heap vma_samplers;
1685 struct util_vma_heap vma_trtt;
1686
1687 /** List of all anv_device_memory objects */
1688 struct list_head memory_objects;
1689
1690 /** List of anv_image objects with a private binding for implicit CCS */
1691 struct list_head image_private_objects;
1692
1693 /** Memory pool for batch buffers */
1694 struct anv_bo_pool batch_bo_pool;
1695 /** Memory pool for utrace timestamp buffers */
1696 struct anv_bo_pool utrace_bo_pool;
1697 /** Memory pool for BVH build buffers */
1698 struct anv_bo_pool bvh_bo_pool;
1699
1700 struct anv_bo_cache bo_cache;
1701
1702 struct anv_state_pool general_state_pool;
1703 struct anv_state_pool dynamic_state_pool;
1704 struct anv_state_pool instruction_state_pool;
1705 struct anv_state_pool binding_table_pool;
1706 struct anv_state_pool scratch_surface_state_pool;
1707 struct anv_state_pool internal_surface_state_pool;
1708 struct anv_state_pool bindless_surface_state_pool;
1709 struct anv_state_pool indirect_push_descriptor_pool;
1710
1711 struct anv_state_reserved_pool custom_border_colors;
1712
1713 /** BO used for various workarounds
1714 *
1715 * There are a number of workarounds on our hardware which require writing
1716 * data somewhere and it doesn't really matter where. For that, we use
1717 * this BO and just write to the first dword or so.
1718 *
1719 * We also need to be able to handle NULL buffers bound as pushed UBOs.
1720 * For that, we use the high bytes (>= 1024) of the workaround BO.
1721 */
1722 struct anv_bo * workaround_bo;
1723 struct anv_address workaround_address;
1724
1725 /**
1726 * Workarounds for game bugs.
1727 */
1728 struct {
1729 struct set * doom64_images;
1730 } workarounds;
1731
1732 struct anv_bo * trivial_batch_bo;
1733 struct anv_state null_surface_state;
1734
1735 /**
1736 * NULL surface state copy stored in host memory for use as a fast
1737 * memcpy() source.
1738 */
1739 char host_null_surface_state[ANV_SURFACE_STATE_SIZE];
1740
1741 struct vk_pipeline_cache * default_pipeline_cache;
1742 struct vk_pipeline_cache * internal_cache;
1743 struct blorp_context blorp;
1744
1745 struct anv_state border_colors;
1746
1747 struct anv_state slice_hash;
1748
1749 /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1750 *
1751 * We need to emit CPS_STATE structures for each viewport accessible by a
1752 * pipeline. So rather than write many identical CPS_STATE structures
1753 * dynamically, we can enumerate all possible combinaisons and then just
1754 * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1755 * array.
1756 */
1757 struct anv_state cps_states;
1758
1759 uint32_t queue_count;
1760 struct anv_queue * queues;
1761
1762 struct anv_scratch_pool scratch_pool;
1763 struct anv_bo *rt_scratch_bos[16];
1764 struct anv_bo *btd_fifo_bo;
1765 struct anv_address rt_uuid_addr;
1766
1767 /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage
1768 *
1769 * For use when a pipeline has no VS input
1770 */
1771 uint32_t empty_vs_input[2];
1772
1773 bool robust_buffer_access;
1774
1775 uint32_t protected_session_id;
1776
1777 /** Shadow ray query BO
1778 *
1779 * The ray_query_bo only holds the current ray being traced. When using
1780 * more than 1 ray query per thread, we cannot fit all the queries in
1781 * there, so we need a another buffer to hold query data that is not
1782 * currently being used by the HW for tracing, similar to a scratch space.
1783 *
1784 * The size of the shadow buffer depends on the number of queries per
1785 * shader.
1786 */
1787 struct anv_bo *ray_query_shadow_bos[16];
1788 /** Ray query buffer used to communicated with HW unit.
1789 */
1790 struct anv_bo *ray_query_bo;
1791
1792 struct anv_shader_bin *rt_trampoline;
1793 struct anv_shader_bin *rt_trivial_return;
1794
1795 enum anv_rt_bvh_build_method bvh_build_method;
1796
1797 /** Draw generation shader
1798 *
1799 * Generates direct draw calls out of indirect parameters. Used to
1800 * workaround slowness with indirect draw calls.
1801 */
1802 struct anv_shader_bin *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
1803 const struct intel_l3_config *internal_kernels_l3_config;
1804
1805 pthread_mutex_t mutex;
1806 pthread_cond_t queue_submit;
1807
1808 struct intel_batch_decode_ctx decoder[ANV_MAX_QUEUE_FAMILIES];
1809 /*
1810 * When decoding a anv_cmd_buffer, we might need to search for BOs through
1811 * the cmd_buffer's list.
1812 */
1813 struct anv_cmd_buffer *cmd_buffer_being_decoded;
1814
1815 int perf_fd; /* -1 if no opened */
1816 uint64_t perf_metric; /* 0 if unset */
1817
1818 struct intel_aux_map_context *aux_map_ctx;
1819
1820 const struct intel_l3_config *l3_config;
1821
1822 struct intel_debug_block_frame *debug_frame_desc;
1823
1824 struct intel_ds_device ds;
1825
1826 nir_shader *fp64_nir;
1827
1828 uint32_t draw_call_count;
1829 struct anv_state breakpoint;
1830 #if DETECT_OS_ANDROID
1831 struct u_gralloc *u_gralloc;
1832 #endif
1833
1834 /** Precompute all dirty graphics bits
1835 *
1836 * Depending on platforms, some of the dirty bits don't apply (for example
1837 * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
1838 * extensions like Mesh shaders also allow us to avoid emitting any
1839 * mesh/task related instructions (we only initialize them once at device
1840 * initialization).
1841 */
1842 BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
1843
1844 /*
1845 * Command pool for companion RCS command buffer.
1846 */
1847 VkCommandPool companion_rcs_cmd_pool;
1848
1849 struct anv_trtt {
1850 pthread_mutex_t mutex;
1851
1852 /* Sometimes we need to run batches from places where we don't have a
1853 * queue coming from the API, so we use this.
1854 */
1855 struct anv_queue *queue;
1856
1857 /* There's only one L3 table, so if l3_addr is zero that means we
1858 * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
1859 * yet in this context).
1860 */
1861 uint64_t l3_addr;
1862
1863 /* We don't want to access the page tables from the CPU, so just
1864 * maintain a mirror that we can use.
1865 */
1866 uint64_t *l3_mirror;
1867 uint64_t *l2_mirror;
1868
1869 /* We keep a dynamic list of page table bos, and each bo can store
1870 * multiple page tables.
1871 */
1872 struct anv_bo **page_table_bos;
1873 int num_page_table_bos;
1874 int page_table_bos_capacity;
1875
1876 /* These are used to keep track of space available for more page tables
1877 * within a bo.
1878 */
1879 struct anv_bo *cur_page_table_bo;
1880 uint64_t next_page_table_bo_offset;
1881
1882 /* Timeline syncobj used to track completion of the TR-TT batch BOs. */
1883 uint32_t timeline_handle;
1884 uint64_t timeline_val;
1885
1886 /* List of struct anv_trtt_batch_bo batches that are in flight and can
1887 * be freed once their timeline gets signaled.
1888 */
1889 struct list_head in_flight_batches;
1890 } trtt;
1891
1892 /* This is true if the user ever bound a sparse resource to memory. This
1893 * is used for a workaround that makes every memoryBarrier flush more
1894 * things than it should. Many applications request for the sparse
1895 * featuers to be enabled but don't use them, and some create sparse
1896 * resources but never use them.
1897 */
1898 bool using_sparse;
1899
1900 struct anv_device_astc_emu astc_emu;
1901
1902 struct intel_bind_timeline bind_timeline; /* Xe only */
1903 };
1904
1905 static inline uint32_t
anv_get_first_render_queue_index(struct anv_physical_device * pdevice)1906 anv_get_first_render_queue_index(struct anv_physical_device *pdevice)
1907 {
1908 assert(pdevice != NULL);
1909
1910 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
1911 if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
1912 return i;
1913 }
1914 }
1915
1916 unreachable("Graphics capable queue family not found");
1917 }
1918
1919 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1920 anv_binding_table_pool_alloc(struct anv_device *device)
1921 {
1922 return anv_state_pool_alloc(&device->binding_table_pool,
1923 device->binding_table_pool.block_size, 0);
1924 }
1925
1926 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1927 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
1928 {
1929 anv_state_pool_free(&device->binding_table_pool, state);
1930 }
1931
1932 static inline struct anv_state
anv_null_surface_state_for_binding_table(struct anv_device * device)1933 anv_null_surface_state_for_binding_table(struct anv_device *device)
1934 {
1935 struct anv_state state = device->null_surface_state;
1936 if (device->physical->indirect_descriptors) {
1937 state.offset += device->physical->va.bindless_surface_state_pool.addr -
1938 device->physical->va.internal_surface_state_pool.addr;
1939 }
1940 return state;
1941 }
1942
1943 static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_device * device,struct anv_state state)1944 anv_bindless_state_for_binding_table(struct anv_device *device,
1945 struct anv_state state)
1946 {
1947 state.offset += device->physical->va.bindless_surface_state_pool.addr -
1948 device->physical->va.internal_surface_state_pool.addr;
1949 return state;
1950 }
1951
1952 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1953 anv_mocs(const struct anv_device *device,
1954 const struct anv_bo *bo,
1955 isl_surf_usage_flags_t usage)
1956 {
1957 return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo));
1958 }
1959
1960 static inline uint32_t
anv_mocs_for_address(const struct anv_device * device,struct anv_address * addr)1961 anv_mocs_for_address(const struct anv_device *device,
1962 struct anv_address *addr)
1963 {
1964 return anv_mocs(device, addr->bo, 0);
1965 }
1966
1967 void anv_device_init_blorp(struct anv_device *device);
1968 void anv_device_finish_blorp(struct anv_device *device);
1969
1970 VkResult anv_device_alloc_bo(struct anv_device *device,
1971 const char *name, uint64_t size,
1972 enum anv_bo_alloc_flags alloc_flags,
1973 uint64_t explicit_address,
1974 struct anv_bo **bo);
1975 VkResult anv_device_map_bo(struct anv_device *device,
1976 struct anv_bo *bo,
1977 uint64_t offset,
1978 size_t size,
1979 void **map_out);
1980 void anv_device_unmap_bo(struct anv_device *device,
1981 struct anv_bo *bo,
1982 void *map, size_t map_size);
1983 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1984 void *host_ptr, uint32_t size,
1985 enum anv_bo_alloc_flags alloc_flags,
1986 uint64_t client_address,
1987 struct anv_bo **bo_out);
1988 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1989 enum anv_bo_alloc_flags alloc_flags,
1990 uint64_t client_address,
1991 struct anv_bo **bo);
1992 VkResult anv_device_export_bo(struct anv_device *device,
1993 struct anv_bo *bo, int *fd_out);
1994 VkResult anv_device_get_bo_tiling(struct anv_device *device,
1995 struct anv_bo *bo,
1996 enum isl_tiling *tiling_out);
1997 VkResult anv_device_set_bo_tiling(struct anv_device *device,
1998 struct anv_bo *bo,
1999 uint32_t row_pitch_B,
2000 enum isl_tiling tiling);
2001 void anv_device_release_bo(struct anv_device *device,
2002 struct anv_bo *bo);
2003
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)2004 static inline void anv_device_set_physical(struct anv_device *device,
2005 struct anv_physical_device *physical_device)
2006 {
2007 device->physical = physical_device;
2008 device->info = &physical_device->info;
2009 device->isl_dev = physical_device->isl_dev;
2010 }
2011
2012 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)2013 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
2014 {
2015 return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
2016 }
2017
2018 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2019 int64_t timeout);
2020
2021 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
2022 const VkDeviceQueueCreateInfo *pCreateInfo,
2023 uint32_t index_in_family);
2024 void anv_queue_finish(struct anv_queue *queue);
2025
2026 VkResult anv_queue_submit(struct vk_queue *queue,
2027 struct vk_queue_submit *submit);
2028 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
2029 struct anv_batch *batch,
2030 bool is_companion_rcs_batch);
2031 VkResult anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
2032 struct anv_batch *batch);
2033
2034 static inline void
anv_trtt_batch_bo_free(struct anv_device * device,struct anv_trtt_batch_bo * trtt_bbo)2035 anv_trtt_batch_bo_free(struct anv_device *device,
2036 struct anv_trtt_batch_bo *trtt_bbo)
2037 {
2038 anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo->bo);
2039 list_del(&trtt_bbo->link);
2040 vk_free(&device->vk.alloc, trtt_bbo);
2041 }
2042
2043 void anv_queue_trace(struct anv_queue *queue, const char *label,
2044 bool frame, bool begin);
2045
2046 static inline VkResult
anv_queue_post_submit(struct anv_queue * queue,VkResult submit_result)2047 anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
2048 {
2049 if (submit_result != VK_SUCCESS)
2050 return submit_result;
2051
2052 VkResult result = VK_SUCCESS;
2053 if (queue->sync) {
2054 result = vk_sync_wait(&queue->device->vk, queue->sync, 0,
2055 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
2056 if (result != VK_SUCCESS)
2057 result = vk_queue_set_lost(&queue->vk, "sync wait failed");
2058 }
2059
2060 return result;
2061 }
2062
2063 void *
2064 anv_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
2065 uint64_t size);
2066 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
2067 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
2068 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
2069 uint32_t stride, uint32_t tiling);
2070 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
2071 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
2072 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
2073 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
2074 uint64_t value);
2075 VkResult
2076 anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device,
2077 struct anv_bo *bo,
2078 enum anv_bo_alloc_flags alloc_flags,
2079 uint32_t *bo_flags);
2080 const struct intel_device_info_pat_entry *
2081 anv_device_get_pat_entry(struct anv_device *device,
2082 enum anv_bo_alloc_flags alloc_flags);
2083
2084 uint64_t anv_vma_alloc(struct anv_device *device,
2085 uint64_t size, uint64_t align,
2086 enum anv_bo_alloc_flags alloc_flags,
2087 uint64_t client_address,
2088 struct util_vma_heap **out_vma_heap);
2089 void anv_vma_free(struct anv_device *device,
2090 struct util_vma_heap *vma_heap,
2091 uint64_t address, uint64_t size);
2092
2093 struct anv_reloc_list {
2094 bool uses_relocs;
2095 uint32_t dep_words;
2096 BITSET_WORD * deps;
2097 const VkAllocationCallbacks *alloc;
2098 };
2099
2100 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
2101 const VkAllocationCallbacks *alloc,
2102 bool uses_relocs);
2103 void anv_reloc_list_finish(struct anv_reloc_list *list);
2104
2105 VkResult
2106 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
2107
2108 static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list * list,struct anv_bo * target_bo)2109 anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
2110 {
2111 return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
2112 }
2113
2114 VkResult anv_reloc_list_append(struct anv_reloc_list *list,
2115 struct anv_reloc_list *other);
2116
2117 struct anv_batch_bo {
2118 /* Link in the anv_cmd_buffer.owned_batch_bos list */
2119 struct list_head link;
2120
2121 struct anv_bo * bo;
2122
2123 /* Bytes actually consumed in this batch BO */
2124 uint32_t length;
2125
2126 /* When this batch BO is used as part of a primary batch buffer, this
2127 * tracked whether it is chained to another primary batch buffer.
2128 *
2129 * If this is the case, the relocation list's last entry points the
2130 * location of the MI_BATCH_BUFFER_START chaining to the next batch.
2131 */
2132 bool chained;
2133
2134 struct anv_reloc_list relocs;
2135 };
2136
2137 struct anv_batch {
2138 const VkAllocationCallbacks * alloc;
2139
2140 /**
2141 * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2142 * Used to increase allocation size for long command buffers.
2143 */
2144 size_t allocated_batch_size;
2145
2146 struct anv_address start_addr;
2147
2148 void * start;
2149 void * end;
2150 void * next;
2151
2152 struct anv_reloc_list * relocs;
2153
2154 /* This callback is called (with the associated user data) in the event
2155 * that the batch runs out of space.
2156 */
2157 VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *);
2158 void * user_data;
2159
2160 /**
2161 * Current error status of the command buffer. Used to track inconsistent
2162 * or incomplete command buffer states that are the consequence of run-time
2163 * errors such as out of memory scenarios. We want to track this in the
2164 * batch because the command buffer object is not visible to some parts
2165 * of the driver.
2166 */
2167 VkResult status;
2168
2169 enum intel_engine_class engine_class;
2170
2171 /**
2172 * Number of 3DPRIMITIVE's emitted for WA 16014538804
2173 */
2174 uint8_t num_3d_primitives_emitted;
2175 };
2176
2177 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
2178 VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
2179 void anv_batch_advance(struct anv_batch *batch, uint32_t size);
2180 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
2181 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
2182
2183 static inline struct anv_address
anv_batch_current_address(struct anv_batch * batch)2184 anv_batch_current_address(struct anv_batch *batch)
2185 {
2186 return anv_batch_address(batch, batch->next);
2187 }
2188
2189 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)2190 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
2191 void *map, size_t size)
2192 {
2193 batch->start_addr = addr;
2194 batch->next = batch->start = map;
2195 batch->end = map + size;
2196 }
2197
2198 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)2199 anv_batch_set_error(struct anv_batch *batch, VkResult error)
2200 {
2201 assert(error != VK_SUCCESS);
2202 if (batch->status == VK_SUCCESS)
2203 batch->status = error;
2204 return batch->status;
2205 }
2206
2207 static inline bool
anv_batch_has_error(struct anv_batch * batch)2208 anv_batch_has_error(struct anv_batch *batch)
2209 {
2210 return batch->status != VK_SUCCESS;
2211 }
2212
2213 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)2214 _anv_combine_address(struct anv_batch *batch, void *location,
2215 const struct anv_address address, uint32_t delta)
2216 {
2217 if (address.bo == NULL)
2218 return address.offset + delta;
2219
2220 if (batch)
2221 anv_reloc_list_add_bo(batch->relocs, address.bo);
2222
2223 return anv_address_physical(anv_address_add(address, delta));
2224 }
2225
2226 #define __gen_address_type struct anv_address
2227 #define __gen_user_data struct anv_batch
2228 #define __gen_combine_address _anv_combine_address
2229
2230 /* Wrapper macros needed to work around preprocessor argument issues. In
2231 * particular, arguments don't get pre-evaluated if they are concatenated.
2232 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
2233 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
2234 * We can work around this easily enough with these helpers.
2235 */
2236 #define __anv_cmd_length(cmd) cmd ## _length
2237 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
2238 #define __anv_cmd_header(cmd) cmd ## _header
2239 #define __anv_cmd_pack(cmd) cmd ## _pack
2240 #define __anv_reg_num(reg) reg ## _num
2241
2242 #define anv_pack_struct(dst, struc, ...) do { \
2243 struct struc __template = { \
2244 __VA_ARGS__ \
2245 }; \
2246 __anv_cmd_pack(struc)(NULL, dst, &__template); \
2247 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
2248 } while (0)
2249
2250 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
2251 void *__dst = anv_batch_emit_dwords(batch, n); \
2252 if (__dst) { \
2253 struct cmd __template = { \
2254 __anv_cmd_header(cmd), \
2255 .DWordLength = n - __anv_cmd_length_bias(cmd), \
2256 __VA_ARGS__ \
2257 }; \
2258 __anv_cmd_pack(cmd)(batch, __dst, &__template); \
2259 } \
2260 __dst; \
2261 })
2262
2263 #define anv_batch_emit_merge(batch, cmd, pipeline, state, name) \
2264 for (struct cmd name = { 0 }, \
2265 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
2266 __builtin_expect(_dst != NULL, 1); \
2267 ({ uint32_t _partial[__anv_cmd_length(cmd)]; \
2268 assert((pipeline)->state.len == __anv_cmd_length(cmd)); \
2269 __anv_cmd_pack(cmd)(batch, _partial, &name); \
2270 for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
2271 ((uint32_t *)_dst)[i] = _partial[i] | \
2272 (pipeline)->batch_data[(pipeline)->state.offset + i]; \
2273 } \
2274 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2275 _dst = NULL; \
2276 }))
2277
2278 #define anv_batch_emit(batch, cmd, name) \
2279 for (struct cmd name = { __anv_cmd_header(cmd) }, \
2280 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
2281 __builtin_expect(_dst != NULL, 1); \
2282 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
2283 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2284 _dst = NULL; \
2285 }))
2286
2287 #define anv_batch_write_reg(batch, reg, name) \
2288 for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
2289 ({ \
2290 uint32_t _dw[__anv_cmd_length(reg)]; \
2291 __anv_cmd_pack(reg)(NULL, _dw, &name); \
2292 for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \
2293 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
2294 lri.RegisterOffset = __anv_reg_num(reg); \
2295 lri.DataDWord = _dw[i]; \
2296 } \
2297 } \
2298 _cont = NULL; \
2299 }))
2300
2301 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
2302 /* #define __gen_get_batch_address anv_batch_address */
2303 /* #define __gen_address_value anv_address_physical */
2304 /* #define __gen_address_offset anv_address_add */
2305
2306 struct anv_device_memory {
2307 struct vk_device_memory vk;
2308
2309 struct list_head link;
2310
2311 struct anv_bo * bo;
2312 const struct anv_memory_type * type;
2313
2314 void * map;
2315 size_t map_size;
2316
2317 /* The map, from the user PoV is map + map_delta */
2318 uint64_t map_delta;
2319 };
2320
2321 /**
2322 * Header for Vertex URB Entry (VUE)
2323 */
2324 struct anv_vue_header {
2325 uint32_t Reserved;
2326 uint32_t RTAIndex; /* RenderTargetArrayIndex */
2327 uint32_t ViewportIndex;
2328 float PointWidth;
2329 };
2330
2331 /** Struct representing a sampled image descriptor
2332 *
2333 * This descriptor layout is used for sampled images, bare sampler, and
2334 * combined image/sampler descriptors.
2335 */
2336 struct anv_sampled_image_descriptor {
2337 /** Bindless image handle
2338 *
2339 * This is expected to already be shifted such that the 20-bit
2340 * SURFACE_STATE table index is in the top 20 bits.
2341 */
2342 uint32_t image;
2343
2344 /** Bindless sampler handle
2345 *
2346 * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
2347 * to the dynamic state base address.
2348 */
2349 uint32_t sampler;
2350 };
2351
2352 /** Struct representing a storage image descriptor */
2353 struct anv_storage_image_descriptor {
2354 /** Bindless image handles
2355 *
2356 * These are expected to already be shifted such that the 20-bit
2357 * SURFACE_STATE table index is in the top 20 bits.
2358 */
2359 uint32_t vanilla;
2360
2361 /** Image depth
2362 *
2363 * By default the HW RESINFO message allows us to query the depth of an image :
2364 *
2365 * From the Kaby Lake docs for the RESINFO message:
2366 *
2367 * "Surface Type | ... | Blue
2368 * --------------+-----+----------------
2369 * SURFTYPE_3D | ... | (Depth+1)»LOD"
2370 *
2371 * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image,
2372 * meaning at a depth offset with a new depth value potentially reduced
2373 * from the original image. Unfortunately if we change the Depth value of
2374 * the image, we then run into issues with Yf/Ys tilings where the HW fetch
2375 * data at incorrect locations.
2376 *
2377 * To solve this, we put the slice depth in the descriptor and recompose
2378 * the vec3 (width, height, depth) using this field for z and xy using the
2379 * RESINFO result.
2380 */
2381 uint32_t image_depth;
2382 };
2383
2384 /** Struct representing a address/range descriptor
2385 *
2386 * The fields of this struct correspond directly to the data layout of
2387 * nir_address_format_64bit_bounded_global addresses. The last field is the
2388 * offset in the NIR address so it must be zero so that when you load the
2389 * descriptor you get a pointer to the start of the range.
2390 */
2391 struct anv_address_range_descriptor {
2392 uint64_t address;
2393 uint32_t range;
2394 uint32_t zero;
2395 };
2396
2397 enum anv_descriptor_data {
2398 /** The descriptor contains a BTI reference to a surface state */
2399 ANV_DESCRIPTOR_BTI_SURFACE_STATE = BITFIELD_BIT(0),
2400 /** The descriptor contains a BTI reference to a sampler state */
2401 ANV_DESCRIPTOR_BTI_SAMPLER_STATE = BITFIELD_BIT(1),
2402 /** The descriptor contains an actual buffer view */
2403 ANV_DESCRIPTOR_BUFFER_VIEW = BITFIELD_BIT(2),
2404 /** The descriptor contains inline uniform data */
2405 ANV_DESCRIPTOR_INLINE_UNIFORM = BITFIELD_BIT(3),
2406 /** anv_address_range_descriptor with a buffer address and range */
2407 ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE = BITFIELD_BIT(4),
2408 /** Bindless surface handle (through anv_sampled_image_descriptor) */
2409 ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE = BITFIELD_BIT(5),
2410 /** Storage image handles (through anv_storage_image_descriptor) */
2411 ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE = BITFIELD_BIT(6),
2412 /** The descriptor contains a single RENDER_SURFACE_STATE */
2413 ANV_DESCRIPTOR_SURFACE = BITFIELD_BIT(7),
2414 /** The descriptor contains a SAMPLER_STATE */
2415 ANV_DESCRIPTOR_SAMPLER = BITFIELD_BIT(8),
2416 /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */
2417 ANV_DESCRIPTOR_SURFACE_SAMPLER = BITFIELD_BIT(9),
2418 };
2419
2420 struct anv_descriptor_set_binding_layout {
2421 /* The type of the descriptors in this binding */
2422 VkDescriptorType type;
2423
2424 /* Flags provided when this binding was created */
2425 VkDescriptorBindingFlags flags;
2426
2427 /* Bitfield representing the type of data this descriptor contains */
2428 enum anv_descriptor_data data;
2429
2430 /* Maximum number of YCbCr texture/sampler planes */
2431 uint8_t max_plane_count;
2432
2433 /* Number of array elements in this binding (or size in bytes for inline
2434 * uniform data)
2435 */
2436 uint32_t array_size;
2437
2438 /* Index into the flattened descriptor set */
2439 uint32_t descriptor_index;
2440
2441 /* Index into the dynamic state array for a dynamic buffer, relative to the
2442 * set.
2443 */
2444 int16_t dynamic_offset_index;
2445
2446 /* Computed surface size from data (for one plane) */
2447 uint16_t descriptor_data_surface_size;
2448
2449 /* Computed sampler size from data (for one plane) */
2450 uint16_t descriptor_data_sampler_size;
2451
2452 /* Index into the descriptor set buffer views */
2453 int32_t buffer_view_index;
2454
2455 /* Offset into the descriptor buffer where the surface descriptor lives */
2456 uint32_t descriptor_surface_offset;
2457
2458 /* Offset into the descriptor buffer where the sampler descriptor lives */
2459 uint16_t descriptor_sampler_offset;
2460
2461 /* Pre computed surface stride (with multiplane descriptor, the descriptor
2462 * includes all the planes)
2463 */
2464 uint16_t descriptor_surface_stride;
2465
2466 /* Pre computed sampler stride (with multiplane descriptor, the descriptor
2467 * includes all the planes)
2468 */
2469 uint16_t descriptor_sampler_stride;
2470
2471 /* Immutable samplers (or NULL if no immutable samplers) */
2472 struct anv_sampler **immutable_samplers;
2473 };
2474
2475 enum anv_descriptor_set_layout_type {
2476 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN,
2477 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT,
2478 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT,
2479 ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER,
2480 };
2481
2482 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
2483 const struct anv_descriptor_set_binding_layout *binding,
2484 bool sampler);
2485
2486 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
2487 const struct anv_descriptor_set_binding_layout *binding,
2488 bool sampler);
2489
2490 struct anv_descriptor_set_layout {
2491 struct vk_object_base base;
2492
2493 VkDescriptorSetLayoutCreateFlags flags;
2494
2495 /* Type of descriptor set layout */
2496 enum anv_descriptor_set_layout_type type;
2497
2498 /* Descriptor set layouts can be destroyed at almost any time */
2499 uint32_t ref_cnt;
2500
2501 /* Number of bindings in this descriptor set */
2502 uint32_t binding_count;
2503
2504 /* Total number of descriptors */
2505 uint32_t descriptor_count;
2506
2507 /* Shader stages affected by this descriptor set */
2508 uint16_t shader_stages;
2509
2510 /* Number of buffer views in this descriptor set */
2511 uint32_t buffer_view_count;
2512
2513 /* Number of dynamic offsets used by this descriptor set */
2514 uint16_t dynamic_offset_count;
2515
2516 /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2517 * this buffer
2518 */
2519 VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2520
2521 /* Size of the descriptor buffer dedicated to surface states for this
2522 * descriptor set
2523 */
2524 uint32_t descriptor_buffer_surface_size;
2525
2526 /* Size of the descriptor buffer dedicated to sampler states for this
2527 * descriptor set
2528 */
2529 uint32_t descriptor_buffer_sampler_size;
2530
2531 /* Bindings in this descriptor set */
2532 struct anv_descriptor_set_binding_layout binding[0];
2533 };
2534
2535 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2536 struct anv_descriptor_set_layout *layout);
2537
2538 void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout);
2539
2540 static inline struct anv_descriptor_set_layout *
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2541 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2542 {
2543 assert(layout && layout->ref_cnt >= 1);
2544 p_atomic_inc(&layout->ref_cnt);
2545
2546 return layout;
2547 }
2548
2549 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2550 anv_descriptor_set_layout_unref(struct anv_device *device,
2551 struct anv_descriptor_set_layout *layout)
2552 {
2553 assert(layout && layout->ref_cnt >= 1);
2554 if (p_atomic_dec_zero(&layout->ref_cnt))
2555 anv_descriptor_set_layout_destroy(device, layout);
2556 }
2557
2558 struct anv_descriptor {
2559 VkDescriptorType type;
2560
2561 union {
2562 struct {
2563 VkImageLayout layout;
2564 struct anv_image_view *image_view;
2565 struct anv_sampler *sampler;
2566 };
2567
2568 struct {
2569 struct anv_buffer_view *set_buffer_view;
2570 struct anv_buffer *buffer;
2571 uint64_t offset;
2572 uint64_t range;
2573 uint64_t bind_range;
2574 };
2575
2576 struct anv_buffer_view *buffer_view;
2577
2578 struct vk_acceleration_structure *accel_struct;
2579 };
2580 };
2581
2582 struct anv_descriptor_set {
2583 struct vk_object_base base;
2584
2585 struct anv_descriptor_pool *pool;
2586 struct anv_descriptor_set_layout *layout;
2587
2588 /* Amount of space occupied in the the pool by this descriptor set. It can
2589 * be larger than the size of the descriptor set.
2590 */
2591 uint32_t size;
2592
2593 /* Is this descriptor set a push descriptor */
2594 bool is_push;
2595
2596 /* Bitfield of descriptors for which we need to generate surface states.
2597 * Only valid for push descriptors
2598 */
2599 uint32_t generate_surface_states;
2600
2601 /* State relative to anv_descriptor_pool::surface_bo */
2602 struct anv_state desc_surface_mem;
2603 /* State relative to anv_descriptor_pool::sampler_bo */
2604 struct anv_state desc_sampler_mem;
2605 /* Surface state for the descriptor buffer */
2606 struct anv_state desc_surface_state;
2607
2608 /* Descriptor set address pointing to desc_surface_mem (we don't need one
2609 * for sampler because they're never accessed other than by the HW through
2610 * the shader sampler handle).
2611 */
2612 struct anv_address desc_surface_addr;
2613
2614 struct anv_address desc_sampler_addr;
2615
2616 /* Descriptor offset from the
2617 * device->va.internal_surface_state_pool.addr
2618 *
2619 * It just needs to be added to the binding table offset to be put into the
2620 * HW BTI entry.
2621 */
2622 uint32_t desc_offset;
2623
2624 uint32_t buffer_view_count;
2625 struct anv_buffer_view *buffer_views;
2626
2627 /* Link to descriptor pool's desc_sets list . */
2628 struct list_head pool_link;
2629
2630 uint32_t descriptor_count;
2631 struct anv_descriptor descriptors[0];
2632 };
2633
2634 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2635 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2636 {
2637 return set->pool == NULL;
2638 }
2639
2640 struct anv_surface_state_data {
2641 uint8_t data[ANV_SURFACE_STATE_SIZE];
2642 };
2643
2644 struct anv_buffer_state {
2645 /** Surface state allocated from the bindless heap
2646 *
2647 * Only valid if anv_physical_device::indirect_descriptors is true
2648 */
2649 struct anv_state state;
2650
2651 /** Surface state after genxml packing
2652 *
2653 * Only valid if anv_physical_device::indirect_descriptors is false
2654 */
2655 struct anv_surface_state_data state_data;
2656 };
2657
2658 struct anv_buffer_view {
2659 struct vk_buffer_view vk;
2660
2661 struct anv_address address;
2662
2663 struct anv_buffer_state general;
2664 struct anv_buffer_state storage;
2665 };
2666
2667 struct anv_push_descriptor_set {
2668 struct anv_descriptor_set set;
2669
2670 /* Put this field right behind anv_descriptor_set so it fills up the
2671 * descriptors[0] field. */
2672 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2673
2674 /** True if the descriptor set buffer has been referenced by a draw or
2675 * dispatch command.
2676 */
2677 bool set_used_on_gpu;
2678
2679 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2680 };
2681
2682 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2683 anv_descriptor_set_address(struct anv_descriptor_set *set)
2684 {
2685 if (anv_descriptor_set_is_push(set)) {
2686 /* We have to flag push descriptor set as used on the GPU
2687 * so that the next time we push descriptors, we grab a new memory.
2688 */
2689 struct anv_push_descriptor_set *push_set =
2690 (struct anv_push_descriptor_set *)set;
2691 push_set->set_used_on_gpu = true;
2692 }
2693
2694 return set->desc_surface_addr;
2695 }
2696
2697 struct anv_descriptor_pool_heap {
2698 /* BO allocated to back the pool (unused for host pools) */
2699 struct anv_bo *bo;
2700
2701 /* Host memory allocated to back a host pool */
2702 void *host_mem;
2703
2704 /* Heap tracking allocations in bo/host_mem */
2705 struct util_vma_heap heap;
2706
2707 /* Size of the heap */
2708 uint32_t size;
2709 };
2710
2711 struct anv_descriptor_pool {
2712 struct vk_object_base base;
2713
2714 struct anv_descriptor_pool_heap surfaces;
2715 struct anv_descriptor_pool_heap samplers;
2716
2717 struct anv_state_stream surface_state_stream;
2718 void *surface_state_free_list;
2719
2720 /** List of anv_descriptor_set. */
2721 struct list_head desc_sets;
2722
2723 /** Heap over host_mem */
2724 struct util_vma_heap host_heap;
2725
2726 /** Allocated size of host_mem */
2727 uint32_t host_mem_size;
2728
2729 /**
2730 * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
2731 * surface_state_stream is unused.
2732 */
2733 bool host_only;
2734
2735 char host_mem[0];
2736 };
2737
2738 bool
2739 anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
2740 struct anv_push_descriptor_set *push_set,
2741 struct anv_descriptor_set_layout *layout);
2742
2743 void
2744 anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set);
2745
2746 void
2747 anv_descriptor_set_write_image_view(struct anv_device *device,
2748 struct anv_descriptor_set *set,
2749 const VkDescriptorImageInfo * const info,
2750 VkDescriptorType type,
2751 uint32_t binding,
2752 uint32_t element);
2753
2754 void
2755 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2756 struct anv_descriptor_set *set,
2757 VkDescriptorType type,
2758 struct anv_buffer_view *buffer_view,
2759 uint32_t binding,
2760 uint32_t element);
2761
2762 void
2763 anv_descriptor_set_write_buffer(struct anv_device *device,
2764 struct anv_descriptor_set *set,
2765 VkDescriptorType type,
2766 struct anv_buffer *buffer,
2767 uint32_t binding,
2768 uint32_t element,
2769 VkDeviceSize offset,
2770 VkDeviceSize range);
2771
2772 void
2773 anv_descriptor_write_surface_state(struct anv_device *device,
2774 struct anv_descriptor *desc,
2775 struct anv_state surface_state);
2776
2777 void
2778 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2779 struct anv_descriptor_set *set,
2780 struct vk_acceleration_structure *accel,
2781 uint32_t binding,
2782 uint32_t element);
2783
2784 void
2785 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2786 struct anv_descriptor_set *set,
2787 uint32_t binding,
2788 const void *data,
2789 size_t offset,
2790 size_t size);
2791
2792 void
2793 anv_descriptor_set_write(struct anv_device *device,
2794 struct anv_descriptor_set *set_override,
2795 uint32_t write_count,
2796 const VkWriteDescriptorSet *writes);
2797
2798 void
2799 anv_descriptor_set_write_template(struct anv_device *device,
2800 struct anv_descriptor_set *set,
2801 const struct vk_descriptor_update_template *template,
2802 const void *data);
2803
2804 #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 4)
2805 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 3)
2806 #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 2)
2807 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 1)
2808 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2809
2810 struct anv_pipeline_binding {
2811 /** Index in the descriptor set
2812 *
2813 * This is a flattened index; the descriptor set layout is already taken
2814 * into account.
2815 */
2816 uint32_t index;
2817
2818 /** Binding in the descriptor set. Not valid for any of the
2819 * ANV_DESCRIPTOR_SET_*
2820 */
2821 uint32_t binding;
2822
2823 /** Offset in the descriptor buffer
2824 *
2825 * Relative to anv_descriptor_set::desc_addr. This is useful for
2826 * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
2827 * table entry.
2828 */
2829 uint32_t set_offset;
2830
2831 /** The descriptor set this surface corresponds to.
2832 *
2833 * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2834 * binding is not a normal descriptor set but something else.
2835 */
2836 uint8_t set;
2837
2838 union {
2839 /** Plane in the binding index for images */
2840 uint8_t plane;
2841
2842 /** Input attachment index (relative to the subpass) */
2843 uint8_t input_attachment_index;
2844
2845 /** Dynamic offset index
2846 *
2847 * For dynamic UBOs and SSBOs, relative to set.
2848 */
2849 uint8_t dynamic_offset_index;
2850 };
2851 };
2852
2853 struct anv_push_range {
2854 /** Index in the descriptor set */
2855 uint32_t index;
2856
2857 /** Descriptor set index */
2858 uint8_t set;
2859
2860 /** Dynamic offset index (for dynamic UBOs), relative to set. */
2861 uint8_t dynamic_offset_index;
2862
2863 /** Start offset in units of 32B */
2864 uint8_t start;
2865
2866 /** Range in units of 32B */
2867 uint8_t length;
2868 };
2869
2870 struct anv_pipeline_sets_layout {
2871 struct anv_device *device;
2872
2873 struct {
2874 struct anv_descriptor_set_layout *layout;
2875 uint32_t dynamic_offset_start;
2876 } set[MAX_SETS];
2877
2878 enum anv_descriptor_set_layout_type type;
2879
2880 uint32_t num_sets;
2881 uint32_t num_dynamic_buffers;
2882 int push_descriptor_set_index;
2883
2884 bool independent_sets;
2885
2886 unsigned char sha1[20];
2887 };
2888
2889 void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout,
2890 struct anv_device *device,
2891 bool independent_sets);
2892
2893 void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout);
2894
2895 void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout,
2896 uint32_t set_idx,
2897 struct anv_descriptor_set_layout *set_layout);
2898
2899 void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout);
2900
2901 void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout);
2902
2903 struct anv_pipeline_layout {
2904 struct vk_object_base base;
2905
2906 struct anv_pipeline_sets_layout sets_layout;
2907 };
2908
2909 const struct anv_descriptor_set_layout *
2910 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
2911 uint8_t *desc_idx);
2912
2913 struct anv_sparse_binding_data {
2914 uint64_t address;
2915 uint64_t size;
2916
2917 /* This is kept only because it's given to us by vma_alloc() and need to be
2918 * passed back to vma_free(), we have no other particular use for it
2919 */
2920 struct util_vma_heap *vma_heap;
2921 };
2922
2923 #define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
2924
2925 static inline bool
anv_sparse_binding_is_enabled(struct anv_device * device)2926 anv_sparse_binding_is_enabled(struct anv_device *device)
2927 {
2928 return device->vk.enabled_features.sparseBinding;
2929 }
2930
2931 static inline bool
anv_sparse_residency_is_enabled(struct anv_device * device)2932 anv_sparse_residency_is_enabled(struct anv_device *device)
2933 {
2934 return device->vk.enabled_features.sparseResidencyBuffer ||
2935 device->vk.enabled_features.sparseResidencyImage2D ||
2936 device->vk.enabled_features.sparseResidencyImage3D ||
2937 device->vk.enabled_features.sparseResidency2Samples ||
2938 device->vk.enabled_features.sparseResidency4Samples ||
2939 device->vk.enabled_features.sparseResidency8Samples ||
2940 device->vk.enabled_features.sparseResidency16Samples ||
2941 device->vk.enabled_features.sparseResidencyAliased;
2942 }
2943
2944 VkResult anv_init_sparse_bindings(struct anv_device *device,
2945 uint64_t size,
2946 struct anv_sparse_binding_data *sparse,
2947 enum anv_bo_alloc_flags alloc_flags,
2948 uint64_t client_address,
2949 struct anv_address *out_address);
2950 VkResult anv_free_sparse_bindings(struct anv_device *device,
2951 struct anv_sparse_binding_data *sparse);
2952 VkResult anv_sparse_bind_buffer(struct anv_device *device,
2953 struct anv_buffer *buffer,
2954 const VkSparseMemoryBind *vk_bind,
2955 struct anv_sparse_submission *submit);
2956 VkResult anv_sparse_bind_image_opaque(struct anv_device *device,
2957 struct anv_image *image,
2958 const VkSparseMemoryBind *vk_bind,
2959 struct anv_sparse_submission *submit);
2960 VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
2961 struct anv_image *image,
2962 const VkSparseImageMemoryBind *bind,
2963 struct anv_sparse_submission *submit);
2964 VkResult anv_sparse_bind(struct anv_device *device,
2965 struct anv_sparse_submission *sparse_submit);
2966
2967 VkSparseImageFormatProperties
2968 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
2969 VkImageAspectFlags aspect,
2970 VkImageType vk_image_type,
2971 struct isl_surf *surf);
2972 void anv_sparse_calc_miptail_properties(struct anv_device *device,
2973 struct anv_image *image,
2974 VkImageAspectFlags vk_aspect,
2975 uint32_t *imageMipTailFirstLod,
2976 VkDeviceSize *imageMipTailSize,
2977 VkDeviceSize *imageMipTailOffset,
2978 VkDeviceSize *imageMipTailStride);
2979 VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
2980 VkImageCreateFlags flags,
2981 VkImageTiling tiling,
2982 VkSampleCountFlagBits samples,
2983 VkImageType type,
2984 VkFormat format);
2985 VkResult anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
2986 struct anv_trtt_batch_bo **out_trtt_bbo);
2987
2988 struct anv_buffer {
2989 struct vk_buffer vk;
2990
2991 /* Set when bound */
2992 struct anv_address address;
2993
2994 struct anv_sparse_binding_data sparse_data;
2995 };
2996
2997 static inline bool
anv_buffer_is_sparse(struct anv_buffer * buffer)2998 anv_buffer_is_sparse(struct anv_buffer *buffer)
2999 {
3000 return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
3001 }
3002
3003 enum anv_cmd_dirty_bits {
3004 ANV_CMD_DIRTY_PIPELINE = 1 << 0,
3005 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1,
3006 ANV_CMD_DIRTY_RENDER_AREA = 1 << 2,
3007 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 3,
3008 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 4,
3009 ANV_CMD_DIRTY_RESTART_INDEX = 1 << 5,
3010 ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE = 1 << 6,
3011 };
3012 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
3013
3014 enum anv_pipe_bits {
3015 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
3016 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
3017 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
3018 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
3019 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
3020 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
3021 ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
3022 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
3023 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
3024 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
3025 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
3026
3027 /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
3028 * cache work has completed. Available on Gfx12+. For earlier Gfx we
3029 * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
3030 */
3031 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),
3032 ANV_PIPE_PSS_STALL_SYNC_BIT = (1 << 15),
3033
3034 /*
3035 * This bit flush data-port's Untyped L1 data cache (LSC L1).
3036 */
3037 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
3038
3039 /* This bit controls the flushing of the engine (Render, Compute) specific
3040 * entries from the compression cache.
3041 */
3042 ANV_PIPE_CCS_CACHE_FLUSH_BIT = (1 << 17),
3043
3044 ANV_PIPE_CS_STALL_BIT = (1 << 20),
3045 ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
3046
3047 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
3048 * a flush has happened but not a CS stall. The next time we do any sort
3049 * of invalidation we need to insert a CS stall at that time. Otherwise,
3050 * we would have to CS stall on every flush which could be bad.
3051 */
3052 ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
3053
3054 /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
3055 * AUX-TT data has changed and we need to invalidate AUX-TT data. This is
3056 * done by writing the AUX-TT register.
3057 */
3058 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 23),
3059
3060 /* This bit does not exist directly in PIPE_CONTROL. It means that a
3061 * PIPE_CONTROL with a post-sync operation will follow. This is used to
3062 * implement a workaround for Gfx9.
3063 */
3064 ANV_PIPE_POST_SYNC_BIT = (1 << 24),
3065 };
3066
3067 /* These bits track the state of buffer writes for queries. They get cleared
3068 * based on PIPE_CONTROL emissions.
3069 */
3070 enum anv_query_bits {
3071 ANV_QUERY_WRITES_RT_FLUSH = (1 << 0),
3072
3073 ANV_QUERY_WRITES_TILE_FLUSH = (1 << 1),
3074
3075 ANV_QUERY_WRITES_CS_STALL = (1 << 2),
3076
3077 ANV_QUERY_WRITES_DATA_FLUSH = (1 << 3),
3078 };
3079
3080 /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's
3081 * likely related to performance workaround 14015868140.
3082 *
3083 * For now we enable this only on DG2 and platform prior to Gfx12 where there
3084 * is no tile cache.
3085 */
3086 #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \
3087 (intel_device_info_is_dg2(devinfo))
3088
3089 /* Things we need to flush before accessing query data using the command
3090 * streamer.
3091 *
3092 * Prior to DG2 experiments show that the command streamer is not coherent
3093 * with the tile cache so we need to flush it to make any data visible to CS.
3094 *
3095 * Otherwise we want to flush the RT cache which is where blorp writes, either
3096 * for clearing the query buffer or for clearing the destination buffer in
3097 * vkCopyQueryPoolResults().
3098 */
3099 #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \
3100 (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \
3101 devinfo->ver >= 12) ? \
3102 ANV_QUERY_WRITES_TILE_FLUSH : 0) | \
3103 ANV_QUERY_WRITES_RT_FLUSH | \
3104 ANV_QUERY_WRITES_CS_STALL)
3105 #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \
3106 (ANV_QUERY_WRITES_DATA_FLUSH | \
3107 ANV_QUERY_WRITES_CS_STALL)
3108
3109 #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \
3110 ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ? \
3111 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \
3112 ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ? \
3113 ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
3114 ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ? \
3115 ANV_PIPE_CS_STALL_BIT : 0) | \
3116 ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ? \
3117 (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3118 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3119 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0))
3120
3121 #define ANV_PIPE_FLUSH_BITS ( \
3122 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3123 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3124 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3125 ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3126 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3127 ANV_PIPE_TILE_CACHE_FLUSH_BIT)
3128
3129 #define ANV_PIPE_STALL_BITS ( \
3130 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3131 ANV_PIPE_DEPTH_STALL_BIT | \
3132 ANV_PIPE_CS_STALL_BIT | \
3133 ANV_PIPE_PSS_STALL_SYNC_BIT)
3134
3135 #define ANV_PIPE_INVALIDATE_BITS ( \
3136 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
3137 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
3138 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
3139 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
3140 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
3141 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
3142
3143 /* PIPE_CONTROL bits that should be set only in 3D RCS mode.
3144 * For more details see genX(emit_apply_pipe_flushes).
3145 */
3146 #define ANV_PIPE_GFX_BITS ( \
3147 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3148 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3149 ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3150 ANV_PIPE_DEPTH_STALL_BIT | \
3151 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3152 (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \
3153 ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
3154
3155 /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
3156 * For more details see genX(emit_apply_pipe_flushes).
3157 *
3158 * Documentation says that untyped L1 dataport cache flush is controlled by
3159 * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
3160 *
3161 * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
3162 *
3163 * "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
3164 * "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
3165 * cache based on the programming of HDC_Chicken0 register bits 13:11."
3166 *
3167 * "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
3168 * cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
3169 * PIPE_CONTROL command."
3170 *
3171 * As part of Wa_22010960976 & Wa_14013347512, i915 is programming
3172 * HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
3173 * Dataport flush, and UAV coherency barrier event"). So there is no need
3174 * to set "Untyped Data-Port Cache" in 3D mode.
3175 *
3176 * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
3177 * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
3178 * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
3179 * register to force L1 untyped flush with HDC pipeline flush has no effect on
3180 * MTL.
3181 *
3182 * It seems like the HW change completely disconnected L1 untyped flush from
3183 * HDC pipeline flush with no way to bring that behavior back. So leave the L1
3184 * untyped flush active in 3D mode on all platforms since it doesn't seems to
3185 * cause issues there too.
3186 *
3187 * Maybe we'll have some GPGPU only bits here at some point.
3188 */
3189 #define ANV_PIPE_GPGPU_BITS (0)
3190
3191 enum intel_ds_stall_flag
3192 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
3193
3194 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
3195 VK_IMAGE_ASPECT_PLANE_0_BIT | \
3196 VK_IMAGE_ASPECT_PLANE_1_BIT | \
3197 VK_IMAGE_ASPECT_PLANE_2_BIT)
3198
3199 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
3200 VK_IMAGE_ASPECT_COLOR_BIT | \
3201 VK_IMAGE_ASPECT_PLANES_BITS_ANV)
3202
3203 struct anv_vertex_binding {
3204 struct anv_buffer * buffer;
3205 VkDeviceSize offset;
3206 VkDeviceSize size;
3207 };
3208
3209 struct anv_xfb_binding {
3210 struct anv_buffer * buffer;
3211 VkDeviceSize offset;
3212 VkDeviceSize size;
3213 };
3214
3215 struct anv_push_constants {
3216 /** Push constant data provided by the client through vkPushConstants */
3217 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
3218
3219 #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
3220 #define ANV_DESCRIPTOR_SET_OFFSET_MASK (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
3221
3222 /**
3223 * Base offsets for descriptor sets from
3224 * INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
3225 *
3226 * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
3227 *
3228 * In bits [6:63] : descriptor set address
3229 */
3230 uint32_t desc_surface_offsets[MAX_SETS];
3231
3232 /**
3233 * Base offsets for descriptor sets from
3234 */
3235 uint32_t desc_sampler_offsets[MAX_SETS];
3236
3237 /** Dynamic offsets for dynamic UBOs and SSBOs */
3238 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
3239
3240 union {
3241 struct {
3242 /** Dynamic MSAA value */
3243 uint32_t fs_msaa_flags;
3244
3245 /** Dynamic TCS input vertices */
3246 uint32_t tcs_input_vertices;
3247 } gfx;
3248
3249 struct {
3250 /** Base workgroup ID
3251 *
3252 * Used for vkCmdDispatchBase.
3253 */
3254 uint32_t base_work_group_id[3];
3255
3256 /** Subgroup ID
3257 *
3258 * This is never set by software but is implicitly filled out when
3259 * uploading the push constants for compute shaders.
3260 */
3261 uint32_t subgroup_id;
3262 } cs;
3263 };
3264
3265 /* Robust access pushed registers. */
3266 uint64_t push_reg_mask[MESA_SHADER_STAGES];
3267
3268 /** Ray query globals (RT_DISPATCH_GLOBALS) */
3269 uint64_t ray_query_globals;
3270 };
3271
3272 struct anv_surface_state {
3273 /** Surface state allocated from the bindless heap
3274 *
3275 * Can be NULL if unused.
3276 */
3277 struct anv_state state;
3278
3279 /** Surface state after genxml packing
3280 *
3281 * Same data as in state.
3282 */
3283 struct anv_surface_state_data state_data;
3284
3285 /** Address of the surface referred to by this state
3286 *
3287 * This address is relative to the start of the BO.
3288 */
3289 struct anv_address address;
3290 /* Address of the aux surface, if any
3291 *
3292 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
3293 *
3294 * With the exception of gfx8, the bottom 12 bits of this address' offset
3295 * include extra aux information.
3296 */
3297 struct anv_address aux_address;
3298 /* Address of the clear color, if any
3299 *
3300 * This address is relative to the start of the BO.
3301 */
3302 struct anv_address clear_address;
3303 };
3304
3305 struct anv_attachment {
3306 VkFormat vk_format;
3307 const struct anv_image_view *iview;
3308 VkImageLayout layout;
3309 enum isl_aux_usage aux_usage;
3310 struct anv_surface_state surface_state;
3311
3312 VkResolveModeFlagBits resolve_mode;
3313 const struct anv_image_view *resolve_iview;
3314 VkImageLayout resolve_layout;
3315 };
3316
3317 /** State tracking for vertex buffer flushes
3318 *
3319 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
3320 * addresses. If you happen to have two vertex buffers which get placed
3321 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
3322 * collisions. In order to solve this problem, we track vertex address ranges
3323 * which are live in the cache and invalidate the cache if one ever exceeds 32
3324 * bits.
3325 */
3326 struct anv_vb_cache_range {
3327 /* Virtual address at which the live vertex buffer cache range starts for
3328 * this vertex buffer index.
3329 */
3330 uint64_t start;
3331
3332 /* Virtual address of the byte after where vertex buffer cache range ends.
3333 * This is exclusive such that end - start is the size of the range.
3334 */
3335 uint64_t end;
3336 };
3337
3338 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)3339 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
3340 const struct anv_vb_cache_range *bound)
3341 {
3342 if (dirty->start == dirty->end) {
3343 *dirty = *bound;
3344 } else if (bound->start != bound->end) {
3345 dirty->start = MIN2(dirty->start, bound->start);
3346 dirty->end = MAX2(dirty->end, bound->end);
3347 }
3348 }
3349
3350 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
3351 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)3352 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
3353 struct anv_vb_cache_range *dirty,
3354 struct anv_address vb_address,
3355 uint32_t vb_size)
3356 {
3357 if (vb_size == 0) {
3358 bound->start = 0;
3359 bound->end = 0;
3360 return false;
3361 }
3362
3363 bound->start = intel_48b_address(anv_address_physical(vb_address));
3364 bound->end = bound->start + vb_size;
3365 assert(bound->end > bound->start); /* No overflow */
3366
3367 /* Align everything to a cache line */
3368 bound->start &= ~(64ull - 1ull);
3369 bound->end = align64(bound->end, 64);
3370
3371 anv_merge_vb_cache_range(dirty, bound);
3372
3373 /* If our range is larger than 32 bits, we have to flush */
3374 assert(bound->end - bound->start <= (1ull << 32));
3375 return (dirty->end - dirty->start) > (1ull << 32);
3376 }
3377
3378 /**
3379 * State tracking for simple internal shaders
3380 */
3381 struct anv_simple_shader {
3382 /* The device associated with this emission */
3383 struct anv_device *device;
3384 /* The command buffer associated with this emission (can be NULL) */
3385 struct anv_cmd_buffer *cmd_buffer;
3386 /* State stream used for various internal allocations */
3387 struct anv_state_stream *dynamic_state_stream;
3388 struct anv_state_stream *general_state_stream;
3389 /* Where to emit the commands (can be different from cmd_buffer->batch) */
3390 struct anv_batch *batch;
3391 /* Shader to use */
3392 struct anv_shader_bin *kernel;
3393 /* L3 config used by the shader */
3394 const struct intel_l3_config *l3_config;
3395 /* Current URB config */
3396 const struct intel_urb_config *urb_cfg;
3397
3398 /* Managed by the simpler shader helper*/
3399 struct anv_state bt_state;
3400 };
3401
3402 /** State tracking for particular pipeline bind point
3403 *
3404 * This struct is the base struct for anv_cmd_graphics_state and
3405 * anv_cmd_compute_state. These are used to track state which is bound to a
3406 * particular type of pipeline. Generic state that applies per-stage such as
3407 * binding table offsets and push constants is tracked generically with a
3408 * per-stage array in anv_cmd_state.
3409 */
3410 struct anv_cmd_pipeline_state {
3411 struct anv_descriptor_set *descriptors[MAX_SETS];
3412 struct anv_push_descriptor_set push_descriptor;
3413
3414 struct anv_push_constants push_constants;
3415
3416 /* Push constant state allocated when flushing push constants. */
3417 struct anv_state push_constants_state;
3418
3419 /**
3420 * Dynamic buffer offsets.
3421 *
3422 * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with
3423 * independent sets we cannot know which how much in total is going to be
3424 * used. As a result we need to store the maximum possible number per set.
3425 *
3426 * Those values are written into anv_push_constants::dynamic_offsets at
3427 * flush time when have the pipeline with the final
3428 * anv_pipeline_sets_layout.
3429 */
3430 struct {
3431 uint32_t offsets[MAX_DYNAMIC_BUFFERS];
3432 } dynamic_offsets[MAX_SETS];
3433
3434 /**
3435 * The current bound pipeline.
3436 */
3437 struct anv_pipeline *pipeline;
3438 };
3439
3440 /** State tracking for graphics pipeline
3441 *
3442 * This has anv_cmd_pipeline_state as a base struct to track things which get
3443 * bound to a graphics pipeline. Along with general pipeline bind point state
3444 * which is in the anv_cmd_pipeline_state base struct, it also contains other
3445 * state which is graphics-specific.
3446 */
3447 struct anv_cmd_graphics_state {
3448 struct anv_cmd_pipeline_state base;
3449
3450 VkRenderingFlags rendering_flags;
3451 VkRect2D render_area;
3452 uint32_t layer_count;
3453 uint32_t samples;
3454 uint32_t view_mask;
3455 uint32_t color_att_count;
3456 struct anv_state att_states;
3457 struct anv_attachment color_att[MAX_RTS];
3458 struct anv_attachment depth_att;
3459 struct anv_attachment stencil_att;
3460 struct anv_state null_surface_state;
3461
3462 anv_cmd_dirty_mask_t dirty;
3463 uint32_t vb_dirty;
3464
3465 struct anv_vb_cache_range ib_bound_range;
3466 struct anv_vb_cache_range ib_dirty_range;
3467 struct anv_vb_cache_range vb_bound_ranges[33];
3468 struct anv_vb_cache_range vb_dirty_ranges[33];
3469
3470 uint32_t restart_index;
3471
3472 VkShaderStageFlags push_constant_stages;
3473
3474 uint32_t primitive_topology;
3475 bool used_task_shader;
3476
3477 struct anv_buffer *index_buffer;
3478 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
3479 uint32_t index_offset;
3480 uint32_t index_size;
3481
3482 struct vk_vertex_input_state vertex_input;
3483 struct vk_sample_locations_state sample_locations;
3484
3485 /**
3486 * The latest BLEND_STATE structure packed in dynamic state heap
3487 */
3488 struct anv_state blend_states;
3489
3490 bool object_preemption;
3491 bool has_uint_rt;
3492
3493 /* State tracking for Wa_14018912822. */
3494 bool color_blend_zero;
3495 bool alpha_blend_zero;
3496
3497 /**
3498 * DEPTH and STENCIL attachment write state for Wa_18019816803.
3499 */
3500 bool ds_write_state;
3501
3502 /**
3503 * State tracking for Wa_18020335297.
3504 */
3505 bool viewport_set;
3506
3507 struct intel_urb_config urb_cfg;
3508
3509 uint32_t n_occlusion_queries;
3510
3511 struct anv_gfx_dynamic_state dyn_state;
3512 };
3513
3514 enum anv_depth_reg_mode {
3515 ANV_DEPTH_REG_MODE_UNKNOWN = 0,
3516 ANV_DEPTH_REG_MODE_HW_DEFAULT,
3517 ANV_DEPTH_REG_MODE_D16_1X_MSAA,
3518 };
3519
3520 /** State tracking for compute pipeline
3521 *
3522 * This has anv_cmd_pipeline_state as a base struct to track things which get
3523 * bound to a compute pipeline. Along with general pipeline bind point state
3524 * which is in the anv_cmd_pipeline_state base struct, it also contains other
3525 * state which is compute-specific.
3526 */
3527 struct anv_cmd_compute_state {
3528 struct anv_cmd_pipeline_state base;
3529
3530 bool pipeline_dirty;
3531
3532 struct anv_state push_data;
3533
3534 struct anv_address num_workgroups;
3535
3536 uint32_t scratch_size;
3537 };
3538
3539 struct anv_cmd_ray_tracing_state {
3540 struct anv_cmd_pipeline_state base;
3541
3542 bool pipeline_dirty;
3543
3544 struct {
3545 struct anv_bo *bo;
3546 struct brw_rt_scratch_layout layout;
3547 } scratch;
3548
3549 struct anv_address build_priv_mem_addr;
3550 size_t build_priv_mem_size;
3551 };
3552
3553 /** State required while building cmd buffer */
3554 struct anv_cmd_state {
3555 /* PIPELINE_SELECT.PipelineSelection */
3556 uint32_t current_pipeline;
3557 const struct intel_l3_config * current_l3_config;
3558 uint32_t last_aux_map_state;
3559
3560 struct anv_cmd_graphics_state gfx;
3561 struct anv_cmd_compute_state compute;
3562 struct anv_cmd_ray_tracing_state rt;
3563
3564 enum anv_pipe_bits pending_pipe_bits;
3565
3566 struct {
3567 /**
3568 * Tracks operations susceptible to interfere with queries in the
3569 * destination buffer of vkCmdCopyQueryResults, we need those operations to
3570 * have completed before we do the work of vkCmdCopyQueryResults.
3571 */
3572 enum anv_query_bits buffer_write_bits;
3573
3574 /**
3575 * Tracks clear operations of query buffers that can interact with
3576 * vkCmdQueryBegin*, vkCmdWriteTimestamp*,
3577 * vkCmdWriteAccelerationStructuresPropertiesKHR, etc...
3578 *
3579 * We need the clearing of the buffer completed before with write data with
3580 * the command streamer or a shader.
3581 */
3582 enum anv_query_bits clear_bits;
3583 } queries;
3584
3585 VkShaderStageFlags descriptors_dirty;
3586 VkShaderStageFlags push_descriptors_dirty;
3587 VkShaderStageFlags push_constants_dirty;
3588
3589 struct anv_vertex_binding vertex_bindings[MAX_VBS];
3590 bool xfb_enabled;
3591 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
3592 struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
3593 struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
3594
3595 unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3596 unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3597 unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3598
3599 /**
3600 * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
3601 * of any command buffer it is disabled by disabling it in EndCommandBuffer
3602 * and before invoking the secondary in ExecuteCommands.
3603 */
3604 bool pma_fix_enabled;
3605
3606 /**
3607 * Whether or not we know for certain that HiZ is enabled for the current
3608 * subpass. If, for whatever reason, we are unsure as to whether HiZ is
3609 * enabled or not, this will be false.
3610 */
3611 bool hiz_enabled;
3612
3613 /* We ensure the registers for the gfx12 D16 fix are initialized at the
3614 * first non-NULL depth stencil packet emission of every command buffer.
3615 * For secondary command buffer execution, we transfer the state from the
3616 * last command buffer to the primary (if known).
3617 */
3618 enum anv_depth_reg_mode depth_reg_mode;
3619
3620 /**
3621 * Whether RHWO optimization is enabled (Wa_1508744258).
3622 */
3623 bool rhwo_optimization_enabled;
3624
3625 /**
3626 * Pending state of the RHWO optimization, to be applied at the next
3627 * genX(cmd_buffer_apply_pipe_flushes).
3628 */
3629 bool pending_rhwo_optimization_enabled;
3630
3631 bool conditional_render_enabled;
3632
3633 /**
3634 * Last rendering scale argument provided to
3635 * genX(cmd_buffer_emit_hashing_mode)().
3636 */
3637 unsigned current_hash_scale;
3638
3639 /**
3640 * A buffer used for spill/fill of ray queries.
3641 */
3642 struct anv_bo * ray_query_shadow_bo;
3643 };
3644
3645 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
3646 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
3647
3648 enum anv_cmd_buffer_exec_mode {
3649 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3650 ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3651 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3652 ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3653 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3654 ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3655 };
3656
3657 struct anv_measure_batch;
3658
3659 struct anv_cmd_buffer {
3660 struct vk_command_buffer vk;
3661
3662 struct anv_device * device;
3663 struct anv_queue_family * queue_family;
3664
3665 /** Batch where the main commands live */
3666 struct anv_batch batch;
3667
3668 /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
3669 * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
3670 * rewrite the end to chain multiple batch together at vkQueueSubmit().
3671 */
3672 void * batch_end;
3673
3674 /* Fields required for the actual chain of anv_batch_bo's.
3675 *
3676 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3677 */
3678 struct list_head batch_bos;
3679 enum anv_cmd_buffer_exec_mode exec_mode;
3680
3681 /* A vector of anv_batch_bo pointers for every batch or surface buffer
3682 * referenced by this command buffer
3683 *
3684 * initialized by anv_cmd_buffer_init_batch_bo_chain()
3685 */
3686 struct u_vector seen_bbos;
3687
3688 /* A vector of int32_t's for every block of binding tables.
3689 *
3690 * initialized by anv_cmd_buffer_init_batch_bo_chain()
3691 */
3692 struct u_vector bt_block_states;
3693 struct anv_state bt_next;
3694
3695 struct anv_reloc_list surface_relocs;
3696
3697 /* Serial for tracking buffer completion */
3698 uint32_t serial;
3699
3700 /* Stream objects for storing temporary data */
3701 struct anv_state_stream surface_state_stream;
3702 struct anv_state_stream dynamic_state_stream;
3703 struct anv_state_stream general_state_stream;
3704 struct anv_state_stream indirect_push_descriptor_stream;
3705
3706 VkCommandBufferUsageFlags usage_flags;
3707
3708 struct anv_query_pool *perf_query_pool;
3709
3710 struct anv_cmd_state state;
3711
3712 struct anv_address return_addr;
3713
3714 /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3715 uint64_t intel_perf_marker;
3716
3717 struct anv_measure_batch *measure;
3718
3719 /**
3720 * KHR_performance_query requires self modifying command buffers and this
3721 * array has the location of modifying commands to the query begin and end
3722 * instructions storing performance counters. The array length is
3723 * anv_physical_device::n_perf_query_commands.
3724 */
3725 struct mi_address_token *self_mod_locations;
3726
3727 /**
3728 * Index tracking which of the self_mod_locations items have already been
3729 * used.
3730 */
3731 uint32_t perf_reloc_idx;
3732
3733 /**
3734 * Sum of all the anv_batch_bo written sizes for this command buffer
3735 * including any executed secondary command buffer.
3736 */
3737 uint32_t total_batch_size;
3738
3739 struct {
3740 /** Batch generating part of the anv_cmd_buffer::batch */
3741 struct anv_batch batch;
3742
3743 /**
3744 * Location in anv_cmd_buffer::batch at which we left some space to
3745 * insert a MI_BATCH_BUFFER_START into the
3746 * anv_cmd_buffer::generation::batch if needed.
3747 */
3748 struct anv_address jump_addr;
3749
3750 /**
3751 * Location in anv_cmd_buffer::batch at which the generation batch
3752 * should jump back to.
3753 */
3754 struct anv_address return_addr;
3755
3756 /** List of anv_batch_bo used for generation
3757 *
3758 * We have to keep this separated of the anv_cmd_buffer::batch_bos that
3759 * is used for a chaining optimization.
3760 */
3761 struct list_head batch_bos;
3762
3763 /** Ring buffer of generated commands
3764 *
3765 * When generating draws in ring mode, this buffer will hold generated
3766 * 3DPRIMITIVE commands.
3767 */
3768 struct anv_bo *ring_bo;
3769
3770 /**
3771 * State tracking of the generation shader (only used for the non-ring
3772 * mode).
3773 */
3774 struct anv_simple_shader shader_state;
3775 } generation;
3776
3777 /**
3778 * A vector of anv_bo pointers for chunks of memory used by the command
3779 * buffer that are too large to be allocated through dynamic_state_stream.
3780 * This is the case for large enough acceleration structures.
3781 *
3782 * initialized by anv_cmd_buffer_init_batch_bo_chain()
3783 */
3784 struct u_vector dynamic_bos;
3785
3786 /**
3787 * Structure holding tracepoints recorded in the command buffer.
3788 */
3789 struct u_trace trace;
3790
3791 /** Pointer to the last emitted COMPUTE_WALKER.
3792 *
3793 * This is used to edit the instruction post emission to replace the "Post
3794 * Sync" field for utrace timestamp emission.
3795 */
3796 void *last_compute_walker;
3797
3798 /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH.
3799 *
3800 * This is used to edit the instruction post emission to replace the "Post
3801 * Sync" field for utrace timestamp emission.
3802 */
3803 void *last_indirect_dispatch;
3804
3805 struct {
3806 struct anv_video_session *vid;
3807 struct anv_video_session_params *params;
3808 } video;
3809
3810 /**
3811 * Companion RCS command buffer to support the MSAA operations on compute
3812 * queue.
3813 */
3814 struct anv_cmd_buffer *companion_rcs_cmd_buffer;
3815
3816 /**
3817 * Whether this command buffer is a companion command buffer of compute one.
3818 */
3819 bool is_companion_rcs_cmd_buffer;
3820
3821 };
3822
3823 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
3824
3825 /* Determine whether we can chain a given cmd_buffer to another one. We need
3826 * to make sure that we can edit the end of the batch to point to next one,
3827 * which requires the command buffer to not be used simultaneously.
3828 *
3829 * We could in theory also implement chaining with companion command buffers,
3830 * but let's sparse ourselves some pain and misery. This optimization has no
3831 * benefit on the brand new Xe kernel driver.
3832 */
3833 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)3834 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
3835 {
3836 return !(cmd_buffer->usage_flags &
3837 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) &&
3838 !(cmd_buffer->is_companion_rcs_cmd_buffer);
3839 }
3840
3841 static inline bool
anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer * cmd_buffer)3842 anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer)
3843 {
3844 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3845 return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0;
3846 }
3847
3848 static inline bool
anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer * cmd_buffer)3849 anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer)
3850 {
3851 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3852 return (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) != 0;
3853 }
3854
3855 static inline bool
anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer * cmd_buffer)3856 anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
3857 {
3858 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3859 return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
3860 }
3861
3862 static inline bool
anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer * cmd_buffer)3863 anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer)
3864 {
3865 struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3866 return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY;
3867 }
3868
3869 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3870 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3871 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3872 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3873 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3874 struct anv_cmd_buffer *secondary);
3875 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3876 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3877 struct anv_cmd_buffer *cmd_buffer,
3878 const VkSemaphore *in_semaphores,
3879 const uint64_t *in_wait_values,
3880 uint32_t num_in_semaphores,
3881 const VkSemaphore *out_semaphores,
3882 const uint64_t *out_signal_values,
3883 uint32_t num_out_semaphores,
3884 VkFence fence,
3885 int perf_query_pass);
3886
3887 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
3888 UNUSED VkCommandBufferResetFlags flags);
3889
3890 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3891 const void *data, uint32_t size, uint32_t alignment);
3892 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3893 uint32_t *a, uint32_t *b,
3894 uint32_t dwords, uint32_t alignment);
3895
3896 struct anv_address
3897 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3898 struct anv_state
3899 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3900 uint32_t entries, uint32_t *state_offset);
3901 struct anv_state
3902 anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer,
3903 uint32_t count);
3904 struct anv_state
3905 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3906 uint32_t size, uint32_t alignment);
3907 struct anv_state
3908 anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer,
3909 uint32_t size, uint32_t alignment);
3910
3911 void
3912 anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
3913 uint32_t num_cmd_buffers);
3914 void
3915 anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
3916 uint32_t cmd_buffer_count,
3917 struct anv_cmd_buffer **cmd_buffers,
3918 struct anv_query_pool *perf_query_pool,
3919 uint32_t perf_query_pass);
3920 void
3921 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
3922 uint32_t num_cmd_buffers);
3923
3924 void
3925 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
3926 enum anv_pipe_bits flushed_bits);
3927
3928 /**
3929 * A allocation tied to a command buffer.
3930 *
3931 * Don't use anv_cmd_alloc::address::map to write memory from userspace, use
3932 * anv_cmd_alloc::map instead.
3933 */
3934 struct anv_cmd_alloc {
3935 struct anv_address address;
3936 void *map;
3937 size_t size;
3938 };
3939
3940 #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 })
3941
3942 static inline bool
anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)3943 anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)
3944 {
3945 return alloc.size == 0;
3946 }
3947
3948 struct anv_cmd_alloc
3949 anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer,
3950 size_t size, uint32_t alignment,
3951 bool private);
3952
3953 VkResult
3954 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3955
3956 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3957
3958 struct anv_state
3959 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3960 struct anv_state
3961 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3962
3963 VkResult
3964 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3965 uint32_t num_entries,
3966 uint32_t *state_offset,
3967 struct anv_state *bt_state);
3968
3969 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3970
3971 static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer * cmd_buffer)3972 anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
3973 {
3974 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
3975 return MAX2(1, util_bitcount(gfx->view_mask));
3976 }
3977
3978 /* Save/restore cmd buffer states for meta operations */
3979 enum anv_cmd_saved_state_flags {
3980 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE = BITFIELD_BIT(0),
3981 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 = BITFIELD_BIT(1),
3982 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS = BITFIELD_BIT(2),
3983 };
3984
3985 struct anv_cmd_saved_state {
3986 uint32_t flags;
3987
3988 struct anv_pipeline *pipeline;
3989 struct anv_descriptor_set *descriptor_set;
3990 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
3991 };
3992
3993 void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
3994 uint32_t flags,
3995 struct anv_cmd_saved_state *state);
3996
3997 void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
3998 struct anv_cmd_saved_state *state);
3999
4000 enum anv_bo_sync_state {
4001 /** Indicates that this is a new (or newly reset fence) */
4002 ANV_BO_SYNC_STATE_RESET,
4003
4004 /** Indicates that this fence has been submitted to the GPU but is still
4005 * (as far as we know) in use by the GPU.
4006 */
4007 ANV_BO_SYNC_STATE_SUBMITTED,
4008
4009 ANV_BO_SYNC_STATE_SIGNALED,
4010 };
4011
4012 struct anv_bo_sync {
4013 struct vk_sync sync;
4014
4015 enum anv_bo_sync_state state;
4016 struct anv_bo *bo;
4017 };
4018
4019 extern const struct vk_sync_type anv_bo_sync_type;
4020
4021 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)4022 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
4023 {
4024 return sync->type == &anv_bo_sync_type;
4025 }
4026
4027 VkResult anv_create_sync_for_memory(struct vk_device *device,
4028 VkDeviceMemory memory,
4029 bool signal_memory,
4030 struct vk_sync **sync_out);
4031
4032 struct anv_event {
4033 struct vk_object_base base;
4034 uint64_t semaphore;
4035 struct anv_state state;
4036 };
4037
4038 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
4039
4040 #define anv_foreach_stage(stage, stage_bits) \
4041 for (gl_shader_stage stage, \
4042 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
4043 stage = __builtin_ffs(__tmp) - 1, __tmp; \
4044 __tmp &= ~(1 << (stage)))
4045
4046 struct anv_pipeline_bind_map {
4047 unsigned char surface_sha1[20];
4048 unsigned char sampler_sha1[20];
4049 unsigned char push_sha1[20];
4050
4051 uint32_t surface_count;
4052 uint32_t sampler_count;
4053 uint16_t kernel_args_size;
4054 uint16_t kernel_arg_count;
4055
4056 struct anv_pipeline_binding * surface_to_descriptor;
4057 struct anv_pipeline_binding * sampler_to_descriptor;
4058 struct brw_kernel_arg_desc * kernel_args;
4059
4060 struct anv_push_range push_ranges[4];
4061 };
4062
4063 struct anv_push_descriptor_info {
4064 /* A bitfield of descriptors used. */
4065 uint32_t used_descriptors;
4066
4067 /* A bitfield of UBOs bindings fully promoted to push constants. */
4068 uint32_t fully_promoted_ubo_descriptors;
4069
4070 /* */
4071 uint8_t used_set_buffer;
4072 };
4073
4074 /* A list of values we push to implement some of the dynamic states */
4075 enum anv_dynamic_push_bits {
4076 ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0),
4077 };
4078
4079 struct anv_shader_upload_params {
4080 gl_shader_stage stage;
4081
4082 const void *key_data;
4083 uint32_t key_size;
4084
4085 const void *kernel_data;
4086 uint32_t kernel_size;
4087
4088 const struct brw_stage_prog_data *prog_data;
4089 uint32_t prog_data_size;
4090
4091 const struct brw_compile_stats *stats;
4092 uint32_t num_stats;
4093
4094 const struct nir_xfb_info *xfb_info;
4095
4096 const struct anv_pipeline_bind_map *bind_map;
4097
4098 const struct anv_push_descriptor_info *push_desc_info;
4099
4100 enum anv_dynamic_push_bits dynamic_push_values;
4101 };
4102
4103 struct anv_shader_bin {
4104 struct vk_pipeline_cache_object base;
4105
4106 gl_shader_stage stage;
4107
4108 struct anv_state kernel;
4109 uint32_t kernel_size;
4110
4111 const struct brw_stage_prog_data *prog_data;
4112 uint32_t prog_data_size;
4113
4114 struct brw_compile_stats stats[3];
4115 uint32_t num_stats;
4116
4117 struct nir_xfb_info *xfb_info;
4118
4119 struct anv_push_descriptor_info push_desc_info;
4120
4121 struct anv_pipeline_bind_map bind_map;
4122
4123 enum anv_dynamic_push_bits dynamic_push_values;
4124 };
4125
4126 struct anv_shader_bin *
4127 anv_shader_bin_create(struct anv_device *device,
4128 gl_shader_stage stage,
4129 const void *key, uint32_t key_size,
4130 const void *kernel, uint32_t kernel_size,
4131 const struct brw_stage_prog_data *prog_data,
4132 uint32_t prog_data_size,
4133 const struct brw_compile_stats *stats, uint32_t num_stats,
4134 const struct nir_xfb_info *xfb_info,
4135 const struct anv_pipeline_bind_map *bind_map,
4136 const struct anv_push_descriptor_info *push_desc_info,
4137 enum anv_dynamic_push_bits dynamic_push_values);
4138
4139
4140 static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin * shader)4141 anv_shader_bin_ref(struct anv_shader_bin *shader)
4142 {
4143 vk_pipeline_cache_object_ref(&shader->base);
4144
4145 return shader;
4146 }
4147
4148 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)4149 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
4150 {
4151 vk_pipeline_cache_object_unref(&device->vk, &shader->base);
4152 }
4153
4154 struct anv_pipeline_executable {
4155 gl_shader_stage stage;
4156
4157 struct brw_compile_stats stats;
4158
4159 char *nir;
4160 char *disasm;
4161 };
4162
4163 enum anv_pipeline_type {
4164 ANV_PIPELINE_GRAPHICS,
4165 ANV_PIPELINE_GRAPHICS_LIB,
4166 ANV_PIPELINE_COMPUTE,
4167 ANV_PIPELINE_RAY_TRACING,
4168 };
4169
4170 struct anv_pipeline {
4171 struct vk_object_base base;
4172
4173 struct anv_device * device;
4174
4175 struct anv_batch batch;
4176 struct anv_reloc_list batch_relocs;
4177
4178 void * mem_ctx;
4179
4180 enum anv_pipeline_type type;
4181 VkPipelineCreateFlags flags;
4182
4183 VkPipelineCreateFlags2KHR active_stages;
4184
4185 uint32_t ray_queries;
4186
4187 /**
4188 * Mask of stages that are accessing push descriptors.
4189 */
4190 VkShaderStageFlags use_push_descriptor;
4191
4192 /**
4193 * Mask of stages that are accessing the push descriptors buffer.
4194 */
4195 VkShaderStageFlags use_push_descriptor_buffer;
4196
4197 /**
4198 * Maximum scratch size for all shaders in this pipeline.
4199 */
4200 uint32_t scratch_size;
4201
4202 /* Layout of the sets used by the pipeline. */
4203 struct anv_pipeline_sets_layout layout;
4204
4205 struct util_dynarray executables;
4206
4207 const struct intel_l3_config * l3_config;
4208 };
4209
4210 /* The base graphics pipeline object only hold shaders. */
4211 struct anv_graphics_base_pipeline {
4212 struct anv_pipeline base;
4213
4214 struct vk_sample_locations_state sample_locations;
4215
4216 /* Shaders */
4217 struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4218
4219 /* A small hash based of shader_info::source_sha1 for identifying
4220 * shaders in renderdoc/shader-db.
4221 */
4222 uint32_t source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4223
4224 /* Feedback index in
4225 * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks
4226 *
4227 * For pipeline libraries, we need to remember the order at creation when
4228 * included into a linked pipeline.
4229 */
4230 uint32_t feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4231
4232 /* Robustness flags used shaders
4233 */
4234 enum brw_robustness_flags robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4235
4236 /* True if at the time the fragment shader was compiled, it didn't have all
4237 * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC.
4238 */
4239 bool fragment_dynamic;
4240 };
4241
4242 /* The library graphics pipeline object has a partial graphic state and
4243 * possibly some shaders. If requested, shaders are also present in NIR early
4244 * form.
4245 */
4246 struct anv_graphics_lib_pipeline {
4247 struct anv_graphics_base_pipeline base;
4248
4249 VkGraphicsPipelineLibraryFlagsEXT lib_flags;
4250
4251 struct vk_graphics_pipeline_all_state all_state;
4252 struct vk_graphics_pipeline_state state;
4253
4254 /* Retained shaders for link optimization. */
4255 struct {
4256 /* This hash is the same as computed in
4257 * anv_graphics_pipeline_gather_shaders().
4258 */
4259 unsigned char shader_sha1[20];
4260
4261 enum gl_subgroup_size subgroup_size_type;
4262
4263 /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
4264 * constants.
4265 */
4266 nir_shader * nir;
4267 } retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4268
4269 /* Whether the shaders have been retained */
4270 bool retain_shaders;
4271 };
4272
4273 struct anv_gfx_state_ptr {
4274 /* Both in dwords */
4275 uint16_t offset;
4276 uint16_t len;
4277 };
4278
4279 /* The final graphics pipeline object has all the graphics state ready to be
4280 * programmed into HW packets (dynamic_state field) or fully baked in its
4281 * batch.
4282 */
4283 struct anv_graphics_pipeline {
4284 struct anv_graphics_base_pipeline base;
4285
4286 struct vk_vertex_input_state vertex_input;
4287 struct vk_sample_locations_state sample_locations;
4288 struct vk_dynamic_graphics_state dynamic_state;
4289
4290 /* If true, the patch control points are passed through push constants
4291 * (anv_push_constants::gfx::tcs_input_vertices)
4292 */
4293 bool dynamic_patch_control_points;
4294
4295 /* This field is required with dynamic primitive topology,
4296 * rasterization_samples used only with gen < 8.
4297 */
4298 uint32_t rasterization_samples;
4299
4300 uint32_t view_mask;
4301 uint32_t instance_multiplier;
4302
4303 bool rp_has_ds_self_dep;
4304
4305 bool kill_pixel;
4306 bool force_fragment_thread_dispatch;
4307 bool uses_xfb;
4308
4309 /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */
4310 uint32_t vs_input_elements;
4311
4312 /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the
4313 * draw parameters
4314 */
4315 uint32_t svgs_count;
4316
4317 /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that
4318 * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER.
4319 *
4320 * When MESA_VK_DYNAMIC_VI is not dynamic
4321 *
4322 * vertex_input_elems = vs_input_elements + svgs_count
4323 *
4324 * All the VERTEX_ELEMENT_STATE can be directly copied behind a
4325 * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise
4326 * this array only holds the svgs_count elements.
4327 */
4328 uint32_t vertex_input_elems;
4329 uint32_t vertex_input_data[2 * 31 /* MAX_VES + 2 internal */];
4330
4331 enum intel_msaa_flags fs_msaa_flags;
4332
4333 /* Pre computed CS instructions that can directly be copied into
4334 * anv_cmd_buffer.
4335 */
4336 uint32_t batch_data[416];
4337
4338 /* Urb setup utilized by this pipeline. */
4339 struct intel_urb_config urb_cfg;
4340
4341 /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
4342 struct {
4343 struct anv_gfx_state_ptr urb;
4344 struct anv_gfx_state_ptr vf_statistics;
4345 struct anv_gfx_state_ptr vf_sgvs;
4346 struct anv_gfx_state_ptr vf_sgvs_2;
4347 struct anv_gfx_state_ptr vf_sgvs_instancing;
4348 struct anv_gfx_state_ptr vf_instancing;
4349 struct anv_gfx_state_ptr primitive_replication;
4350 struct anv_gfx_state_ptr sbe;
4351 struct anv_gfx_state_ptr sbe_swiz;
4352 struct anv_gfx_state_ptr so_decl_list;
4353 struct anv_gfx_state_ptr ms;
4354 struct anv_gfx_state_ptr vs;
4355 struct anv_gfx_state_ptr hs;
4356 struct anv_gfx_state_ptr ds;
4357 struct anv_gfx_state_ptr ps;
4358
4359 struct anv_gfx_state_ptr task_control;
4360 struct anv_gfx_state_ptr task_shader;
4361 struct anv_gfx_state_ptr task_redistrib;
4362 struct anv_gfx_state_ptr clip_mesh;
4363 struct anv_gfx_state_ptr mesh_control;
4364 struct anv_gfx_state_ptr mesh_shader;
4365 struct anv_gfx_state_ptr mesh_distrib;
4366 struct anv_gfx_state_ptr sbe_mesh;
4367 } final;
4368
4369 /* Pre packed CS instructions & structures that need to be merged later
4370 * with dynamic state.
4371 */
4372 struct {
4373 struct anv_gfx_state_ptr clip;
4374 struct anv_gfx_state_ptr sf;
4375 struct anv_gfx_state_ptr raster;
4376 struct anv_gfx_state_ptr ps_extra;
4377 struct anv_gfx_state_ptr wm;
4378 struct anv_gfx_state_ptr so;
4379 struct anv_gfx_state_ptr gs;
4380 struct anv_gfx_state_ptr te;
4381 struct anv_gfx_state_ptr vfg;
4382 } partial;
4383 };
4384
4385 #define anv_batch_merge_pipeline_state(batch, dwords0, pipeline, state) \
4386 do { \
4387 uint32_t *dw; \
4388 \
4389 assert(ARRAY_SIZE(dwords0) == (pipeline)->state.len); \
4390 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
4391 if (!dw) \
4392 break; \
4393 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
4394 dw[i] = (dwords0)[i] | \
4395 (pipeline)->batch_data[(pipeline)->state.offset + i]; \
4396 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4)); \
4397 } while (0)
4398
4399 #define anv_batch_emit_pipeline_state(batch, pipeline, state) \
4400 do { \
4401 if ((pipeline)->state.len == 0) \
4402 break; \
4403 uint32_t *dw; \
4404 dw = anv_batch_emit_dwords((batch), (pipeline)->state.len); \
4405 if (!dw) \
4406 break; \
4407 memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset], \
4408 4 * (pipeline)->state.len); \
4409 } while (0)
4410
4411
4412 struct anv_compute_pipeline {
4413 struct anv_pipeline base;
4414
4415 struct anv_shader_bin * cs;
4416 uint32_t batch_data[9];
4417 uint32_t interface_descriptor_data[8];
4418
4419 /* A small hash based of shader_info::source_sha1 for identifying shaders
4420 * in renderdoc/shader-db.
4421 */
4422 uint32_t source_hash;
4423 };
4424
4425 struct anv_rt_shader_group {
4426 VkRayTracingShaderGroupTypeKHR type;
4427
4428 /* Whether this group was imported from another pipeline */
4429 bool imported;
4430
4431 struct anv_shader_bin *general;
4432 struct anv_shader_bin *closest_hit;
4433 struct anv_shader_bin *any_hit;
4434 struct anv_shader_bin *intersection;
4435
4436 /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
4437 uint32_t handle[8];
4438 };
4439
4440 struct anv_ray_tracing_pipeline {
4441 struct anv_pipeline base;
4442
4443 /* All shaders in the pipeline */
4444 struct util_dynarray shaders;
4445
4446 uint32_t group_count;
4447 struct anv_rt_shader_group * groups;
4448
4449 /* If non-zero, this is the default computed stack size as per the stack
4450 * size computation in the Vulkan spec. If zero, that indicates that the
4451 * client has requested a dynamic stack size.
4452 */
4453 uint32_t stack_size;
4454 };
4455
4456 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
4457 static inline struct anv_##pipe_type##_pipeline * \
4458 anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
4459 { \
4460 assert(pipeline->type == pipe_enum); \
4461 return (struct anv_##pipe_type##_pipeline *) pipeline; \
4462 }
4463
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)4464 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
4465 ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS)
4466 ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
4467 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
4468 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
4469
4470 static inline bool
4471 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
4472 gl_shader_stage stage)
4473 {
4474 return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4475 }
4476
4477 static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline * pipeline,gl_shader_stage stage)4478 anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
4479 gl_shader_stage stage)
4480 {
4481 return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4482 }
4483
4484 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)4485 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
4486 {
4487 return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
4488 }
4489
4490 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)4491 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
4492 {
4493 return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
4494 }
4495
4496 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)4497 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
4498 {
4499 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
4500 const struct vk_dynamic_graphics_state *dyn =
4501 &cmd_buffer->vk.dynamic_graphics_state;
4502 uint8_t color_writes = dyn->cb.color_write_enables;
4503
4504 /* All writes disabled through vkCmdSetColorWriteEnableEXT */
4505 if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
4506 return true;
4507
4508 /* Or all write masks are empty */
4509 for (uint32_t i = 0; i < state->color_att_count; i++) {
4510 if (dyn->cb.attachments[i].write_mask != 0)
4511 return false;
4512 }
4513
4514 return true;
4515 }
4516
4517 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)4518 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
4519 {
4520 state->has_uint_rt = false;
4521 for (unsigned a = 0; a < state->color_att_count; a++) {
4522 if (vk_format_is_int(state->color_att[a].vk_format)) {
4523 state->has_uint_rt = true;
4524 break;
4525 }
4526 }
4527 }
4528
4529 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
4530 static inline const struct brw_##prefix##_prog_data * \
4531 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
4532 { \
4533 if (anv_pipeline_has_stage(pipeline, stage)) { \
4534 return (const struct brw_##prefix##_prog_data *) \
4535 pipeline->base.shaders[stage]->prog_data; \
4536 } else { \
4537 return NULL; \
4538 } \
4539 }
4540
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)4541 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
4542 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
4543 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
4544 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
4545 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
4546 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
4547 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
4548
4549 static inline const struct brw_cs_prog_data *
4550 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
4551 {
4552 assert(pipeline->cs);
4553 return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
4554 }
4555
4556 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)4557 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
4558 {
4559 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
4560 return &get_gs_prog_data(pipeline)->base;
4561 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
4562 return &get_tes_prog_data(pipeline)->base;
4563 else
4564 return &get_vs_prog_data(pipeline)->base;
4565 }
4566
4567 VkResult
4568 anv_device_init_rt_shaders(struct anv_device *device);
4569
4570 void
4571 anv_device_finish_rt_shaders(struct anv_device *device);
4572
4573 struct anv_kernel_arg {
4574 bool is_ptr;
4575 uint16_t size;
4576
4577 union {
4578 uint64_t u64;
4579 void *ptr;
4580 };
4581 };
4582
4583 struct anv_kernel {
4584 #ifndef NDEBUG
4585 const char *name;
4586 #endif
4587 struct anv_shader_bin *bin;
4588 const struct intel_l3_config *l3_config;
4589 };
4590
4591 struct anv_format_plane {
4592 enum isl_format isl_format:16;
4593 struct isl_swizzle swizzle;
4594
4595 /* What aspect is associated to this plane */
4596 VkImageAspectFlags aspect;
4597 };
4598
4599 struct anv_format {
4600 struct anv_format_plane planes[3];
4601 VkFormat vk_format;
4602 uint8_t n_planes;
4603 bool can_ycbcr;
4604 bool can_video;
4605 };
4606
4607 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)4608 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
4609 {
4610 if (util_bitcount(aspects) == 1) {
4611 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
4612 VK_IMAGE_ASPECT_DEPTH_BIT |
4613 VK_IMAGE_ASPECT_STENCIL_BIT |
4614 VK_IMAGE_ASPECT_PLANE_0_BIT |
4615 VK_IMAGE_ASPECT_PLANE_1_BIT |
4616 VK_IMAGE_ASPECT_PLANE_2_BIT));
4617 } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
4618 assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
4619 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
4620 VK_IMAGE_ASPECT_PLANE_1_BIT) ||
4621 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
4622 VK_IMAGE_ASPECT_PLANE_1_BIT |
4623 VK_IMAGE_ASPECT_PLANE_2_BIT));
4624 } else {
4625 assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
4626 VK_IMAGE_ASPECT_STENCIL_BIT));
4627 }
4628 }
4629
4630 /**
4631 * Return the aspect's plane relative to all_aspects. For an image, for
4632 * instance, all_aspects would be the set of aspects in the image. For
4633 * an image view, all_aspects would be the subset of aspects represented
4634 * by that particular view.
4635 */
4636 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)4637 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
4638 VkImageAspectFlagBits aspect)
4639 {
4640 anv_assert_valid_aspect_set(all_aspects);
4641 assert(util_bitcount(aspect) == 1);
4642 assert(!(aspect & ~all_aspects));
4643
4644 /* Because we always put image and view planes in aspect-bit-order, the
4645 * plane index is the number of bits in all_aspects before aspect.
4646 */
4647 return util_bitcount(all_aspects & (aspect - 1));
4648 }
4649
4650 #define anv_foreach_image_aspect_bit(b, image, aspects) \
4651 u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
4652
4653 const struct anv_format *
4654 anv_get_format(VkFormat format);
4655
4656 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)4657 anv_get_format_planes(VkFormat vk_format)
4658 {
4659 const struct anv_format *format = anv_get_format(vk_format);
4660
4661 return format != NULL ? format->n_planes : 0;
4662 }
4663
4664 struct anv_format_plane
4665 anv_get_format_plane(const struct intel_device_info *devinfo,
4666 VkFormat vk_format, uint32_t plane,
4667 VkImageTiling tiling);
4668
4669 struct anv_format_plane
4670 anv_get_format_aspect(const struct intel_device_info *devinfo,
4671 VkFormat vk_format,
4672 VkImageAspectFlagBits aspect, VkImageTiling tiling);
4673
4674 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)4675 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
4676 VkImageAspectFlags aspect, VkImageTiling tiling)
4677 {
4678 return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
4679 }
4680
4681 bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo,
4682 const enum isl_format format);
4683
4684 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
4685 VkImageCreateFlags create_flags,
4686 VkFormat vk_format, VkImageTiling vk_tiling,
4687 VkImageUsageFlags vk_usage,
4688 const VkImageFormatListCreateInfo *fmt_list);
4689
4690 extern VkFormat
4691 vk_format_from_android(unsigned android_format, unsigned android_usage);
4692
4693 static inline VkFormat
anv_get_emulation_format(const struct anv_physical_device * pdevice,VkFormat format)4694 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
4695 {
4696 if (pdevice->flush_astc_ldr_void_extent_denorms) {
4697 const struct util_format_description *desc =
4698 vk_format_description(format);
4699 if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
4700 desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
4701 return format;
4702 }
4703
4704 if (pdevice->emu_astc_ldr)
4705 return vk_texcompress_astc_emulation_format(format);
4706
4707 return VK_FORMAT_UNDEFINED;
4708 }
4709
4710 static inline bool
anv_is_format_emulated(const struct anv_physical_device * pdevice,VkFormat format)4711 anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format)
4712 {
4713 return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED;
4714 }
4715
4716 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)4717 anv_swizzle_for_render(struct isl_swizzle swizzle)
4718 {
4719 /* Sometimes the swizzle will have alpha map to one. We do this to fake
4720 * RGB as RGBA for texturing
4721 */
4722 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
4723 swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
4724
4725 /* But it doesn't matter what we render to that channel */
4726 swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
4727
4728 return swizzle;
4729 }
4730
4731 void
4732 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
4733
4734 /**
4735 * Describes how each part of anv_image will be bound to memory.
4736 */
4737 struct anv_image_memory_range {
4738 /**
4739 * Disjoint bindings into which each portion of the image will be bound.
4740 *
4741 * Binding images to memory can be complicated and invold binding different
4742 * portions of the image to different memory objects or regions. For most
4743 * images, everything lives in the MAIN binding and gets bound by
4744 * vkBindImageMemory. For disjoint multi-planar images, each plane has
4745 * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
4746 * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are
4747 * implicit or driver-managed and live in special-case bindings.
4748 */
4749 enum anv_image_memory_binding {
4750 /**
4751 * Used if and only if image is not multi-planar disjoint. Bound by
4752 * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
4753 */
4754 ANV_IMAGE_MEMORY_BINDING_MAIN,
4755
4756 /**
4757 * Used if and only if image is multi-planar disjoint. Bound by
4758 * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
4759 */
4760 ANV_IMAGE_MEMORY_BINDING_PLANE_0,
4761 ANV_IMAGE_MEMORY_BINDING_PLANE_1,
4762 ANV_IMAGE_MEMORY_BINDING_PLANE_2,
4763
4764 /**
4765 * Driver-private bo. In special cases we may store the aux surface and/or
4766 * aux state in this binding.
4767 */
4768 ANV_IMAGE_MEMORY_BINDING_PRIVATE,
4769
4770 /** Sentinel */
4771 ANV_IMAGE_MEMORY_BINDING_END,
4772 } binding;
4773
4774 /**
4775 * Offset is relative to the start of the binding created by
4776 * vkBindImageMemory, not to the start of the bo.
4777 */
4778 uint64_t offset;
4779
4780 uint64_t size;
4781 uint32_t alignment;
4782 };
4783
4784 /**
4785 * Subsurface of an anv_image.
4786 */
4787 struct anv_surface {
4788 struct isl_surf isl;
4789 struct anv_image_memory_range memory_range;
4790 };
4791
4792 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)4793 anv_surface_is_valid(const struct anv_surface *surface)
4794 {
4795 return surface->isl.size_B > 0 && surface->memory_range.size > 0;
4796 }
4797
4798 struct anv_image {
4799 struct vk_image vk;
4800
4801 uint32_t n_planes;
4802
4803 /**
4804 * Image has multi-planar format and was created with
4805 * VK_IMAGE_CREATE_DISJOINT_BIT.
4806 */
4807 bool disjoint;
4808
4809 /**
4810 * Image is a WSI image
4811 */
4812 bool from_wsi;
4813
4814 /**
4815 * Image was imported from an struct AHardwareBuffer. We have to delay
4816 * final image creation until bind time.
4817 */
4818 bool from_ahb;
4819
4820 /**
4821 * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
4822 * must be released when the image is destroyed.
4823 */
4824 bool from_gralloc;
4825
4826 /**
4827 * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
4828 * LDR workaround or emulation.
4829 */
4830 VkFormat emu_plane_format;
4831
4832 /**
4833 * The memory bindings created by vkCreateImage and vkBindImageMemory.
4834 *
4835 * For details on the image's memory layout, see check_memory_bindings().
4836 *
4837 * vkCreateImage constructs the `memory_range` for each
4838 * anv_image_memory_binding. After vkCreateImage, each binding is valid if
4839 * and only if `memory_range::size > 0`.
4840 *
4841 * vkBindImageMemory binds each valid `memory_range` to an `address`.
4842 * Usually, the app will provide the address via the parameters of
4843 * vkBindImageMemory. However, special-case bindings may be bound to
4844 * driver-private memory.
4845 */
4846 struct anv_image_binding {
4847 struct anv_image_memory_range memory_range;
4848 struct anv_address address;
4849 struct anv_sparse_binding_data sparse_data;
4850 } bindings[ANV_IMAGE_MEMORY_BINDING_END];
4851
4852 /**
4853 * Image subsurfaces
4854 *
4855 * For each foo, anv_image::planes[x].surface is valid if and only if
4856 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
4857 * to figure the number associated with a given aspect.
4858 *
4859 * The hardware requires that the depth buffer and stencil buffer be
4860 * separate surfaces. From Vulkan's perspective, though, depth and stencil
4861 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we
4862 * allocate the depth and stencil buffers as separate surfaces in the same
4863 * bo.
4864 */
4865 struct anv_image_plane {
4866 struct anv_surface primary_surface;
4867
4868 /**
4869 * The base aux usage for this image. For color images, this can be
4870 * either CCS_E or CCS_D depending on whether or not we can reliably
4871 * leave CCS on all the time.
4872 */
4873 enum isl_aux_usage aux_usage;
4874
4875 struct anv_surface aux_surface;
4876
4877 /** Location of the compression control surface. */
4878 struct anv_image_memory_range compr_ctrl_memory_range;
4879
4880 /** Location of the fast clear state. */
4881 struct anv_image_memory_range fast_clear_memory_range;
4882
4883 /**
4884 * Whether this image can be fast cleared with non-zero clear colors.
4885 * This can happen with mutable images when formats of different bit
4886 * sizes per components are used.
4887 *
4888 * On Gfx9+, because the clear colors are stored as a 4 components 32bit
4889 * values, we can clear in R16G16_UNORM (store 2 16bit values in the
4890 * components 0 & 1 of the clear color) and then draw in R32_UINT which
4891 * would interpret the clear color as a single component value, using
4892 * only the first 16bit component of the previous written clear color.
4893 *
4894 * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
4895 * boolean will prevent the usage of CC_ONE.
4896 */
4897 bool can_non_zero_fast_clear;
4898
4899 struct {
4900 /** Whether the image has CCS data mapped through AUX-TT. */
4901 bool mapped;
4902
4903 /** Main address of the mapping. */
4904 uint64_t addr;
4905
4906 /** Size of the mapping. */
4907 uint64_t size;
4908 } aux_tt;
4909 } planes[3];
4910
4911 struct anv_image_memory_range vid_dmv_top_surface;
4912
4913 /* Link in the anv_device.image_private_objects list */
4914 struct list_head link;
4915 };
4916
4917 static inline bool
anv_image_is_sparse(struct anv_image * image)4918 anv_image_is_sparse(struct anv_image *image)
4919 {
4920 return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
4921 }
4922
4923 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)4924 anv_image_is_externally_shared(const struct anv_image *image)
4925 {
4926 return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
4927 image->vk.external_handle_types != 0;
4928 }
4929
4930 static inline bool
anv_image_has_private_binding(const struct anv_image * image)4931 anv_image_has_private_binding(const struct anv_image *image)
4932 {
4933 const struct anv_image_binding private_binding =
4934 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
4935 return private_binding.memory_range.size != 0;
4936 }
4937
4938 static inline bool
anv_image_format_is_d16_or_s8(const struct anv_image * image)4939 anv_image_format_is_d16_or_s8(const struct anv_image *image)
4940 {
4941 return image->vk.format == VK_FORMAT_D16_UNORM ||
4942 image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT ||
4943 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
4944 image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
4945 image->vk.format == VK_FORMAT_S8_UINT;
4946 }
4947
4948 /* The ordering of this enum is important */
4949 enum anv_fast_clear_type {
4950 /** Image does not have/support any fast-clear blocks */
4951 ANV_FAST_CLEAR_NONE = 0,
4952 /** Image has/supports fast-clear but only to the default value */
4953 ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
4954 /** Image has/supports fast-clear with an arbitrary fast-clear value */
4955 ANV_FAST_CLEAR_ANY = 2,
4956 };
4957
4958 /**
4959 * Return the aspect's _format_ plane, not its _memory_ plane (using the
4960 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
4961 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
4962 * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
4963 */
4964 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)4965 anv_image_aspect_to_plane(const struct anv_image *image,
4966 VkImageAspectFlagBits aspect)
4967 {
4968 return anv_aspect_to_plane(image->vk.aspects, aspect);
4969 }
4970
4971 /* Returns the number of auxiliary buffer levels attached to an image. */
4972 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)4973 anv_image_aux_levels(const struct anv_image * const image,
4974 VkImageAspectFlagBits aspect)
4975 {
4976 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4977 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
4978 return 0;
4979
4980 return image->vk.mip_levels;
4981 }
4982
4983 /* Returns the number of auxiliary buffer layers attached to an image. */
4984 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)4985 anv_image_aux_layers(const struct anv_image * const image,
4986 VkImageAspectFlagBits aspect,
4987 const uint8_t miplevel)
4988 {
4989 assert(image);
4990
4991 /* The miplevel must exist in the main buffer. */
4992 assert(miplevel < image->vk.mip_levels);
4993
4994 if (miplevel >= anv_image_aux_levels(image, aspect)) {
4995 /* There are no layers with auxiliary data because the miplevel has no
4996 * auxiliary data.
4997 */
4998 return 0;
4999 }
5000
5001 return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
5002 }
5003
5004 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)5005 anv_image_address(const struct anv_image *image,
5006 const struct anv_image_memory_range *mem_range)
5007 {
5008 const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
5009 assert(binding->memory_range.offset == 0);
5010
5011 if (mem_range->size == 0)
5012 return ANV_NULL_ADDRESS;
5013
5014 return anv_address_add(binding->address, mem_range->offset);
5015 }
5016
5017 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5018 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
5019 const struct anv_image *image,
5020 VkImageAspectFlagBits aspect)
5021 {
5022 assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
5023 VK_IMAGE_ASPECT_DEPTH_BIT));
5024
5025 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5026 const struct anv_image_memory_range *mem_range =
5027 &image->planes[plane].fast_clear_memory_range;
5028
5029 return anv_image_address(image, mem_range);
5030 }
5031
5032 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5033 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
5034 const struct anv_image *image,
5035 VkImageAspectFlagBits aspect)
5036 {
5037 struct anv_address addr =
5038 anv_image_get_clear_color_addr(device, image, aspect);
5039
5040 unsigned clear_color_state_size;
5041 if (device->info->ver >= 11) {
5042 /* The fast clear type and the first compression state are stored in the
5043 * last 2 dwords of the clear color struct. Refer to the comment in
5044 * add_aux_state_tracking_buffer().
5045 */
5046 assert(device->isl_dev.ss.clear_color_state_size >= 32);
5047 clear_color_state_size = device->isl_dev.ss.clear_color_state_size - 8;
5048 } else
5049 clear_color_state_size = device->isl_dev.ss.clear_value_size;
5050 return anv_address_add(addr, clear_color_state_size);
5051 }
5052
5053 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)5054 anv_image_get_compression_state_addr(const struct anv_device *device,
5055 const struct anv_image *image,
5056 VkImageAspectFlagBits aspect,
5057 uint32_t level, uint32_t array_layer)
5058 {
5059 assert(level < anv_image_aux_levels(image, aspect));
5060 assert(array_layer < anv_image_aux_layers(image, aspect, level));
5061 UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5062 assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage));
5063
5064 /* Relative to start of the plane's fast clear type */
5065 uint32_t offset;
5066
5067 offset = 4; /* Go past the fast clear type */
5068
5069 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
5070 for (uint32_t l = 0; l < level; l++)
5071 offset += u_minify(image->vk.extent.depth, l) * 4;
5072 } else {
5073 offset += level * image->vk.array_layers * 4;
5074 }
5075
5076 offset += array_layer * 4;
5077
5078 assert(offset < image->planes[plane].fast_clear_memory_range.size);
5079
5080 return anv_address_add(
5081 anv_image_get_fast_clear_type_addr(device, image, aspect),
5082 offset);
5083 }
5084
5085 static inline const struct anv_image_memory_range *
anv_image_get_aux_memory_range(const struct anv_image * image,uint32_t plane)5086 anv_image_get_aux_memory_range(const struct anv_image *image,
5087 uint32_t plane)
5088 {
5089 if (image->planes[plane].aux_surface.memory_range.size > 0)
5090 return &image->planes[plane].aux_surface.memory_range;
5091 else
5092 return &image->planes[plane].compr_ctrl_memory_range;
5093 }
5094
5095 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
5096 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)5097 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
5098 const struct anv_image *image)
5099 {
5100 if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
5101 return false;
5102
5103 /* For Gfx8-11, there are some restrictions around sampling from HiZ.
5104 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
5105 * say:
5106 *
5107 * "If this field is set to AUX_HIZ, Number of Multisamples must
5108 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
5109 */
5110 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
5111 return false;
5112
5113 if (!devinfo->has_sample_with_hiz)
5114 return false;
5115
5116 return image->vk.samples == 1;
5117 }
5118
5119 /* Returns true if an MCS-enabled buffer can be sampled from. */
5120 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)5121 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
5122 const struct anv_image *image)
5123 {
5124 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
5125 const uint32_t plane =
5126 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
5127
5128 assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
5129
5130 const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
5131
5132 /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
5133 * See HSD 1707282275, wa_14013111325. Due to the use of
5134 * format-reinterpretation, a simplified workaround is implemented.
5135 */
5136 if (intel_needs_workaround(devinfo, 14013111325) &&
5137 isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
5138 return false;
5139 }
5140
5141 return true;
5142 }
5143
5144 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)5145 anv_image_plane_uses_aux_map(const struct anv_device *device,
5146 const struct anv_image *image,
5147 uint32_t plane)
5148 {
5149 return device->info->has_aux_map &&
5150 isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
5151 }
5152
5153 static inline bool
anv_image_uses_aux_map(const struct anv_device * device,const struct anv_image * image)5154 anv_image_uses_aux_map(const struct anv_device *device,
5155 const struct anv_image *image)
5156 {
5157 for (uint32_t p = 0; p < image->n_planes; ++p) {
5158 if (anv_image_plane_uses_aux_map(device, image, p))
5159 return true;
5160 }
5161
5162 return false;
5163 }
5164
5165 static inline bool
anv_bo_allows_aux_map(const struct anv_device * device,const struct anv_bo * bo)5166 anv_bo_allows_aux_map(const struct anv_device *device,
5167 const struct anv_bo *bo)
5168 {
5169 if (device->aux_map_ctx == NULL)
5170 return false;
5171
5172 return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0;
5173 }
5174
5175 static inline bool
anv_address_allows_aux_map(const struct anv_device * device,struct anv_address addr)5176 anv_address_allows_aux_map(const struct anv_device *device,
5177 struct anv_address addr)
5178 {
5179 if (device->aux_map_ctx == NULL)
5180 return false;
5181
5182 /* Technically, we really only care about what offset the image is bound
5183 * into on the BO, but we don't have that information here. As a heuristic,
5184 * rely on the BO offset instead.
5185 */
5186 if (anv_address_physical(addr) %
5187 intel_aux_map_get_alignment(device->aux_map_ctx) != 0)
5188 return false;
5189
5190 return true;
5191 }
5192
5193 void
5194 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
5195 const struct anv_image *image,
5196 VkImageAspectFlagBits aspect,
5197 enum isl_aux_usage aux_usage,
5198 uint32_t level,
5199 uint32_t base_layer,
5200 uint32_t layer_count);
5201
5202 void
5203 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
5204 const struct anv_image *image,
5205 const enum isl_format format,
5206 union isl_color_value clear_color);
5207
5208 void
5209 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
5210 struct anv_state state,
5211 const struct anv_image *image);
5212
5213 struct anv_image_binding *
5214 anv_image_aspect_to_binding(struct anv_image *image,
5215 VkImageAspectFlags aspect);
5216
5217 void
5218 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
5219 const struct anv_image *image,
5220 VkImageAspectFlagBits aspect,
5221 enum isl_aux_usage aux_usage,
5222 enum isl_format format, struct isl_swizzle swizzle,
5223 uint32_t level, uint32_t base_layer, uint32_t layer_count,
5224 VkRect2D area, union isl_color_value clear_color);
5225 void
5226 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
5227 const struct anv_image *image,
5228 VkImageAspectFlags aspects,
5229 enum isl_aux_usage depth_aux_usage,
5230 uint32_t level,
5231 uint32_t base_layer, uint32_t layer_count,
5232 VkRect2D area,
5233 float depth_value, uint8_t stencil_value);
5234 void
5235 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
5236 const struct anv_image *src_image,
5237 enum isl_aux_usage src_aux_usage,
5238 uint32_t src_level, uint32_t src_base_layer,
5239 const struct anv_image *dst_image,
5240 enum isl_aux_usage dst_aux_usage,
5241 uint32_t dst_level, uint32_t dst_base_layer,
5242 VkImageAspectFlagBits aspect,
5243 uint32_t src_x, uint32_t src_y,
5244 uint32_t dst_x, uint32_t dst_y,
5245 uint32_t width, uint32_t height,
5246 uint32_t layer_count,
5247 enum blorp_filter filter);
5248 void
5249 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
5250 const struct anv_image *image,
5251 VkImageAspectFlagBits aspect, uint32_t level,
5252 uint32_t base_layer, uint32_t layer_count,
5253 enum isl_aux_op hiz_op);
5254 void
5255 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
5256 const struct anv_image *image,
5257 VkImageAspectFlags aspects,
5258 uint32_t level,
5259 uint32_t base_layer, uint32_t layer_count,
5260 VkRect2D area, uint8_t stencil_value);
5261 void
5262 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
5263 const struct anv_image *image,
5264 enum isl_format format, struct isl_swizzle swizzle,
5265 VkImageAspectFlagBits aspect,
5266 uint32_t base_layer, uint32_t layer_count,
5267 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
5268 bool predicate);
5269 void
5270 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
5271 const struct anv_image *image,
5272 enum isl_format format, struct isl_swizzle swizzle,
5273 VkImageAspectFlagBits aspect, uint32_t level,
5274 uint32_t base_layer, uint32_t layer_count,
5275 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
5276 bool predicate);
5277
5278 isl_surf_usage_flags_t
5279 anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
5280 VkImageCreateFlags vk_create_flags,
5281 VkImageUsageFlags vk_usage,
5282 isl_surf_usage_flags_t isl_extra_usage,
5283 VkImageAspectFlagBits aspect);
5284
5285 void
5286 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
5287 struct anv_address address,
5288 VkDeviceSize size,
5289 uint32_t data);
5290
5291 VkResult
5292 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
5293
5294 bool
5295 anv_can_hiz_clear_ds_view(struct anv_device *device,
5296 const struct anv_image_view *iview,
5297 VkImageLayout layout,
5298 VkImageAspectFlags clear_aspects,
5299 float depth_clear_value,
5300 VkRect2D render_area,
5301 const VkQueueFlagBits queue_flags);
5302
5303 bool
5304 anv_can_fast_clear_color_view(struct anv_device *device,
5305 struct anv_image_view *iview,
5306 VkImageLayout layout,
5307 union isl_color_value clear_color,
5308 uint32_t num_layers,
5309 VkRect2D render_area,
5310 const VkQueueFlagBits queue_flags);
5311
5312 enum isl_aux_state ATTRIBUTE_PURE
5313 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
5314 const struct anv_image *image,
5315 const VkImageAspectFlagBits aspect,
5316 const VkImageLayout layout,
5317 const VkQueueFlagBits queue_flags);
5318
5319 enum isl_aux_usage ATTRIBUTE_PURE
5320 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
5321 const struct anv_image *image,
5322 const VkImageAspectFlagBits aspect,
5323 const VkImageUsageFlagBits usage,
5324 const VkImageLayout layout,
5325 const VkQueueFlagBits queue_flags);
5326
5327 enum anv_fast_clear_type ATTRIBUTE_PURE
5328 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
5329 const struct anv_image * const image,
5330 const VkImageAspectFlagBits aspect,
5331 const VkImageLayout layout,
5332 const VkQueueFlagBits queue_flags);
5333
5334 bool ATTRIBUTE_PURE
5335 anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo,
5336 const struct anv_image * const image,
5337 const VkImageAspectFlagBits aspect,
5338 const VkImageLayout layout,
5339 const VkQueueFlagBits queue_flags);
5340
5341 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)5342 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
5343 VkImageAspectFlags aspects2)
5344 {
5345 if (aspects1 == aspects2)
5346 return true;
5347
5348 /* Only 1 color aspects are compatibles. */
5349 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5350 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5351 util_bitcount(aspects1) == util_bitcount(aspects2))
5352 return true;
5353
5354 return false;
5355 }
5356
5357 struct anv_image_view {
5358 struct vk_image_view vk;
5359
5360 const struct anv_image *image; /**< VkImageViewCreateInfo::image */
5361
5362 unsigned n_planes;
5363
5364 /**
5365 * True if the surface states (if any) are owned by some anv_state_stream
5366 * from internal_surface_state_pool.
5367 */
5368 bool use_surface_state_stream;
5369
5370 struct {
5371 struct isl_view isl;
5372
5373 /**
5374 * A version of the image view for storage usage (can apply 3D image
5375 * slicing).
5376 */
5377 struct isl_view isl_storage;
5378
5379 /**
5380 * RENDER_SURFACE_STATE when using image as a sampler surface with an
5381 * image layout of SHADER_READ_ONLY_OPTIMAL or
5382 * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
5383 */
5384 struct anv_surface_state optimal_sampler;
5385
5386 /**
5387 * RENDER_SURFACE_STATE when using image as a sampler surface with an
5388 * image layout of GENERAL.
5389 */
5390 struct anv_surface_state general_sampler;
5391
5392 /**
5393 * RENDER_SURFACE_STATE when using image as a storage image.
5394 */
5395 struct anv_surface_state storage;
5396 } planes[3];
5397 };
5398
5399 enum anv_image_view_state_flags {
5400 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 0),
5401 };
5402
5403 void anv_image_fill_surface_state(struct anv_device *device,
5404 const struct anv_image *image,
5405 VkImageAspectFlagBits aspect,
5406 const struct isl_view *view,
5407 isl_surf_usage_flags_t view_usage,
5408 enum isl_aux_usage aux_usage,
5409 const union isl_color_value *clear_color,
5410 enum anv_image_view_state_flags flags,
5411 struct anv_surface_state *state_inout);
5412
5413
5414 static inline const struct anv_surface_state *
anv_image_view_texture_surface_state(const struct anv_image_view * iview,uint32_t plane,VkImageLayout layout)5415 anv_image_view_texture_surface_state(const struct anv_image_view *iview,
5416 uint32_t plane, VkImageLayout layout)
5417 {
5418 return layout == VK_IMAGE_LAYOUT_GENERAL ?
5419 &iview->planes[plane].general_sampler :
5420 &iview->planes[plane].optimal_sampler;
5421 }
5422
5423 static inline const struct anv_surface_state *
anv_image_view_storage_surface_state(const struct anv_image_view * iview)5424 anv_image_view_storage_surface_state(const struct anv_image_view *iview)
5425 {
5426 return &iview->planes[0].storage;
5427 }
5428
5429 static inline bool
anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state * state,const struct anv_image * image)5430 anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state,
5431 const struct anv_image *image)
5432 {
5433 for (unsigned a = 0; a < state->color_att_count; a++) {
5434 if (state->color_att[a].iview &&
5435 state->color_att[a].iview->image == image)
5436 return true;
5437 }
5438
5439 if (state->depth_att.iview && state->depth_att.iview->image == image)
5440 return true;
5441 if (state->stencil_att.iview && state->stencil_att.iview->image == image)
5442 return true;
5443
5444 return false;
5445 }
5446
5447 struct anv_image_create_info {
5448 const VkImageCreateInfo *vk_info;
5449
5450 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
5451 isl_tiling_flags_t isl_tiling_flags;
5452
5453 /** These flags will be added to any derived from VkImageCreateInfo. */
5454 isl_surf_usage_flags_t isl_extra_usage_flags;
5455
5456 /** An opt-in stride in pixels, should be 0 for implicit layouts */
5457 uint32_t stride;
5458
5459 /** Whether to allocate private binding */
5460 bool no_private_binding_alloc;
5461 };
5462
5463 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
5464 const struct anv_image_create_info *create_info);
5465
5466 void anv_image_finish(struct anv_image *image);
5467
5468 void anv_image_get_memory_requirements(struct anv_device *device,
5469 struct anv_image *image,
5470 VkImageAspectFlags aspects,
5471 VkMemoryRequirements2 *pMemoryRequirements);
5472
5473 void anv_image_view_init(struct anv_device *device,
5474 struct anv_image_view *iview,
5475 const VkImageViewCreateInfo *pCreateInfo,
5476 struct anv_state_stream *state_stream);
5477
5478 void anv_image_view_finish(struct anv_image_view *iview);
5479
5480 enum isl_format
5481 anv_isl_format_for_descriptor_type(const struct anv_device *device,
5482 VkDescriptorType type);
5483
5484 static inline isl_surf_usage_flags_t
anv_isl_usage_for_descriptor_type(const VkDescriptorType type)5485 anv_isl_usage_for_descriptor_type(const VkDescriptorType type)
5486 {
5487 switch(type) {
5488 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
5489 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
5490 return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT;
5491 default:
5492 return ISL_SURF_USAGE_STORAGE_BIT;
5493 }
5494 }
5495
5496 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)5497 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
5498 VkLineRasterizationModeKHR line_mode)
5499 {
5500 if (raster_mode == VK_POLYGON_MODE_LINE &&
5501 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
5502 return true;
5503 return false;
5504 }
5505
5506 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)5507 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
5508 unsigned rasterization_samples)
5509 {
5510 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
5511 if (rasterization_samples > 1) {
5512 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
5513 } else {
5514 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
5515 }
5516 }
5517 return line_mode;
5518 }
5519
5520 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)5521 anv_is_dual_src_blend_factor(VkBlendFactor factor)
5522 {
5523 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
5524 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
5525 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
5526 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
5527 }
5528
5529 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)5530 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
5531 {
5532 return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
5533 anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
5534 anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
5535 anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
5536 }
5537
5538 VkFormatFeatureFlags2
5539 anv_get_image_format_features2(const struct anv_physical_device *physical_device,
5540 VkFormat vk_format,
5541 const struct anv_format *anv_format,
5542 VkImageTiling vk_tiling,
5543 const struct isl_drm_modifier_info *isl_mod_info);
5544
5545 void anv_fill_buffer_surface_state(struct anv_device *device,
5546 void *surface_state_ptr,
5547 enum isl_format format,
5548 struct isl_swizzle swizzle,
5549 isl_surf_usage_flags_t usage,
5550 struct anv_address address,
5551 uint32_t range, uint32_t stride);
5552
5553
5554 struct gfx8_border_color {
5555 union {
5556 float float32[4];
5557 uint32_t uint32[4];
5558 };
5559 /* Pad out to 64 bytes */
5560 uint32_t _pad[12];
5561 };
5562
5563 struct anv_sampler {
5564 struct vk_sampler vk;
5565
5566 uint32_t state[3][4];
5567 uint32_t n_planes;
5568
5569 /* Blob of sampler state data which is guaranteed to be 32-byte aligned
5570 * and with a 32-byte stride for use as bindless samplers.
5571 */
5572 struct anv_state bindless_state;
5573
5574 struct anv_state custom_border_color;
5575 };
5576
5577 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
5578
5579 struct anv_query_pool {
5580 struct vk_query_pool vk;
5581
5582 /** Stride between slots, in bytes */
5583 uint32_t stride;
5584 /** Number of slots in this query pool */
5585 struct anv_bo * bo;
5586
5587 /** Location for the KHR_performance_query small batch updating
5588 * ANV_PERF_QUERY_OFFSET_REG
5589 */
5590 uint32_t khr_perf_preambles_offset;
5591
5592 /** Size of each small batch */
5593 uint32_t khr_perf_preamble_stride;
5594
5595 /* KHR perf queries : */
5596 uint32_t pass_size;
5597 uint32_t data_offset;
5598 uint32_t snapshot_size;
5599 uint32_t n_counters;
5600 struct intel_perf_counter_pass *counter_pass;
5601 uint32_t n_passes;
5602 struct intel_perf_query_info **pass_query;
5603 };
5604
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)5605 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
5606 uint32_t pass)
5607 {
5608 return pool->khr_perf_preambles_offset +
5609 pool->khr_perf_preamble_stride * pass;
5610 }
5611
5612 struct anv_vid_mem {
5613 struct anv_device_memory *mem;
5614 VkDeviceSize offset;
5615 VkDeviceSize size;
5616 };
5617
5618 #define ANV_VIDEO_MEM_REQS_H264 4
5619 #define ANV_VIDEO_MEM_REQS_H265 9
5620 #define ANV_MB_WIDTH 16
5621 #define ANV_MB_HEIGHT 16
5622 #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16
5623 #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16
5624 #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8
5625 #define ANV_MAX_H265_CTB_SIZE 64
5626
5627 enum anv_vid_mem_h264_types {
5628 ANV_VID_MEM_H264_INTRA_ROW_STORE,
5629 ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE,
5630 ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH,
5631 ANV_VID_MEM_H264_MPR_ROW_SCRATCH,
5632 ANV_VID_MEM_H264_MAX,
5633 };
5634
5635 enum anv_vid_mem_h265_types {
5636 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE,
5637 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE,
5638 ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN,
5639 ANV_VID_MEM_H265_METADATA_LINE,
5640 ANV_VID_MEM_H265_METADATA_TILE_LINE,
5641 ANV_VID_MEM_H265_METADATA_TILE_COLUMN,
5642 ANV_VID_MEM_H265_SAO_LINE,
5643 ANV_VID_MEM_H265_SAO_TILE_LINE,
5644 ANV_VID_MEM_H265_SAO_TILE_COLUMN,
5645 ANV_VID_MEM_H265_MAX,
5646 };
5647
5648 struct anv_video_session {
5649 struct vk_video_session vk;
5650
5651 /* the decoder needs some private memory allocations */
5652 struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_MAX];
5653 };
5654
5655 struct anv_video_session_params {
5656 struct vk_video_session_parameters vk;
5657 };
5658
5659 void
5660 anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f);
5661
5662 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)5663 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
5664 enum anv_pipe_bits bits,
5665 const char* reason)
5666 {
5667 cmd_buffer->state.pending_pipe_bits |= bits;
5668 if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits) {
5669 fputs("pc: add ", stdout);
5670 anv_dump_pipe_bits(bits, stdout);
5671 fprintf(stdout, "reason: %s\n", reason);
5672 }
5673 }
5674
5675 struct anv_performance_configuration_intel {
5676 struct vk_object_base base;
5677
5678 struct intel_perf_registers *register_config;
5679
5680 uint64_t config_id;
5681 };
5682
5683 void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
5684 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
5685 void anv_device_perf_init(struct anv_device *device);
5686 void anv_perf_write_pass_results(struct intel_perf_config *perf,
5687 struct anv_query_pool *pool, uint32_t pass,
5688 const struct intel_perf_query_result *accumulated_results,
5689 union VkPerformanceCounterResultKHR *results);
5690
5691 void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
5692 struct nir_shader *fs_nir,
5693 struct anv_device *device,
5694 const VkGraphicsPipelineCreateInfo *info);
5695
5696 /* Use to emit a series of memcpy operations */
5697 struct anv_memcpy_state {
5698 struct anv_device *device;
5699 struct anv_batch *batch;
5700
5701 struct anv_vb_cache_range vb_bound;
5702 struct anv_vb_cache_range vb_dirty;
5703 };
5704
5705 VkResult anv_device_init_internal_kernels(struct anv_device *device);
5706 void anv_device_finish_internal_kernels(struct anv_device *device);
5707
5708 VkResult anv_device_init_astc_emu(struct anv_device *device);
5709 void anv_device_finish_astc_emu(struct anv_device *device);
5710 void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
5711 struct anv_image *image,
5712 VkImageLayout layout,
5713 const VkImageSubresourceLayers *subresource,
5714 VkOffset3D block_offset,
5715 VkExtent3D block_extent);
5716
5717 /* This structure is used in 2 scenarios :
5718 *
5719 * - copy utrace timestamps from command buffer so that command buffer can
5720 * be resubmitted multiple times without the recorded timestamps being
5721 * overwritten before they're read back
5722 *
5723 * - emit trace points for queue debug tagging
5724 * (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
5725 */
5726 struct anv_utrace_submit {
5727 /* Needs to be the first field */
5728 struct intel_ds_flush_data ds;
5729
5730 /* Batch stuff to implement of copy of timestamps recorded in another
5731 * buffer.
5732 */
5733 struct anv_reloc_list relocs;
5734 struct anv_batch batch;
5735 struct util_dynarray batch_bos;
5736
5737 /* Stream for temporary allocations */
5738 struct anv_state_stream dynamic_state_stream;
5739 struct anv_state_stream general_state_stream;
5740
5741 /* Syncobj to be signaled when the batch completes */
5742 struct vk_sync *sync;
5743
5744 /* Queue on which all the recorded traces are submitted */
5745 struct anv_queue *queue;
5746
5747 /* Buffer of 64bits timestamps (only used for timestamp copies) */
5748 struct anv_bo *trace_bo;
5749
5750 /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
5751 * timestamps)
5752 */
5753 uint64_t last_full_timestamp;
5754
5755 /* Memcpy state tracking (only used for timestamp copies on render engine) */
5756 struct anv_memcpy_state memcpy_state;
5757
5758 /* Memcpy state tracking (only used for timestamp copies on compute engine) */
5759 struct anv_simple_shader simple_state;
5760 };
5761
5762 void anv_device_utrace_init(struct anv_device *device);
5763 void anv_device_utrace_finish(struct anv_device *device);
5764 VkResult
5765 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
5766 uint32_t cmd_buffer_count,
5767 struct anv_cmd_buffer **cmd_buffers,
5768 struct anv_utrace_submit **out_submit);
5769
5770 static bool
anv_has_cooperative_matrix(const struct anv_physical_device * device)5771 anv_has_cooperative_matrix(const struct anv_physical_device *device)
5772 {
5773 return device->has_cooperative_matrix;
5774 }
5775
5776 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
5777 VK_FROM_HANDLE(__anv_type, __name, __handle)
5778
5779 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
5780 VK_OBJECT_TYPE_COMMAND_BUFFER)
5781 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
5782 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
5783 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
5784 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
5785 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
5786
5787 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
5788 VK_OBJECT_TYPE_BUFFER)
5789 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView,
5790 VK_OBJECT_TYPE_BUFFER_VIEW)
5791 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
5792 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
5793 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
5794 VK_OBJECT_TYPE_DESCRIPTOR_SET)
5795 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
5796 VkDescriptorSetLayout,
5797 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
5798 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory,
5799 VK_OBJECT_TYPE_DEVICE_MEMORY)
5800 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
5801 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
5802 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
5803 VK_OBJECT_TYPE_IMAGE_VIEW);
5804 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
5805 VK_OBJECT_TYPE_PIPELINE)
5806 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
5807 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
5808 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
5809 VK_OBJECT_TYPE_QUERY_POOL)
5810 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
5811 VK_OBJECT_TYPE_SAMPLER)
5812 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
5813 VkPerformanceConfigurationINTEL,
5814 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
5815 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
5816 VkVideoSessionKHR,
5817 VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
5818 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base,
5819 VkVideoSessionParametersKHR,
5820 VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
5821
5822 #define anv_genX(devinfo, thing) ({ \
5823 __typeof(&gfx9_##thing) genX_thing; \
5824 switch ((devinfo)->verx10) { \
5825 case 90: \
5826 genX_thing = &gfx9_##thing; \
5827 break; \
5828 case 110: \
5829 genX_thing = &gfx11_##thing; \
5830 break; \
5831 case 120: \
5832 genX_thing = &gfx12_##thing; \
5833 break; \
5834 case 125: \
5835 genX_thing = &gfx125_##thing; \
5836 break; \
5837 case 200: \
5838 genX_thing = &gfx20_##thing; \
5839 break; \
5840 default: \
5841 unreachable("Unknown hardware generation"); \
5842 } \
5843 genX_thing; \
5844 })
5845
5846 /* Gen-specific function declarations */
5847 #ifdef genX
5848 # include "anv_genX.h"
5849 #else
5850 # define genX(x) gfx9_##x
5851 # include "anv_genX.h"
5852 # undef genX
5853 # define genX(x) gfx11_##x
5854 # include "anv_genX.h"
5855 # undef genX
5856 # define genX(x) gfx12_##x
5857 # include "anv_genX.h"
5858 # undef genX
5859 # define genX(x) gfx125_##x
5860 # include "anv_genX.h"
5861 # undef genX
5862 # define genX(x) gfx20_##x
5863 # include "anv_genX.h"
5864 # undef genX
5865 #endif
5866
5867 #ifdef __cplusplus
5868 }
5869 #endif
5870
5871 #endif /* ANV_PRIVATE_H */
5872