• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/drm_fourcc.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #else
40 #define VG(x) ((void)0)
41 #endif
42 
43 #include "common/intel_aux_map.h"
44 #include "common/intel_bind_timeline.h"
45 #include "common/intel_engine.h"
46 #include "common/intel_gem.h"
47 #include "common/intel_l3_config.h"
48 #include "common/intel_measure.h"
49 #include "common/intel_mem.h"
50 #include "common/intel_sample_positions.h"
51 #include "decoder/intel_decoder.h"
52 #include "dev/intel_device_info.h"
53 #include "blorp/blorp.h"
54 #include "compiler/brw_compiler.h"
55 #include "compiler/brw_kernel.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/detect_os.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/perf/u_trace.h"
65 #include "util/set.h"
66 #include "util/sparse_array.h"
67 #include "util/u_atomic.h"
68 #if DETECT_OS_ANDROID
69 #include "util/u_gralloc/u_gralloc.h"
70 #endif
71 #include "util/u_vector.h"
72 #include "util/u_math.h"
73 #include "util/vma.h"
74 #include "util/xmlconfig.h"
75 #include "vk_acceleration_structure.h"
76 #include "vk_alloc.h"
77 #include "vk_buffer.h"
78 #include "vk_buffer_view.h"
79 #include "vk_command_buffer.h"
80 #include "vk_command_pool.h"
81 #include "vk_debug_report.h"
82 #include "vk_descriptor_update_template.h"
83 #include "vk_device.h"
84 #include "vk_device_memory.h"
85 #include "vk_drm_syncobj.h"
86 #include "vk_enum_defines.h"
87 #include "vk_format.h"
88 #include "vk_framebuffer.h"
89 #include "vk_graphics_state.h"
90 #include "vk_image.h"
91 #include "vk_instance.h"
92 #include "vk_pipeline_cache.h"
93 #include "vk_physical_device.h"
94 #include "vk_sampler.h"
95 #include "vk_shader_module.h"
96 #include "vk_sync.h"
97 #include "vk_sync_timeline.h"
98 #include "vk_texcompress_astc.h"
99 #include "vk_util.h"
100 #include "vk_query_pool.h"
101 #include "vk_queue.h"
102 #include "vk_log.h"
103 #include "vk_ycbcr_conversion.h"
104 #include "vk_video.h"
105 
106 #ifdef __cplusplus
107 extern "C" {
108 #endif
109 
110 /* Pre-declarations needed for WSI entrypoints */
111 struct wl_surface;
112 struct wl_display;
113 typedef struct xcb_connection_t xcb_connection_t;
114 typedef uint32_t xcb_visualid_t;
115 typedef uint32_t xcb_window_t;
116 
117 struct anv_batch;
118 struct anv_buffer;
119 struct anv_buffer_view;
120 struct anv_image_view;
121 struct anv_instance;
122 
123 struct intel_aux_map_context;
124 struct intel_perf_config;
125 struct intel_perf_counter_pass;
126 struct intel_perf_query_result;
127 
128 #include <vulkan/vulkan.h>
129 #include <vulkan/vk_icd.h>
130 
131 #include "anv_android.h"
132 #include "anv_entrypoints.h"
133 #include "anv_kmd_backend.h"
134 #include "anv_rmv.h"
135 #include "isl/isl.h"
136 
137 #include "dev/intel_debug.h"
138 #undef MESA_LOG_TAG
139 #define MESA_LOG_TAG "MESA-INTEL"
140 #include "util/log.h"
141 #include "wsi_common.h"
142 
143 #define NSEC_PER_SEC 1000000000ull
144 
145 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
146 
147 /* Allowing different clear colors requires us to perform a depth resolve at
148  * the end of certain render passes. This is because while slow clears store
149  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
150  * See the PRMs for examples describing when additional resolves would be
151  * necessary. To enable fast clears without requiring extra resolves, we set
152  * the clear value to a globally-defined one. We could allow different values
153  * if the user doesn't expect coherent data during or after a render passes
154  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
155  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
156  * 1.0f seems to be the only value used. The only application that doesn't set
157  * this value does so through the usage of an seemingly uninitialized clear
158  * value.
159  */
160 #define ANV_HZ_FC_VAL 1.0f
161 
162 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
163 #define MAX_VBS         (33 - 2)
164 
165 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
166  * only supports the push model of VS inputs, and we only have 128 GRFs,
167  * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
168  * we use two of them for SGVs.
169  */
170 #define MAX_VES         (31 - 2)
171 
172 #define MAX_XFB_BUFFERS  4
173 #define MAX_XFB_STREAMS  4
174 #define MAX_SETS         8
175 #define MAX_RTS          8
176 #define MAX_VIEWPORTS   16
177 #define MAX_SCISSORS    16
178 #define MAX_PUSH_CONSTANTS_SIZE 128
179 #define MAX_DYNAMIC_BUFFERS 16
180 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
181 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
182 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
183 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
184  * use 64 here to avoid cache issues. This could most likely bring it back to
185  * 32 if we had different virtual addresses for the different views on a given
186  * GEM object.
187  */
188 #define ANV_UBO_ALIGNMENT 64
189 #define ANV_SSBO_ALIGNMENT 4
190 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
191 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
192 #define MAX_SAMPLE_LOCATIONS 16
193 
194 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
195  * and we can't put anything else there we use 64b.
196  */
197 #define ANV_SURFACE_STATE_SIZE (64)
198 
199 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
200  *
201  *    "The surface state model is used when a Binding Table Index (specified
202  *    in the message descriptor) of less than 240 is specified. In this model,
203  *    the Binding Table Index is used to index into the binding table, and the
204  *    binding table entry contains a pointer to the SURFACE_STATE."
205  *
206  * Binding table values above 240 are used for various things in the hardware
207  * such as stateless, stateless with incoherent cache, SLM, and bindless.
208  */
209 #define MAX_BINDING_TABLE_SIZE 240
210 
211 #define ANV_SVGS_VB_INDEX    MAX_VBS
212 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
213 
214 /* We reserve this MI ALU register for the purpose of handling predication.
215  * Other code which uses the MI ALU should leave it alone.
216  */
217 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
218 
219 /* We reserve this MI ALU register to pass around an offset computed from
220  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
221  * Other code which uses the MI ALU should leave it alone.
222  */
223 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
224 
225 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
226 
227 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
228  * and we can't put anything else there we use 64b.
229  */
230 #define ANV_SURFACE_STATE_SIZE (64)
231 #define ANV_SAMPLER_STATE_SIZE (32)
232 
233 /* For gfx12 we set the streamout buffers using 4 separate commands
234  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
235  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
236  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
237  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
238  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
239  * 3DSTATE_SO_BUFFER_INDEX_0.
240  */
241 #define SO_BUFFER_INDEX_0_CMD 0x60
242 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
243 
244 /* The TR-TT L1 page table entries may contain these values instead of actual
245  * pointers to indicate the regions are either NULL or invalid. We program
246  * these values to TR-TT registers, so we could change them, but it's super
247  * convenient to have the NULL value be 0 because everything is
248  * zero-initialized when allocated.
249  *
250  * Since we reserve these values for NULL/INVALID, then we can't use them as
251  * destinations for TR-TT address translation. Both values are shifted by 16
252  * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
253  * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
254  * nothing really to reserve. We could instead just reserve random 64kb
255  * ranges from any of the non-TR-TT vmas and use their addresses.
256  */
257 #define ANV_TRTT_L1_NULL_TILE_VAL 0
258 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
259 
260 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)261 align_down_npot_u32(uint32_t v, uint32_t a)
262 {
263    return v - (v % a);
264 }
265 
266 /** Alignment must be a power of 2. */
267 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)268 anv_is_aligned(uintmax_t n, uintmax_t a)
269 {
270    assert(a == (a & -a));
271    return (n & (a - 1)) == 0;
272 }
273 
274 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)275 vk_to_isl_color(VkClearColorValue color)
276 {
277    return (union isl_color_value) {
278       .u32 = {
279          color.uint32[0],
280          color.uint32[1],
281          color.uint32[2],
282          color.uint32[3],
283       },
284    };
285 }
286 
287 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)288 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
289 {
290    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
291    union isl_color_value isl_color = { .u32 = {0, } };
292 
293 #define COPY_COLOR_CHANNEL(c, i) \
294    if (fmtl->channels.c.bits) \
295       isl_color.u32[i] = color.uint32[i]
296 
297    COPY_COLOR_CHANNEL(r, 0);
298    COPY_COLOR_CHANNEL(g, 1);
299    COPY_COLOR_CHANNEL(b, 2);
300    COPY_COLOR_CHANNEL(a, 3);
301 
302 #undef COPY_COLOR_CHANNEL
303 
304    return isl_color;
305 }
306 
307 /**
308  * Warn on ignored extension structs.
309  *
310  * The Vulkan spec requires us to ignore unsupported or unknown structs in
311  * a pNext chain.  In debug mode, emitting warnings for ignored structs may
312  * help us discover structs that we should not have ignored.
313  *
314  *
315  * From the Vulkan 1.0.38 spec:
316  *
317  *    Any component of the implementation (the loader, any enabled layers,
318  *    and drivers) must skip over, without processing (other than reading the
319  *    sType and pNext members) any chained structures with sType values not
320  *    defined by extensions supported by that component.
321  */
322 #define anv_debug_ignored_stype(sType) \
323    mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
324 
325 void __anv_perf_warn(struct anv_device *device,
326                      const struct vk_object_base *object,
327                      const char *file, int line, const char *format, ...)
328    anv_printflike(5, 6);
329 
330 /**
331  * Print a FINISHME message, including its source location.
332  */
333 #define anv_finishme(format, ...) \
334    do { \
335       static bool reported = false; \
336       if (!reported) { \
337          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
338                     ##__VA_ARGS__); \
339          reported = true; \
340       } \
341    } while (0)
342 
343 /**
344  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
345  */
346 #define anv_perf_warn(objects_macro, format, ...)   \
347    do { \
348       static bool reported = false; \
349       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
350          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
351                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
352                   objects_macro, __FILE__, __LINE__,                    \
353                   format, ## __VA_ARGS__);                              \
354          reported = true; \
355       } \
356    } while (0)
357 
358 /* A non-fatal assert.  Useful for debugging. */
359 #ifdef DEBUG
360 #define anv_assert(x) ({ \
361    if (unlikely(!(x))) \
362       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
363 })
364 #else
365 #define anv_assert(x)
366 #endif
367 
368 enum anv_bo_alloc_flags {
369    /** Specifies that the BO must have a 32-bit address
370     *
371     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
372     */
373    ANV_BO_ALLOC_32BIT_ADDRESS =           (1 << 0),
374 
375    /** Specifies that the BO may be shared externally */
376    ANV_BO_ALLOC_EXTERNAL =                (1 << 1),
377 
378    /** Specifies that the BO should be mapped */
379    ANV_BO_ALLOC_MAPPED =                  (1 << 2),
380 
381    /** Specifies that the BO should be coherent.
382     *
383     * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free,
384     * bo can get upgraded to HOST_CACHED_COHERENT
385     */
386    ANV_BO_ALLOC_HOST_COHERENT =           (1 << 3),
387 
388    /** Specifies that the BO should be captured in error states */
389    ANV_BO_ALLOC_CAPTURE =                 (1 << 4),
390 
391    /** Specifies that the BO will have an address assigned by the caller
392     *
393     * Such BOs do not exist in any VMA heap.
394     */
395    ANV_BO_ALLOC_FIXED_ADDRESS =           (1 << 5),
396 
397    /** Enables implicit synchronization on the BO
398     *
399     * This is the opposite of EXEC_OBJECT_ASYNC.
400     */
401    ANV_BO_ALLOC_IMPLICIT_SYNC =           (1 << 6),
402 
403    /** Enables implicit synchronization on the BO
404     *
405     * This is equivalent to EXEC_OBJECT_WRITE.
406     */
407    ANV_BO_ALLOC_IMPLICIT_WRITE =          (1 << 7),
408 
409    /** Has an address which is visible to the client */
410    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS =  (1 << 8),
411 
412    /** Align the BO's virtual address to match AUX-TT requirements */
413    ANV_BO_ALLOC_AUX_TT_ALIGNED =          (1 << 9),
414 
415    /** This buffer is allocated from local memory and should be cpu visible */
416    ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE =   (1 << 10),
417 
418    /** For non device local allocations */
419    ANV_BO_ALLOC_NO_LOCAL_MEM =            (1 << 11),
420 
421    /** This buffer will be scanout to display */
422    ANV_BO_ALLOC_SCANOUT =                 (1 << 12),
423 
424    /** For descriptor pools */
425    ANV_BO_ALLOC_DESCRIPTOR_POOL =         (1 << 13),
426 
427    /** For buffers that will be bound using TR-TT.
428     *
429     * Not for buffers used as the TR-TT page tables.
430     */
431    ANV_BO_ALLOC_TRTT =                    (1 << 14),
432 
433    /** Protected buffer */
434    ANV_BO_ALLOC_PROTECTED =               (1 << 15),
435 
436    /** Specifies that the BO should be cached and incoherent. */
437    ANV_BO_ALLOC_HOST_CACHED =             (1 << 16),
438 
439    /** For sampler pools */
440    ANV_BO_ALLOC_SAMPLER_POOL =            (1 << 17),
441 
442    /** Specifies that the BO is imported.
443     *
444     * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL
445     */
446    ANV_BO_ALLOC_IMPORTED =                (1 << 18),
447 
448    /** Specify whether this BO is internal to the driver */
449    ANV_BO_ALLOC_INTERNAL =                (1 << 19),
450 
451    /** Allocate with CCS AUX requirements
452     *
453     * This pads the BO include CCS data mapppable through the AUX-TT and
454     * aligned to the AUX-TT requirements.
455     */
456    ANV_BO_ALLOC_AUX_CCS =                 (1 << 20),
457 };
458 
459 /** Specifies that the BO should be cached and coherent. */
460 #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \
461                                            ANV_BO_ALLOC_HOST_CACHED)
462 
463 
464 struct anv_bo {
465    const char *name;
466 
467    /* The VMA heap in anv_device from which this BO takes its offset.
468     *
469     * This can only be NULL when has_fixed_address is true.
470     */
471    struct util_vma_heap *vma_heap;
472 
473    /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */
474    uint32_t gem_handle;
475 
476    uint32_t refcount;
477 
478    /* Index into the current validation list.  This is used by the
479     * validation list building algorithm to track which buffers are already
480     * in the validation list so that we can ensure uniqueness.
481     */
482    uint32_t exec_obj_index;
483 
484    /* Index for use with util_sparse_array_free_list */
485    uint32_t free_index;
486 
487    /* Last known offset.  This value is provided by the kernel when we
488     * execbuf and is used as the presumed offset for the next bunch of
489     * relocations, in canonical address format.
490     */
491    uint64_t offset;
492 
493    /** Size of the buffer */
494    uint64_t size;
495 
496    /** Offset at which the CCS data is stored */
497    uint64_t ccs_offset;
498 
499    /* Map for internally mapped BOs.
500     *
501     * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
502     * BO.
503     */
504    void *map;
505 
506    /* The actual size of bo allocated by kmd, basically:
507     * align(size, mem_alignment)
508     */
509    uint64_t actual_size;
510 
511    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
512    uint32_t flags;
513 
514    enum anv_bo_alloc_flags alloc_flags;
515 
516    /** True if this BO wraps a host pointer */
517    bool from_host_ptr:1;
518 
519    /** True if this BO is mapped in the GTT (only used for RMV) */
520    bool gtt_mapped:1;
521 };
522 
523 static inline bool
anv_bo_is_external(const struct anv_bo * bo)524 anv_bo_is_external(const struct anv_bo *bo)
525 {
526    return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL;
527 }
528 
529 static inline bool
anv_bo_is_vram_only(const struct anv_bo * bo)530 anv_bo_is_vram_only(const struct anv_bo *bo)
531 {
532    return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM |
533                                ANV_BO_ALLOC_MAPPED |
534                                ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
535                                ANV_BO_ALLOC_IMPORTED));
536 }
537 
538 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)539 anv_bo_ref(struct anv_bo *bo)
540 {
541    p_atomic_inc(&bo->refcount);
542    return bo;
543 }
544 
545 enum intel_device_info_mmap_mode
546 anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo);
547 
548 static inline bool
anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)549 anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)
550 {
551    return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) ==
552           ANV_BO_ALLOC_HOST_CACHED;
553 }
554 
555 struct anv_address {
556    struct anv_bo *bo;
557    int64_t offset;
558 };
559 
560 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
561 
562 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)563 anv_address_from_u64(uint64_t addr_u64)
564 {
565    assert(addr_u64 == intel_canonical_address(addr_u64));
566    return (struct anv_address) {
567       .bo = NULL,
568       .offset = addr_u64,
569    };
570 }
571 
572 static inline bool
anv_address_is_null(struct anv_address addr)573 anv_address_is_null(struct anv_address addr)
574 {
575    return addr.bo == NULL && addr.offset == 0;
576 }
577 
578 static inline uint64_t
anv_address_physical(struct anv_address addr)579 anv_address_physical(struct anv_address addr)
580 {
581    uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset;
582    return intel_canonical_address(address);
583 }
584 
585 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)586 anv_address_add(struct anv_address addr, uint64_t offset)
587 {
588    addr.offset += offset;
589    return addr;
590 }
591 
592 static inline void *
anv_address_map(struct anv_address addr)593 anv_address_map(struct anv_address addr)
594 {
595    if (addr.bo == NULL)
596       return NULL;
597 
598    if (addr.bo->map == NULL)
599       return NULL;
600 
601    return addr.bo->map + addr.offset;
602 }
603 
604 /* Represent a virtual address range */
605 struct anv_va_range {
606    uint64_t addr;
607    uint64_t size;
608 };
609 
610 /* Represents a lock-free linked list of "free" things.  This is used by
611  * both the block pool and the state pools.  Unfortunately, in order to
612  * solve the ABA problem, we can't use a single uint32_t head.
613  */
614 union anv_free_list {
615    struct {
616       uint32_t offset;
617 
618       /* A simple count that is incremented every time the head changes. */
619       uint32_t count;
620    };
621    /* Make sure it's aligned to 64 bits. This will make atomic operations
622     * faster on 32 bit platforms.
623     */
624    alignas(8) uint64_t u64;
625 };
626 
627 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
628 
629 struct anv_block_state {
630    union {
631       struct {
632          uint32_t next;
633          uint32_t end;
634       };
635       /* Make sure it's aligned to 64 bits. This will make atomic operations
636        * faster on 32 bit platforms.
637        */
638       alignas(8) uint64_t u64;
639    };
640 };
641 
642 #define anv_block_pool_foreach_bo(bo, pool)  \
643    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
644         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
645         _pp_bo++)
646 
647 #define ANV_MAX_BLOCK_POOL_BOS 20
648 
649 struct anv_block_pool {
650    const char *name;
651 
652    struct anv_device *device;
653 
654    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
655    struct anv_bo *bo;
656    uint32_t nbos;
657 
658    /* Maximum size of the pool */
659    uint64_t max_size;
660 
661    /* Current size of the pool */
662    uint64_t size;
663 
664    /* The canonical address where the start of the pool is pinned. The various bos that
665     * are created as the pool grows will have addresses in the range
666     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
667     */
668    uint64_t start_address;
669 
670    /* The offset from the start of the bo to the "center" of the block
671     * pool.  Pointers to allocated blocks are given by
672     * bo.map + center_bo_offset + offsets.
673     */
674    uint32_t center_bo_offset;
675 
676    struct anv_block_state state;
677 
678    enum anv_bo_alloc_flags bo_alloc_flags;
679 };
680 
681 /* Block pools are backed by a fixed-size 1GB memfd */
682 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
683 
684 /* The center of the block pool is also the middle of the memfd.  This may
685  * change in the future if we decide differently for some reason.
686  */
687 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
688 
689 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)690 anv_block_pool_size(struct anv_block_pool *pool)
691 {
692    return pool->state.end;
693 }
694 
695 struct anv_state {
696    int64_t offset;
697    uint32_t alloc_size;
698    uint32_t idx;
699    void *map;
700 };
701 
702 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
703 
704 struct anv_fixed_size_state_pool {
705    union anv_free_list free_list;
706    struct anv_block_state block;
707 };
708 
709 #define ANV_MIN_STATE_SIZE_LOG2 6
710 #define ANV_MAX_STATE_SIZE_LOG2 22
711 
712 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
713 
714 struct anv_free_entry {
715    uint32_t next;
716    struct anv_state state;
717 };
718 
719 struct anv_state_table {
720    struct anv_device *device;
721    int fd;
722    struct anv_free_entry *map;
723    uint32_t size;
724    uint64_t max_size;
725    struct anv_block_state state;
726    struct u_vector cleanups;
727 };
728 
729 struct anv_state_pool {
730    struct anv_block_pool block_pool;
731 
732    /* Offset into the relevant state base address where the state pool starts
733     * allocating memory.
734     */
735    int64_t start_offset;
736 
737    struct anv_state_table table;
738 
739    /* The size of blocks which will be allocated from the block pool */
740    uint32_t block_size;
741 
742    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
743 };
744 
745 struct anv_state_reserved_pool {
746    struct anv_state_pool *pool;
747    union anv_free_list reserved_blocks;
748    uint32_t count;
749 };
750 
751 struct anv_state_stream {
752    struct anv_state_pool *state_pool;
753 
754    /* The size of blocks to allocate from the state pool */
755    uint32_t block_size;
756 
757    /* Current block we're allocating from */
758    struct anv_state block;
759 
760    /* Offset into the current block at which to allocate the next state */
761    uint32_t next;
762 
763    /* Sum of all the blocks in all_blocks */
764    uint32_t total_size;
765 
766    /* List of all blocks allocated from this pool */
767    struct util_dynarray all_blocks;
768 };
769 
770 struct anv_sparse_submission {
771    struct anv_queue *queue;
772 
773    struct anv_vm_bind *binds;
774    int binds_len;
775    int binds_capacity;
776 
777    uint32_t wait_count;
778    uint32_t signal_count;
779 
780    struct vk_sync_wait *waits;
781    struct vk_sync_signal *signals;
782 };
783 
784 struct anv_trtt_bind {
785    uint64_t pte_addr;
786    uint64_t entry_addr;
787 };
788 
789 struct anv_trtt_submission {
790    struct anv_sparse_submission *sparse;
791 
792    struct anv_trtt_bind *l3l2_binds;
793    struct anv_trtt_bind *l1_binds;
794 
795    int l3l2_binds_len;
796    int l1_binds_len;
797 };
798 
799 /* The block_pool functions exported for testing only.  The block pool should
800  * only be used via a state pool (see below).
801  */
802 VkResult anv_block_pool_init(struct anv_block_pool *pool,
803                              struct anv_device *device,
804                              const char *name,
805                              uint64_t start_address,
806                              uint32_t initial_size,
807                              uint32_t max_size);
808 void anv_block_pool_finish(struct anv_block_pool *pool);
809 VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
810                               uint32_t block_size,
811                               int64_t *offset,
812                               uint32_t *padding);
813 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
814 size);
815 
816 struct anv_state_pool_params {
817    const char *name;
818    uint64_t    base_address;
819    int64_t     start_offset;
820    uint32_t    block_size;
821    uint32_t    max_size;
822 };
823 
824 VkResult anv_state_pool_init(struct anv_state_pool *pool,
825                              struct anv_device *device,
826                              const struct anv_state_pool_params *params);
827 void anv_state_pool_finish(struct anv_state_pool *pool);
828 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
829                                       uint32_t state_size, uint32_t alignment);
830 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
831 
832 static inline struct anv_address
anv_state_pool_state_address(struct anv_state_pool * pool,struct anv_state state)833 anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state)
834 {
835    return (struct anv_address) {
836       .bo = pool->block_pool.bo,
837       .offset = state.offset - pool->start_offset,
838    };
839 }
840 
841 void anv_state_stream_init(struct anv_state_stream *stream,
842                            struct anv_state_pool *state_pool,
843                            uint32_t block_size);
844 void anv_state_stream_finish(struct anv_state_stream *stream);
845 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
846                                         uint32_t size, uint32_t alignment);
847 
848 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
849                                       struct anv_state_pool *parent,
850                                       uint32_t count, uint32_t size,
851                                       uint32_t alignment);
852 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
853 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
854 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
855                                   struct anv_state state);
856 
857 VkResult anv_state_table_init(struct anv_state_table *table,
858                              struct anv_device *device,
859                              uint32_t initial_entries);
860 void anv_state_table_finish(struct anv_state_table *table);
861 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
862                              uint32_t count);
863 void anv_free_list_push(union anv_free_list *list,
864                         struct anv_state_table *table,
865                         uint32_t idx, uint32_t count);
866 struct anv_state* anv_free_list_pop(union anv_free_list *list,
867                                     struct anv_state_table *table);
868 
869 
870 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)871 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
872 {
873    return &table->map[idx].state;
874 }
875 /**
876  * Implements a pool of re-usable BOs.  The interface is identical to that
877  * of block_pool except that each block is its own BO.
878  */
879 struct anv_bo_pool {
880    const char *name;
881 
882    struct anv_device *device;
883 
884    enum anv_bo_alloc_flags bo_alloc_flags;
885 
886    struct util_sparse_array_free_list free_list[16];
887 };
888 
889 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
890                       const char *name, enum anv_bo_alloc_flags alloc_flags);
891 void anv_bo_pool_finish(struct anv_bo_pool *pool);
892 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
893                            struct anv_bo **bo_out);
894 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
895 
896 struct anv_scratch_pool {
897    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
898    struct anv_bo *bos[16][MESA_SHADER_STAGES];
899    uint32_t surfs[16];
900    struct anv_state surf_states[16];
901 };
902 
903 void anv_scratch_pool_init(struct anv_device *device,
904                            struct anv_scratch_pool *pool);
905 void anv_scratch_pool_finish(struct anv_device *device,
906                              struct anv_scratch_pool *pool);
907 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
908                                       struct anv_scratch_pool *pool,
909                                       gl_shader_stage stage,
910                                       unsigned per_thread_scratch);
911 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
912                                    struct anv_scratch_pool *pool,
913                                    unsigned per_thread_scratch);
914 
915 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
916 struct anv_bo_cache {
917    struct util_sparse_array bo_map;
918    pthread_mutex_t mutex;
919 };
920 
921 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
922                            struct anv_device *device);
923 void anv_bo_cache_finish(struct anv_bo_cache *cache);
924 
925 struct anv_queue_family {
926    /* Standard bits passed on to the client */
927    VkQueueFlags   queueFlags;
928    uint32_t       queueCount;
929 
930    enum intel_engine_class engine_class;
931 };
932 
933 #define ANV_MAX_QUEUE_FAMILIES 5
934 
935 struct anv_memory_type {
936    /* Standard bits passed on to the client */
937    VkMemoryPropertyFlags   propertyFlags;
938    uint32_t                heapIndex;
939 };
940 
941 struct anv_memory_heap {
942    /* Standard bits passed on to the client */
943    VkDeviceSize      size;
944    VkMemoryHeapFlags flags;
945 
946    /** Driver-internal book-keeping.
947     *
948     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
949     */
950    alignas(8) VkDeviceSize used;
951 
952    bool              is_local_mem;
953 };
954 
955 struct anv_memregion {
956    const struct intel_memory_class_instance *region;
957    uint64_t size;
958    uint64_t available;
959 };
960 
961 enum anv_timestamp_capture_type {
962     ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
963     ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
964     ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
965     ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
966     ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
967 };
968 
969 struct anv_physical_device {
970     struct vk_physical_device                   vk;
971 
972     /* Link in anv_instance::physical_devices */
973     struct list_head                            link;
974 
975     struct anv_instance *                       instance;
976     char                                        path[20];
977     struct intel_device_info                      info;
978 
979     bool                                        video_decode_enabled;
980 
981     struct brw_compiler *                       compiler;
982     struct isl_device                           isl_dev;
983     struct intel_perf_config *                    perf;
984     /*
985      * Number of commands required to implement a performance query begin +
986      * end.
987      */
988     uint32_t                                    n_perf_query_commands;
989     bool                                        has_exec_async;
990     bool                                        has_exec_capture;
991     VkQueueGlobalPriorityKHR                    max_context_priority;
992     uint64_t                                    gtt_size;
993 
994     bool                                        always_use_bindless;
995     bool                                        use_call_secondary;
996 
997     /** True if we can use timeline semaphores through execbuf */
998     bool                                        has_exec_timeline;
999 
1000     /** True if we can read the GPU timestamp register
1001      *
1002      * When running in a virtual context, the timestamp register is unreadable
1003      * on Gfx12+.
1004      */
1005     bool                                        has_reg_timestamp;
1006 
1007     /** True if we can create protected contexts. */
1008     bool                                        has_protected_contexts;
1009 
1010     /** Whether the i915 driver has the ability to create VM objects */
1011     bool                                        has_vm_control;
1012 
1013     /** True if we have the means to do sparse binding (e.g., a Kernel driver
1014      * a vm_bind ioctl).
1015      */
1016     bool                                        has_sparse;
1017     bool                                        sparse_uses_trtt;
1018 
1019     /** True if HW supports ASTC LDR */
1020     bool                                        has_astc_ldr;
1021     /** True if denorms in void extents should be flushed to zero */
1022     bool                                        flush_astc_ldr_void_extent_denorms;
1023     /** True if ASTC LDR is supported via emulation */
1024     bool                                        emu_astc_ldr;
1025     /* true if FCV optimization should be disabled. */
1026     bool                                        disable_fcv;
1027     /**/
1028     bool                                        uses_ex_bso;
1029 
1030     bool                                        always_flush_cache;
1031 
1032     /** True if application memory is allocated with extra AUX memory
1033      *
1034      * Applications quite often pool image allocations together in a single
1035      * VkDeviceMemory object. On platforms like MTL, the alignment of images
1036      * with compression mapped through the AUX translation tables is large :
1037      * 1MB. This can create a lot of wasted space in the application memory
1038      * objects.
1039      *
1040      * To workaround this problem, we allocate CCS data at the end of
1041      * VkDeviceMemory objects. This would not work well for TGL-like platforms
1042      * because the AUX translation tables also contain the format of the
1043      * images, but on MTL the HW ignore those values. So we can share the AUX
1044      * TT entries between different images without problem.
1045      *
1046      * This should be only true for platforms with AUX TT.
1047      */
1048     bool                                         alloc_aux_tt_mem;
1049 
1050     /**
1051      * True if the descriptors buffers are holding one of the following :
1052      *    - anv_sampled_image_descriptor
1053      *    - anv_storage_image_descriptor
1054      *    - anv_address_range_descriptor
1055      *
1056      * Accessing the descriptors in a bindless fashion from the shader
1057      * requires an indirection in the shader, first fetch one of the structure
1058      * listed above from the descriptor buffer, then emit the send message to
1059      * the fixed function (sampler, dataport, etc...) with the handle fetched
1060      * above.
1061      *
1062      * We need to do things this way prior to DG2 because the bindless surface
1063      * state space is limited to 64Mb and some application will allocate more
1064      * than what HW can support. On DG2+ we get 4Gb of bindless surface state
1065      * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
1066      * structures instead.
1067      */
1068     bool                                        indirect_descriptors;
1069 
1070     bool                                        uses_relocs;
1071 
1072     /** Can the platform support cooperative matrices and is it enabled? */
1073     bool                                        has_cooperative_matrix;
1074 
1075     struct {
1076       uint32_t                                  family_count;
1077       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
1078     } queue;
1079 
1080     struct {
1081       uint32_t                                  type_count;
1082       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1083       uint32_t                                  heap_count;
1084       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1085 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1086       bool                                      need_flush;
1087 #endif
1088     } memory;
1089 
1090     struct {
1091        /**
1092         * General state pool
1093         */
1094        struct anv_va_range                      general_state_pool;
1095        /**
1096         * Low 32bit heap
1097         */
1098        struct anv_va_range                      low_heap;
1099        /**
1100         * Binding table pool
1101         */
1102        struct anv_va_range                      binding_table_pool;
1103        /**
1104         * Internal surface states for blorp & push descriptors.
1105         */
1106        struct anv_va_range                      internal_surface_state_pool;
1107        /**
1108         * Scratch surfaces (overlaps with internal_surface_state_pool).
1109         */
1110        struct anv_va_range                      scratch_surface_state_pool;
1111        /**
1112         * Bindless surface states (indirectly referred to by indirect
1113         * descriptors or for direct descriptors)
1114         */
1115        struct anv_va_range                      bindless_surface_state_pool;
1116        /**
1117         * Dynamic state pool
1118         */
1119        struct anv_va_range                      dynamic_state_pool;
1120        /**
1121         * Sampler state pool
1122         */
1123        struct anv_va_range                      sampler_state_pool;
1124        /**
1125         * Indirect descriptor pool
1126         */
1127        struct anv_va_range                      indirect_descriptor_pool;
1128        /**
1129         * Indirect push descriptor pool
1130         */
1131        struct anv_va_range                      indirect_push_descriptor_pool;
1132        /**
1133         * Instruction state pool
1134         */
1135        struct anv_va_range                      instruction_state_pool;
1136        /**
1137         * Client heap
1138         */
1139        struct anv_va_range                      high_heap;
1140        struct anv_va_range                      trtt;
1141     } va;
1142 
1143     /* Either we have a single vram region and it's all mappable, or we have
1144      * both mappable & non-mappable parts. System memory is always available.
1145      */
1146     struct anv_memregion                        vram_mappable;
1147     struct anv_memregion                        vram_non_mappable;
1148     struct anv_memregion                        sys;
1149     uint8_t                                     driver_build_sha1[20];
1150     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1151     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1152     uint8_t                                     device_uuid[VK_UUID_SIZE];
1153     uint8_t                                     rt_uuid[VK_UUID_SIZE];
1154 
1155     /* Maximum amount of scratch space used by all the GRL kernels */
1156     uint32_t                                    max_grl_scratch_size;
1157 
1158     struct vk_sync_type                         sync_syncobj_type;
1159     struct vk_sync_timeline_type                sync_timeline_type;
1160     const struct vk_sync_type *                 sync_types[4];
1161 
1162     struct wsi_device                       wsi_device;
1163     int                                         local_fd;
1164     bool                                        has_local;
1165     int64_t                                     local_major;
1166     int64_t                                     local_minor;
1167     int                                         master_fd;
1168     bool                                        has_master;
1169     int64_t                                     master_major;
1170     int64_t                                     master_minor;
1171     struct intel_query_engine_info *            engine_info;
1172 
1173     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
1174                                enum anv_timestamp_capture_type, void *);
1175     struct intel_measure_device                 measure_device;
1176 };
1177 
1178 static inline uint32_t
anv_physical_device_bindless_heap_size(const struct anv_physical_device * device)1179 anv_physical_device_bindless_heap_size(const struct anv_physical_device *device)
1180 {
1181    return device->uses_ex_bso ?
1182       128 * 1024 * 1024 /* 128 MiB */ :
1183       64 * 1024 * 1024 /* 64 MiB */;
1184 }
1185 
1186 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1187 anv_physical_device_has_vram(const struct anv_physical_device *device)
1188 {
1189    return device->vram_mappable.size > 0;
1190 }
1191 
1192 struct anv_instance {
1193     struct vk_instance                          vk;
1194 
1195     struct driOptionCache                       dri_options;
1196     struct driOptionCache                       available_dri_options;
1197 
1198     int                                         mesh_conv_prim_attrs_to_vert_attrs;
1199     bool                                        enable_tbimr;
1200     bool                                        external_memory_implicit_sync;
1201 
1202     /**
1203      * Workarounds for game bugs.
1204      */
1205     uint8_t                                     assume_full_subgroups;
1206     bool                                        limit_trig_input_range;
1207     bool                                        sample_mask_out_opengl_behaviour;
1208     bool                                        force_filter_addr_rounding;
1209     bool                                        fp64_workaround_enabled;
1210     float                                       lower_depth_range_rate;
1211     unsigned                                    generated_indirect_threshold;
1212     unsigned                                    generated_indirect_ring_threshold;
1213     unsigned                                    query_clear_with_blorp_threshold;
1214     unsigned                                    query_copy_with_shader_threshold;
1215     unsigned                                    force_vk_vendor;
1216     bool                                        has_fake_sparse;
1217     bool                                        disable_fcv;
1218 
1219     /* HW workarounds */
1220     bool                                        no_16bit;
1221     bool                                        intel_enable_wa_14018912822;
1222 };
1223 
1224 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1225 void anv_finish_wsi(struct anv_physical_device *physical_device);
1226 
1227 struct anv_queue {
1228    struct vk_queue                           vk;
1229 
1230    struct anv_device *                       device;
1231 
1232    const struct anv_queue_family *           family;
1233 
1234    struct intel_batch_decode_ctx *           decoder;
1235 
1236    union {
1237       uint32_t                               exec_flags; /* i915 */
1238       uint32_t                               context_id; /* i915 */
1239       uint32_t                               exec_queue_id; /* Xe */
1240    };
1241 
1242    /** Context/Engine id which executes companion RCS command buffer */
1243    uint32_t                                  companion_rcs_id;
1244 
1245    /** Synchronization object for debug purposes (DEBUG_SYNC) */
1246    struct vk_sync                           *sync;
1247 
1248    /** Companion synchronization object
1249     *
1250     * Vulkan command buffers can be destroyed as soon as their lifecycle moved
1251     * from the Pending state to the Invalid/Executable state. This transition
1252     * happens when the VkFence/VkSemaphore associated with the completion of
1253     * the command buffer work is signaled.
1254     *
1255     * When we're using a companion command buffer to execute part of another
1256     * command buffer, we need to tie the 2 work submissions together to ensure
1257     * when the associated VkFence/VkSemaphore is signaled, both command
1258     * buffers are actually unused by the HW. To do this, we run an empty batch
1259     * buffer that we use to signal after both submissions :
1260     *
1261     *   CCS -->    main   ---> empty_batch (with wait on companion) --> signal
1262     *   RCS --> companion -|
1263     *
1264     * When companion batch completes, it signals companion_sync and allow
1265     * empty_batch to execute. Since empty_batch is running on the main engine,
1266     * we're guaranteed that upon completion both main & companion command
1267     * buffers are not used by HW anymore.
1268     */
1269    struct vk_sync                           *companion_sync;
1270 
1271    struct intel_ds_queue                     ds;
1272 };
1273 
1274 struct nir_xfb_info;
1275 struct anv_pipeline_bind_map;
1276 struct anv_push_descriptor_info;
1277 enum anv_dynamic_push_bits;
1278 
1279 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1280 
1281 struct anv_shader_bin *
1282 anv_device_search_for_kernel(struct anv_device *device,
1283                              struct vk_pipeline_cache *cache,
1284                              const void *key_data, uint32_t key_size,
1285                              bool *user_cache_bit);
1286 
1287 struct anv_shader_upload_params;
1288 
1289 struct anv_shader_bin *
1290 anv_device_upload_kernel(struct anv_device *device,
1291                          struct vk_pipeline_cache *cache,
1292                          const struct anv_shader_upload_params *params);
1293 
1294 struct nir_shader;
1295 struct nir_shader_compiler_options;
1296 
1297 struct nir_shader *
1298 anv_device_search_for_nir(struct anv_device *device,
1299                           struct vk_pipeline_cache *cache,
1300                           const struct nir_shader_compiler_options *nir_options,
1301                           unsigned char sha1_key[20],
1302                           void *mem_ctx);
1303 
1304 void
1305 anv_device_upload_nir(struct anv_device *device,
1306                       struct vk_pipeline_cache *cache,
1307                       const struct nir_shader *nir,
1308                       unsigned char sha1_key[20]);
1309 
1310 void
1311 anv_load_fp64_shader(struct anv_device *device);
1312 
1313 /**
1314  * This enum tracks the various HW instructions that hold graphics state
1315  * needing to be reprogrammed. Some instructions are grouped together as they
1316  * pretty much need to be emitted together (like 3DSTATE_URB_*).
1317  *
1318  * Not all bits apply to all platforms. We build a dirty state based on
1319  * enabled extensions & generation on anv_device.
1320  */
1321 enum anv_gfx_state_bits {
1322    /* Pipeline states */
1323    ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
1324    ANV_GFX_STATE_VF_STATISTICS,
1325    ANV_GFX_STATE_VF_SGVS,
1326    ANV_GFX_STATE_VF_SGVS_2,
1327    ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
1328    ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
1329    ANV_GFX_STATE_PRIMITIVE_REPLICATION,
1330    ANV_GFX_STATE_MULTISAMPLE,
1331    ANV_GFX_STATE_SBE,
1332    ANV_GFX_STATE_SBE_SWIZ,
1333    ANV_GFX_STATE_SO_DECL_LIST,
1334    ANV_GFX_STATE_VS,
1335    ANV_GFX_STATE_HS,
1336    ANV_GFX_STATE_DS,
1337    ANV_GFX_STATE_GS,
1338    ANV_GFX_STATE_PS,
1339    ANV_GFX_STATE_SBE_MESH,
1340    ANV_GFX_STATE_CLIP_MESH,
1341    ANV_GFX_STATE_MESH_CONTROL,
1342    ANV_GFX_STATE_MESH_SHADER,
1343    ANV_GFX_STATE_MESH_DISTRIB,
1344    ANV_GFX_STATE_TASK_CONTROL,
1345    ANV_GFX_STATE_TASK_SHADER,
1346    ANV_GFX_STATE_TASK_REDISTRIB,
1347    /* Dynamic states */
1348    ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
1349    ANV_GFX_STATE_BLEND_STATE_POINTERS, /* The pointer to the dynamic state */
1350    ANV_GFX_STATE_CLIP,
1351    ANV_GFX_STATE_CC_STATE,
1352    ANV_GFX_STATE_CPS,
1353    ANV_GFX_STATE_DEPTH_BOUNDS,
1354    ANV_GFX_STATE_INDEX_BUFFER,
1355    ANV_GFX_STATE_LINE_STIPPLE,
1356    ANV_GFX_STATE_PS_BLEND,
1357    ANV_GFX_STATE_RASTER,
1358    ANV_GFX_STATE_SAMPLE_MASK,
1359    ANV_GFX_STATE_SAMPLE_PATTERN,
1360    ANV_GFX_STATE_SCISSOR,
1361    ANV_GFX_STATE_SF,
1362    ANV_GFX_STATE_STREAMOUT,
1363    ANV_GFX_STATE_TE,
1364    ANV_GFX_STATE_VERTEX_INPUT,
1365    ANV_GFX_STATE_VF,
1366    ANV_GFX_STATE_VF_TOPOLOGY,
1367    ANV_GFX_STATE_VFG,
1368    ANV_GFX_STATE_VIEWPORT_CC,
1369    ANV_GFX_STATE_VIEWPORT_SF_CLIP,
1370    ANV_GFX_STATE_WM,
1371    ANV_GFX_STATE_WM_DEPTH_STENCIL,
1372    ANV_GFX_STATE_PS_EXTRA,
1373    ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
1374    ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
1375    ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
1376 
1377    ANV_GFX_STATE_MAX,
1378 };
1379 
1380 const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
1381 
1382 /* This structure tracks the values to program in HW instructions for
1383  * corresponding to dynamic states of the Vulkan API. Only fields that need to
1384  * be reemitted outside of the VkPipeline object are tracked here.
1385  */
1386 struct anv_gfx_dynamic_state {
1387    /* 3DSTATE_BLEND_STATE_POINTERS */
1388    struct {
1389       bool AlphaToCoverageEnable;
1390       bool AlphaToOneEnable;
1391       bool IndependentAlphaBlendEnable;
1392       struct {
1393          bool     WriteDisableAlpha;
1394          bool     WriteDisableRed;
1395          bool     WriteDisableGreen;
1396          bool     WriteDisableBlue;
1397 
1398          uint32_t LogicOpFunction;
1399          bool     LogicOpEnable;
1400 
1401          bool     ColorBufferBlendEnable;
1402          uint32_t ColorClampRange;
1403          bool     PreBlendColorClampEnable;
1404          bool     PostBlendColorClampEnable;
1405          uint32_t SourceBlendFactor;
1406          uint32_t DestinationBlendFactor;
1407          uint32_t ColorBlendFunction;
1408          uint32_t SourceAlphaBlendFactor;
1409          uint32_t DestinationAlphaBlendFactor;
1410          uint32_t AlphaBlendFunction;
1411       } rts[MAX_RTS];
1412   } blend;
1413 
1414    /* 3DSTATE_CC_STATE_POINTERS */
1415    struct {
1416       float BlendConstantColorRed;
1417       float BlendConstantColorGreen;
1418       float BlendConstantColorBlue;
1419       float BlendConstantColorAlpha;
1420    } cc;
1421 
1422    /* 3DSTATE_CLIP */
1423    struct {
1424       uint32_t APIMode;
1425       uint32_t ViewportXYClipTestEnable;
1426       uint32_t MaximumVPIndex;
1427       uint32_t TriangleStripListProvokingVertexSelect;
1428       uint32_t LineStripListProvokingVertexSelect;
1429       uint32_t TriangleFanProvokingVertexSelect;
1430    } clip;
1431 
1432    /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
1433    struct {
1434       /* Gfx11 */
1435       uint32_t CoarsePixelShadingMode;
1436       float    MinCPSizeX;
1437       float    MinCPSizeY;
1438       /* Gfx12+ */
1439       uint32_t CoarsePixelShadingStateArrayPointer;
1440    } cps;
1441 
1442    /* 3DSTATE_DEPTH_BOUNDS */
1443    struct {
1444       bool     DepthBoundsTestEnable;
1445       float    DepthBoundsTestMinValue;
1446       float    DepthBoundsTestMaxValue;
1447    } db;
1448 
1449    /* 3DSTATE_GS */
1450    struct {
1451       uint32_t ReorderMode;
1452    } gs;
1453 
1454    /* 3DSTATE_LINE_STIPPLE */
1455    struct {
1456       uint32_t LineStipplePattern;
1457       float    LineStippleInverseRepeatCount;
1458       uint32_t LineStippleRepeatCount;
1459    } ls;
1460 
1461    /* 3DSTATE_PS_EXTRA */
1462    struct {
1463       bool PixelShaderKillsPixel;
1464    } ps_extra;
1465 
1466    /* 3DSTATE_PS_BLEND */
1467    struct {
1468       bool     HasWriteableRT;
1469       bool     ColorBufferBlendEnable;
1470       uint32_t SourceAlphaBlendFactor;
1471       uint32_t DestinationAlphaBlendFactor;
1472       uint32_t SourceBlendFactor;
1473       uint32_t DestinationBlendFactor;
1474       bool     AlphaTestEnable;
1475       bool     IndependentAlphaBlendEnable;
1476       bool     AlphaToCoverageEnable;
1477    } ps_blend;
1478 
1479    /* 3DSTATE_RASTER */
1480    struct {
1481       uint32_t APIMode;
1482       bool     DXMultisampleRasterizationEnable;
1483       bool     AntialiasingEnable;
1484       uint32_t CullMode;
1485       uint32_t FrontWinding;
1486       bool     GlobalDepthOffsetEnableSolid;
1487       bool     GlobalDepthOffsetEnableWireframe;
1488       bool     GlobalDepthOffsetEnablePoint;
1489       float    GlobalDepthOffsetConstant;
1490       float    GlobalDepthOffsetScale;
1491       float    GlobalDepthOffsetClamp;
1492       uint32_t FrontFaceFillMode;
1493       uint32_t BackFaceFillMode;
1494       bool     ViewportZFarClipTestEnable;
1495       bool     ViewportZNearClipTestEnable;
1496       bool     ConservativeRasterizationEnable;
1497    } raster;
1498 
1499    /* 3DSTATE_SCISSOR_STATE_POINTERS */
1500    struct {
1501       uint32_t count;
1502       struct {
1503          uint32_t ScissorRectangleYMin;
1504          uint32_t ScissorRectangleXMin;
1505          uint32_t ScissorRectangleYMax;
1506          uint32_t ScissorRectangleXMax;
1507       } elem[MAX_SCISSORS];
1508    } scissor;
1509 
1510    /* 3DSTATE_SF */
1511    struct {
1512       float    LineWidth;
1513       uint32_t TriangleStripListProvokingVertexSelect;
1514       uint32_t LineStripListProvokingVertexSelect;
1515       uint32_t TriangleFanProvokingVertexSelect;
1516       bool     LegacyGlobalDepthBiasEnable;
1517    } sf;
1518 
1519    /* 3DSTATE_STREAMOUT */
1520    struct {
1521       bool     RenderingDisable;
1522       uint32_t RenderStreamSelect;
1523       uint32_t ReorderMode;
1524       uint32_t ForceRendering;
1525    } so;
1526 
1527    /* 3DSTATE_SAMPLE_MASK */
1528    struct {
1529       uint32_t SampleMask;
1530    } sm;
1531 
1532    /* 3DSTATE_TE */
1533    struct {
1534       uint32_t OutputTopology;
1535    } te;
1536 
1537    /* 3DSTATE_VF */
1538    struct {
1539       bool     IndexedDrawCutIndexEnable;
1540       uint32_t CutIndex;
1541    } vf;
1542 
1543    /* 3DSTATE_VFG */
1544    struct {
1545       uint32_t DistributionMode;
1546       bool     ListCutIndexEnable;
1547    } vfg;
1548 
1549    /* 3DSTATE_VF_TOPOLOGY */
1550    struct {
1551       uint32_t PrimitiveTopologyType;
1552    } vft;
1553 
1554    /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
1555    struct {
1556       uint32_t count;
1557       struct {
1558          float MinimumDepth;
1559          float MaximumDepth;
1560       } elem[MAX_VIEWPORTS];
1561    } vp_cc;
1562 
1563    /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
1564    struct {
1565       uint32_t count;
1566       struct {
1567          float ViewportMatrixElementm00;
1568          float ViewportMatrixElementm11;
1569          float ViewportMatrixElementm22;
1570          float ViewportMatrixElementm30;
1571          float ViewportMatrixElementm31;
1572          float ViewportMatrixElementm32;
1573          float XMinClipGuardband;
1574          float XMaxClipGuardband;
1575          float YMinClipGuardband;
1576          float YMaxClipGuardband;
1577          float XMinViewPort;
1578          float XMaxViewPort;
1579          float YMinViewPort;
1580          float YMaxViewPort;
1581       } elem[MAX_VIEWPORTS];
1582    } vp_sf_clip;
1583 
1584    /* 3DSTATE_WM */
1585    struct {
1586       uint32_t ForceThreadDispatchEnable;
1587       bool     LineStippleEnable;
1588    } wm;
1589 
1590    /* 3DSTATE_WM_DEPTH_STENCIL */
1591    struct {
1592       bool     DoubleSidedStencilEnable;
1593       uint32_t StencilTestMask;
1594       uint32_t StencilWriteMask;
1595       uint32_t BackfaceStencilTestMask;
1596       uint32_t BackfaceStencilWriteMask;
1597       uint32_t StencilReferenceValue;
1598       uint32_t BackfaceStencilReferenceValue;
1599       bool     DepthTestEnable;
1600       bool     DepthBufferWriteEnable;
1601       uint32_t DepthTestFunction;
1602       bool     StencilTestEnable;
1603       bool     StencilBufferWriteEnable;
1604       uint32_t StencilFailOp;
1605       uint32_t StencilPassDepthPassOp;
1606       uint32_t StencilPassDepthFailOp;
1607       uint32_t StencilTestFunction;
1608       uint32_t BackfaceStencilFailOp;
1609       uint32_t BackfaceStencilPassDepthPassOp;
1610       uint32_t BackfaceStencilPassDepthFailOp;
1611       uint32_t BackfaceStencilTestFunction;
1612    } ds;
1613 
1614    /* 3DSTATE_TBIMR_TILE_PASS_INFO */
1615    struct {
1616       unsigned TileRectangleHeight;
1617       unsigned TileRectangleWidth;
1618       unsigned VerticalTileCount;
1619       unsigned HorizontalTileCount;
1620       unsigned TBIMRBatchSize;
1621       unsigned TileBoxCheck;
1622    } tbimr;
1623    bool use_tbimr;
1624 
1625    bool pma_fix;
1626 
1627    BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
1628 };
1629 
1630 enum anv_internal_kernel_name {
1631    ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
1632    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
1633    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1634    ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
1635 
1636    ANV_INTERNAL_KERNEL_COUNT,
1637 };
1638 
1639 enum anv_rt_bvh_build_method {
1640    ANV_BVH_BUILD_METHOD_TRIVIAL,
1641    ANV_BVH_BUILD_METHOD_NEW_SAH,
1642 };
1643 
1644 struct anv_device_astc_emu {
1645     struct vk_texcompress_astc_state           *texcompress;
1646 
1647     /* for flush_astc_ldr_void_extent_denorms */
1648     simple_mtx_t mutex;
1649     VkDescriptorSetLayout ds_layout;
1650     VkPipelineLayout pipeline_layout;
1651     VkPipeline pipeline;
1652 };
1653 
1654 struct anv_trtt_batch_bo {
1655    struct anv_bo *bo;
1656    uint32_t size;
1657 
1658    /* Once device->trtt.timeline_handle signals timeline_val as complete we
1659     * can free this struct and its members.
1660     */
1661    uint64_t timeline_val;
1662 
1663    /* Part of device->trtt.in_flight_batches. */
1664    struct list_head link;
1665 };
1666 
1667 struct anv_device {
1668     struct vk_device                            vk;
1669 
1670     struct anv_physical_device *                physical;
1671     const struct intel_device_info *            info;
1672     const struct anv_kmd_backend *              kmd_backend;
1673     struct isl_device                           isl_dev;
1674     union {
1675        uint32_t                                 context_id; /* i915 */
1676        uint32_t                                 vm_id; /* Xe */
1677     };
1678     int                                         fd;
1679 
1680     pthread_mutex_t                             vma_mutex;
1681     struct util_vma_heap                        vma_lo;
1682     struct util_vma_heap                        vma_hi;
1683     struct util_vma_heap                        vma_desc;
1684     struct util_vma_heap                        vma_samplers;
1685     struct util_vma_heap                        vma_trtt;
1686 
1687     /** List of all anv_device_memory objects */
1688     struct list_head                            memory_objects;
1689 
1690     /** List of anv_image objects with a private binding for implicit CCS */
1691     struct list_head                            image_private_objects;
1692 
1693     /** Memory pool for batch buffers */
1694     struct anv_bo_pool                          batch_bo_pool;
1695     /** Memory pool for utrace timestamp buffers */
1696     struct anv_bo_pool                          utrace_bo_pool;
1697     /** Memory pool for BVH build buffers */
1698     struct anv_bo_pool                          bvh_bo_pool;
1699 
1700     struct anv_bo_cache                         bo_cache;
1701 
1702     struct anv_state_pool                       general_state_pool;
1703     struct anv_state_pool                       dynamic_state_pool;
1704     struct anv_state_pool                       instruction_state_pool;
1705     struct anv_state_pool                       binding_table_pool;
1706     struct anv_state_pool                       scratch_surface_state_pool;
1707     struct anv_state_pool                       internal_surface_state_pool;
1708     struct anv_state_pool                       bindless_surface_state_pool;
1709     struct anv_state_pool                       indirect_push_descriptor_pool;
1710 
1711     struct anv_state_reserved_pool              custom_border_colors;
1712 
1713     /** BO used for various workarounds
1714      *
1715      * There are a number of workarounds on our hardware which require writing
1716      * data somewhere and it doesn't really matter where.  For that, we use
1717      * this BO and just write to the first dword or so.
1718      *
1719      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1720      * For that, we use the high bytes (>= 1024) of the workaround BO.
1721      */
1722     struct anv_bo *                             workaround_bo;
1723     struct anv_address                          workaround_address;
1724 
1725     /**
1726      * Workarounds for game bugs.
1727      */
1728     struct {
1729        struct set *                             doom64_images;
1730     } workarounds;
1731 
1732     struct anv_bo *                             trivial_batch_bo;
1733     struct anv_state                            null_surface_state;
1734 
1735     /**
1736      * NULL surface state copy stored in host memory for use as a fast
1737      * memcpy() source.
1738      */
1739     char                                        host_null_surface_state[ANV_SURFACE_STATE_SIZE];
1740 
1741     struct vk_pipeline_cache *                  default_pipeline_cache;
1742     struct vk_pipeline_cache *                  internal_cache;
1743     struct blorp_context                        blorp;
1744 
1745     struct anv_state                            border_colors;
1746 
1747     struct anv_state                            slice_hash;
1748 
1749     /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1750      *
1751      * We need to emit CPS_STATE structures for each viewport accessible by a
1752      * pipeline. So rather than write many identical CPS_STATE structures
1753      * dynamically, we can enumerate all possible combinaisons and then just
1754      * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1755      * array.
1756      */
1757     struct anv_state                            cps_states;
1758 
1759     uint32_t                                    queue_count;
1760     struct anv_queue  *                         queues;
1761 
1762     struct anv_scratch_pool                     scratch_pool;
1763     struct anv_bo                              *rt_scratch_bos[16];
1764     struct anv_bo                              *btd_fifo_bo;
1765     struct anv_address                          rt_uuid_addr;
1766 
1767     /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage
1768      *
1769      * For use when a pipeline has no VS input
1770      */
1771     uint32_t                                    empty_vs_input[2];
1772 
1773     bool                                        robust_buffer_access;
1774 
1775     uint32_t                                    protected_session_id;
1776 
1777     /** Shadow ray query BO
1778      *
1779      * The ray_query_bo only holds the current ray being traced. When using
1780      * more than 1 ray query per thread, we cannot fit all the queries in
1781      * there, so we need a another buffer to hold query data that is not
1782      * currently being used by the HW for tracing, similar to a scratch space.
1783      *
1784      * The size of the shadow buffer depends on the number of queries per
1785      * shader.
1786      */
1787     struct anv_bo                              *ray_query_shadow_bos[16];
1788     /** Ray query buffer used to communicated with HW unit.
1789      */
1790     struct anv_bo                              *ray_query_bo;
1791 
1792     struct anv_shader_bin                      *rt_trampoline;
1793     struct anv_shader_bin                      *rt_trivial_return;
1794 
1795     enum anv_rt_bvh_build_method                bvh_build_method;
1796 
1797     /** Draw generation shader
1798      *
1799      * Generates direct draw calls out of indirect parameters. Used to
1800      * workaround slowness with indirect draw calls.
1801      */
1802     struct anv_shader_bin                      *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
1803     const struct intel_l3_config               *internal_kernels_l3_config;
1804 
1805     pthread_mutex_t                             mutex;
1806     pthread_cond_t                              queue_submit;
1807 
1808     struct intel_batch_decode_ctx               decoder[ANV_MAX_QUEUE_FAMILIES];
1809     /*
1810      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1811      * the cmd_buffer's list.
1812      */
1813     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1814 
1815     int                                         perf_fd; /* -1 if no opened */
1816     uint64_t                                    perf_metric; /* 0 if unset */
1817 
1818     struct intel_aux_map_context                *aux_map_ctx;
1819 
1820     const struct intel_l3_config                *l3_config;
1821 
1822     struct intel_debug_block_frame              *debug_frame_desc;
1823 
1824     struct intel_ds_device                       ds;
1825 
1826     nir_shader                                  *fp64_nir;
1827 
1828     uint32_t                                    draw_call_count;
1829     struct anv_state                            breakpoint;
1830 #if DETECT_OS_ANDROID
1831     struct u_gralloc                            *u_gralloc;
1832 #endif
1833 
1834     /** Precompute all dirty graphics bits
1835      *
1836      * Depending on platforms, some of the dirty bits don't apply (for example
1837      * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
1838      * extensions like Mesh shaders also allow us to avoid emitting any
1839      * mesh/task related instructions (we only initialize them once at device
1840      * initialization).
1841      */
1842     BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
1843 
1844     /*
1845      * Command pool for companion RCS command buffer.
1846      */
1847     VkCommandPool                               companion_rcs_cmd_pool;
1848 
1849     struct anv_trtt {
1850        pthread_mutex_t mutex;
1851 
1852        /* Sometimes we need to run batches from places where we don't have a
1853         * queue coming from the API, so we use this.
1854         */
1855        struct anv_queue *queue;
1856 
1857        /* There's only one L3 table, so if l3_addr is zero that means we
1858         * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
1859         * yet in this context).
1860         */
1861        uint64_t l3_addr;
1862 
1863        /* We don't want to access the page tables from the CPU, so just
1864         * maintain a mirror that we can use.
1865         */
1866        uint64_t *l3_mirror;
1867        uint64_t *l2_mirror;
1868 
1869        /* We keep a dynamic list of page table bos, and each bo can store
1870         * multiple page tables.
1871         */
1872        struct anv_bo **page_table_bos;
1873        int num_page_table_bos;
1874        int page_table_bos_capacity;
1875 
1876        /* These are used to keep track of space available for more page tables
1877         * within a bo.
1878         */
1879        struct anv_bo *cur_page_table_bo;
1880        uint64_t next_page_table_bo_offset;
1881 
1882        /* Timeline syncobj used to track completion of the TR-TT batch BOs. */
1883        uint32_t timeline_handle;
1884        uint64_t timeline_val;
1885 
1886        /* List of struct anv_trtt_batch_bo batches that are in flight and can
1887         * be freed once their timeline gets signaled.
1888         */
1889        struct list_head in_flight_batches;
1890     } trtt;
1891 
1892     /* This is true if the user ever bound a sparse resource to memory. This
1893      * is used for a workaround that makes every memoryBarrier flush more
1894      * things than it should. Many applications request for the sparse
1895      * featuers to be enabled but don't use them, and some create sparse
1896      * resources but never use them.
1897      */
1898     bool                                         using_sparse;
1899 
1900     struct anv_device_astc_emu                   astc_emu;
1901 
1902     struct intel_bind_timeline bind_timeline; /* Xe only */
1903 };
1904 
1905 static inline uint32_t
anv_get_first_render_queue_index(struct anv_physical_device * pdevice)1906 anv_get_first_render_queue_index(struct anv_physical_device *pdevice)
1907 {
1908    assert(pdevice != NULL);
1909 
1910    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
1911       if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
1912          return i;
1913       }
1914    }
1915 
1916    unreachable("Graphics capable queue family not found");
1917 }
1918 
1919 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1920 anv_binding_table_pool_alloc(struct anv_device *device)
1921 {
1922    return anv_state_pool_alloc(&device->binding_table_pool,
1923                                device->binding_table_pool.block_size, 0);
1924 }
1925 
1926 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1927 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
1928 {
1929    anv_state_pool_free(&device->binding_table_pool, state);
1930 }
1931 
1932 static inline struct anv_state
anv_null_surface_state_for_binding_table(struct anv_device * device)1933 anv_null_surface_state_for_binding_table(struct anv_device *device)
1934 {
1935    struct anv_state state = device->null_surface_state;
1936    if (device->physical->indirect_descriptors) {
1937       state.offset += device->physical->va.bindless_surface_state_pool.addr -
1938                       device->physical->va.internal_surface_state_pool.addr;
1939    }
1940    return state;
1941 }
1942 
1943 static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_device * device,struct anv_state state)1944 anv_bindless_state_for_binding_table(struct anv_device *device,
1945                                      struct anv_state state)
1946 {
1947    state.offset += device->physical->va.bindless_surface_state_pool.addr -
1948                    device->physical->va.internal_surface_state_pool.addr;
1949    return state;
1950 }
1951 
1952 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1953 anv_mocs(const struct anv_device *device,
1954          const struct anv_bo *bo,
1955          isl_surf_usage_flags_t usage)
1956 {
1957    return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo));
1958 }
1959 
1960 static inline uint32_t
anv_mocs_for_address(const struct anv_device * device,struct anv_address * addr)1961 anv_mocs_for_address(const struct anv_device *device,
1962                      struct anv_address *addr)
1963 {
1964    return anv_mocs(device, addr->bo, 0);
1965 }
1966 
1967 void anv_device_init_blorp(struct anv_device *device);
1968 void anv_device_finish_blorp(struct anv_device *device);
1969 
1970 VkResult anv_device_alloc_bo(struct anv_device *device,
1971                              const char *name, uint64_t size,
1972                              enum anv_bo_alloc_flags alloc_flags,
1973                              uint64_t explicit_address,
1974                              struct anv_bo **bo);
1975 VkResult anv_device_map_bo(struct anv_device *device,
1976                            struct anv_bo *bo,
1977                            uint64_t offset,
1978                            size_t size,
1979                            void **map_out);
1980 void anv_device_unmap_bo(struct anv_device *device,
1981                          struct anv_bo *bo,
1982                          void *map, size_t map_size);
1983 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1984                                             void *host_ptr, uint32_t size,
1985                                             enum anv_bo_alloc_flags alloc_flags,
1986                                             uint64_t client_address,
1987                                             struct anv_bo **bo_out);
1988 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1989                               enum anv_bo_alloc_flags alloc_flags,
1990                               uint64_t client_address,
1991                               struct anv_bo **bo);
1992 VkResult anv_device_export_bo(struct anv_device *device,
1993                               struct anv_bo *bo, int *fd_out);
1994 VkResult anv_device_get_bo_tiling(struct anv_device *device,
1995                                   struct anv_bo *bo,
1996                                   enum isl_tiling *tiling_out);
1997 VkResult anv_device_set_bo_tiling(struct anv_device *device,
1998                                   struct anv_bo *bo,
1999                                   uint32_t row_pitch_B,
2000                                   enum isl_tiling tiling);
2001 void anv_device_release_bo(struct anv_device *device,
2002                            struct anv_bo *bo);
2003 
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)2004 static inline void anv_device_set_physical(struct anv_device *device,
2005                                            struct anv_physical_device *physical_device)
2006 {
2007    device->physical = physical_device;
2008    device->info = &physical_device->info;
2009    device->isl_dev = physical_device->isl_dev;
2010 }
2011 
2012 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)2013 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
2014 {
2015    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
2016 }
2017 
2018 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2019                          int64_t timeout);
2020 
2021 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
2022                         const VkDeviceQueueCreateInfo *pCreateInfo,
2023                         uint32_t index_in_family);
2024 void anv_queue_finish(struct anv_queue *queue);
2025 
2026 VkResult anv_queue_submit(struct vk_queue *queue,
2027                           struct vk_queue_submit *submit);
2028 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
2029                                        struct anv_batch *batch,
2030                                        bool is_companion_rcs_batch);
2031 VkResult anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit,
2032                                      struct anv_batch *batch);
2033 
2034 static inline void
anv_trtt_batch_bo_free(struct anv_device * device,struct anv_trtt_batch_bo * trtt_bbo)2035 anv_trtt_batch_bo_free(struct anv_device *device,
2036                        struct anv_trtt_batch_bo *trtt_bbo)
2037 {
2038    anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo->bo);
2039    list_del(&trtt_bbo->link);
2040    vk_free(&device->vk.alloc, trtt_bbo);
2041 }
2042 
2043 void anv_queue_trace(struct anv_queue *queue, const char *label,
2044                      bool frame, bool begin);
2045 
2046 static inline VkResult
anv_queue_post_submit(struct anv_queue * queue,VkResult submit_result)2047 anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
2048 {
2049    if (submit_result != VK_SUCCESS)
2050       return submit_result;
2051 
2052    VkResult result = VK_SUCCESS;
2053    if (queue->sync) {
2054       result = vk_sync_wait(&queue->device->vk, queue->sync, 0,
2055                             VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
2056       if (result != VK_SUCCESS)
2057          result = vk_queue_set_lost(&queue->vk, "sync wait failed");
2058    }
2059 
2060    return result;
2061 }
2062 
2063 void *
2064 anv_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
2065              uint64_t size);
2066 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
2067 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
2068 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
2069                        uint32_t stride, uint32_t tiling);
2070 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
2071 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
2072 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
2073 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
2074                               uint64_t value);
2075 VkResult
2076 anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device,
2077                                           struct anv_bo *bo,
2078                                           enum anv_bo_alloc_flags alloc_flags,
2079                                           uint32_t *bo_flags);
2080 const struct intel_device_info_pat_entry *
2081 anv_device_get_pat_entry(struct anv_device *device,
2082                          enum anv_bo_alloc_flags alloc_flags);
2083 
2084 uint64_t anv_vma_alloc(struct anv_device *device,
2085                        uint64_t size, uint64_t align,
2086                        enum anv_bo_alloc_flags alloc_flags,
2087                        uint64_t client_address,
2088                        struct util_vma_heap **out_vma_heap);
2089 void anv_vma_free(struct anv_device *device,
2090                   struct util_vma_heap *vma_heap,
2091                   uint64_t address, uint64_t size);
2092 
2093 struct anv_reloc_list {
2094    bool                                         uses_relocs;
2095    uint32_t                                     dep_words;
2096    BITSET_WORD *                                deps;
2097    const VkAllocationCallbacks                  *alloc;
2098 };
2099 
2100 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
2101                              const VkAllocationCallbacks *alloc,
2102                              bool uses_relocs);
2103 void anv_reloc_list_finish(struct anv_reloc_list *list);
2104 
2105 VkResult
2106 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
2107 
2108 static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list * list,struct anv_bo * target_bo)2109 anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
2110 {
2111    return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
2112 }
2113 
2114 VkResult anv_reloc_list_append(struct anv_reloc_list *list,
2115                                struct anv_reloc_list *other);
2116 
2117 struct anv_batch_bo {
2118    /* Link in the anv_cmd_buffer.owned_batch_bos list */
2119    struct list_head                             link;
2120 
2121    struct anv_bo *                              bo;
2122 
2123    /* Bytes actually consumed in this batch BO */
2124    uint32_t                                     length;
2125 
2126    /* When this batch BO is used as part of a primary batch buffer, this
2127     * tracked whether it is chained to another primary batch buffer.
2128     *
2129     * If this is the case, the relocation list's last entry points the
2130     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
2131     */
2132    bool                                         chained;
2133 
2134    struct anv_reloc_list                        relocs;
2135 };
2136 
2137 struct anv_batch {
2138    const VkAllocationCallbacks *                alloc;
2139 
2140    /**
2141     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2142     * Used to increase allocation size for long command buffers.
2143     */
2144    size_t                                       allocated_batch_size;
2145 
2146    struct anv_address                           start_addr;
2147 
2148    void *                                       start;
2149    void *                                       end;
2150    void *                                       next;
2151 
2152    struct anv_reloc_list *                      relocs;
2153 
2154    /* This callback is called (with the associated user data) in the event
2155     * that the batch runs out of space.
2156     */
2157    VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *);
2158    void *                                       user_data;
2159 
2160    /**
2161     * Current error status of the command buffer. Used to track inconsistent
2162     * or incomplete command buffer states that are the consequence of run-time
2163     * errors such as out of memory scenarios. We want to track this in the
2164     * batch because the command buffer object is not visible to some parts
2165     * of the driver.
2166     */
2167    VkResult                                     status;
2168 
2169    enum intel_engine_class                      engine_class;
2170 
2171    /**
2172     * Number of 3DPRIMITIVE's emitted for WA 16014538804
2173     */
2174    uint8_t num_3d_primitives_emitted;
2175 };
2176 
2177 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
2178 VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
2179 void anv_batch_advance(struct anv_batch *batch, uint32_t size);
2180 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
2181 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
2182 
2183 static inline struct anv_address
anv_batch_current_address(struct anv_batch * batch)2184 anv_batch_current_address(struct anv_batch *batch)
2185 {
2186    return anv_batch_address(batch, batch->next);
2187 }
2188 
2189 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)2190 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
2191                       void *map, size_t size)
2192 {
2193    batch->start_addr = addr;
2194    batch->next = batch->start = map;
2195    batch->end = map + size;
2196 }
2197 
2198 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)2199 anv_batch_set_error(struct anv_batch *batch, VkResult error)
2200 {
2201    assert(error != VK_SUCCESS);
2202    if (batch->status == VK_SUCCESS)
2203       batch->status = error;
2204    return batch->status;
2205 }
2206 
2207 static inline bool
anv_batch_has_error(struct anv_batch * batch)2208 anv_batch_has_error(struct anv_batch *batch)
2209 {
2210    return batch->status != VK_SUCCESS;
2211 }
2212 
2213 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)2214 _anv_combine_address(struct anv_batch *batch, void *location,
2215                      const struct anv_address address, uint32_t delta)
2216 {
2217    if (address.bo == NULL)
2218       return address.offset + delta;
2219 
2220    if (batch)
2221       anv_reloc_list_add_bo(batch->relocs, address.bo);
2222 
2223    return anv_address_physical(anv_address_add(address, delta));
2224 }
2225 
2226 #define __gen_address_type struct anv_address
2227 #define __gen_user_data struct anv_batch
2228 #define __gen_combine_address _anv_combine_address
2229 
2230 /* Wrapper macros needed to work around preprocessor argument issues.  In
2231  * particular, arguments don't get pre-evaluated if they are concatenated.
2232  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
2233  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
2234  * We can work around this easily enough with these helpers.
2235  */
2236 #define __anv_cmd_length(cmd) cmd ## _length
2237 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
2238 #define __anv_cmd_header(cmd) cmd ## _header
2239 #define __anv_cmd_pack(cmd) cmd ## _pack
2240 #define __anv_reg_num(reg) reg ## _num
2241 
2242 #define anv_pack_struct(dst, struc, ...) do {                              \
2243       struct struc __template = {                                          \
2244          __VA_ARGS__                                                       \
2245       };                                                                   \
2246       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
2247       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
2248    } while (0)
2249 
2250 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
2251       void *__dst = anv_batch_emit_dwords(batch, n);       \
2252       if (__dst) {                                         \
2253          struct cmd __template = {                         \
2254             __anv_cmd_header(cmd),                         \
2255            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
2256             __VA_ARGS__                                    \
2257          };                                                \
2258          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
2259       }                                                    \
2260       __dst;                                               \
2261    })
2262 
2263 #define anv_batch_emit_merge(batch, cmd, pipeline, state, name)         \
2264    for (struct cmd name = { 0 },                                        \
2265         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2266         __builtin_expect(_dst != NULL, 1);                              \
2267         ({ uint32_t _partial[__anv_cmd_length(cmd)];                    \
2268            assert((pipeline)->state.len == __anv_cmd_length(cmd));      \
2269            __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
2270            for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) {       \
2271               ((uint32_t *)_dst)[i] = _partial[i] |                     \
2272                  (pipeline)->batch_data[(pipeline)->state.offset + i];  \
2273            }                                                            \
2274            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2275            _dst = NULL;                                                 \
2276          }))
2277 
2278 #define anv_batch_emit(batch, cmd, name)                            \
2279    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
2280         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2281         __builtin_expect(_dst != NULL, 1);                              \
2282         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
2283            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2284            _dst = NULL;                                                 \
2285          }))
2286 
2287 #define anv_batch_write_reg(batch, reg, name)                           \
2288    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
2289         ({                                                              \
2290             uint32_t _dw[__anv_cmd_length(reg)];                        \
2291             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
2292             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
2293                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
2294                   lri.RegisterOffset   = __anv_reg_num(reg);            \
2295                   lri.DataDWord        = _dw[i];                        \
2296                }                                                        \
2297             }                                                           \
2298            _cont = NULL;                                                \
2299          }))
2300 
2301 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
2302 /* #define __gen_get_batch_address anv_batch_address */
2303 /* #define __gen_address_value anv_address_physical */
2304 /* #define __gen_address_offset anv_address_add */
2305 
2306 struct anv_device_memory {
2307    struct vk_device_memory                      vk;
2308 
2309    struct list_head                             link;
2310 
2311    struct anv_bo *                              bo;
2312    const struct anv_memory_type *               type;
2313 
2314    void *                                       map;
2315    size_t                                       map_size;
2316 
2317    /* The map, from the user PoV is map + map_delta */
2318    uint64_t                                     map_delta;
2319 };
2320 
2321 /**
2322  * Header for Vertex URB Entry (VUE)
2323  */
2324 struct anv_vue_header {
2325    uint32_t Reserved;
2326    uint32_t RTAIndex; /* RenderTargetArrayIndex */
2327    uint32_t ViewportIndex;
2328    float PointWidth;
2329 };
2330 
2331 /** Struct representing a sampled image descriptor
2332  *
2333  * This descriptor layout is used for sampled images, bare sampler, and
2334  * combined image/sampler descriptors.
2335  */
2336 struct anv_sampled_image_descriptor {
2337    /** Bindless image handle
2338     *
2339     * This is expected to already be shifted such that the 20-bit
2340     * SURFACE_STATE table index is in the top 20 bits.
2341     */
2342    uint32_t image;
2343 
2344    /** Bindless sampler handle
2345     *
2346     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
2347     * to the dynamic state base address.
2348     */
2349    uint32_t sampler;
2350 };
2351 
2352 /** Struct representing a storage image descriptor */
2353 struct anv_storage_image_descriptor {
2354    /** Bindless image handles
2355     *
2356     * These are expected to already be shifted such that the 20-bit
2357     * SURFACE_STATE table index is in the top 20 bits.
2358     */
2359    uint32_t vanilla;
2360 
2361    /** Image depth
2362     *
2363     * By default the HW RESINFO message allows us to query the depth of an image :
2364     *
2365     * From the Kaby Lake docs for the RESINFO message:
2366     *
2367     *    "Surface Type | ... | Blue
2368     *    --------------+-----+----------------
2369     *    SURFTYPE_3D  | ... | (Depth+1)»LOD"
2370     *
2371     * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image,
2372     * meaning at a depth offset with a new depth value potentially reduced
2373     * from the original image. Unfortunately if we change the Depth value of
2374     * the image, we then run into issues with Yf/Ys tilings where the HW fetch
2375     * data at incorrect locations.
2376     *
2377     * To solve this, we put the slice depth in the descriptor and recompose
2378     * the vec3 (width, height, depth) using this field for z and xy using the
2379     * RESINFO result.
2380     */
2381    uint32_t image_depth;
2382 };
2383 
2384 /** Struct representing a address/range descriptor
2385  *
2386  * The fields of this struct correspond directly to the data layout of
2387  * nir_address_format_64bit_bounded_global addresses.  The last field is the
2388  * offset in the NIR address so it must be zero so that when you load the
2389  * descriptor you get a pointer to the start of the range.
2390  */
2391 struct anv_address_range_descriptor {
2392    uint64_t address;
2393    uint32_t range;
2394    uint32_t zero;
2395 };
2396 
2397 enum anv_descriptor_data {
2398    /** The descriptor contains a BTI reference to a surface state */
2399    ANV_DESCRIPTOR_BTI_SURFACE_STATE       = BITFIELD_BIT(0),
2400    /** The descriptor contains a BTI reference to a sampler state */
2401    ANV_DESCRIPTOR_BTI_SAMPLER_STATE       = BITFIELD_BIT(1),
2402    /** The descriptor contains an actual buffer view */
2403    ANV_DESCRIPTOR_BUFFER_VIEW             = BITFIELD_BIT(2),
2404    /** The descriptor contains inline uniform data */
2405    ANV_DESCRIPTOR_INLINE_UNIFORM          = BITFIELD_BIT(3),
2406    /** anv_address_range_descriptor with a buffer address and range */
2407    ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE  = BITFIELD_BIT(4),
2408    /** Bindless surface handle (through anv_sampled_image_descriptor) */
2409    ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE  = BITFIELD_BIT(5),
2410    /** Storage image handles (through anv_storage_image_descriptor) */
2411    ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE  = BITFIELD_BIT(6),
2412    /** The descriptor contains a single RENDER_SURFACE_STATE */
2413    ANV_DESCRIPTOR_SURFACE                 = BITFIELD_BIT(7),
2414    /** The descriptor contains a SAMPLER_STATE */
2415    ANV_DESCRIPTOR_SAMPLER                 = BITFIELD_BIT(8),
2416    /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */
2417    ANV_DESCRIPTOR_SURFACE_SAMPLER         = BITFIELD_BIT(9),
2418 };
2419 
2420 struct anv_descriptor_set_binding_layout {
2421    /* The type of the descriptors in this binding */
2422    VkDescriptorType type;
2423 
2424    /* Flags provided when this binding was created */
2425    VkDescriptorBindingFlags flags;
2426 
2427    /* Bitfield representing the type of data this descriptor contains */
2428    enum anv_descriptor_data data;
2429 
2430    /* Maximum number of YCbCr texture/sampler planes */
2431    uint8_t max_plane_count;
2432 
2433    /* Number of array elements in this binding (or size in bytes for inline
2434     * uniform data)
2435     */
2436    uint32_t array_size;
2437 
2438    /* Index into the flattened descriptor set */
2439    uint32_t descriptor_index;
2440 
2441    /* Index into the dynamic state array for a dynamic buffer, relative to the
2442     * set.
2443     */
2444    int16_t dynamic_offset_index;
2445 
2446    /* Computed surface size from data (for one plane) */
2447    uint16_t descriptor_data_surface_size;
2448 
2449    /* Computed sampler size from data (for one plane) */
2450    uint16_t descriptor_data_sampler_size;
2451 
2452    /* Index into the descriptor set buffer views */
2453    int32_t buffer_view_index;
2454 
2455    /* Offset into the descriptor buffer where the surface descriptor lives */
2456    uint32_t descriptor_surface_offset;
2457 
2458    /* Offset into the descriptor buffer where the sampler descriptor lives */
2459    uint16_t descriptor_sampler_offset;
2460 
2461    /* Pre computed surface stride (with multiplane descriptor, the descriptor
2462     * includes all the planes)
2463     */
2464    uint16_t descriptor_surface_stride;
2465 
2466    /* Pre computed sampler stride (with multiplane descriptor, the descriptor
2467     * includes all the planes)
2468     */
2469    uint16_t descriptor_sampler_stride;
2470 
2471    /* Immutable samplers (or NULL if no immutable samplers) */
2472    struct anv_sampler **immutable_samplers;
2473 };
2474 
2475 enum anv_descriptor_set_layout_type {
2476    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN,
2477    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT,
2478    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT,
2479    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER,
2480 };
2481 
2482 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
2483                                       const struct anv_descriptor_set_binding_layout *binding,
2484                                       bool sampler);
2485 
2486 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
2487                                       const struct anv_descriptor_set_binding_layout *binding,
2488                                       bool sampler);
2489 
2490 struct anv_descriptor_set_layout {
2491    struct vk_object_base base;
2492 
2493    VkDescriptorSetLayoutCreateFlags flags;
2494 
2495    /* Type of descriptor set layout */
2496    enum anv_descriptor_set_layout_type type;
2497 
2498    /* Descriptor set layouts can be destroyed at almost any time */
2499    uint32_t ref_cnt;
2500 
2501    /* Number of bindings in this descriptor set */
2502    uint32_t binding_count;
2503 
2504    /* Total number of descriptors */
2505    uint32_t descriptor_count;
2506 
2507    /* Shader stages affected by this descriptor set */
2508    uint16_t shader_stages;
2509 
2510    /* Number of buffer views in this descriptor set */
2511    uint32_t buffer_view_count;
2512 
2513    /* Number of dynamic offsets used by this descriptor set */
2514    uint16_t dynamic_offset_count;
2515 
2516    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2517     * this buffer
2518     */
2519    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2520 
2521    /* Size of the descriptor buffer dedicated to surface states for this
2522     * descriptor set
2523     */
2524    uint32_t descriptor_buffer_surface_size;
2525 
2526    /* Size of the descriptor buffer dedicated to sampler states for this
2527     * descriptor set
2528     */
2529    uint32_t descriptor_buffer_sampler_size;
2530 
2531    /* Bindings in this descriptor set */
2532    struct anv_descriptor_set_binding_layout binding[0];
2533 };
2534 
2535 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2536                                        struct anv_descriptor_set_layout *layout);
2537 
2538 void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout);
2539 
2540 static inline struct anv_descriptor_set_layout *
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2541 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2542 {
2543    assert(layout && layout->ref_cnt >= 1);
2544    p_atomic_inc(&layout->ref_cnt);
2545 
2546    return layout;
2547 }
2548 
2549 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2550 anv_descriptor_set_layout_unref(struct anv_device *device,
2551                                 struct anv_descriptor_set_layout *layout)
2552 {
2553    assert(layout && layout->ref_cnt >= 1);
2554    if (p_atomic_dec_zero(&layout->ref_cnt))
2555       anv_descriptor_set_layout_destroy(device, layout);
2556 }
2557 
2558 struct anv_descriptor {
2559    VkDescriptorType type;
2560 
2561    union {
2562       struct {
2563          VkImageLayout layout;
2564          struct anv_image_view *image_view;
2565          struct anv_sampler *sampler;
2566       };
2567 
2568       struct {
2569          struct anv_buffer_view *set_buffer_view;
2570          struct anv_buffer *buffer;
2571          uint64_t offset;
2572          uint64_t range;
2573          uint64_t bind_range;
2574       };
2575 
2576       struct anv_buffer_view *buffer_view;
2577 
2578       struct vk_acceleration_structure *accel_struct;
2579    };
2580 };
2581 
2582 struct anv_descriptor_set {
2583    struct vk_object_base base;
2584 
2585    struct anv_descriptor_pool *pool;
2586    struct anv_descriptor_set_layout *layout;
2587 
2588    /* Amount of space occupied in the the pool by this descriptor set. It can
2589     * be larger than the size of the descriptor set.
2590     */
2591    uint32_t size;
2592 
2593    /* Is this descriptor set a push descriptor */
2594    bool is_push;
2595 
2596    /* Bitfield of descriptors for which we need to generate surface states.
2597     * Only valid for push descriptors
2598     */
2599    uint32_t generate_surface_states;
2600 
2601    /* State relative to anv_descriptor_pool::surface_bo */
2602    struct anv_state desc_surface_mem;
2603    /* State relative to anv_descriptor_pool::sampler_bo */
2604    struct anv_state desc_sampler_mem;
2605    /* Surface state for the descriptor buffer */
2606    struct anv_state desc_surface_state;
2607 
2608    /* Descriptor set address pointing to desc_surface_mem (we don't need one
2609     * for sampler because they're never accessed other than by the HW through
2610     * the shader sampler handle).
2611     */
2612    struct anv_address desc_surface_addr;
2613 
2614    struct anv_address desc_sampler_addr;
2615 
2616    /* Descriptor offset from the
2617     * device->va.internal_surface_state_pool.addr
2618     *
2619     * It just needs to be added to the binding table offset to be put into the
2620     * HW BTI entry.
2621     */
2622    uint32_t desc_offset;
2623 
2624    uint32_t buffer_view_count;
2625    struct anv_buffer_view *buffer_views;
2626 
2627    /* Link to descriptor pool's desc_sets list . */
2628    struct list_head pool_link;
2629 
2630    uint32_t descriptor_count;
2631    struct anv_descriptor descriptors[0];
2632 };
2633 
2634 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2635 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2636 {
2637    return set->pool == NULL;
2638 }
2639 
2640 struct anv_surface_state_data {
2641    uint8_t data[ANV_SURFACE_STATE_SIZE];
2642 };
2643 
2644 struct anv_buffer_state {
2645    /** Surface state allocated from the bindless heap
2646     *
2647     * Only valid if anv_physical_device::indirect_descriptors is true
2648     */
2649    struct anv_state state;
2650 
2651    /** Surface state after genxml packing
2652     *
2653     * Only valid if anv_physical_device::indirect_descriptors is false
2654     */
2655    struct anv_surface_state_data state_data;
2656 };
2657 
2658 struct anv_buffer_view {
2659    struct vk_buffer_view vk;
2660 
2661    struct anv_address address;
2662 
2663    struct anv_buffer_state general;
2664    struct anv_buffer_state storage;
2665 };
2666 
2667 struct anv_push_descriptor_set {
2668    struct anv_descriptor_set set;
2669 
2670    /* Put this field right behind anv_descriptor_set so it fills up the
2671     * descriptors[0] field. */
2672    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2673 
2674    /** True if the descriptor set buffer has been referenced by a draw or
2675     * dispatch command.
2676     */
2677    bool set_used_on_gpu;
2678 
2679    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2680 };
2681 
2682 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2683 anv_descriptor_set_address(struct anv_descriptor_set *set)
2684 {
2685    if (anv_descriptor_set_is_push(set)) {
2686       /* We have to flag push descriptor set as used on the GPU
2687        * so that the next time we push descriptors, we grab a new memory.
2688        */
2689       struct anv_push_descriptor_set *push_set =
2690          (struct anv_push_descriptor_set *)set;
2691       push_set->set_used_on_gpu = true;
2692    }
2693 
2694    return set->desc_surface_addr;
2695 }
2696 
2697 struct anv_descriptor_pool_heap {
2698    /* BO allocated to back the pool (unused for host pools) */
2699    struct anv_bo        *bo;
2700 
2701    /* Host memory allocated to back a host pool */
2702    void                 *host_mem;
2703 
2704    /* Heap tracking allocations in bo/host_mem */
2705    struct util_vma_heap  heap;
2706 
2707    /* Size of the heap */
2708    uint32_t              size;
2709 };
2710 
2711 struct anv_descriptor_pool {
2712    struct vk_object_base base;
2713 
2714    struct anv_descriptor_pool_heap surfaces;
2715    struct anv_descriptor_pool_heap samplers;
2716 
2717    struct anv_state_stream surface_state_stream;
2718    void *surface_state_free_list;
2719 
2720    /** List of anv_descriptor_set. */
2721    struct list_head desc_sets;
2722 
2723    /** Heap over host_mem */
2724    struct util_vma_heap host_heap;
2725 
2726    /** Allocated size of host_mem */
2727    uint32_t host_mem_size;
2728 
2729    /**
2730     * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
2731     * surface_state_stream is unused.
2732     */
2733    bool host_only;
2734 
2735    char host_mem[0];
2736 };
2737 
2738 bool
2739 anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
2740                              struct anv_push_descriptor_set *push_set,
2741                              struct anv_descriptor_set_layout *layout);
2742 
2743 void
2744 anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set);
2745 
2746 void
2747 anv_descriptor_set_write_image_view(struct anv_device *device,
2748                                     struct anv_descriptor_set *set,
2749                                     const VkDescriptorImageInfo * const info,
2750                                     VkDescriptorType type,
2751                                     uint32_t binding,
2752                                     uint32_t element);
2753 
2754 void
2755 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2756                                      struct anv_descriptor_set *set,
2757                                      VkDescriptorType type,
2758                                      struct anv_buffer_view *buffer_view,
2759                                      uint32_t binding,
2760                                      uint32_t element);
2761 
2762 void
2763 anv_descriptor_set_write_buffer(struct anv_device *device,
2764                                 struct anv_descriptor_set *set,
2765                                 VkDescriptorType type,
2766                                 struct anv_buffer *buffer,
2767                                 uint32_t binding,
2768                                 uint32_t element,
2769                                 VkDeviceSize offset,
2770                                 VkDeviceSize range);
2771 
2772 void
2773 anv_descriptor_write_surface_state(struct anv_device *device,
2774                                    struct anv_descriptor *desc,
2775                                    struct anv_state surface_state);
2776 
2777 void
2778 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2779                                                 struct anv_descriptor_set *set,
2780                                                 struct vk_acceleration_structure *accel,
2781                                                 uint32_t binding,
2782                                                 uint32_t element);
2783 
2784 void
2785 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2786                                              struct anv_descriptor_set *set,
2787                                              uint32_t binding,
2788                                              const void *data,
2789                                              size_t offset,
2790                                              size_t size);
2791 
2792 void
2793 anv_descriptor_set_write(struct anv_device *device,
2794                          struct anv_descriptor_set *set_override,
2795                          uint32_t write_count,
2796                          const VkWriteDescriptorSet *writes);
2797 
2798 void
2799 anv_descriptor_set_write_template(struct anv_device *device,
2800                                   struct anv_descriptor_set *set,
2801                                   const struct vk_descriptor_update_template *template,
2802                                   const void *data);
2803 
2804 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 4)
2805 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 3)
2806 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 2)
2807 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 1)
2808 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2809 
2810 struct anv_pipeline_binding {
2811    /** Index in the descriptor set
2812     *
2813     * This is a flattened index; the descriptor set layout is already taken
2814     * into account.
2815     */
2816    uint32_t index;
2817 
2818    /** Binding in the descriptor set. Not valid for any of the
2819     * ANV_DESCRIPTOR_SET_*
2820     */
2821    uint32_t binding;
2822 
2823    /** Offset in the descriptor buffer
2824     *
2825     * Relative to anv_descriptor_set::desc_addr. This is useful for
2826     * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
2827     * table entry.
2828     */
2829    uint32_t set_offset;
2830 
2831    /** The descriptor set this surface corresponds to.
2832     *
2833     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2834     * binding is not a normal descriptor set but something else.
2835     */
2836    uint8_t set;
2837 
2838    union {
2839       /** Plane in the binding index for images */
2840       uint8_t plane;
2841 
2842       /** Input attachment index (relative to the subpass) */
2843       uint8_t input_attachment_index;
2844 
2845       /** Dynamic offset index
2846        *
2847        * For dynamic UBOs and SSBOs, relative to set.
2848        */
2849       uint8_t dynamic_offset_index;
2850    };
2851 };
2852 
2853 struct anv_push_range {
2854    /** Index in the descriptor set */
2855    uint32_t index;
2856 
2857    /** Descriptor set index */
2858    uint8_t set;
2859 
2860    /** Dynamic offset index (for dynamic UBOs), relative to set. */
2861    uint8_t dynamic_offset_index;
2862 
2863    /** Start offset in units of 32B */
2864    uint8_t start;
2865 
2866    /** Range in units of 32B */
2867    uint8_t length;
2868 };
2869 
2870 struct anv_pipeline_sets_layout {
2871    struct anv_device *device;
2872 
2873    struct {
2874       struct anv_descriptor_set_layout *layout;
2875       uint32_t dynamic_offset_start;
2876    } set[MAX_SETS];
2877 
2878    enum anv_descriptor_set_layout_type type;
2879 
2880    uint32_t num_sets;
2881    uint32_t num_dynamic_buffers;
2882    int push_descriptor_set_index;
2883 
2884    bool independent_sets;
2885 
2886    unsigned char sha1[20];
2887 };
2888 
2889 void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout,
2890                                    struct anv_device *device,
2891                                    bool independent_sets);
2892 
2893 void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout);
2894 
2895 void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout,
2896                                   uint32_t set_idx,
2897                                   struct anv_descriptor_set_layout *set_layout);
2898 
2899 void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout);
2900 
2901 void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout);
2902 
2903 struct anv_pipeline_layout {
2904    struct vk_object_base base;
2905 
2906    struct anv_pipeline_sets_layout sets_layout;
2907 };
2908 
2909 const struct anv_descriptor_set_layout *
2910 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
2911                                  uint8_t *desc_idx);
2912 
2913 struct anv_sparse_binding_data {
2914    uint64_t address;
2915    uint64_t size;
2916 
2917    /* This is kept only because it's given to us by vma_alloc() and need to be
2918     * passed back to vma_free(), we have no other particular use for it
2919     */
2920    struct util_vma_heap *vma_heap;
2921 };
2922 
2923 #define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
2924 
2925 static inline bool
anv_sparse_binding_is_enabled(struct anv_device * device)2926 anv_sparse_binding_is_enabled(struct anv_device *device)
2927 {
2928    return device->vk.enabled_features.sparseBinding;
2929 }
2930 
2931 static inline bool
anv_sparse_residency_is_enabled(struct anv_device * device)2932 anv_sparse_residency_is_enabled(struct anv_device *device)
2933 {
2934    return device->vk.enabled_features.sparseResidencyBuffer ||
2935           device->vk.enabled_features.sparseResidencyImage2D ||
2936           device->vk.enabled_features.sparseResidencyImage3D ||
2937           device->vk.enabled_features.sparseResidency2Samples ||
2938           device->vk.enabled_features.sparseResidency4Samples ||
2939           device->vk.enabled_features.sparseResidency8Samples ||
2940           device->vk.enabled_features.sparseResidency16Samples ||
2941           device->vk.enabled_features.sparseResidencyAliased;
2942 }
2943 
2944 VkResult anv_init_sparse_bindings(struct anv_device *device,
2945                                   uint64_t size,
2946                                   struct anv_sparse_binding_data *sparse,
2947                                   enum anv_bo_alloc_flags alloc_flags,
2948                                   uint64_t client_address,
2949                                   struct anv_address *out_address);
2950 VkResult anv_free_sparse_bindings(struct anv_device *device,
2951                                   struct anv_sparse_binding_data *sparse);
2952 VkResult anv_sparse_bind_buffer(struct anv_device *device,
2953                                 struct anv_buffer *buffer,
2954                                 const VkSparseMemoryBind *vk_bind,
2955                                 struct anv_sparse_submission *submit);
2956 VkResult anv_sparse_bind_image_opaque(struct anv_device *device,
2957                                       struct anv_image *image,
2958                                       const VkSparseMemoryBind *vk_bind,
2959                                       struct anv_sparse_submission *submit);
2960 VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
2961                                       struct anv_image *image,
2962                                       const VkSparseImageMemoryBind *bind,
2963                                       struct anv_sparse_submission *submit);
2964 VkResult anv_sparse_bind(struct anv_device *device,
2965                          struct anv_sparse_submission *sparse_submit);
2966 
2967 VkSparseImageFormatProperties
2968 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
2969                                         VkImageAspectFlags aspect,
2970                                         VkImageType vk_image_type,
2971                                         struct isl_surf *surf);
2972 void anv_sparse_calc_miptail_properties(struct anv_device *device,
2973                                         struct anv_image *image,
2974                                         VkImageAspectFlags vk_aspect,
2975                                         uint32_t *imageMipTailFirstLod,
2976                                         VkDeviceSize *imageMipTailSize,
2977                                         VkDeviceSize *imageMipTailOffset,
2978                                         VkDeviceSize *imageMipTailStride);
2979 VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
2980                                         VkImageCreateFlags flags,
2981                                         VkImageTiling tiling,
2982                                         VkSampleCountFlagBits samples,
2983                                         VkImageType type,
2984                                         VkFormat format);
2985 VkResult anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size,
2986                                struct anv_trtt_batch_bo **out_trtt_bbo);
2987 
2988 struct anv_buffer {
2989    struct vk_buffer vk;
2990 
2991    /* Set when bound */
2992    struct anv_address address;
2993 
2994    struct anv_sparse_binding_data sparse_data;
2995 };
2996 
2997 static inline bool
anv_buffer_is_sparse(struct anv_buffer * buffer)2998 anv_buffer_is_sparse(struct anv_buffer *buffer)
2999 {
3000    return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
3001 }
3002 
3003 enum anv_cmd_dirty_bits {
3004    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
3005    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
3006    ANV_CMD_DIRTY_RENDER_AREA                         = 1 << 2,
3007    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 3,
3008    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 4,
3009    ANV_CMD_DIRTY_RESTART_INDEX                       = 1 << 5,
3010    ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE              = 1 << 6,
3011 };
3012 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
3013 
3014 enum anv_pipe_bits {
3015    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
3016    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
3017    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
3018    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
3019    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
3020    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
3021    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
3022    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
3023    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
3024    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
3025    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
3026 
3027    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
3028     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
3029     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
3030     */
3031    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
3032    ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
3033 
3034    /*
3035     * This bit flush data-port's Untyped L1 data cache (LSC L1).
3036     */
3037    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
3038 
3039    /* This bit controls the flushing of the engine (Render, Compute) specific
3040     * entries from the compression cache.
3041     */
3042    ANV_PIPE_CCS_CACHE_FLUSH_BIT              = (1 << 17),
3043 
3044    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
3045    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
3046 
3047    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
3048     * a flush has happened but not a CS stall.  The next time we do any sort
3049     * of invalidation we need to insert a CS stall at that time.  Otherwise,
3050     * we would have to CS stall on every flush which could be bad.
3051     */
3052    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
3053 
3054    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
3055     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
3056     * done by writing the AUX-TT register.
3057     */
3058    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 23),
3059 
3060    /* This bit does not exist directly in PIPE_CONTROL. It means that a
3061     * PIPE_CONTROL with a post-sync operation will follow. This is used to
3062     * implement a workaround for Gfx9.
3063     */
3064    ANV_PIPE_POST_SYNC_BIT                    = (1 << 24),
3065 };
3066 
3067 /* These bits track the state of buffer writes for queries. They get cleared
3068  * based on PIPE_CONTROL emissions.
3069  */
3070 enum anv_query_bits {
3071    ANV_QUERY_WRITES_RT_FLUSH      = (1 << 0),
3072 
3073    ANV_QUERY_WRITES_TILE_FLUSH    = (1 << 1),
3074 
3075    ANV_QUERY_WRITES_CS_STALL      = (1 << 2),
3076 
3077    ANV_QUERY_WRITES_DATA_FLUSH    = (1 << 3),
3078 };
3079 
3080 /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's
3081  * likely related to performance workaround 14015868140.
3082  *
3083  * For now we enable this only on DG2 and platform prior to Gfx12 where there
3084  * is no tile cache.
3085  */
3086 #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \
3087    (intel_device_info_is_dg2(devinfo))
3088 
3089 /* Things we need to flush before accessing query data using the command
3090  * streamer.
3091  *
3092  * Prior to DG2 experiments show that the command streamer is not coherent
3093  * with the tile cache so we need to flush it to make any data visible to CS.
3094  *
3095  * Otherwise we want to flush the RT cache which is where blorp writes, either
3096  * for clearing the query buffer or for clearing the destination buffer in
3097  * vkCopyQueryPoolResults().
3098  */
3099 #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \
3100    (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \
3101       devinfo->ver >= 12) ? \
3102      ANV_QUERY_WRITES_TILE_FLUSH : 0) | \
3103     ANV_QUERY_WRITES_RT_FLUSH | \
3104     ANV_QUERY_WRITES_CS_STALL)
3105 #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \
3106    (ANV_QUERY_WRITES_DATA_FLUSH | \
3107     ANV_QUERY_WRITES_CS_STALL)
3108 
3109 #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \
3110    ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ?   \
3111     ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \
3112    ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ?   \
3113     ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
3114    ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ?   \
3115     ANV_PIPE_CS_STALL_BIT : 0) | \
3116    ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ?  \
3117     (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3118      ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3119      ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0))
3120 
3121 #define ANV_PIPE_FLUSH_BITS ( \
3122    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3123    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3124    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3125    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3126    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3127    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
3128 
3129 #define ANV_PIPE_STALL_BITS ( \
3130    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3131    ANV_PIPE_DEPTH_STALL_BIT | \
3132    ANV_PIPE_CS_STALL_BIT | \
3133    ANV_PIPE_PSS_STALL_SYNC_BIT)
3134 
3135 #define ANV_PIPE_INVALIDATE_BITS ( \
3136    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
3137    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
3138    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
3139    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
3140    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
3141    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
3142 
3143 /* PIPE_CONTROL bits that should be set only in 3D RCS mode.
3144  * For more details see genX(emit_apply_pipe_flushes).
3145  */
3146 #define ANV_PIPE_GFX_BITS ( \
3147    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3148    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3149    ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3150    ANV_PIPE_DEPTH_STALL_BIT | \
3151    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3152    (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \
3153    ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
3154 
3155 /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
3156  * For more details see genX(emit_apply_pipe_flushes).
3157  *
3158  * Documentation says that untyped L1 dataport cache flush is controlled by
3159  * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
3160  *
3161  * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
3162  *
3163  *    "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
3164  *     "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
3165  *     cache based on the programming of HDC_Chicken0 register bits 13:11."
3166  *
3167  *    "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
3168  *     cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
3169  *     PIPE_CONTROL command."
3170  *
3171  *    As part of Wa_22010960976 & Wa_14013347512, i915 is programming
3172  *    HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
3173  *    Dataport flush, and UAV coherency barrier event"). So there is no need
3174  *    to set "Untyped Data-Port Cache" in 3D mode.
3175  *
3176  * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
3177  * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
3178  * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
3179  * register to force L1 untyped flush with HDC pipeline flush has no effect on
3180  * MTL.
3181  *
3182  * It seems like the HW change completely disconnected L1 untyped flush from
3183  * HDC pipeline flush with no way to bring that behavior back. So leave the L1
3184  * untyped flush active in 3D mode on all platforms since it doesn't seems to
3185  * cause issues there too.
3186  *
3187  * Maybe we'll have some GPGPU only bits here at some point.
3188  */
3189 #define ANV_PIPE_GPGPU_BITS (0)
3190 
3191 enum intel_ds_stall_flag
3192 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
3193 
3194 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
3195    VK_IMAGE_ASPECT_PLANE_0_BIT | \
3196    VK_IMAGE_ASPECT_PLANE_1_BIT | \
3197    VK_IMAGE_ASPECT_PLANE_2_BIT)
3198 
3199 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
3200    VK_IMAGE_ASPECT_COLOR_BIT | \
3201    VK_IMAGE_ASPECT_PLANES_BITS_ANV)
3202 
3203 struct anv_vertex_binding {
3204    struct anv_buffer *                          buffer;
3205    VkDeviceSize                                 offset;
3206    VkDeviceSize                                 size;
3207 };
3208 
3209 struct anv_xfb_binding {
3210    struct anv_buffer *                          buffer;
3211    VkDeviceSize                                 offset;
3212    VkDeviceSize                                 size;
3213 };
3214 
3215 struct anv_push_constants {
3216    /** Push constant data provided by the client through vkPushConstants */
3217    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
3218 
3219 #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
3220 #define ANV_DESCRIPTOR_SET_OFFSET_MASK        (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
3221 
3222    /**
3223     * Base offsets for descriptor sets from
3224     * INDIRECT_DESCRIPTOR_POOL_MIN_ADDRESS
3225     *
3226     * In bits [0:5] : dynamic offset index in dynamic_offsets[] for the set
3227     *
3228     * In bits [6:63] : descriptor set address
3229     */
3230    uint32_t desc_surface_offsets[MAX_SETS];
3231 
3232    /**
3233     * Base offsets for descriptor sets from
3234     */
3235    uint32_t desc_sampler_offsets[MAX_SETS];
3236 
3237    /** Dynamic offsets for dynamic UBOs and SSBOs */
3238    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
3239 
3240    union {
3241       struct {
3242          /** Dynamic MSAA value */
3243          uint32_t fs_msaa_flags;
3244 
3245          /** Dynamic TCS input vertices */
3246          uint32_t tcs_input_vertices;
3247       } gfx;
3248 
3249       struct {
3250          /** Base workgroup ID
3251           *
3252           * Used for vkCmdDispatchBase.
3253           */
3254          uint32_t base_work_group_id[3];
3255 
3256          /** Subgroup ID
3257           *
3258           * This is never set by software but is implicitly filled out when
3259           * uploading the push constants for compute shaders.
3260           */
3261          uint32_t subgroup_id;
3262       } cs;
3263    };
3264 
3265    /* Robust access pushed registers. */
3266    uint64_t push_reg_mask[MESA_SHADER_STAGES];
3267 
3268    /** Ray query globals (RT_DISPATCH_GLOBALS) */
3269    uint64_t ray_query_globals;
3270 };
3271 
3272 struct anv_surface_state {
3273    /** Surface state allocated from the bindless heap
3274     *
3275     * Can be NULL if unused.
3276     */
3277    struct anv_state state;
3278 
3279    /** Surface state after genxml packing
3280     *
3281     * Same data as in state.
3282     */
3283    struct anv_surface_state_data state_data;
3284 
3285    /** Address of the surface referred to by this state
3286     *
3287     * This address is relative to the start of the BO.
3288     */
3289    struct anv_address address;
3290    /* Address of the aux surface, if any
3291     *
3292     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
3293     *
3294     * With the exception of gfx8, the bottom 12 bits of this address' offset
3295     * include extra aux information.
3296     */
3297    struct anv_address aux_address;
3298    /* Address of the clear color, if any
3299     *
3300     * This address is relative to the start of the BO.
3301     */
3302    struct anv_address clear_address;
3303 };
3304 
3305 struct anv_attachment {
3306    VkFormat vk_format;
3307    const struct anv_image_view *iview;
3308    VkImageLayout layout;
3309    enum isl_aux_usage aux_usage;
3310    struct anv_surface_state surface_state;
3311 
3312    VkResolveModeFlagBits resolve_mode;
3313    const struct anv_image_view *resolve_iview;
3314    VkImageLayout resolve_layout;
3315 };
3316 
3317 /** State tracking for vertex buffer flushes
3318  *
3319  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
3320  * addresses.  If you happen to have two vertex buffers which get placed
3321  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
3322  * collisions.  In order to solve this problem, we track vertex address ranges
3323  * which are live in the cache and invalidate the cache if one ever exceeds 32
3324  * bits.
3325  */
3326 struct anv_vb_cache_range {
3327    /* Virtual address at which the live vertex buffer cache range starts for
3328     * this vertex buffer index.
3329     */
3330    uint64_t start;
3331 
3332    /* Virtual address of the byte after where vertex buffer cache range ends.
3333     * This is exclusive such that end - start is the size of the range.
3334     */
3335    uint64_t end;
3336 };
3337 
3338 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)3339 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
3340                          const struct anv_vb_cache_range *bound)
3341 {
3342    if (dirty->start == dirty->end) {
3343       *dirty = *bound;
3344    } else if (bound->start != bound->end) {
3345       dirty->start = MIN2(dirty->start, bound->start);
3346       dirty->end = MAX2(dirty->end, bound->end);
3347    }
3348 }
3349 
3350 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
3351 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)3352 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
3353                                            struct anv_vb_cache_range *dirty,
3354                                            struct anv_address vb_address,
3355                                            uint32_t vb_size)
3356 {
3357    if (vb_size == 0) {
3358       bound->start = 0;
3359       bound->end = 0;
3360       return false;
3361    }
3362 
3363    bound->start = intel_48b_address(anv_address_physical(vb_address));
3364    bound->end = bound->start + vb_size;
3365    assert(bound->end > bound->start); /* No overflow */
3366 
3367    /* Align everything to a cache line */
3368    bound->start &= ~(64ull - 1ull);
3369    bound->end = align64(bound->end, 64);
3370 
3371    anv_merge_vb_cache_range(dirty, bound);
3372 
3373    /* If our range is larger than 32 bits, we have to flush */
3374    assert(bound->end - bound->start <= (1ull << 32));
3375    return (dirty->end - dirty->start) > (1ull << 32);
3376 }
3377 
3378 /**
3379  * State tracking for simple internal shaders
3380  */
3381 struct anv_simple_shader {
3382    /* The device associated with this emission */
3383    struct anv_device *device;
3384    /* The command buffer associated with this emission (can be NULL) */
3385    struct anv_cmd_buffer *cmd_buffer;
3386    /* State stream used for various internal allocations */
3387    struct anv_state_stream *dynamic_state_stream;
3388    struct anv_state_stream *general_state_stream;
3389    /* Where to emit the commands (can be different from cmd_buffer->batch) */
3390    struct anv_batch *batch;
3391    /* Shader to use */
3392    struct anv_shader_bin *kernel;
3393    /* L3 config used by the shader */
3394    const struct intel_l3_config *l3_config;
3395    /* Current URB config */
3396    const struct intel_urb_config *urb_cfg;
3397 
3398    /* Managed by the simpler shader helper*/
3399    struct anv_state bt_state;
3400 };
3401 
3402 /** State tracking for particular pipeline bind point
3403  *
3404  * This struct is the base struct for anv_cmd_graphics_state and
3405  * anv_cmd_compute_state.  These are used to track state which is bound to a
3406  * particular type of pipeline.  Generic state that applies per-stage such as
3407  * binding table offsets and push constants is tracked generically with a
3408  * per-stage array in anv_cmd_state.
3409  */
3410 struct anv_cmd_pipeline_state {
3411    struct anv_descriptor_set *descriptors[MAX_SETS];
3412    struct anv_push_descriptor_set push_descriptor;
3413 
3414    struct anv_push_constants push_constants;
3415 
3416    /* Push constant state allocated when flushing push constants. */
3417    struct anv_state          push_constants_state;
3418 
3419    /**
3420     * Dynamic buffer offsets.
3421     *
3422     * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with
3423     * independent sets we cannot know which how much in total is going to be
3424     * used. As a result we need to store the maximum possible number per set.
3425     *
3426     * Those values are written into anv_push_constants::dynamic_offsets at
3427     * flush time when have the pipeline with the final
3428     * anv_pipeline_sets_layout.
3429     */
3430    struct {
3431       uint32_t                                  offsets[MAX_DYNAMIC_BUFFERS];
3432    }                                            dynamic_offsets[MAX_SETS];
3433 
3434    /**
3435     * The current bound pipeline.
3436     */
3437    struct anv_pipeline      *pipeline;
3438 };
3439 
3440 /** State tracking for graphics pipeline
3441  *
3442  * This has anv_cmd_pipeline_state as a base struct to track things which get
3443  * bound to a graphics pipeline.  Along with general pipeline bind point state
3444  * which is in the anv_cmd_pipeline_state base struct, it also contains other
3445  * state which is graphics-specific.
3446  */
3447 struct anv_cmd_graphics_state {
3448    struct anv_cmd_pipeline_state base;
3449 
3450    VkRenderingFlags rendering_flags;
3451    VkRect2D render_area;
3452    uint32_t layer_count;
3453    uint32_t samples;
3454    uint32_t view_mask;
3455    uint32_t color_att_count;
3456    struct anv_state att_states;
3457    struct anv_attachment color_att[MAX_RTS];
3458    struct anv_attachment depth_att;
3459    struct anv_attachment stencil_att;
3460    struct anv_state null_surface_state;
3461 
3462    anv_cmd_dirty_mask_t dirty;
3463    uint32_t vb_dirty;
3464 
3465    struct anv_vb_cache_range ib_bound_range;
3466    struct anv_vb_cache_range ib_dirty_range;
3467    struct anv_vb_cache_range vb_bound_ranges[33];
3468    struct anv_vb_cache_range vb_dirty_ranges[33];
3469 
3470    uint32_t restart_index;
3471 
3472    VkShaderStageFlags push_constant_stages;
3473 
3474    uint32_t primitive_topology;
3475    bool used_task_shader;
3476 
3477    struct anv_buffer *index_buffer;
3478    uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
3479    uint32_t index_offset;
3480    uint32_t index_size;
3481 
3482    struct vk_vertex_input_state vertex_input;
3483    struct vk_sample_locations_state sample_locations;
3484 
3485    /**
3486     * The latest BLEND_STATE structure packed in dynamic state heap
3487     */
3488    struct anv_state blend_states;
3489 
3490    bool object_preemption;
3491    bool has_uint_rt;
3492 
3493    /* State tracking for Wa_14018912822. */
3494    bool color_blend_zero;
3495    bool alpha_blend_zero;
3496 
3497    /**
3498     * DEPTH and STENCIL attachment write state for Wa_18019816803.
3499     */
3500    bool ds_write_state;
3501 
3502    /**
3503     * State tracking for Wa_18020335297.
3504     */
3505    bool                                         viewport_set;
3506 
3507    struct intel_urb_config urb_cfg;
3508 
3509    uint32_t n_occlusion_queries;
3510 
3511    struct anv_gfx_dynamic_state dyn_state;
3512 };
3513 
3514 enum anv_depth_reg_mode {
3515    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
3516    ANV_DEPTH_REG_MODE_HW_DEFAULT,
3517    ANV_DEPTH_REG_MODE_D16_1X_MSAA,
3518 };
3519 
3520 /** State tracking for compute pipeline
3521  *
3522  * This has anv_cmd_pipeline_state as a base struct to track things which get
3523  * bound to a compute pipeline.  Along with general pipeline bind point state
3524  * which is in the anv_cmd_pipeline_state base struct, it also contains other
3525  * state which is compute-specific.
3526  */
3527 struct anv_cmd_compute_state {
3528    struct anv_cmd_pipeline_state base;
3529 
3530    bool pipeline_dirty;
3531 
3532    struct anv_state push_data;
3533 
3534    struct anv_address num_workgroups;
3535 
3536    uint32_t scratch_size;
3537 };
3538 
3539 struct anv_cmd_ray_tracing_state {
3540    struct anv_cmd_pipeline_state base;
3541 
3542    bool pipeline_dirty;
3543 
3544    struct {
3545       struct anv_bo *bo;
3546       struct brw_rt_scratch_layout layout;
3547    } scratch;
3548 
3549    struct anv_address build_priv_mem_addr;
3550    size_t             build_priv_mem_size;
3551 };
3552 
3553 /** State required while building cmd buffer */
3554 struct anv_cmd_state {
3555    /* PIPELINE_SELECT.PipelineSelection */
3556    uint32_t                                     current_pipeline;
3557    const struct intel_l3_config *               current_l3_config;
3558    uint32_t                                     last_aux_map_state;
3559 
3560    struct anv_cmd_graphics_state                gfx;
3561    struct anv_cmd_compute_state                 compute;
3562    struct anv_cmd_ray_tracing_state             rt;
3563 
3564    enum anv_pipe_bits                           pending_pipe_bits;
3565 
3566    struct {
3567       /**
3568        * Tracks operations susceptible to interfere with queries in the
3569        * destination buffer of vkCmdCopyQueryResults, we need those operations to
3570        * have completed before we do the work of vkCmdCopyQueryResults.
3571        */
3572       enum anv_query_bits                          buffer_write_bits;
3573 
3574       /**
3575        * Tracks clear operations of query buffers that can interact with
3576        * vkCmdQueryBegin*, vkCmdWriteTimestamp*,
3577        * vkCmdWriteAccelerationStructuresPropertiesKHR, etc...
3578        *
3579        * We need the clearing of the buffer completed before with write data with
3580        * the command streamer or a shader.
3581        */
3582       enum anv_query_bits                          clear_bits;
3583    } queries;
3584 
3585    VkShaderStageFlags                           descriptors_dirty;
3586    VkShaderStageFlags                           push_descriptors_dirty;
3587    VkShaderStageFlags                           push_constants_dirty;
3588 
3589    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
3590    bool                                         xfb_enabled;
3591    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
3592    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
3593    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
3594 
3595    unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3596    unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3597    unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
3598 
3599    /**
3600     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
3601     * of any command buffer it is disabled by disabling it in EndCommandBuffer
3602     * and before invoking the secondary in ExecuteCommands.
3603     */
3604    bool                                         pma_fix_enabled;
3605 
3606    /**
3607     * Whether or not we know for certain that HiZ is enabled for the current
3608     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
3609     * enabled or not, this will be false.
3610     */
3611    bool                                         hiz_enabled;
3612 
3613    /* We ensure the registers for the gfx12 D16 fix are initialized at the
3614     * first non-NULL depth stencil packet emission of every command buffer.
3615     * For secondary command buffer execution, we transfer the state from the
3616     * last command buffer to the primary (if known).
3617     */
3618    enum anv_depth_reg_mode                      depth_reg_mode;
3619 
3620    /**
3621     * Whether RHWO optimization is enabled (Wa_1508744258).
3622     */
3623    bool                                         rhwo_optimization_enabled;
3624 
3625    /**
3626     * Pending state of the RHWO optimization, to be applied at the next
3627     * genX(cmd_buffer_apply_pipe_flushes).
3628     */
3629    bool                                         pending_rhwo_optimization_enabled;
3630 
3631    bool                                         conditional_render_enabled;
3632 
3633    /**
3634     * Last rendering scale argument provided to
3635     * genX(cmd_buffer_emit_hashing_mode)().
3636     */
3637    unsigned                                     current_hash_scale;
3638 
3639    /**
3640     * A buffer used for spill/fill of ray queries.
3641     */
3642    struct anv_bo *                              ray_query_shadow_bo;
3643 };
3644 
3645 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
3646 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
3647 
3648 enum anv_cmd_buffer_exec_mode {
3649    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3650    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3651    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3652    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3653    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3654    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3655 };
3656 
3657 struct anv_measure_batch;
3658 
3659 struct anv_cmd_buffer {
3660    struct vk_command_buffer                     vk;
3661 
3662    struct anv_device *                          device;
3663    struct anv_queue_family *                    queue_family;
3664 
3665    /** Batch where the main commands live */
3666    struct anv_batch                             batch;
3667 
3668    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
3669     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
3670     * rewrite the end to chain multiple batch together at vkQueueSubmit().
3671     */
3672    void *                                       batch_end;
3673 
3674    /* Fields required for the actual chain of anv_batch_bo's.
3675     *
3676     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3677     */
3678    struct list_head                             batch_bos;
3679    enum anv_cmd_buffer_exec_mode                exec_mode;
3680 
3681    /* A vector of anv_batch_bo pointers for every batch or surface buffer
3682     * referenced by this command buffer
3683     *
3684     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3685     */
3686    struct u_vector                            seen_bbos;
3687 
3688    /* A vector of int32_t's for every block of binding tables.
3689     *
3690     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3691     */
3692    struct u_vector                              bt_block_states;
3693    struct anv_state                             bt_next;
3694 
3695    struct anv_reloc_list                        surface_relocs;
3696 
3697    /* Serial for tracking buffer completion */
3698    uint32_t                                     serial;
3699 
3700    /* Stream objects for storing temporary data */
3701    struct anv_state_stream                      surface_state_stream;
3702    struct anv_state_stream                      dynamic_state_stream;
3703    struct anv_state_stream                      general_state_stream;
3704    struct anv_state_stream                      indirect_push_descriptor_stream;
3705 
3706    VkCommandBufferUsageFlags                    usage_flags;
3707 
3708    struct anv_query_pool                       *perf_query_pool;
3709 
3710    struct anv_cmd_state                         state;
3711 
3712    struct anv_address                           return_addr;
3713 
3714    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3715    uint64_t                                     intel_perf_marker;
3716 
3717    struct anv_measure_batch *measure;
3718 
3719    /**
3720     * KHR_performance_query requires self modifying command buffers and this
3721     * array has the location of modifying commands to the query begin and end
3722     * instructions storing performance counters. The array length is
3723     * anv_physical_device::n_perf_query_commands.
3724     */
3725    struct mi_address_token                  *self_mod_locations;
3726 
3727    /**
3728     * Index tracking which of the self_mod_locations items have already been
3729     * used.
3730     */
3731    uint32_t                                      perf_reloc_idx;
3732 
3733    /**
3734     * Sum of all the anv_batch_bo written sizes for this command buffer
3735     * including any executed secondary command buffer.
3736     */
3737    uint32_t                                     total_batch_size;
3738 
3739    struct {
3740       /** Batch generating part of the anv_cmd_buffer::batch */
3741       struct anv_batch                          batch;
3742 
3743       /**
3744        * Location in anv_cmd_buffer::batch at which we left some space to
3745        * insert a MI_BATCH_BUFFER_START into the
3746        * anv_cmd_buffer::generation::batch if needed.
3747        */
3748       struct anv_address                        jump_addr;
3749 
3750       /**
3751        * Location in anv_cmd_buffer::batch at which the generation batch
3752        * should jump back to.
3753        */
3754       struct anv_address                        return_addr;
3755 
3756       /** List of anv_batch_bo used for generation
3757        *
3758        * We have to keep this separated of the anv_cmd_buffer::batch_bos that
3759        * is used for a chaining optimization.
3760        */
3761       struct list_head                          batch_bos;
3762 
3763       /** Ring buffer of generated commands
3764        *
3765        * When generating draws in ring mode, this buffer will hold generated
3766        * 3DPRIMITIVE commands.
3767        */
3768       struct anv_bo                            *ring_bo;
3769 
3770       /**
3771        * State tracking of the generation shader (only used for the non-ring
3772        * mode).
3773        */
3774       struct anv_simple_shader                  shader_state;
3775    } generation;
3776 
3777    /**
3778     * A vector of anv_bo pointers for chunks of memory used by the command
3779     * buffer that are too large to be allocated through dynamic_state_stream.
3780     * This is the case for large enough acceleration structures.
3781     *
3782     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3783     */
3784    struct u_vector                              dynamic_bos;
3785 
3786    /**
3787     * Structure holding tracepoints recorded in the command buffer.
3788     */
3789    struct u_trace                               trace;
3790 
3791    /** Pointer to the last emitted COMPUTE_WALKER.
3792     *
3793     * This is used to edit the instruction post emission to replace the "Post
3794     * Sync" field for utrace timestamp emission.
3795     */
3796    void                                        *last_compute_walker;
3797 
3798    /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH.
3799     *
3800     * This is used to edit the instruction post emission to replace the "Post
3801     * Sync" field for utrace timestamp emission.
3802     */
3803    void                                        *last_indirect_dispatch;
3804 
3805    struct {
3806       struct anv_video_session *vid;
3807       struct anv_video_session_params *params;
3808    } video;
3809 
3810    /**
3811     * Companion RCS command buffer to support the MSAA operations on compute
3812     * queue.
3813     */
3814    struct anv_cmd_buffer                        *companion_rcs_cmd_buffer;
3815 
3816    /**
3817     * Whether this command buffer is a companion command buffer of compute one.
3818     */
3819    bool                                         is_companion_rcs_cmd_buffer;
3820 
3821 };
3822 
3823 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
3824 
3825 /* Determine whether we can chain a given cmd_buffer to another one. We need
3826  * to make sure that we can edit the end of the batch to point to next one,
3827  * which requires the command buffer to not be used simultaneously.
3828  *
3829  * We could in theory also implement chaining with companion command buffers,
3830  * but let's sparse ourselves some pain and misery. This optimization has no
3831  * benefit on the brand new Xe kernel driver.
3832  */
3833 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)3834 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
3835 {
3836    return !(cmd_buffer->usage_flags &
3837             VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) &&
3838           !(cmd_buffer->is_companion_rcs_cmd_buffer);
3839 }
3840 
3841 static inline bool
anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer * cmd_buffer)3842 anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer)
3843 {
3844    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3845    return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0;
3846 }
3847 
3848 static inline bool
anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer * cmd_buffer)3849 anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer)
3850 {
3851    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3852    return (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) != 0;
3853 }
3854 
3855 static inline bool
anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer * cmd_buffer)3856 anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
3857 {
3858    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3859    return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
3860 }
3861 
3862 static inline bool
anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer * cmd_buffer)3863 anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer)
3864 {
3865    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
3866    return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY;
3867 }
3868 
3869 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3870 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3871 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3872 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3873 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3874                                   struct anv_cmd_buffer *secondary);
3875 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3876 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3877                                 struct anv_cmd_buffer *cmd_buffer,
3878                                 const VkSemaphore *in_semaphores,
3879                                 const uint64_t *in_wait_values,
3880                                 uint32_t num_in_semaphores,
3881                                 const VkSemaphore *out_semaphores,
3882                                 const uint64_t *out_signal_values,
3883                                 uint32_t num_out_semaphores,
3884                                 VkFence fence,
3885                                 int perf_query_pass);
3886 
3887 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
3888                           UNUSED VkCommandBufferResetFlags flags);
3889 
3890 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3891                                              const void *data, uint32_t size, uint32_t alignment);
3892 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3893                                               uint32_t *a, uint32_t *b,
3894                                               uint32_t dwords, uint32_t alignment);
3895 
3896 struct anv_address
3897 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3898 struct anv_state
3899 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3900                                    uint32_t entries, uint32_t *state_offset);
3901 struct anv_state
3902 anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer,
3903                                     uint32_t count);
3904 struct anv_state
3905 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3906                                    uint32_t size, uint32_t alignment);
3907 struct anv_state
3908 anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer,
3909                                    uint32_t size, uint32_t alignment);
3910 
3911 void
3912 anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
3913                                      uint32_t num_cmd_buffers);
3914 void
3915 anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
3916                                 uint32_t cmd_buffer_count,
3917                                 struct anv_cmd_buffer **cmd_buffers,
3918                                 struct anv_query_pool *perf_query_pool,
3919                                 uint32_t perf_query_pass);
3920 void
3921 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
3922                        uint32_t num_cmd_buffers);
3923 
3924 void
3925 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
3926                                          enum anv_pipe_bits flushed_bits);
3927 
3928 /**
3929  * A allocation tied to a command buffer.
3930  *
3931  * Don't use anv_cmd_alloc::address::map to write memory from userspace, use
3932  * anv_cmd_alloc::map instead.
3933  */
3934 struct anv_cmd_alloc {
3935    struct anv_address  address;
3936    void               *map;
3937    size_t              size;
3938 };
3939 
3940 #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 })
3941 
3942 static inline bool
anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)3943 anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)
3944 {
3945    return alloc.size == 0;
3946 }
3947 
3948 struct anv_cmd_alloc
3949 anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer,
3950                            size_t size, uint32_t alignment,
3951                            bool private);
3952 
3953 VkResult
3954 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3955 
3956 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3957 
3958 struct anv_state
3959 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3960 struct anv_state
3961 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3962 
3963 VkResult
3964 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3965                                          uint32_t num_entries,
3966                                          uint32_t *state_offset,
3967                                          struct anv_state *bt_state);
3968 
3969 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3970 
3971 static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer * cmd_buffer)3972 anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
3973 {
3974    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
3975    return MAX2(1, util_bitcount(gfx->view_mask));
3976 }
3977 
3978 /* Save/restore cmd buffer states for meta operations */
3979 enum anv_cmd_saved_state_flags {
3980    ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE         = BITFIELD_BIT(0),
3981    ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0         = BITFIELD_BIT(1),
3982    ANV_CMD_SAVED_STATE_PUSH_CONSTANTS           = BITFIELD_BIT(2),
3983 };
3984 
3985 struct anv_cmd_saved_state {
3986    uint32_t flags;
3987 
3988    struct anv_pipeline *pipeline;
3989    struct anv_descriptor_set *descriptor_set;
3990    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
3991 };
3992 
3993 void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
3994                                uint32_t flags,
3995                                struct anv_cmd_saved_state *state);
3996 
3997 void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
3998                                   struct anv_cmd_saved_state *state);
3999 
4000 enum anv_bo_sync_state {
4001    /** Indicates that this is a new (or newly reset fence) */
4002    ANV_BO_SYNC_STATE_RESET,
4003 
4004    /** Indicates that this fence has been submitted to the GPU but is still
4005     * (as far as we know) in use by the GPU.
4006     */
4007    ANV_BO_SYNC_STATE_SUBMITTED,
4008 
4009    ANV_BO_SYNC_STATE_SIGNALED,
4010 };
4011 
4012 struct anv_bo_sync {
4013    struct vk_sync sync;
4014 
4015    enum anv_bo_sync_state state;
4016    struct anv_bo *bo;
4017 };
4018 
4019 extern const struct vk_sync_type anv_bo_sync_type;
4020 
4021 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)4022 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
4023 {
4024    return sync->type == &anv_bo_sync_type;
4025 }
4026 
4027 VkResult anv_create_sync_for_memory(struct vk_device *device,
4028                                     VkDeviceMemory memory,
4029                                     bool signal_memory,
4030                                     struct vk_sync **sync_out);
4031 
4032 struct anv_event {
4033    struct vk_object_base                        base;
4034    uint64_t                                     semaphore;
4035    struct anv_state                             state;
4036 };
4037 
4038 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
4039 
4040 #define anv_foreach_stage(stage, stage_bits)                         \
4041    for (gl_shader_stage stage,                                       \
4042         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
4043         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
4044         __tmp &= ~(1 << (stage)))
4045 
4046 struct anv_pipeline_bind_map {
4047    unsigned char                                surface_sha1[20];
4048    unsigned char                                sampler_sha1[20];
4049    unsigned char                                push_sha1[20];
4050 
4051    uint32_t surface_count;
4052    uint32_t sampler_count;
4053    uint16_t kernel_args_size;
4054    uint16_t kernel_arg_count;
4055 
4056    struct anv_pipeline_binding *                surface_to_descriptor;
4057    struct anv_pipeline_binding *                sampler_to_descriptor;
4058    struct brw_kernel_arg_desc *                 kernel_args;
4059 
4060    struct anv_push_range                        push_ranges[4];
4061 };
4062 
4063 struct anv_push_descriptor_info {
4064    /* A bitfield of descriptors used. */
4065    uint32_t used_descriptors;
4066 
4067    /* A bitfield of UBOs bindings fully promoted to push constants. */
4068    uint32_t fully_promoted_ubo_descriptors;
4069 
4070    /* */
4071    uint8_t used_set_buffer;
4072 };
4073 
4074 /* A list of values we push to implement some of the dynamic states */
4075 enum anv_dynamic_push_bits {
4076    ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0),
4077 };
4078 
4079 struct anv_shader_upload_params {
4080    gl_shader_stage stage;
4081 
4082    const void *key_data;
4083    uint32_t key_size;
4084 
4085    const void *kernel_data;
4086    uint32_t kernel_size;
4087 
4088    const struct brw_stage_prog_data *prog_data;
4089    uint32_t prog_data_size;
4090 
4091    const struct brw_compile_stats *stats;
4092    uint32_t num_stats;
4093 
4094    const struct nir_xfb_info *xfb_info;
4095 
4096    const struct anv_pipeline_bind_map *bind_map;
4097 
4098    const struct anv_push_descriptor_info *push_desc_info;
4099 
4100    enum anv_dynamic_push_bits dynamic_push_values;
4101 };
4102 
4103 struct anv_shader_bin {
4104    struct vk_pipeline_cache_object base;
4105 
4106    gl_shader_stage stage;
4107 
4108    struct anv_state kernel;
4109    uint32_t kernel_size;
4110 
4111    const struct brw_stage_prog_data *prog_data;
4112    uint32_t prog_data_size;
4113 
4114    struct brw_compile_stats stats[3];
4115    uint32_t num_stats;
4116 
4117    struct nir_xfb_info *xfb_info;
4118 
4119    struct anv_push_descriptor_info push_desc_info;
4120 
4121    struct anv_pipeline_bind_map bind_map;
4122 
4123    enum anv_dynamic_push_bits dynamic_push_values;
4124 };
4125 
4126 struct anv_shader_bin *
4127 anv_shader_bin_create(struct anv_device *device,
4128                       gl_shader_stage stage,
4129                       const void *key, uint32_t key_size,
4130                       const void *kernel, uint32_t kernel_size,
4131                       const struct brw_stage_prog_data *prog_data,
4132                       uint32_t prog_data_size,
4133                       const struct brw_compile_stats *stats, uint32_t num_stats,
4134                       const struct nir_xfb_info *xfb_info,
4135                       const struct anv_pipeline_bind_map *bind_map,
4136                       const struct anv_push_descriptor_info *push_desc_info,
4137                       enum anv_dynamic_push_bits dynamic_push_values);
4138 
4139 
4140 static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin * shader)4141 anv_shader_bin_ref(struct anv_shader_bin *shader)
4142 {
4143    vk_pipeline_cache_object_ref(&shader->base);
4144 
4145    return shader;
4146 }
4147 
4148 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)4149 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
4150 {
4151    vk_pipeline_cache_object_unref(&device->vk, &shader->base);
4152 }
4153 
4154 struct anv_pipeline_executable {
4155    gl_shader_stage stage;
4156 
4157    struct brw_compile_stats stats;
4158 
4159    char *nir;
4160    char *disasm;
4161 };
4162 
4163 enum anv_pipeline_type {
4164    ANV_PIPELINE_GRAPHICS,
4165    ANV_PIPELINE_GRAPHICS_LIB,
4166    ANV_PIPELINE_COMPUTE,
4167    ANV_PIPELINE_RAY_TRACING,
4168 };
4169 
4170 struct anv_pipeline {
4171    struct vk_object_base                        base;
4172 
4173    struct anv_device *                          device;
4174 
4175    struct anv_batch                             batch;
4176    struct anv_reloc_list                        batch_relocs;
4177 
4178    void *                                       mem_ctx;
4179 
4180    enum anv_pipeline_type                       type;
4181    VkPipelineCreateFlags                        flags;
4182 
4183    VkPipelineCreateFlags2KHR                    active_stages;
4184 
4185    uint32_t                                     ray_queries;
4186 
4187    /**
4188     * Mask of stages that are accessing push descriptors.
4189     */
4190    VkShaderStageFlags                           use_push_descriptor;
4191 
4192    /**
4193     * Mask of stages that are accessing the push descriptors buffer.
4194     */
4195    VkShaderStageFlags                           use_push_descriptor_buffer;
4196 
4197    /**
4198     * Maximum scratch size for all shaders in this pipeline.
4199     */
4200    uint32_t                                     scratch_size;
4201 
4202    /* Layout of the sets used by the pipeline. */
4203    struct anv_pipeline_sets_layout              layout;
4204 
4205    struct util_dynarray                         executables;
4206 
4207    const struct intel_l3_config *               l3_config;
4208 };
4209 
4210 /* The base graphics pipeline object only hold shaders. */
4211 struct anv_graphics_base_pipeline {
4212    struct anv_pipeline                          base;
4213 
4214    struct vk_sample_locations_state             sample_locations;
4215 
4216    /* Shaders */
4217    struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4218 
4219    /* A small hash based of shader_info::source_sha1 for identifying
4220     * shaders in renderdoc/shader-db.
4221     */
4222    uint32_t                                     source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4223 
4224    /* Feedback index in
4225     * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks
4226     *
4227     * For pipeline libraries, we need to remember the order at creation when
4228     * included into a linked pipeline.
4229     */
4230    uint32_t                                     feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4231 
4232    /* Robustness flags used shaders
4233     */
4234    enum brw_robustness_flags                    robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4235 
4236    /* True if at the time the fragment shader was compiled, it didn't have all
4237     * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC.
4238     */
4239    bool                                         fragment_dynamic;
4240 };
4241 
4242 /* The library graphics pipeline object has a partial graphic state and
4243  * possibly some shaders. If requested, shaders are also present in NIR early
4244  * form.
4245  */
4246 struct anv_graphics_lib_pipeline {
4247    struct anv_graphics_base_pipeline            base;
4248 
4249    VkGraphicsPipelineLibraryFlagsEXT            lib_flags;
4250 
4251    struct vk_graphics_pipeline_all_state        all_state;
4252    struct vk_graphics_pipeline_state            state;
4253 
4254    /* Retained shaders for link optimization. */
4255    struct {
4256       /* This hash is the same as computed in
4257        * anv_graphics_pipeline_gather_shaders().
4258        */
4259       unsigned char                             shader_sha1[20];
4260 
4261       enum gl_subgroup_size                     subgroup_size_type;
4262 
4263       /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
4264        * constants.
4265        */
4266       nir_shader *                              nir;
4267    }                                            retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4268 
4269    /* Whether the shaders have been retained */
4270    bool                                         retain_shaders;
4271 };
4272 
4273 struct anv_gfx_state_ptr {
4274    /* Both in dwords */
4275    uint16_t  offset;
4276    uint16_t  len;
4277 };
4278 
4279 /* The final graphics pipeline object has all the graphics state ready to be
4280  * programmed into HW packets (dynamic_state field) or fully baked in its
4281  * batch.
4282  */
4283 struct anv_graphics_pipeline {
4284    struct anv_graphics_base_pipeline            base;
4285 
4286    struct vk_vertex_input_state                 vertex_input;
4287    struct vk_sample_locations_state             sample_locations;
4288    struct vk_dynamic_graphics_state             dynamic_state;
4289 
4290    /* If true, the patch control points are passed through push constants
4291     * (anv_push_constants::gfx::tcs_input_vertices)
4292     */
4293    bool                                         dynamic_patch_control_points;
4294 
4295    /* This field is required with dynamic primitive topology,
4296     * rasterization_samples used only with gen < 8.
4297     */
4298    uint32_t                                     rasterization_samples;
4299 
4300    uint32_t                                     view_mask;
4301    uint32_t                                     instance_multiplier;
4302 
4303    bool                                         rp_has_ds_self_dep;
4304 
4305    bool                                         kill_pixel;
4306    bool                                         force_fragment_thread_dispatch;
4307    bool                                         uses_xfb;
4308 
4309    /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */
4310    uint32_t                                     vs_input_elements;
4311 
4312    /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the
4313     * draw parameters
4314     */
4315    uint32_t                                     svgs_count;
4316 
4317    /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that
4318     * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER.
4319     *
4320     * When MESA_VK_DYNAMIC_VI is not dynamic
4321     *
4322     *     vertex_input_elems = vs_input_elements + svgs_count
4323     *
4324     * All the VERTEX_ELEMENT_STATE can be directly copied behind a
4325     * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise
4326     * this array only holds the svgs_count elements.
4327     */
4328    uint32_t                                     vertex_input_elems;
4329    uint32_t                                     vertex_input_data[2 * 31 /* MAX_VES + 2 internal */];
4330 
4331    enum intel_msaa_flags                        fs_msaa_flags;
4332 
4333    /* Pre computed CS instructions that can directly be copied into
4334     * anv_cmd_buffer.
4335     */
4336    uint32_t                                     batch_data[416];
4337 
4338    /* Urb setup utilized by this pipeline. */
4339    struct intel_urb_config urb_cfg;
4340 
4341    /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
4342    struct {
4343       struct anv_gfx_state_ptr                  urb;
4344       struct anv_gfx_state_ptr                  vf_statistics;
4345       struct anv_gfx_state_ptr                  vf_sgvs;
4346       struct anv_gfx_state_ptr                  vf_sgvs_2;
4347       struct anv_gfx_state_ptr                  vf_sgvs_instancing;
4348       struct anv_gfx_state_ptr                  vf_instancing;
4349       struct anv_gfx_state_ptr                  primitive_replication;
4350       struct anv_gfx_state_ptr                  sbe;
4351       struct anv_gfx_state_ptr                  sbe_swiz;
4352       struct anv_gfx_state_ptr                  so_decl_list;
4353       struct anv_gfx_state_ptr                  ms;
4354       struct anv_gfx_state_ptr                  vs;
4355       struct anv_gfx_state_ptr                  hs;
4356       struct anv_gfx_state_ptr                  ds;
4357       struct anv_gfx_state_ptr                  ps;
4358 
4359       struct anv_gfx_state_ptr                  task_control;
4360       struct anv_gfx_state_ptr                  task_shader;
4361       struct anv_gfx_state_ptr                  task_redistrib;
4362       struct anv_gfx_state_ptr                  clip_mesh;
4363       struct anv_gfx_state_ptr                  mesh_control;
4364       struct anv_gfx_state_ptr                  mesh_shader;
4365       struct anv_gfx_state_ptr                  mesh_distrib;
4366       struct anv_gfx_state_ptr                  sbe_mesh;
4367    } final;
4368 
4369    /* Pre packed CS instructions & structures that need to be merged later
4370     * with dynamic state.
4371     */
4372    struct {
4373       struct anv_gfx_state_ptr                  clip;
4374       struct anv_gfx_state_ptr                  sf;
4375       struct anv_gfx_state_ptr                  raster;
4376       struct anv_gfx_state_ptr                  ps_extra;
4377       struct anv_gfx_state_ptr                  wm;
4378       struct anv_gfx_state_ptr                  so;
4379       struct anv_gfx_state_ptr                  gs;
4380       struct anv_gfx_state_ptr                  te;
4381       struct anv_gfx_state_ptr                  vfg;
4382    } partial;
4383 };
4384 
4385 #define anv_batch_merge_pipeline_state(batch, dwords0, pipeline, state) \
4386    do {                                                                 \
4387       uint32_t *dw;                                                     \
4388                                                                         \
4389       assert(ARRAY_SIZE(dwords0) == (pipeline)->state.len);             \
4390       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
4391       if (!dw)                                                          \
4392          break;                                                         \
4393       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
4394          dw[i] = (dwords0)[i] |                                         \
4395             (pipeline)->batch_data[(pipeline)->state.offset + i];       \
4396       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));   \
4397    } while (0)
4398 
4399 #define anv_batch_emit_pipeline_state(batch, pipeline, state)           \
4400    do {                                                                 \
4401       if ((pipeline)->state.len == 0)                                   \
4402          break;                                                         \
4403       uint32_t *dw;                                                     \
4404       dw = anv_batch_emit_dwords((batch), (pipeline)->state.len);       \
4405       if (!dw)                                                          \
4406          break;                                                         \
4407       memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset],     \
4408              4 * (pipeline)->state.len);                                \
4409    } while (0)
4410 
4411 
4412 struct anv_compute_pipeline {
4413    struct anv_pipeline                          base;
4414 
4415    struct anv_shader_bin *                      cs;
4416    uint32_t                                     batch_data[9];
4417    uint32_t                                     interface_descriptor_data[8];
4418 
4419    /* A small hash based of shader_info::source_sha1 for identifying shaders
4420     * in renderdoc/shader-db.
4421     */
4422    uint32_t                                     source_hash;
4423 };
4424 
4425 struct anv_rt_shader_group {
4426    VkRayTracingShaderGroupTypeKHR type;
4427 
4428    /* Whether this group was imported from another pipeline */
4429    bool imported;
4430 
4431    struct anv_shader_bin *general;
4432    struct anv_shader_bin *closest_hit;
4433    struct anv_shader_bin *any_hit;
4434    struct anv_shader_bin *intersection;
4435 
4436    /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
4437    uint32_t handle[8];
4438 };
4439 
4440 struct anv_ray_tracing_pipeline {
4441    struct anv_pipeline                          base;
4442 
4443    /* All shaders in the pipeline */
4444    struct util_dynarray                         shaders;
4445 
4446    uint32_t                                     group_count;
4447    struct anv_rt_shader_group *                 groups;
4448 
4449    /* If non-zero, this is the default computed stack size as per the stack
4450     * size computation in the Vulkan spec.  If zero, that indicates that the
4451     * client has requested a dynamic stack size.
4452     */
4453    uint32_t                                     stack_size;
4454 };
4455 
4456 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
4457    static inline struct anv_##pipe_type##_pipeline *                 \
4458    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
4459    {                                                                 \
4460       assert(pipeline->type == pipe_enum);                           \
4461       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
4462    }
4463 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)4464 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
4465 ANV_DECL_PIPELINE_DOWNCAST(graphics_base, ANV_PIPELINE_GRAPHICS)
4466 ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
4467 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
4468 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
4469 
4470 static inline bool
4471 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
4472                        gl_shader_stage stage)
4473 {
4474    return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4475 }
4476 
4477 static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline * pipeline,gl_shader_stage stage)4478 anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
4479                             gl_shader_stage stage)
4480 {
4481    return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
4482 }
4483 
4484 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)4485 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
4486 {
4487    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
4488 }
4489 
4490 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)4491 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
4492 {
4493    return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
4494 }
4495 
4496 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)4497 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
4498 {
4499    const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
4500    const struct vk_dynamic_graphics_state *dyn =
4501       &cmd_buffer->vk.dynamic_graphics_state;
4502    uint8_t color_writes = dyn->cb.color_write_enables;
4503 
4504    /* All writes disabled through vkCmdSetColorWriteEnableEXT */
4505    if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
4506       return true;
4507 
4508    /* Or all write masks are empty */
4509    for (uint32_t i = 0; i < state->color_att_count; i++) {
4510       if (dyn->cb.attachments[i].write_mask != 0)
4511          return false;
4512    }
4513 
4514    return true;
4515 }
4516 
4517 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)4518 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
4519 {
4520    state->has_uint_rt = false;
4521    for (unsigned a = 0; a < state->color_att_count; a++) {
4522       if (vk_format_is_int(state->color_att[a].vk_format)) {
4523          state->has_uint_rt = true;
4524          break;
4525       }
4526    }
4527 }
4528 
4529 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
4530 static inline const struct brw_##prefix##_prog_data *                   \
4531 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
4532 {                                                                       \
4533    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
4534       return (const struct brw_##prefix##_prog_data *)                  \
4535          pipeline->base.shaders[stage]->prog_data;                      \
4536    } else {                                                             \
4537       return NULL;                                                      \
4538    }                                                                    \
4539 }
4540 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)4541 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
4542 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
4543 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
4544 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
4545 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
4546 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
4547 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
4548 
4549 static inline const struct brw_cs_prog_data *
4550 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
4551 {
4552    assert(pipeline->cs);
4553    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
4554 }
4555 
4556 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)4557 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
4558 {
4559    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
4560       return &get_gs_prog_data(pipeline)->base;
4561    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
4562       return &get_tes_prog_data(pipeline)->base;
4563    else
4564       return &get_vs_prog_data(pipeline)->base;
4565 }
4566 
4567 VkResult
4568 anv_device_init_rt_shaders(struct anv_device *device);
4569 
4570 void
4571 anv_device_finish_rt_shaders(struct anv_device *device);
4572 
4573 struct anv_kernel_arg {
4574    bool is_ptr;
4575    uint16_t size;
4576 
4577    union {
4578       uint64_t u64;
4579       void *ptr;
4580    };
4581 };
4582 
4583 struct anv_kernel {
4584 #ifndef NDEBUG
4585    const char *name;
4586 #endif
4587    struct anv_shader_bin *bin;
4588    const struct intel_l3_config *l3_config;
4589 };
4590 
4591 struct anv_format_plane {
4592    enum isl_format isl_format:16;
4593    struct isl_swizzle swizzle;
4594 
4595    /* What aspect is associated to this plane */
4596    VkImageAspectFlags aspect;
4597 };
4598 
4599 struct anv_format {
4600    struct anv_format_plane planes[3];
4601    VkFormat vk_format;
4602    uint8_t n_planes;
4603    bool can_ycbcr;
4604    bool can_video;
4605 };
4606 
4607 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)4608 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
4609 {
4610    if (util_bitcount(aspects) == 1) {
4611       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
4612                         VK_IMAGE_ASPECT_DEPTH_BIT |
4613                         VK_IMAGE_ASPECT_STENCIL_BIT |
4614                         VK_IMAGE_ASPECT_PLANE_0_BIT |
4615                         VK_IMAGE_ASPECT_PLANE_1_BIT |
4616                         VK_IMAGE_ASPECT_PLANE_2_BIT));
4617    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
4618       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
4619              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
4620                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
4621              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
4622                          VK_IMAGE_ASPECT_PLANE_1_BIT |
4623                          VK_IMAGE_ASPECT_PLANE_2_BIT));
4624    } else {
4625       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
4626                          VK_IMAGE_ASPECT_STENCIL_BIT));
4627    }
4628 }
4629 
4630 /**
4631  * Return the aspect's plane relative to all_aspects.  For an image, for
4632  * instance, all_aspects would be the set of aspects in the image.  For
4633  * an image view, all_aspects would be the subset of aspects represented
4634  * by that particular view.
4635  */
4636 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)4637 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
4638                     VkImageAspectFlagBits aspect)
4639 {
4640    anv_assert_valid_aspect_set(all_aspects);
4641    assert(util_bitcount(aspect) == 1);
4642    assert(!(aspect & ~all_aspects));
4643 
4644    /* Because we always put image and view planes in aspect-bit-order, the
4645     * plane index is the number of bits in all_aspects before aspect.
4646     */
4647    return util_bitcount(all_aspects & (aspect - 1));
4648 }
4649 
4650 #define anv_foreach_image_aspect_bit(b, image, aspects) \
4651    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
4652 
4653 const struct anv_format *
4654 anv_get_format(VkFormat format);
4655 
4656 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)4657 anv_get_format_planes(VkFormat vk_format)
4658 {
4659    const struct anv_format *format = anv_get_format(vk_format);
4660 
4661    return format != NULL ? format->n_planes : 0;
4662 }
4663 
4664 struct anv_format_plane
4665 anv_get_format_plane(const struct intel_device_info *devinfo,
4666                      VkFormat vk_format, uint32_t plane,
4667                      VkImageTiling tiling);
4668 
4669 struct anv_format_plane
4670 anv_get_format_aspect(const struct intel_device_info *devinfo,
4671                       VkFormat vk_format,
4672                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
4673 
4674 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)4675 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
4676                    VkImageAspectFlags aspect, VkImageTiling tiling)
4677 {
4678    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
4679 }
4680 
4681 bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo,
4682                                const enum isl_format format);
4683 
4684 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
4685                                   VkImageCreateFlags create_flags,
4686                                   VkFormat vk_format, VkImageTiling vk_tiling,
4687                                   VkImageUsageFlags vk_usage,
4688                                   const VkImageFormatListCreateInfo *fmt_list);
4689 
4690 extern VkFormat
4691 vk_format_from_android(unsigned android_format, unsigned android_usage);
4692 
4693 static inline VkFormat
anv_get_emulation_format(const struct anv_physical_device * pdevice,VkFormat format)4694 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
4695 {
4696    if (pdevice->flush_astc_ldr_void_extent_denorms) {
4697       const struct util_format_description *desc =
4698          vk_format_description(format);
4699       if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
4700           desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
4701          return format;
4702    }
4703 
4704    if (pdevice->emu_astc_ldr)
4705       return vk_texcompress_astc_emulation_format(format);
4706 
4707    return VK_FORMAT_UNDEFINED;
4708 }
4709 
4710 static inline bool
anv_is_format_emulated(const struct anv_physical_device * pdevice,VkFormat format)4711 anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format)
4712 {
4713    return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED;
4714 }
4715 
4716 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)4717 anv_swizzle_for_render(struct isl_swizzle swizzle)
4718 {
4719    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
4720     * RGB as RGBA for texturing
4721     */
4722    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
4723           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
4724 
4725    /* But it doesn't matter what we render to that channel */
4726    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
4727 
4728    return swizzle;
4729 }
4730 
4731 void
4732 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
4733 
4734 /**
4735  * Describes how each part of anv_image will be bound to memory.
4736  */
4737 struct anv_image_memory_range {
4738    /**
4739     * Disjoint bindings into which each portion of the image will be bound.
4740     *
4741     * Binding images to memory can be complicated and invold binding different
4742     * portions of the image to different memory objects or regions.  For most
4743     * images, everything lives in the MAIN binding and gets bound by
4744     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
4745     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
4746     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
4747     * implicit or driver-managed and live in special-case bindings.
4748     */
4749    enum anv_image_memory_binding {
4750       /**
4751        * Used if and only if image is not multi-planar disjoint. Bound by
4752        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
4753        */
4754       ANV_IMAGE_MEMORY_BINDING_MAIN,
4755 
4756       /**
4757        * Used if and only if image is multi-planar disjoint.  Bound by
4758        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
4759        */
4760       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
4761       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
4762       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
4763 
4764       /**
4765        * Driver-private bo. In special cases we may store the aux surface and/or
4766        * aux state in this binding.
4767        */
4768       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
4769 
4770       /** Sentinel */
4771       ANV_IMAGE_MEMORY_BINDING_END,
4772    } binding;
4773 
4774    /**
4775     * Offset is relative to the start of the binding created by
4776     * vkBindImageMemory, not to the start of the bo.
4777     */
4778    uint64_t offset;
4779 
4780    uint64_t size;
4781    uint32_t alignment;
4782 };
4783 
4784 /**
4785  * Subsurface of an anv_image.
4786  */
4787 struct anv_surface {
4788    struct isl_surf isl;
4789    struct anv_image_memory_range memory_range;
4790 };
4791 
4792 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)4793 anv_surface_is_valid(const struct anv_surface *surface)
4794 {
4795    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
4796 }
4797 
4798 struct anv_image {
4799    struct vk_image vk;
4800 
4801    uint32_t n_planes;
4802 
4803    /**
4804     * Image has multi-planar format and was created with
4805     * VK_IMAGE_CREATE_DISJOINT_BIT.
4806     */
4807    bool disjoint;
4808 
4809    /**
4810     * Image is a WSI image
4811     */
4812    bool from_wsi;
4813 
4814    /**
4815     * Image was imported from an struct AHardwareBuffer.  We have to delay
4816     * final image creation until bind time.
4817     */
4818    bool from_ahb;
4819 
4820    /**
4821     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
4822     * must be released when the image is destroyed.
4823     */
4824    bool from_gralloc;
4825 
4826    /**
4827     * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
4828     * LDR workaround or emulation.
4829     */
4830    VkFormat emu_plane_format;
4831 
4832    /**
4833     * The memory bindings created by vkCreateImage and vkBindImageMemory.
4834     *
4835     * For details on the image's memory layout, see check_memory_bindings().
4836     *
4837     * vkCreateImage constructs the `memory_range` for each
4838     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
4839     * and only if `memory_range::size > 0`.
4840     *
4841     * vkBindImageMemory binds each valid `memory_range` to an `address`.
4842     * Usually, the app will provide the address via the parameters of
4843     * vkBindImageMemory.  However, special-case bindings may be bound to
4844     * driver-private memory.
4845     */
4846    struct anv_image_binding {
4847       struct anv_image_memory_range memory_range;
4848       struct anv_address address;
4849       struct anv_sparse_binding_data sparse_data;
4850    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
4851 
4852    /**
4853     * Image subsurfaces
4854     *
4855     * For each foo, anv_image::planes[x].surface is valid if and only if
4856     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
4857     * to figure the number associated with a given aspect.
4858     *
4859     * The hardware requires that the depth buffer and stencil buffer be
4860     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
4861     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
4862     * allocate the depth and stencil buffers as separate surfaces in the same
4863     * bo.
4864     */
4865    struct anv_image_plane {
4866       struct anv_surface primary_surface;
4867 
4868       /**
4869        * The base aux usage for this image.  For color images, this can be
4870        * either CCS_E or CCS_D depending on whether or not we can reliably
4871        * leave CCS on all the time.
4872        */
4873       enum isl_aux_usage aux_usage;
4874 
4875       struct anv_surface aux_surface;
4876 
4877       /** Location of the compression control surface.  */
4878       struct anv_image_memory_range compr_ctrl_memory_range;
4879 
4880       /** Location of the fast clear state.  */
4881       struct anv_image_memory_range fast_clear_memory_range;
4882 
4883       /**
4884        * Whether this image can be fast cleared with non-zero clear colors.
4885        * This can happen with mutable images when formats of different bit
4886        * sizes per components are used.
4887        *
4888        * On Gfx9+, because the clear colors are stored as a 4 components 32bit
4889        * values, we can clear in R16G16_UNORM (store 2 16bit values in the
4890        * components 0 & 1 of the clear color) and then draw in R32_UINT which
4891        * would interpret the clear color as a single component value, using
4892        * only the first 16bit component of the previous written clear color.
4893        *
4894        * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
4895        * boolean will prevent the usage of CC_ONE.
4896        */
4897       bool can_non_zero_fast_clear;
4898 
4899       struct {
4900          /** Whether the image has CCS data mapped through AUX-TT. */
4901          bool mapped;
4902 
4903          /** Main address of the mapping. */
4904          uint64_t addr;
4905 
4906          /** Size of the mapping. */
4907          uint64_t size;
4908       } aux_tt;
4909    } planes[3];
4910 
4911    struct anv_image_memory_range vid_dmv_top_surface;
4912 
4913    /* Link in the anv_device.image_private_objects list */
4914    struct list_head link;
4915 };
4916 
4917 static inline bool
anv_image_is_sparse(struct anv_image * image)4918 anv_image_is_sparse(struct anv_image *image)
4919 {
4920    return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
4921 }
4922 
4923 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)4924 anv_image_is_externally_shared(const struct anv_image *image)
4925 {
4926    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
4927           image->vk.external_handle_types != 0;
4928 }
4929 
4930 static inline bool
anv_image_has_private_binding(const struct anv_image * image)4931 anv_image_has_private_binding(const struct anv_image *image)
4932 {
4933    const struct anv_image_binding private_binding =
4934       image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
4935    return private_binding.memory_range.size != 0;
4936 }
4937 
4938 static inline bool
anv_image_format_is_d16_or_s8(const struct anv_image * image)4939 anv_image_format_is_d16_or_s8(const struct anv_image *image)
4940 {
4941    return image->vk.format == VK_FORMAT_D16_UNORM ||
4942       image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT ||
4943       image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
4944       image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
4945       image->vk.format == VK_FORMAT_S8_UINT;
4946 }
4947 
4948 /* The ordering of this enum is important */
4949 enum anv_fast_clear_type {
4950    /** Image does not have/support any fast-clear blocks */
4951    ANV_FAST_CLEAR_NONE = 0,
4952    /** Image has/supports fast-clear but only to the default value */
4953    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
4954    /** Image has/supports fast-clear with an arbitrary fast-clear value */
4955    ANV_FAST_CLEAR_ANY = 2,
4956 };
4957 
4958 /**
4959  * Return the aspect's _format_ plane, not its _memory_ plane (using the
4960  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
4961  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
4962  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
4963  */
4964 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)4965 anv_image_aspect_to_plane(const struct anv_image *image,
4966                           VkImageAspectFlagBits aspect)
4967 {
4968    return anv_aspect_to_plane(image->vk.aspects, aspect);
4969 }
4970 
4971 /* Returns the number of auxiliary buffer levels attached to an image. */
4972 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)4973 anv_image_aux_levels(const struct anv_image * const image,
4974                      VkImageAspectFlagBits aspect)
4975 {
4976    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4977    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
4978       return 0;
4979 
4980    return image->vk.mip_levels;
4981 }
4982 
4983 /* Returns the number of auxiliary buffer layers attached to an image. */
4984 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)4985 anv_image_aux_layers(const struct anv_image * const image,
4986                      VkImageAspectFlagBits aspect,
4987                      const uint8_t miplevel)
4988 {
4989    assert(image);
4990 
4991    /* The miplevel must exist in the main buffer. */
4992    assert(miplevel < image->vk.mip_levels);
4993 
4994    if (miplevel >= anv_image_aux_levels(image, aspect)) {
4995       /* There are no layers with auxiliary data because the miplevel has no
4996        * auxiliary data.
4997        */
4998       return 0;
4999    }
5000 
5001    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
5002 }
5003 
5004 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)5005 anv_image_address(const struct anv_image *image,
5006                   const struct anv_image_memory_range *mem_range)
5007 {
5008    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
5009    assert(binding->memory_range.offset == 0);
5010 
5011    if (mem_range->size == 0)
5012       return ANV_NULL_ADDRESS;
5013 
5014    return anv_address_add(binding->address, mem_range->offset);
5015 }
5016 
5017 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5018 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
5019                                const struct anv_image *image,
5020                                VkImageAspectFlagBits aspect)
5021 {
5022    assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
5023                                VK_IMAGE_ASPECT_DEPTH_BIT));
5024 
5025    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5026    const struct anv_image_memory_range *mem_range =
5027       &image->planes[plane].fast_clear_memory_range;
5028 
5029    return anv_image_address(image, mem_range);
5030 }
5031 
5032 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5033 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
5034                                    const struct anv_image *image,
5035                                    VkImageAspectFlagBits aspect)
5036 {
5037    struct anv_address addr =
5038       anv_image_get_clear_color_addr(device, image, aspect);
5039 
5040    unsigned clear_color_state_size;
5041    if (device->info->ver >= 11) {
5042       /* The fast clear type and the first compression state are stored in the
5043        * last 2 dwords of the clear color struct. Refer to the comment in
5044        * add_aux_state_tracking_buffer().
5045        */
5046       assert(device->isl_dev.ss.clear_color_state_size >= 32);
5047       clear_color_state_size = device->isl_dev.ss.clear_color_state_size - 8;
5048    } else
5049       clear_color_state_size = device->isl_dev.ss.clear_value_size;
5050    return anv_address_add(addr, clear_color_state_size);
5051 }
5052 
5053 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)5054 anv_image_get_compression_state_addr(const struct anv_device *device,
5055                                      const struct anv_image *image,
5056                                      VkImageAspectFlagBits aspect,
5057                                      uint32_t level, uint32_t array_layer)
5058 {
5059    assert(level < anv_image_aux_levels(image, aspect));
5060    assert(array_layer < anv_image_aux_layers(image, aspect, level));
5061    UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5062    assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage));
5063 
5064    /* Relative to start of the plane's fast clear type */
5065    uint32_t offset;
5066 
5067    offset = 4; /* Go past the fast clear type */
5068 
5069    if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
5070       for (uint32_t l = 0; l < level; l++)
5071          offset += u_minify(image->vk.extent.depth, l) * 4;
5072    } else {
5073       offset += level * image->vk.array_layers * 4;
5074    }
5075 
5076    offset += array_layer * 4;
5077 
5078    assert(offset < image->planes[plane].fast_clear_memory_range.size);
5079 
5080    return anv_address_add(
5081       anv_image_get_fast_clear_type_addr(device, image, aspect),
5082       offset);
5083 }
5084 
5085 static inline const struct anv_image_memory_range *
anv_image_get_aux_memory_range(const struct anv_image * image,uint32_t plane)5086 anv_image_get_aux_memory_range(const struct anv_image *image,
5087                                uint32_t plane)
5088 {
5089    if (image->planes[plane].aux_surface.memory_range.size > 0)
5090      return &image->planes[plane].aux_surface.memory_range;
5091    else
5092      return &image->planes[plane].compr_ctrl_memory_range;
5093 }
5094 
5095 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
5096 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)5097 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
5098                         const struct anv_image *image)
5099 {
5100    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
5101       return false;
5102 
5103    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
5104     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
5105     * say:
5106     *
5107     *    "If this field is set to AUX_HIZ, Number of Multisamples must
5108     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
5109     */
5110    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
5111       return false;
5112 
5113    if (!devinfo->has_sample_with_hiz)
5114       return false;
5115 
5116    return image->vk.samples == 1;
5117 }
5118 
5119 /* Returns true if an MCS-enabled buffer can be sampled from. */
5120 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)5121 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
5122                               const struct anv_image *image)
5123 {
5124    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
5125    const uint32_t plane =
5126       anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
5127 
5128    assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
5129 
5130    const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
5131 
5132    /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
5133     * See HSD 1707282275, wa_14013111325. Due to the use of
5134     * format-reinterpretation, a simplified workaround is implemented.
5135     */
5136    if (intel_needs_workaround(devinfo, 14013111325) &&
5137        isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
5138       return false;
5139    }
5140 
5141    return true;
5142 }
5143 
5144 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)5145 anv_image_plane_uses_aux_map(const struct anv_device *device,
5146                              const struct anv_image *image,
5147                              uint32_t plane)
5148 {
5149    return device->info->has_aux_map &&
5150       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
5151 }
5152 
5153 static inline bool
anv_image_uses_aux_map(const struct anv_device * device,const struct anv_image * image)5154 anv_image_uses_aux_map(const struct anv_device *device,
5155                        const struct anv_image *image)
5156 {
5157    for (uint32_t p = 0; p < image->n_planes; ++p) {
5158       if (anv_image_plane_uses_aux_map(device, image, p))
5159          return true;
5160    }
5161 
5162    return false;
5163 }
5164 
5165 static inline bool
anv_bo_allows_aux_map(const struct anv_device * device,const struct anv_bo * bo)5166 anv_bo_allows_aux_map(const struct anv_device *device,
5167                       const struct anv_bo *bo)
5168 {
5169    if (device->aux_map_ctx == NULL)
5170       return false;
5171 
5172    return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0;
5173 }
5174 
5175 static inline bool
anv_address_allows_aux_map(const struct anv_device * device,struct anv_address addr)5176 anv_address_allows_aux_map(const struct anv_device *device,
5177                            struct anv_address addr)
5178 {
5179    if (device->aux_map_ctx == NULL)
5180       return false;
5181 
5182    /* Technically, we really only care about what offset the image is bound
5183     * into on the BO, but we don't have that information here. As a heuristic,
5184     * rely on the BO offset instead.
5185     */
5186    if (anv_address_physical(addr) %
5187        intel_aux_map_get_alignment(device->aux_map_ctx) != 0)
5188       return false;
5189 
5190    return true;
5191 }
5192 
5193 void
5194 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
5195                                   const struct anv_image *image,
5196                                   VkImageAspectFlagBits aspect,
5197                                   enum isl_aux_usage aux_usage,
5198                                   uint32_t level,
5199                                   uint32_t base_layer,
5200                                   uint32_t layer_count);
5201 
5202 void
5203 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
5204                                        const struct anv_image *image,
5205                                        const enum isl_format format,
5206                                        union isl_color_value clear_color);
5207 
5208 void
5209 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
5210                                            struct anv_state state,
5211                                            const struct anv_image *image);
5212 
5213 struct anv_image_binding *
5214 anv_image_aspect_to_binding(struct anv_image *image,
5215                             VkImageAspectFlags aspect);
5216 
5217 void
5218 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
5219                       const struct anv_image *image,
5220                       VkImageAspectFlagBits aspect,
5221                       enum isl_aux_usage aux_usage,
5222                       enum isl_format format, struct isl_swizzle swizzle,
5223                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
5224                       VkRect2D area, union isl_color_value clear_color);
5225 void
5226 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
5227                               const struct anv_image *image,
5228                               VkImageAspectFlags aspects,
5229                               enum isl_aux_usage depth_aux_usage,
5230                               uint32_t level,
5231                               uint32_t base_layer, uint32_t layer_count,
5232                               VkRect2D area,
5233                               float depth_value, uint8_t stencil_value);
5234 void
5235 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
5236                        const struct anv_image *src_image,
5237                        enum isl_aux_usage src_aux_usage,
5238                        uint32_t src_level, uint32_t src_base_layer,
5239                        const struct anv_image *dst_image,
5240                        enum isl_aux_usage dst_aux_usage,
5241                        uint32_t dst_level, uint32_t dst_base_layer,
5242                        VkImageAspectFlagBits aspect,
5243                        uint32_t src_x, uint32_t src_y,
5244                        uint32_t dst_x, uint32_t dst_y,
5245                        uint32_t width, uint32_t height,
5246                        uint32_t layer_count,
5247                        enum blorp_filter filter);
5248 void
5249 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
5250                  const struct anv_image *image,
5251                  VkImageAspectFlagBits aspect, uint32_t level,
5252                  uint32_t base_layer, uint32_t layer_count,
5253                  enum isl_aux_op hiz_op);
5254 void
5255 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
5256                     const struct anv_image *image,
5257                     VkImageAspectFlags aspects,
5258                     uint32_t level,
5259                     uint32_t base_layer, uint32_t layer_count,
5260                     VkRect2D area, uint8_t stencil_value);
5261 void
5262 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
5263                  const struct anv_image *image,
5264                  enum isl_format format, struct isl_swizzle swizzle,
5265                  VkImageAspectFlagBits aspect,
5266                  uint32_t base_layer, uint32_t layer_count,
5267                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
5268                  bool predicate);
5269 void
5270 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
5271                  const struct anv_image *image,
5272                  enum isl_format format, struct isl_swizzle swizzle,
5273                  VkImageAspectFlagBits aspect, uint32_t level,
5274                  uint32_t base_layer, uint32_t layer_count,
5275                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
5276                  bool predicate);
5277 
5278 isl_surf_usage_flags_t
5279 anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
5280                                 VkImageCreateFlags vk_create_flags,
5281                                 VkImageUsageFlags vk_usage,
5282                                 isl_surf_usage_flags_t isl_extra_usage,
5283                                 VkImageAspectFlagBits aspect);
5284 
5285 void
5286 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
5287                          struct anv_address address,
5288                          VkDeviceSize size,
5289                          uint32_t data);
5290 
5291 VkResult
5292 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
5293 
5294 bool
5295 anv_can_hiz_clear_ds_view(struct anv_device *device,
5296                           const struct anv_image_view *iview,
5297                           VkImageLayout layout,
5298                           VkImageAspectFlags clear_aspects,
5299                           float depth_clear_value,
5300                           VkRect2D render_area,
5301                           const VkQueueFlagBits queue_flags);
5302 
5303 bool
5304 anv_can_fast_clear_color_view(struct anv_device *device,
5305                               struct anv_image_view *iview,
5306                               VkImageLayout layout,
5307                               union isl_color_value clear_color,
5308                               uint32_t num_layers,
5309                               VkRect2D render_area,
5310                               const VkQueueFlagBits queue_flags);
5311 
5312 enum isl_aux_state ATTRIBUTE_PURE
5313 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
5314                         const struct anv_image *image,
5315                         const VkImageAspectFlagBits aspect,
5316                         const VkImageLayout layout,
5317                         const VkQueueFlagBits queue_flags);
5318 
5319 enum isl_aux_usage ATTRIBUTE_PURE
5320 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
5321                         const struct anv_image *image,
5322                         const VkImageAspectFlagBits aspect,
5323                         const VkImageUsageFlagBits usage,
5324                         const VkImageLayout layout,
5325                         const VkQueueFlagBits queue_flags);
5326 
5327 enum anv_fast_clear_type ATTRIBUTE_PURE
5328 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
5329                               const struct anv_image * const image,
5330                               const VkImageAspectFlagBits aspect,
5331                               const VkImageLayout layout,
5332                               const VkQueueFlagBits queue_flags);
5333 
5334 bool ATTRIBUTE_PURE
5335 anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo,
5336                                     const struct anv_image * const image,
5337                                     const VkImageAspectFlagBits aspect,
5338                                     const VkImageLayout layout,
5339                                     const VkQueueFlagBits queue_flags);
5340 
5341 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)5342 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
5343                              VkImageAspectFlags aspects2)
5344 {
5345    if (aspects1 == aspects2)
5346       return true;
5347 
5348    /* Only 1 color aspects are compatibles. */
5349    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5350        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
5351        util_bitcount(aspects1) == util_bitcount(aspects2))
5352       return true;
5353 
5354    return false;
5355 }
5356 
5357 struct anv_image_view {
5358    struct vk_image_view vk;
5359 
5360    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
5361 
5362    unsigned n_planes;
5363 
5364    /**
5365     * True if the surface states (if any) are owned by some anv_state_stream
5366     * from internal_surface_state_pool.
5367     */
5368    bool use_surface_state_stream;
5369 
5370    struct {
5371       struct isl_view isl;
5372 
5373       /**
5374        * A version of the image view for storage usage (can apply 3D image
5375        * slicing).
5376        */
5377       struct isl_view isl_storage;
5378 
5379       /**
5380        * RENDER_SURFACE_STATE when using image as a sampler surface with an
5381        * image layout of SHADER_READ_ONLY_OPTIMAL or
5382        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
5383        */
5384       struct anv_surface_state optimal_sampler;
5385 
5386       /**
5387        * RENDER_SURFACE_STATE when using image as a sampler surface with an
5388        * image layout of GENERAL.
5389        */
5390       struct anv_surface_state general_sampler;
5391 
5392       /**
5393        * RENDER_SURFACE_STATE when using image as a storage image.
5394        */
5395       struct anv_surface_state storage;
5396    } planes[3];
5397 };
5398 
5399 enum anv_image_view_state_flags {
5400    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 0),
5401 };
5402 
5403 void anv_image_fill_surface_state(struct anv_device *device,
5404                                   const struct anv_image *image,
5405                                   VkImageAspectFlagBits aspect,
5406                                   const struct isl_view *view,
5407                                   isl_surf_usage_flags_t view_usage,
5408                                   enum isl_aux_usage aux_usage,
5409                                   const union isl_color_value *clear_color,
5410                                   enum anv_image_view_state_flags flags,
5411                                   struct anv_surface_state *state_inout);
5412 
5413 
5414 static inline const struct anv_surface_state *
anv_image_view_texture_surface_state(const struct anv_image_view * iview,uint32_t plane,VkImageLayout layout)5415 anv_image_view_texture_surface_state(const struct anv_image_view *iview,
5416                                      uint32_t plane, VkImageLayout layout)
5417 {
5418    return layout == VK_IMAGE_LAYOUT_GENERAL ?
5419           &iview->planes[plane].general_sampler :
5420           &iview->planes[plane].optimal_sampler;
5421 }
5422 
5423 static inline const struct anv_surface_state *
anv_image_view_storage_surface_state(const struct anv_image_view * iview)5424 anv_image_view_storage_surface_state(const struct anv_image_view *iview)
5425 {
5426    return &iview->planes[0].storage;
5427 }
5428 
5429 static inline bool
anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state * state,const struct anv_image * image)5430 anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state,
5431                                                const struct anv_image *image)
5432 {
5433    for (unsigned a = 0; a < state->color_att_count; a++) {
5434       if (state->color_att[a].iview &&
5435           state->color_att[a].iview->image == image)
5436          return true;
5437    }
5438 
5439    if (state->depth_att.iview && state->depth_att.iview->image == image)
5440       return true;
5441    if (state->stencil_att.iview && state->stencil_att.iview->image == image)
5442       return true;
5443 
5444    return false;
5445 }
5446 
5447 struct anv_image_create_info {
5448    const VkImageCreateInfo *vk_info;
5449 
5450    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
5451    isl_tiling_flags_t isl_tiling_flags;
5452 
5453    /** These flags will be added to any derived from VkImageCreateInfo. */
5454    isl_surf_usage_flags_t isl_extra_usage_flags;
5455 
5456    /** An opt-in stride in pixels, should be 0 for implicit layouts */
5457    uint32_t stride;
5458 
5459    /** Whether to allocate private binding */
5460    bool no_private_binding_alloc;
5461 };
5462 
5463 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
5464                         const struct anv_image_create_info *create_info);
5465 
5466 void anv_image_finish(struct anv_image *image);
5467 
5468 void anv_image_get_memory_requirements(struct anv_device *device,
5469                                        struct anv_image *image,
5470                                        VkImageAspectFlags aspects,
5471                                        VkMemoryRequirements2 *pMemoryRequirements);
5472 
5473 void anv_image_view_init(struct anv_device *device,
5474                          struct anv_image_view *iview,
5475                          const VkImageViewCreateInfo *pCreateInfo,
5476                          struct anv_state_stream *state_stream);
5477 
5478 void anv_image_view_finish(struct anv_image_view *iview);
5479 
5480 enum isl_format
5481 anv_isl_format_for_descriptor_type(const struct anv_device *device,
5482                                    VkDescriptorType type);
5483 
5484 static inline isl_surf_usage_flags_t
anv_isl_usage_for_descriptor_type(const VkDescriptorType type)5485 anv_isl_usage_for_descriptor_type(const VkDescriptorType type)
5486 {
5487    switch(type) {
5488       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
5489       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
5490          return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT;
5491       default:
5492          return ISL_SURF_USAGE_STORAGE_BIT;
5493    }
5494 }
5495 
5496 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)5497 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
5498                           VkLineRasterizationModeKHR line_mode)
5499 {
5500    if (raster_mode == VK_POLYGON_MODE_LINE &&
5501        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
5502       return true;
5503    return false;
5504 }
5505 
5506 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)5507 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
5508                             unsigned rasterization_samples)
5509 {
5510    if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
5511       if (rasterization_samples > 1) {
5512          return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
5513       } else {
5514          return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
5515       }
5516    }
5517    return line_mode;
5518 }
5519 
5520 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)5521 anv_is_dual_src_blend_factor(VkBlendFactor factor)
5522 {
5523    return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
5524           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
5525           factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
5526           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
5527 }
5528 
5529 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)5530 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
5531 {
5532    return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
5533           anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
5534           anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
5535           anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
5536 }
5537 
5538 VkFormatFeatureFlags2
5539 anv_get_image_format_features2(const struct anv_physical_device *physical_device,
5540                                VkFormat vk_format,
5541                                const struct anv_format *anv_format,
5542                                VkImageTiling vk_tiling,
5543                                const struct isl_drm_modifier_info *isl_mod_info);
5544 
5545 void anv_fill_buffer_surface_state(struct anv_device *device,
5546                                    void *surface_state_ptr,
5547                                    enum isl_format format,
5548                                    struct isl_swizzle swizzle,
5549                                    isl_surf_usage_flags_t usage,
5550                                    struct anv_address address,
5551                                    uint32_t range, uint32_t stride);
5552 
5553 
5554 struct gfx8_border_color {
5555    union {
5556       float float32[4];
5557       uint32_t uint32[4];
5558    };
5559    /* Pad out to 64 bytes */
5560    uint32_t _pad[12];
5561 };
5562 
5563 struct anv_sampler {
5564    struct vk_sampler            vk;
5565 
5566    uint32_t                     state[3][4];
5567    uint32_t                     n_planes;
5568 
5569    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
5570     * and with a 32-byte stride for use as bindless samplers.
5571     */
5572    struct anv_state             bindless_state;
5573 
5574    struct anv_state             custom_border_color;
5575 };
5576 
5577 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
5578 
5579 struct anv_query_pool {
5580    struct vk_query_pool                         vk;
5581 
5582    /** Stride between slots, in bytes */
5583    uint32_t                                     stride;
5584    /** Number of slots in this query pool */
5585    struct anv_bo *                              bo;
5586 
5587    /** Location for the KHR_performance_query small batch updating
5588     *  ANV_PERF_QUERY_OFFSET_REG
5589     */
5590    uint32_t                                     khr_perf_preambles_offset;
5591 
5592    /** Size of each small batch */
5593    uint32_t                                     khr_perf_preamble_stride;
5594 
5595    /* KHR perf queries : */
5596    uint32_t                                     pass_size;
5597    uint32_t                                     data_offset;
5598    uint32_t                                     snapshot_size;
5599    uint32_t                                     n_counters;
5600    struct intel_perf_counter_pass                *counter_pass;
5601    uint32_t                                     n_passes;
5602    struct intel_perf_query_info                 **pass_query;
5603 };
5604 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)5605 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
5606                                                       uint32_t pass)
5607 {
5608    return pool->khr_perf_preambles_offset +
5609           pool->khr_perf_preamble_stride * pass;
5610 }
5611 
5612 struct anv_vid_mem {
5613    struct anv_device_memory *mem;
5614    VkDeviceSize       offset;
5615    VkDeviceSize       size;
5616 };
5617 
5618 #define ANV_VIDEO_MEM_REQS_H264 4
5619 #define ANV_VIDEO_MEM_REQS_H265 9
5620 #define ANV_MB_WIDTH 16
5621 #define ANV_MB_HEIGHT 16
5622 #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16
5623 #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16
5624 #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8
5625 #define ANV_MAX_H265_CTB_SIZE 64
5626 
5627 enum anv_vid_mem_h264_types {
5628    ANV_VID_MEM_H264_INTRA_ROW_STORE,
5629    ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE,
5630    ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH,
5631    ANV_VID_MEM_H264_MPR_ROW_SCRATCH,
5632    ANV_VID_MEM_H264_MAX,
5633 };
5634 
5635 enum anv_vid_mem_h265_types {
5636    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE,
5637    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE,
5638    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN,
5639    ANV_VID_MEM_H265_METADATA_LINE,
5640    ANV_VID_MEM_H265_METADATA_TILE_LINE,
5641    ANV_VID_MEM_H265_METADATA_TILE_COLUMN,
5642    ANV_VID_MEM_H265_SAO_LINE,
5643    ANV_VID_MEM_H265_SAO_TILE_LINE,
5644    ANV_VID_MEM_H265_SAO_TILE_COLUMN,
5645    ANV_VID_MEM_H265_MAX,
5646 };
5647 
5648 struct anv_video_session {
5649    struct vk_video_session vk;
5650 
5651    /* the decoder needs some private memory allocations */
5652    struct anv_vid_mem vid_mem[ANV_VID_MEM_H265_MAX];
5653 };
5654 
5655 struct anv_video_session_params {
5656    struct vk_video_session_parameters vk;
5657 };
5658 
5659 void
5660 anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f);
5661 
5662 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)5663 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
5664                           enum anv_pipe_bits bits,
5665                           const char* reason)
5666 {
5667    cmd_buffer->state.pending_pipe_bits |= bits;
5668    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits) {
5669       fputs("pc: add ", stdout);
5670       anv_dump_pipe_bits(bits, stdout);
5671       fprintf(stdout, "reason: %s\n", reason);
5672    }
5673 }
5674 
5675 struct anv_performance_configuration_intel {
5676    struct vk_object_base      base;
5677 
5678    struct intel_perf_registers *register_config;
5679 
5680    uint64_t                   config_id;
5681 };
5682 
5683 void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
5684 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
5685 void anv_device_perf_init(struct anv_device *device);
5686 void anv_perf_write_pass_results(struct intel_perf_config *perf,
5687                                  struct anv_query_pool *pool, uint32_t pass,
5688                                  const struct intel_perf_query_result *accumulated_results,
5689                                  union VkPerformanceCounterResultKHR *results);
5690 
5691 void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
5692                                 struct nir_shader *fs_nir,
5693                                 struct anv_device *device,
5694                                 const VkGraphicsPipelineCreateInfo *info);
5695 
5696 /* Use to emit a series of memcpy operations */
5697 struct anv_memcpy_state {
5698    struct anv_device *device;
5699    struct anv_batch *batch;
5700 
5701    struct anv_vb_cache_range vb_bound;
5702    struct anv_vb_cache_range vb_dirty;
5703 };
5704 
5705 VkResult anv_device_init_internal_kernels(struct anv_device *device);
5706 void anv_device_finish_internal_kernels(struct anv_device *device);
5707 
5708 VkResult anv_device_init_astc_emu(struct anv_device *device);
5709 void anv_device_finish_astc_emu(struct anv_device *device);
5710 void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
5711                           struct anv_image *image,
5712                           VkImageLayout layout,
5713                           const VkImageSubresourceLayers *subresource,
5714                           VkOffset3D block_offset,
5715                           VkExtent3D block_extent);
5716 
5717 /* This structure is used in 2 scenarios :
5718  *
5719  *    - copy utrace timestamps from command buffer so that command buffer can
5720  *      be resubmitted multiple times without the recorded timestamps being
5721  *      overwritten before they're read back
5722  *
5723  *    - emit trace points for queue debug tagging
5724  *      (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
5725  */
5726 struct anv_utrace_submit {
5727    /* Needs to be the first field */
5728    struct intel_ds_flush_data ds;
5729 
5730    /* Batch stuff to implement of copy of timestamps recorded in another
5731     * buffer.
5732     */
5733    struct anv_reloc_list relocs;
5734    struct anv_batch batch;
5735    struct util_dynarray batch_bos;
5736 
5737    /* Stream for temporary allocations */
5738    struct anv_state_stream dynamic_state_stream;
5739    struct anv_state_stream general_state_stream;
5740 
5741    /* Syncobj to be signaled when the batch completes */
5742    struct vk_sync *sync;
5743 
5744    /* Queue on which all the recorded traces are submitted */
5745    struct anv_queue *queue;
5746 
5747    /* Buffer of 64bits timestamps (only used for timestamp copies) */
5748    struct anv_bo *trace_bo;
5749 
5750    /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
5751     * timestamps)
5752     */
5753    uint64_t last_full_timestamp;
5754 
5755    /* Memcpy state tracking (only used for timestamp copies on render engine) */
5756    struct anv_memcpy_state memcpy_state;
5757 
5758    /* Memcpy state tracking (only used for timestamp copies on compute engine) */
5759    struct anv_simple_shader simple_state;
5760 };
5761 
5762 void anv_device_utrace_init(struct anv_device *device);
5763 void anv_device_utrace_finish(struct anv_device *device);
5764 VkResult
5765 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
5766                                     uint32_t cmd_buffer_count,
5767                                     struct anv_cmd_buffer **cmd_buffers,
5768                                     struct anv_utrace_submit **out_submit);
5769 
5770 static bool
anv_has_cooperative_matrix(const struct anv_physical_device * device)5771 anv_has_cooperative_matrix(const struct anv_physical_device *device)
5772 {
5773    return device->has_cooperative_matrix;
5774 }
5775 
5776 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
5777    VK_FROM_HANDLE(__anv_type, __name, __handle)
5778 
5779 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
5780                        VK_OBJECT_TYPE_COMMAND_BUFFER)
5781 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
5782 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
5783 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
5784                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
5785 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
5786 
5787 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
5788                                VK_OBJECT_TYPE_BUFFER)
5789 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView,
5790                                VK_OBJECT_TYPE_BUFFER_VIEW)
5791 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
5792                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
5793 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
5794                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
5795 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
5796                                VkDescriptorSetLayout,
5797                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
5798 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory,
5799                                VK_OBJECT_TYPE_DEVICE_MEMORY)
5800 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
5801 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
5802 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
5803                                VK_OBJECT_TYPE_IMAGE_VIEW);
5804 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
5805                                VK_OBJECT_TYPE_PIPELINE)
5806 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
5807                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
5808 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
5809                                VK_OBJECT_TYPE_QUERY_POOL)
5810 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
5811                                VK_OBJECT_TYPE_SAMPLER)
5812 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
5813                                VkPerformanceConfigurationINTEL,
5814                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
5815 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
5816                                VkVideoSessionKHR,
5817                                VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
5818 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base,
5819                                VkVideoSessionParametersKHR,
5820                                VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
5821 
5822 #define anv_genX(devinfo, thing) ({             \
5823    __typeof(&gfx9_##thing) genX_thing;          \
5824    switch ((devinfo)->verx10) {                 \
5825    case 90:                                     \
5826       genX_thing = &gfx9_##thing;               \
5827       break;                                    \
5828    case 110:                                    \
5829       genX_thing = &gfx11_##thing;              \
5830       break;                                    \
5831    case 120:                                    \
5832       genX_thing = &gfx12_##thing;              \
5833       break;                                    \
5834    case 125:                                    \
5835       genX_thing = &gfx125_##thing;             \
5836       break;                                    \
5837    case 200:                                    \
5838       genX_thing = &gfx20_##thing;              \
5839       break;                                    \
5840    default:                                     \
5841       unreachable("Unknown hardware generation"); \
5842    }                                            \
5843    genX_thing;                                  \
5844 })
5845 
5846 /* Gen-specific function declarations */
5847 #ifdef genX
5848 #  include "anv_genX.h"
5849 #else
5850 #  define genX(x) gfx9_##x
5851 #  include "anv_genX.h"
5852 #  undef genX
5853 #  define genX(x) gfx11_##x
5854 #  include "anv_genX.h"
5855 #  undef genX
5856 #  define genX(x) gfx12_##x
5857 #  include "anv_genX.h"
5858 #  undef genX
5859 #  define genX(x) gfx125_##x
5860 #  include "anv_genX.h"
5861 #  undef genX
5862 #  define genX(x) gfx20_##x
5863 #  include "anv_genX.h"
5864 #  undef genX
5865 #endif
5866 
5867 #ifdef __cplusplus
5868 }
5869 #endif
5870 
5871 #endif /* ANV_PRIVATE_H */
5872