• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #ifndef NDEBUG
40 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41 #endif
42 #else
43 #define VG(x) ((void)0)
44 #endif
45 
46 #include "common/intel_clflush.h"
47 #include "common/intel_decoder.h"
48 #include "common/intel_gem.h"
49 #include "common/intel_l3_config.h"
50 #include "common/intel_measure.h"
51 #include "dev/intel_device_info.h"
52 #include "blorp/blorp.h"
53 #include "compiler/brw_compiler.h"
54 #include "compiler/brw_rt.h"
55 #include "util/bitset.h"
56 #include "util/bitscan.h"
57 #include "util/macros.h"
58 #include "util/hash_table.h"
59 #include "util/list.h"
60 #include "util/sparse_array.h"
61 #include "util/u_atomic.h"
62 #include "util/u_vector.h"
63 #include "util/u_math.h"
64 #include "util/vma.h"
65 #include "util/xmlconfig.h"
66 #include "vk_alloc.h"
67 #include "vk_debug_report.h"
68 #include "vk_device.h"
69 #include "vk_enum_defines.h"
70 #include "vk_image.h"
71 #include "vk_instance.h"
72 #include "vk_physical_device.h"
73 #include "vk_shader_module.h"
74 #include "vk_util.h"
75 #include "vk_command_buffer.h"
76 #include "vk_queue.h"
77 #include "vk_log.h"
78 
79 /* Pre-declarations needed for WSI entrypoints */
80 struct wl_surface;
81 struct wl_display;
82 typedef struct xcb_connection_t xcb_connection_t;
83 typedef uint32_t xcb_visualid_t;
84 typedef uint32_t xcb_window_t;
85 
86 struct anv_batch;
87 struct anv_buffer;
88 struct anv_buffer_view;
89 struct anv_image_view;
90 struct anv_acceleration_structure;
91 struct anv_instance;
92 
93 struct intel_aux_map_context;
94 struct intel_perf_config;
95 struct intel_perf_counter_pass;
96 struct intel_perf_query_result;
97 
98 #include <vulkan/vulkan.h>
99 #include <vulkan/vk_icd.h>
100 
101 #include "anv_android.h"
102 #include "anv_entrypoints.h"
103 #include "isl/isl.h"
104 
105 #include "dev/intel_debug.h"
106 #undef MESA_LOG_TAG
107 #define MESA_LOG_TAG "MESA-INTEL"
108 #include "util/log.h"
109 #include "wsi_common.h"
110 
111 #define NSEC_PER_SEC 1000000000ull
112 
113 /* anv Virtual Memory Layout
114  * =========================
115  *
116  * When the anv driver is determining the virtual graphics addresses of memory
117  * objects itself using the softpin mechanism, the following memory ranges
118  * will be used.
119  *
120  * Three special considerations to notice:
121  *
122  * (1) the dynamic state pool is located within the same 4 GiB as the low
123  * heap. This is to work around a VF cache issue described in a comment in
124  * anv_physical_device_init_heaps.
125  *
126  * (2) the binding table pool is located at lower addresses than the surface
127  * state pool, within a 4 GiB range. This allows surface state base addresses
128  * to cover both binding tables (16 bit offsets) and surface states (32 bit
129  * offsets).
130  *
131  * (3) the last 4 GiB of the address space is withheld from the high
132  * heap. Various hardware units will read past the end of an object for
133  * various reasons. This healthy margin prevents reads from wrapping around
134  * 48-bit addresses.
135  */
136 #define GENERAL_STATE_POOL_MIN_ADDRESS     0x000000010000ULL /* 64 KiB */
137 #define GENERAL_STATE_POOL_MAX_ADDRESS     0x00003fffffffULL
138 #define LOW_HEAP_MIN_ADDRESS               0x000040000000ULL /* 1 GiB */
139 #define LOW_HEAP_MAX_ADDRESS               0x00007fffffffULL
140 #define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
141 #define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
142 #define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
143 #define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
144 #define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
145 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
146 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
147 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
148 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
149 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
150 #define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
151 
152 #define GENERAL_STATE_POOL_SIZE     \
153    (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
154 #define LOW_HEAP_SIZE               \
155    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
156 #define DYNAMIC_STATE_POOL_SIZE     \
157    (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
158 #define BINDING_TABLE_POOL_SIZE     \
159    (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
160 #define SURFACE_STATE_POOL_SIZE     \
161    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
162 #define INSTRUCTION_STATE_POOL_SIZE \
163    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
164 #define CLIENT_VISIBLE_HEAP_SIZE               \
165    (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
166 
167 /* Allowing different clear colors requires us to perform a depth resolve at
168  * the end of certain render passes. This is because while slow clears store
169  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
170  * See the PRMs for examples describing when additional resolves would be
171  * necessary. To enable fast clears without requiring extra resolves, we set
172  * the clear value to a globally-defined one. We could allow different values
173  * if the user doesn't expect coherent data during or after a render passes
174  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
175  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
176  * 1.0f seems to be the only value used. The only application that doesn't set
177  * this value does so through the usage of an seemingly uninitialized clear
178  * value.
179  */
180 #define ANV_HZ_FC_VAL 1.0f
181 
182 #define MAX_VBS         28
183 #define MAX_XFB_BUFFERS  4
184 #define MAX_XFB_STREAMS  4
185 #define MAX_SETS         8
186 #define MAX_RTS          8
187 #define MAX_VIEWPORTS   16
188 #define MAX_SCISSORS    16
189 #define MAX_PUSH_CONSTANTS_SIZE 128
190 #define MAX_DYNAMIC_BUFFERS 16
191 #define MAX_IMAGES 64
192 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
193 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
194 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
195 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
196  * use 64 here to avoid cache issues. This could most likely bring it back to
197  * 32 if we had different virtual addresses for the different views on a given
198  * GEM object.
199  */
200 #define ANV_UBO_ALIGNMENT 64
201 #define ANV_SSBO_ALIGNMENT 4
202 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
203 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
204 #define MAX_SAMPLE_LOCATIONS 16
205 
206 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
207  *
208  *    "The surface state model is used when a Binding Table Index (specified
209  *    in the message descriptor) of less than 240 is specified. In this model,
210  *    the Binding Table Index is used to index into the binding table, and the
211  *    binding table entry contains a pointer to the SURFACE_STATE."
212  *
213  * Binding table values above 240 are used for various things in the hardware
214  * such as stateless, stateless with incoherent cache, SLM, and bindless.
215  */
216 #define MAX_BINDING_TABLE_SIZE 240
217 
218 /* The kernel relocation API has a limitation of a 32-bit delta value
219  * applied to the address before it is written which, in spite of it being
220  * unsigned, is treated as signed .  Because of the way that this maps to
221  * the Vulkan API, we cannot handle an offset into a buffer that does not
222  * fit into a signed 32 bits.  The only mechanism we have for dealing with
223  * this at the moment is to limit all VkDeviceMemory objects to a maximum
224  * of 2GB each.  The Vulkan spec allows us to do this:
225  *
226  *    "Some platforms may have a limit on the maximum size of a single
227  *    allocation. For example, certain systems may fail to create
228  *    allocations with a size greater than or equal to 4GB. Such a limit is
229  *    implementation-dependent, and if such a failure occurs then the error
230  *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
231  */
232 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
233 
234 #define ANV_SVGS_VB_INDEX    MAX_VBS
235 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
236 
237 /* We reserve this MI ALU register for the purpose of handling predication.
238  * Other code which uses the MI ALU should leave it alone.
239  */
240 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
241 
242 /* We reserve this MI ALU register to pass around an offset computed from
243  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
244  * Other code which uses the MI ALU should leave it alone.
245  */
246 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
247 
248 /* For gfx12 we set the streamout buffers using 4 separate commands
249  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
250  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
251  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
252  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
253  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
254  * 3DSTATE_SO_BUFFER_INDEX_0.
255  */
256 #define SO_BUFFER_INDEX_0_CMD 0x60
257 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
258 
259 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)260 align_down_npot_u32(uint32_t v, uint32_t a)
261 {
262    return v - (v % a);
263 }
264 
265 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)266 align_down_u32(uint32_t v, uint32_t a)
267 {
268    assert(a != 0 && a == (a & -a));
269    return v & ~(a - 1);
270 }
271 
272 static inline uint32_t
align_u32(uint32_t v,uint32_t a)273 align_u32(uint32_t v, uint32_t a)
274 {
275    assert(a != 0 && a == (a & -a));
276    return align_down_u32(v + a - 1, a);
277 }
278 
279 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)280 align_down_u64(uint64_t v, uint64_t a)
281 {
282    assert(a != 0 && a == (a & -a));
283    return v & ~(a - 1);
284 }
285 
286 static inline uint64_t
align_u64(uint64_t v,uint64_t a)287 align_u64(uint64_t v, uint64_t a)
288 {
289    return align_down_u64(v + a - 1, a);
290 }
291 
292 static inline int32_t
align_i32(int32_t v,int32_t a)293 align_i32(int32_t v, int32_t a)
294 {
295    assert(a != 0 && a == (a & -a));
296    return (v + a - 1) & ~(a - 1);
297 }
298 
299 /** Alignment must be a power of 2. */
300 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)301 anv_is_aligned(uintmax_t n, uintmax_t a)
302 {
303    assert(a == (a & -a));
304    return (n & (a - 1)) == 0;
305 }
306 
307 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)308 anv_minify(uint32_t n, uint32_t levels)
309 {
310    if (unlikely(n == 0))
311       return 0;
312    else
313       return MAX2(n >> levels, 1);
314 }
315 
316 static inline float
anv_clamp_f(float f,float min,float max)317 anv_clamp_f(float f, float min, float max)
318 {
319    assert(min < max);
320 
321    if (f > max)
322       return max;
323    else if (f < min)
324       return min;
325    else
326       return f;
327 }
328 
329 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)330 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
331 {
332    if (*inout_mask & clear_mask) {
333       *inout_mask &= ~clear_mask;
334       return true;
335    } else {
336       return false;
337    }
338 }
339 
340 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)341 vk_to_isl_color(VkClearColorValue color)
342 {
343    return (union isl_color_value) {
344       .u32 = {
345          color.uint32[0],
346          color.uint32[1],
347          color.uint32[2],
348          color.uint32[3],
349       },
350    };
351 }
352 
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)353 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
354 {
355    uintptr_t mask = (1ull << bits) - 1;
356    *flags = ptr & mask;
357    return (void *) (ptr & ~mask);
358 }
359 
anv_pack_ptr(void * ptr,int bits,int flags)360 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
361 {
362    uintptr_t value = (uintptr_t) ptr;
363    uintptr_t mask = (1ull << bits) - 1;
364    return value | (mask & flags);
365 }
366 
367 /**
368  * Warn on ignored extension structs.
369  *
370  * The Vulkan spec requires us to ignore unsupported or unknown structs in
371  * a pNext chain.  In debug mode, emitting warnings for ignored structs may
372  * help us discover structs that we should not have ignored.
373  *
374  *
375  * From the Vulkan 1.0.38 spec:
376  *
377  *    Any component of the implementation (the loader, any enabled layers,
378  *    and drivers) must skip over, without processing (other than reading the
379  *    sType and pNext members) any chained structures with sType values not
380  *    defined by extensions supported by that component.
381  */
382 #define anv_debug_ignored_stype(sType) \
383    mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
384 
385 void __anv_perf_warn(struct anv_device *device,
386                      const struct vk_object_base *object,
387                      const char *file, int line, const char *format, ...)
388    anv_printflike(5, 6);
389 
390 /**
391  * Print a FINISHME message, including its source location.
392  */
393 #define anv_finishme(format, ...) \
394    do { \
395       static bool reported = false; \
396       if (!reported) { \
397          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
398                     ##__VA_ARGS__); \
399          reported = true; \
400       } \
401    } while (0)
402 
403 /**
404  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
405  */
406 #define anv_perf_warn(objects_macro, format, ...)   \
407    do { \
408       static bool reported = false; \
409       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
410          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
411                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
412                   objects_macro, __FILE__, __LINE__,                    \
413                   format, ## __VA_ARGS__);                              \
414          reported = true; \
415       } \
416    } while (0)
417 
418 /* A non-fatal assert.  Useful for debugging. */
419 #ifdef DEBUG
420 #define anv_assert(x) ({ \
421    if (unlikely(!(x))) \
422       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
423 })
424 #else
425 #define anv_assert(x)
426 #endif
427 
428 struct anv_bo {
429    const char *name;
430 
431    uint32_t gem_handle;
432 
433    uint32_t refcount;
434 
435    /* Index into the current validation list.  This is used by the
436     * validation list building alrogithm to track which buffers are already
437     * in the validation list so that we can ensure uniqueness.
438     */
439    uint32_t index;
440 
441    /* Index for use with util_sparse_array_free_list */
442    uint32_t free_index;
443 
444    /* Last known offset.  This value is provided by the kernel when we
445     * execbuf and is used as the presumed offset for the next bunch of
446     * relocations.
447     */
448    uint64_t offset;
449 
450    /** Size of the buffer not including implicit aux */
451    uint64_t size;
452 
453    /* Map for internally mapped BOs.
454     *
455     * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
456     */
457    void *map;
458 
459    /** Size of the implicit CCS range at the end of the buffer
460     *
461     * On Gfx12, CCS data is always a direct 1/256 scale-down.  A single 64K
462     * page of main surface data maps to a 256B chunk of CCS data and that
463     * mapping is provided on TGL-LP by the AUX table which maps virtual memory
464     * addresses in the main surface to virtual memory addresses for CCS data.
465     *
466     * Because we can't change these maps around easily and because Vulkan
467     * allows two VkImages to be bound to overlapping memory regions (as long
468     * as the app is careful), it's not feasible to make this mapping part of
469     * the image.  (On Gfx11 and earlier, the mapping was provided via
470     * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
471     * Instead, we attach the CCS data directly to the buffer object and setup
472     * the AUX table mapping at BO creation time.
473     *
474     * This field is for internal tracking use by the BO allocator only and
475     * should not be touched by other parts of the code.  If something wants to
476     * know if a BO has implicit CCS data, it should instead look at the
477     * has_implicit_ccs boolean below.
478     *
479     * This data is not included in maps of this buffer.
480     */
481    uint32_t _ccs_size;
482 
483    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
484    uint32_t flags;
485 
486    /** True if this BO may be shared with other processes */
487    bool is_external:1;
488 
489    /** True if this BO is a wrapper
490     *
491     * When set to true, none of the fields in this BO are meaningful except
492     * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
493     * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
494     * is set in the physical device.
495     */
496    bool is_wrapper:1;
497 
498    /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
499    bool has_fixed_address:1;
500 
501    /** True if this BO wraps a host pointer */
502    bool from_host_ptr:1;
503 
504    /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
505    bool has_client_visible_address:1;
506 
507    /** True if this BO has implicit CCS data attached to it */
508    bool has_implicit_ccs:1;
509 };
510 
511 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)512 anv_bo_ref(struct anv_bo *bo)
513 {
514    p_atomic_inc(&bo->refcount);
515    return bo;
516 }
517 
518 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)519 anv_bo_unwrap(struct anv_bo *bo)
520 {
521    while (bo->is_wrapper)
522       bo = bo->map;
523    return bo;
524 }
525 
526 /* Represents a lock-free linked list of "free" things.  This is used by
527  * both the block pool and the state pools.  Unfortunately, in order to
528  * solve the ABA problem, we can't use a single uint32_t head.
529  */
530 union anv_free_list {
531    struct {
532       uint32_t offset;
533 
534       /* A simple count that is incremented every time the head changes. */
535       uint32_t count;
536    };
537    /* Make sure it's aligned to 64 bits. This will make atomic operations
538     * faster on 32 bit platforms.
539     */
540    uint64_t u64 __attribute__ ((aligned (8)));
541 };
542 
543 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
544 
545 struct anv_block_state {
546    union {
547       struct {
548          uint32_t next;
549          uint32_t end;
550       };
551       /* Make sure it's aligned to 64 bits. This will make atomic operations
552        * faster on 32 bit platforms.
553        */
554       uint64_t u64 __attribute__ ((aligned (8)));
555    };
556 };
557 
558 #define anv_block_pool_foreach_bo(bo, pool)  \
559    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
560         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
561         _pp_bo++)
562 
563 #define ANV_MAX_BLOCK_POOL_BOS 20
564 
565 struct anv_block_pool {
566    const char *name;
567 
568    struct anv_device *device;
569    bool use_softpin;
570 
571    /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
572     * around the actual BO so that we grow the pool after the wrapper BO has
573     * been put in a relocation list.  This is only used in the non-softpin
574     * case.
575     */
576    struct anv_bo wrapper_bo;
577 
578    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
579    struct anv_bo *bo;
580    uint32_t nbos;
581 
582    uint64_t size;
583 
584    /* The address where the start of the pool is pinned. The various bos that
585     * are created as the pool grows will have addresses in the range
586     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
587     */
588    uint64_t start_address;
589 
590    /* The offset from the start of the bo to the "center" of the block
591     * pool.  Pointers to allocated blocks are given by
592     * bo.map + center_bo_offset + offsets.
593     */
594    uint32_t center_bo_offset;
595 
596    /* Current memory map of the block pool.  This pointer may or may not
597     * point to the actual beginning of the block pool memory.  If
598     * anv_block_pool_alloc_back has ever been called, then this pointer
599     * will point to the "center" position of the buffer and all offsets
600     * (negative or positive) given out by the block pool alloc functions
601     * will be valid relative to this pointer.
602     *
603     * In particular, map == bo.map + center_offset
604     *
605     * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
606     * since it will handle the softpin case as well, where this points to NULL.
607     */
608    void *map;
609    int fd;
610 
611    /**
612     * Array of mmaps and gem handles owned by the block pool, reclaimed when
613     * the block pool is destroyed.
614     */
615    struct u_vector mmap_cleanups;
616 
617    struct anv_block_state state;
618 
619    struct anv_block_state back_state;
620 };
621 
622 /* Block pools are backed by a fixed-size 1GB memfd */
623 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
624 
625 /* The center of the block pool is also the middle of the memfd.  This may
626  * change in the future if we decide differently for some reason.
627  */
628 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
629 
630 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)631 anv_block_pool_size(struct anv_block_pool *pool)
632 {
633    return pool->state.end + pool->back_state.end;
634 }
635 
636 struct anv_state {
637    int32_t offset;
638    uint32_t alloc_size;
639    void *map;
640    uint32_t idx;
641 };
642 
643 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
644 
645 struct anv_fixed_size_state_pool {
646    union anv_free_list free_list;
647    struct anv_block_state block;
648 };
649 
650 #define ANV_MIN_STATE_SIZE_LOG2 6
651 #define ANV_MAX_STATE_SIZE_LOG2 21
652 
653 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
654 
655 struct anv_free_entry {
656    uint32_t next;
657    struct anv_state state;
658 };
659 
660 struct anv_state_table {
661    struct anv_device *device;
662    int fd;
663    struct anv_free_entry *map;
664    uint32_t size;
665    struct anv_block_state state;
666    struct u_vector cleanups;
667 };
668 
669 struct anv_state_pool {
670    struct anv_block_pool block_pool;
671 
672    /* Offset into the relevant state base address where the state pool starts
673     * allocating memory.
674     */
675    int32_t start_offset;
676 
677    struct anv_state_table table;
678 
679    /* The size of blocks which will be allocated from the block pool */
680    uint32_t block_size;
681 
682    /** Free list for "back" allocations */
683    union anv_free_list back_alloc_free_list;
684 
685    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
686 };
687 
688 struct anv_state_reserved_pool {
689    struct anv_state_pool *pool;
690    union anv_free_list reserved_blocks;
691    uint32_t count;
692 };
693 
694 struct anv_state_stream {
695    struct anv_state_pool *state_pool;
696 
697    /* The size of blocks to allocate from the state pool */
698    uint32_t block_size;
699 
700    /* Current block we're allocating from */
701    struct anv_state block;
702 
703    /* Offset into the current block at which to allocate the next state */
704    uint32_t next;
705 
706    /* List of all blocks allocated from this pool */
707    struct util_dynarray all_blocks;
708 };
709 
710 /* The block_pool functions exported for testing only.  The block pool should
711  * only be used via a state pool (see below).
712  */
713 VkResult anv_block_pool_init(struct anv_block_pool *pool,
714                              struct anv_device *device,
715                              const char *name,
716                              uint64_t start_address,
717                              uint32_t initial_size);
718 void anv_block_pool_finish(struct anv_block_pool *pool);
719 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
720                              uint32_t block_size, uint32_t *padding);
721 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
722                                   uint32_t block_size);
723 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
724 size);
725 
726 VkResult anv_state_pool_init(struct anv_state_pool *pool,
727                              struct anv_device *device,
728                              const char *name,
729                              uint64_t base_address,
730                              int32_t start_offset,
731                              uint32_t block_size);
732 void anv_state_pool_finish(struct anv_state_pool *pool);
733 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
734                                       uint32_t state_size, uint32_t alignment);
735 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
736 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
737 void anv_state_stream_init(struct anv_state_stream *stream,
738                            struct anv_state_pool *state_pool,
739                            uint32_t block_size);
740 void anv_state_stream_finish(struct anv_state_stream *stream);
741 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
742                                         uint32_t size, uint32_t alignment);
743 
744 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
745                                       struct anv_state_pool *parent,
746                                       uint32_t count, uint32_t size,
747                                       uint32_t alignment);
748 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
749 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
750 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
751                                   struct anv_state state);
752 
753 VkResult anv_state_table_init(struct anv_state_table *table,
754                              struct anv_device *device,
755                              uint32_t initial_entries);
756 void anv_state_table_finish(struct anv_state_table *table);
757 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
758                              uint32_t count);
759 void anv_free_list_push(union anv_free_list *list,
760                         struct anv_state_table *table,
761                         uint32_t idx, uint32_t count);
762 struct anv_state* anv_free_list_pop(union anv_free_list *list,
763                                     struct anv_state_table *table);
764 
765 
766 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)767 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
768 {
769    return &table->map[idx].state;
770 }
771 /**
772  * Implements a pool of re-usable BOs.  The interface is identical to that
773  * of block_pool except that each block is its own BO.
774  */
775 struct anv_bo_pool {
776    const char *name;
777 
778    struct anv_device *device;
779 
780    struct util_sparse_array_free_list free_list[16];
781 };
782 
783 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
784                       const char *name);
785 void anv_bo_pool_finish(struct anv_bo_pool *pool);
786 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
787                            struct anv_bo **bo_out);
788 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
789 
790 struct anv_scratch_pool {
791    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
792    struct anv_bo *bos[16][MESA_SHADER_STAGES];
793    uint32_t surfs[16];
794    struct anv_state surf_states[16];
795 };
796 
797 void anv_scratch_pool_init(struct anv_device *device,
798                            struct anv_scratch_pool *pool);
799 void anv_scratch_pool_finish(struct anv_device *device,
800                              struct anv_scratch_pool *pool);
801 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
802                                       struct anv_scratch_pool *pool,
803                                       gl_shader_stage stage,
804                                       unsigned per_thread_scratch);
805 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
806                                    struct anv_scratch_pool *pool,
807                                    unsigned per_thread_scratch);
808 
809 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
810 struct anv_bo_cache {
811    struct util_sparse_array bo_map;
812    pthread_mutex_t mutex;
813 };
814 
815 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
816                            struct anv_device *device);
817 void anv_bo_cache_finish(struct anv_bo_cache *cache);
818 
819 struct anv_queue_family {
820    /* Standard bits passed on to the client */
821    VkQueueFlags   queueFlags;
822    uint32_t       queueCount;
823 
824    /* Driver internal information */
825    enum drm_i915_gem_engine_class engine_class;
826 };
827 
828 #define ANV_MAX_QUEUE_FAMILIES 3
829 
830 struct anv_memory_type {
831    /* Standard bits passed on to the client */
832    VkMemoryPropertyFlags   propertyFlags;
833    uint32_t                heapIndex;
834 };
835 
836 struct anv_memory_heap {
837    /* Standard bits passed on to the client */
838    VkDeviceSize      size;
839    VkMemoryHeapFlags flags;
840 
841    /** Driver-internal book-keeping.
842     *
843     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
844     */
845    VkDeviceSize      used __attribute__ ((aligned (8)));
846 
847    bool              is_local_mem;
848 };
849 
850 struct anv_memregion {
851    struct drm_i915_gem_memory_class_instance region;
852    uint64_t size;
853    uint64_t available;
854 };
855 
856 struct anv_physical_device {
857     struct vk_physical_device                   vk;
858 
859     /* Link in anv_instance::physical_devices */
860     struct list_head                            link;
861 
862     struct anv_instance *                       instance;
863     char                                        path[20];
864     struct {
865        uint16_t                                 domain;
866        uint8_t                                  bus;
867        uint8_t                                  device;
868        uint8_t                                  function;
869     }                                           pci_info;
870     struct intel_device_info                      info;
871     /** Amount of "GPU memory" we want to advertise
872      *
873      * Clearly, this value is bogus since Intel is a UMA architecture.  On
874      * gfx7 platforms, we are limited by GTT size unless we want to implement
875      * fine-grained tracking and GTT splitting.  On Broadwell and above we are
876      * practically unlimited.  However, we will never report more than 3/4 of
877      * the total system ram to try and avoid running out of RAM.
878      */
879     bool                                        supports_48bit_addresses;
880     struct brw_compiler *                       compiler;
881     struct isl_device                           isl_dev;
882     struct intel_perf_config *                    perf;
883    /* True if hardware support is incomplete/alpha */
884     bool                                        is_alpha;
885     /*
886      * Number of commands required to implement a performance query begin +
887      * end.
888      */
889     uint32_t                                    n_perf_query_commands;
890     int                                         cmd_parser_version;
891     bool                                        has_exec_async;
892     bool                                        has_exec_capture;
893     bool                                        has_exec_fence;
894     bool                                        has_syncobj_wait;
895     bool                                        has_syncobj_wait_available;
896     bool                                        has_context_priority;
897     bool                                        has_context_isolation;
898     bool                                        has_thread_submit;
899     bool                                        has_mmap_offset;
900     bool                                        has_userptr_probe;
901     uint64_t                                    gtt_size;
902 
903     bool                                        use_softpin;
904     bool                                        always_use_bindless;
905     bool                                        use_call_secondary;
906 
907     /** True if we can access buffers using A64 messages */
908     bool                                        has_a64_buffer_access;
909     /** True if we can use bindless access for images */
910     bool                                        has_bindless_images;
911     /** True if we can use bindless access for samplers */
912     bool                                        has_bindless_samplers;
913     /** True if we can use timeline semaphores through execbuf */
914     bool                                        has_exec_timeline;
915 
916     /** True if we can read the GPU timestamp register
917      *
918      * When running in a virtual context, the timestamp register is unreadable
919      * on Gfx12+.
920      */
921     bool                                        has_reg_timestamp;
922 
923     /** True if this device has implicit AUX
924      *
925      * If true, CCS is handled as an implicit attachment to the BO rather than
926      * as an explicitly bound surface.
927      */
928     bool                                        has_implicit_ccs;
929 
930     bool                                        always_flush_cache;
931 
932     struct {
933       uint32_t                                  family_count;
934       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
935     } queue;
936 
937     struct {
938       uint32_t                                  type_count;
939       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
940       uint32_t                                  heap_count;
941       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
942       bool                                      need_clflush;
943     } memory;
944 
945     struct anv_memregion                        vram;
946     struct anv_memregion                        sys;
947     uint8_t                                     driver_build_sha1[20];
948     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
949     uint8_t                                     driver_uuid[VK_UUID_SIZE];
950     uint8_t                                     device_uuid[VK_UUID_SIZE];
951 
952     struct disk_cache *                         disk_cache;
953 
954     struct wsi_device                       wsi_device;
955     int                                         local_fd;
956     bool                                        has_local;
957     int64_t                                     local_major;
958     int64_t                                     local_minor;
959     int                                         master_fd;
960     bool                                        has_master;
961     int64_t                                     master_major;
962     int64_t                                     master_minor;
963     struct drm_i915_query_engine_info *         engine_info;
964 
965     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t );
966     struct intel_measure_device                 measure_device;
967 };
968 
969 struct anv_app_info {
970    const char*        app_name;
971    uint32_t           app_version;
972    const char*        engine_name;
973    uint32_t           engine_version;
974    uint32_t           api_version;
975 };
976 
977 struct anv_instance {
978     struct vk_instance                          vk;
979 
980     bool                                        physical_devices_enumerated;
981     struct list_head                            physical_devices;
982 
983     bool                                        pipeline_cache_enabled;
984 
985     struct driOptionCache                       dri_options;
986     struct driOptionCache                       available_dri_options;
987 };
988 
989 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
990 void anv_finish_wsi(struct anv_physical_device *physical_device);
991 
992 struct anv_queue_submit {
993    struct anv_cmd_buffer **                  cmd_buffers;
994    uint32_t                                  cmd_buffer_count;
995    uint32_t                                  cmd_buffer_array_length;
996 
997    uint32_t                                  fence_count;
998    uint32_t                                  fence_array_length;
999    struct drm_i915_gem_exec_fence *          fences;
1000    uint64_t *                                fence_values;
1001 
1002    uint32_t                                  temporary_semaphore_count;
1003    uint32_t                                  temporary_semaphore_array_length;
1004    struct anv_semaphore_impl *               temporary_semaphores;
1005 
1006    /* Allocated only with non shareable timelines. */
1007    union {
1008       struct anv_timeline **                 wait_timelines;
1009       uint32_t *                             wait_timeline_syncobjs;
1010    };
1011    uint32_t                                  wait_timeline_count;
1012    uint32_t                                  wait_timeline_array_length;
1013    uint64_t *                                wait_timeline_values;
1014 
1015    struct anv_timeline **                    signal_timelines;
1016    uint32_t                                  signal_timeline_count;
1017    uint32_t                                  signal_timeline_array_length;
1018    uint64_t *                                signal_timeline_values;
1019 
1020    int                                       in_fence;
1021    bool                                      need_out_fence;
1022    int                                       out_fence;
1023 
1024    uint32_t                                  fence_bo_count;
1025    uint32_t                                  fence_bo_array_length;
1026    /* An array of struct anv_bo pointers with lower bit used as a flag to
1027     * signal we will wait on that BO (see anv_(un)pack_ptr).
1028     */
1029    uintptr_t *                               fence_bos;
1030 
1031    int                                       perf_query_pass;
1032    struct anv_query_pool *                   perf_query_pool;
1033 
1034    const VkAllocationCallbacks *             alloc;
1035    VkSystemAllocationScope                   alloc_scope;
1036 
1037    struct anv_bo *                           simple_bo;
1038    uint32_t                                  simple_bo_size;
1039 
1040    struct list_head                          link;
1041 };
1042 
1043 struct anv_queue {
1044    struct vk_queue                           vk;
1045 
1046    struct anv_device *                       device;
1047 
1048    const struct anv_queue_family *           family;
1049 
1050    uint32_t                                  exec_flags;
1051 
1052    /* Set once from the device api calls. */
1053    bool                                      lost_signaled;
1054 
1055    /* Only set once atomically by the queue */
1056    int                                       lost;
1057    int                                       error_line;
1058    const char *                              error_file;
1059    char                                      error_msg[80];
1060 
1061    /*
1062     * This mutext protects the variables below.
1063     */
1064    pthread_mutex_t                           mutex;
1065 
1066    pthread_t                                 thread;
1067    pthread_cond_t                            cond;
1068 
1069    /*
1070     * A list of struct anv_queue_submit to be submitted to i915.
1071     */
1072    struct list_head                          queued_submits;
1073 
1074    /* Set to true to stop the submission thread */
1075    bool                                      quit;
1076 };
1077 
1078 struct anv_pipeline_cache {
1079    struct vk_object_base                        base;
1080    struct anv_device *                          device;
1081    pthread_mutex_t                              mutex;
1082 
1083    struct hash_table *                          nir_cache;
1084 
1085    struct hash_table *                          cache;
1086 
1087    bool                                         external_sync;
1088 };
1089 
1090 struct nir_xfb_info;
1091 struct anv_pipeline_bind_map;
1092 
1093 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1094                              struct anv_device *device,
1095                              bool cache_enabled,
1096                              bool external_sync);
1097 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1098 
1099 struct anv_shader_bin *
1100 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1101                           const void *key, uint32_t key_size);
1102 struct anv_shader_bin *
1103 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1104                                  gl_shader_stage stage,
1105                                  const void *key_data, uint32_t key_size,
1106                                  const void *kernel_data, uint32_t kernel_size,
1107                                  const struct brw_stage_prog_data *prog_data,
1108                                  uint32_t prog_data_size,
1109                                  const struct brw_compile_stats *stats,
1110                                  uint32_t num_stats,
1111                                  const struct nir_xfb_info *xfb_info,
1112                                  const struct anv_pipeline_bind_map *bind_map);
1113 
1114 struct anv_shader_bin *
1115 anv_device_search_for_kernel(struct anv_device *device,
1116                              struct anv_pipeline_cache *cache,
1117                              const void *key_data, uint32_t key_size,
1118                              bool *user_cache_bit);
1119 
1120 struct anv_shader_bin *
1121 anv_device_upload_kernel(struct anv_device *device,
1122                          struct anv_pipeline_cache *cache,
1123                          gl_shader_stage stage,
1124                          const void *key_data, uint32_t key_size,
1125                          const void *kernel_data, uint32_t kernel_size,
1126                          const struct brw_stage_prog_data *prog_data,
1127                          uint32_t prog_data_size,
1128                          const struct brw_compile_stats *stats,
1129                          uint32_t num_stats,
1130                          const struct nir_xfb_info *xfb_info,
1131                          const struct anv_pipeline_bind_map *bind_map);
1132 
1133 struct nir_shader;
1134 struct nir_shader_compiler_options;
1135 
1136 struct nir_shader *
1137 anv_device_search_for_nir(struct anv_device *device,
1138                           struct anv_pipeline_cache *cache,
1139                           const struct nir_shader_compiler_options *nir_options,
1140                           unsigned char sha1_key[20],
1141                           void *mem_ctx);
1142 
1143 void
1144 anv_device_upload_nir(struct anv_device *device,
1145                       struct anv_pipeline_cache *cache,
1146                       const struct nir_shader *nir,
1147                       unsigned char sha1_key[20]);
1148 
1149 struct anv_address {
1150    struct anv_bo *bo;
1151    int64_t offset;
1152 };
1153 
1154 struct anv_device {
1155     struct vk_device                            vk;
1156 
1157     struct anv_physical_device *                physical;
1158     struct intel_device_info                      info;
1159     struct isl_device                           isl_dev;
1160     int                                         context_id;
1161     int                                         fd;
1162     bool                                        can_chain_batches;
1163     bool                                        robust_buffer_access;
1164     bool                                        has_thread_submit;
1165 
1166     pthread_mutex_t                             vma_mutex;
1167     struct util_vma_heap                        vma_lo;
1168     struct util_vma_heap                        vma_cva;
1169     struct util_vma_heap                        vma_hi;
1170 
1171     /** List of all anv_device_memory objects */
1172     struct list_head                            memory_objects;
1173 
1174     struct anv_bo_pool                          batch_bo_pool;
1175 
1176     struct anv_bo_cache                         bo_cache;
1177 
1178     struct anv_state_pool                       general_state_pool;
1179     struct anv_state_pool                       dynamic_state_pool;
1180     struct anv_state_pool                       instruction_state_pool;
1181     struct anv_state_pool                       binding_table_pool;
1182     struct anv_state_pool                       surface_state_pool;
1183 
1184     struct anv_state_reserved_pool              custom_border_colors;
1185 
1186     /** BO used for various workarounds
1187      *
1188      * There are a number of workarounds on our hardware which require writing
1189      * data somewhere and it doesn't really matter where.  For that, we use
1190      * this BO and just write to the first dword or so.
1191      *
1192      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1193      * For that, we use the high bytes (>= 1024) of the workaround BO.
1194      */
1195     struct anv_bo *                             workaround_bo;
1196     struct anv_address                          workaround_address;
1197 
1198     struct anv_bo *                             trivial_batch_bo;
1199     struct anv_state                            null_surface_state;
1200 
1201     struct anv_pipeline_cache                   default_pipeline_cache;
1202     struct blorp_context                        blorp;
1203 
1204     struct anv_state                            border_colors;
1205 
1206     struct anv_state                            slice_hash;
1207 
1208     uint32_t                                    queue_count;
1209     struct anv_queue  *                         queues;
1210 
1211     struct anv_scratch_pool                     scratch_pool;
1212     struct anv_bo                              *rt_scratch_bos[16];
1213 
1214     struct anv_shader_bin                      *rt_trampoline;
1215     struct anv_shader_bin                      *rt_trivial_return;
1216 
1217     pthread_mutex_t                             mutex;
1218     pthread_cond_t                              queue_submit;
1219     int                                         _lost;
1220     int                                         lost_reported;
1221 
1222     struct intel_batch_decode_ctx               decoder_ctx;
1223     /*
1224      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1225      * the cmd_buffer's list.
1226      */
1227     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1228 
1229     int                                         perf_fd; /* -1 if no opened */
1230     uint64_t                                    perf_metric; /* 0 if unset */
1231 
1232     struct intel_aux_map_context                *aux_map_ctx;
1233 
1234     const struct intel_l3_config                *l3_config;
1235 
1236     struct intel_debug_block_frame              *debug_frame_desc;
1237 };
1238 
1239 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1240 #define ANV_ALWAYS_SOFTPIN true
1241 #else
1242 #define ANV_ALWAYS_SOFTPIN false
1243 #endif
1244 
1245 static inline bool
anv_use_softpin(const struct anv_physical_device * pdevice)1246 anv_use_softpin(const struct anv_physical_device *pdevice)
1247 {
1248 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1249    /* Sky Lake and later always uses softpin */
1250    assert(pdevice->use_softpin);
1251    return true;
1252 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
1253    /* Haswell and earlier never use softpin */
1254    assert(!pdevice->use_softpin);
1255    return false;
1256 #else
1257    /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1258     * device.  Also, for GFX version 8, we need to look at the physical
1259     * device because Broadwell softpins but Cherryview doesn't.
1260     */
1261    return pdevice->use_softpin;
1262 #endif
1263 }
1264 
1265 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1266 anv_binding_table_pool(struct anv_device *device)
1267 {
1268    if (anv_use_softpin(device->physical))
1269       return &device->binding_table_pool;
1270    else
1271       return &device->surface_state_pool;
1272 }
1273 
1274 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1275 anv_binding_table_pool_alloc(struct anv_device *device)
1276 {
1277    if (anv_use_softpin(device->physical))
1278       return anv_state_pool_alloc(&device->binding_table_pool,
1279                                   device->binding_table_pool.block_size, 0);
1280    else
1281       return anv_state_pool_alloc_back(&device->surface_state_pool);
1282 }
1283 
1284 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1285 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1286    anv_state_pool_free(anv_binding_table_pool(device), state);
1287 }
1288 
1289 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1290 anv_mocs(const struct anv_device *device,
1291          const struct anv_bo *bo,
1292          isl_surf_usage_flags_t usage)
1293 {
1294    return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1295 }
1296 
1297 void anv_device_init_blorp(struct anv_device *device);
1298 void anv_device_finish_blorp(struct anv_device *device);
1299 
1300 void _anv_device_report_lost(struct anv_device *device);
1301 VkResult _anv_device_set_lost(struct anv_device *device,
1302                               const char *file, int line,
1303                               const char *msg, ...)
1304    anv_printflike(4, 5);
1305 VkResult _anv_queue_set_lost(struct anv_queue *queue,
1306                               const char *file, int line,
1307                               const char *msg, ...)
1308    anv_printflike(4, 5);
1309 #define anv_device_set_lost(dev, ...) \
1310    _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1311 #define anv_queue_set_lost(queue, ...) \
1312    (queue)->device->has_thread_submit ? \
1313    _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
1314    _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
1315 
1316 static inline bool
anv_device_is_lost(struct anv_device * device)1317 anv_device_is_lost(struct anv_device *device)
1318 {
1319    int lost = p_atomic_read(&device->_lost);
1320    if (unlikely(lost && !device->lost_reported))
1321       _anv_device_report_lost(device);
1322    return lost;
1323 }
1324 
1325 VkResult anv_device_query_status(struct anv_device *device);
1326 
1327 
1328 enum anv_bo_alloc_flags {
1329    /** Specifies that the BO must have a 32-bit address
1330     *
1331     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1332     */
1333    ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1334 
1335    /** Specifies that the BO may be shared externally */
1336    ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1337 
1338    /** Specifies that the BO should be mapped */
1339    ANV_BO_ALLOC_MAPPED =         (1 << 2),
1340 
1341    /** Specifies that the BO should be snooped so we get coherency */
1342    ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1343 
1344    /** Specifies that the BO should be captured in error states */
1345    ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1346 
1347    /** Specifies that the BO will have an address assigned by the caller
1348     *
1349     * Such BOs do not exist in any VMA heap.
1350     */
1351    ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1352 
1353    /** Enables implicit synchronization on the BO
1354     *
1355     * This is the opposite of EXEC_OBJECT_ASYNC.
1356     */
1357    ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1358 
1359    /** Enables implicit synchronization on the BO
1360     *
1361     * This is equivalent to EXEC_OBJECT_WRITE.
1362     */
1363    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1364 
1365    /** Has an address which is visible to the client */
1366    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1367 
1368    /** This buffer has implicit CCS data attached to it */
1369    ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1370 
1371    /** This buffer is allocated from local memory */
1372    ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1373 };
1374 
1375 VkResult anv_device_alloc_bo(struct anv_device *device,
1376                              const char *name, uint64_t size,
1377                              enum anv_bo_alloc_flags alloc_flags,
1378                              uint64_t explicit_address,
1379                              struct anv_bo **bo);
1380 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1381                                             void *host_ptr, uint32_t size,
1382                                             enum anv_bo_alloc_flags alloc_flags,
1383                                             uint64_t client_address,
1384                                             struct anv_bo **bo_out);
1385 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1386                               enum anv_bo_alloc_flags alloc_flags,
1387                               uint64_t client_address,
1388                               struct anv_bo **bo);
1389 VkResult anv_device_export_bo(struct anv_device *device,
1390                               struct anv_bo *bo, int *fd_out);
1391 void anv_device_release_bo(struct anv_device *device,
1392                            struct anv_bo *bo);
1393 
1394 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1395 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1396 {
1397    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1398 }
1399 
1400 VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1401 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1402                          int64_t timeout);
1403 
1404 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1405                         uint32_t exec_flags,
1406                         const VkDeviceQueueCreateInfo *pCreateInfo,
1407                         uint32_t index_in_family);
1408 void anv_queue_finish(struct anv_queue *queue);
1409 
1410 VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
1411 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1412                                        struct anv_batch *batch);
1413 
1414 uint64_t anv_gettime_ns(void);
1415 uint64_t anv_get_absolute_timeout(uint64_t timeout);
1416 
1417 void* anv_gem_mmap(struct anv_device *device,
1418                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1419 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1420 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1421 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1422 uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1423                                 uint32_t num_regions,
1424                                 struct drm_i915_gem_memory_class_instance *regions);
1425 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1426 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1427 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1428 int anv_gem_execbuffer(struct anv_device *device,
1429                        struct drm_i915_gem_execbuffer2 *execbuf);
1430 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1431                        uint32_t stride, uint32_t tiling);
1432 int anv_gem_create_context(struct anv_device *device);
1433 int anv_gem_create_context_engines(struct anv_device *device,
1434                                    const struct drm_i915_query_engine_info *info,
1435                                    int num_engines,
1436                                    uint16_t *engine_classes);
1437 bool anv_gem_has_context_priority(int fd);
1438 int anv_gem_destroy_context(struct anv_device *device, int context);
1439 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1440                               uint64_t value);
1441 int anv_gem_get_context_param(int fd, int context, uint32_t param,
1442                               uint64_t *value);
1443 int anv_gem_get_param(int fd, uint32_t param);
1444 uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);
1445 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1446 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1447 int anv_gem_context_get_reset_stats(int fd, int context,
1448                                     uint32_t *active, uint32_t *pending);
1449 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1450 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1451 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1452 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1453 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1454                        uint32_t read_domains, uint32_t write_domain);
1455 int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1456 uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1457 void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1458 int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1459 uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1460 int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1461                                      uint32_t handle);
1462 int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1463                                      uint32_t handle, int fd);
1464 void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1465 bool anv_gem_supports_syncobj_wait(int fd);
1466 int anv_gem_syncobj_wait(struct anv_device *device,
1467                          const uint32_t *handles, uint32_t num_handles,
1468                          int64_t abs_timeout_ns, bool wait_all);
1469 int anv_gem_syncobj_timeline_wait(struct anv_device *device,
1470                                   const uint32_t *handles, const uint64_t *points,
1471                                   uint32_t num_items, int64_t abs_timeout_ns,
1472                                   bool wait_all, bool wait_materialize);
1473 int anv_gem_syncobj_timeline_signal(struct anv_device *device,
1474                                     const uint32_t *handles, const uint64_t *points,
1475                                     uint32_t num_items);
1476 int anv_gem_syncobj_timeline_query(struct anv_device *device,
1477                                    const uint32_t *handles, uint64_t *points,
1478                                    uint32_t num_items);
1479 int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1480                    int32_t *buffer_len);
1481 struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1482 int anv_gem_count_engines(const struct drm_i915_query_engine_info *info,
1483                           uint16_t engine_class);
1484 
1485 uint64_t anv_vma_alloc(struct anv_device *device,
1486                        uint64_t size, uint64_t align,
1487                        enum anv_bo_alloc_flags alloc_flags,
1488                        uint64_t client_address);
1489 void anv_vma_free(struct anv_device *device,
1490                   uint64_t address, uint64_t size);
1491 
1492 struct anv_reloc_list {
1493    uint32_t                                     num_relocs;
1494    uint32_t                                     array_length;
1495    struct drm_i915_gem_relocation_entry *       relocs;
1496    struct anv_bo **                             reloc_bos;
1497    uint32_t                                     dep_words;
1498    BITSET_WORD *                                deps;
1499 };
1500 
1501 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1502                              const VkAllocationCallbacks *alloc);
1503 void anv_reloc_list_finish(struct anv_reloc_list *list,
1504                            const VkAllocationCallbacks *alloc);
1505 
1506 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1507                             const VkAllocationCallbacks *alloc,
1508                             uint32_t offset, struct anv_bo *target_bo,
1509                             uint32_t delta, uint64_t *address_u64_out);
1510 
1511 VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1512                                const VkAllocationCallbacks *alloc,
1513                                struct anv_bo *target_bo);
1514 
1515 struct anv_batch_bo {
1516    /* Link in the anv_cmd_buffer.owned_batch_bos list */
1517    struct list_head                             link;
1518 
1519    struct anv_bo *                              bo;
1520 
1521    /* Bytes actually consumed in this batch BO */
1522    uint32_t                                     length;
1523 
1524    /* When this batch BO is used as part of a primary batch buffer, this
1525     * tracked whether it is chained to another primary batch buffer.
1526     *
1527     * If this is the case, the relocation list's last entry points the
1528     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1529     */
1530    bool                                         chained;
1531 
1532    struct anv_reloc_list                        relocs;
1533 };
1534 
1535 struct anv_batch {
1536    const VkAllocationCallbacks *                alloc;
1537 
1538    struct anv_address                           start_addr;
1539 
1540    void *                                       start;
1541    void *                                       end;
1542    void *                                       next;
1543 
1544    struct anv_reloc_list *                      relocs;
1545 
1546    /* This callback is called (with the associated user data) in the event
1547     * that the batch runs out of space.
1548     */
1549    VkResult (*extend_cb)(struct anv_batch *, void *);
1550    void *                                       user_data;
1551 
1552    /**
1553     * Current error status of the command buffer. Used to track inconsistent
1554     * or incomplete command buffer states that are the consequence of run-time
1555     * errors such as out of memory scenarios. We want to track this in the
1556     * batch because the command buffer object is not visible to some parts
1557     * of the driver.
1558     */
1559    VkResult                                     status;
1560 };
1561 
1562 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1563 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1564 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1565 
1566 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1567 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1568                       void *map, size_t size)
1569 {
1570    batch->start_addr = addr;
1571    batch->next = batch->start = map;
1572    batch->end = map + size;
1573 }
1574 
1575 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1576 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1577 {
1578    assert(error != VK_SUCCESS);
1579    if (batch->status == VK_SUCCESS)
1580       batch->status = error;
1581    return batch->status;
1582 }
1583 
1584 static inline bool
anv_batch_has_error(struct anv_batch * batch)1585 anv_batch_has_error(struct anv_batch *batch)
1586 {
1587    return batch->status != VK_SUCCESS;
1588 }
1589 
1590 static inline uint64_t
anv_batch_emit_reloc(struct anv_batch * batch,void * location,struct anv_bo * bo,uint32_t delta)1591 anv_batch_emit_reloc(struct anv_batch *batch,
1592                      void *location, struct anv_bo *bo, uint32_t delta)
1593 {
1594    uint64_t address_u64 = 0;
1595    VkResult result;
1596 
1597    if (ANV_ALWAYS_SOFTPIN) {
1598       address_u64 = bo->offset + delta;
1599       result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1600    } else {
1601       result = anv_reloc_list_add(batch->relocs, batch->alloc,
1602                                   location - batch->start, bo, delta,
1603                                   &address_u64);
1604    }
1605    if (unlikely(result != VK_SUCCESS)) {
1606       anv_batch_set_error(batch, result);
1607       return 0;
1608    }
1609 
1610    return address_u64;
1611 }
1612 
1613 
1614 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1615 
1616 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)1617 anv_address_from_u64(uint64_t addr_u64)
1618 {
1619    assert(addr_u64 == intel_canonical_address(addr_u64));
1620    return (struct anv_address) {
1621       .bo = NULL,
1622       .offset = addr_u64,
1623    };
1624 }
1625 
1626 static inline bool
anv_address_is_null(struct anv_address addr)1627 anv_address_is_null(struct anv_address addr)
1628 {
1629    return addr.bo == NULL && addr.offset == 0;
1630 }
1631 
1632 static inline uint64_t
anv_address_physical(struct anv_address addr)1633 anv_address_physical(struct anv_address addr)
1634 {
1635    if (addr.bo && (ANV_ALWAYS_SOFTPIN ||
1636                    (addr.bo->flags & EXEC_OBJECT_PINNED))) {
1637       assert(addr.bo->flags & EXEC_OBJECT_PINNED);
1638       return intel_canonical_address(addr.bo->offset + addr.offset);
1639    } else {
1640       return intel_canonical_address(addr.offset);
1641    }
1642 }
1643 
1644 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)1645 anv_address_add(struct anv_address addr, uint64_t offset)
1646 {
1647    addr.offset += offset;
1648    return addr;
1649 }
1650 
1651 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1652 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1653 {
1654    unsigned reloc_size = 0;
1655    if (device->info.ver >= 8) {
1656       reloc_size = sizeof(uint64_t);
1657       *(uint64_t *)p = intel_canonical_address(v);
1658    } else {
1659       reloc_size = sizeof(uint32_t);
1660       *(uint32_t *)p = v;
1661    }
1662 
1663    if (flush && !device->info.has_llc)
1664       intel_flush_range(p, reloc_size);
1665 }
1666 
1667 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1668 _anv_combine_address(struct anv_batch *batch, void *location,
1669                      const struct anv_address address, uint32_t delta)
1670 {
1671    if (address.bo == NULL) {
1672       return address.offset + delta;
1673    } else if (batch == NULL) {
1674       assert(address.bo->flags & EXEC_OBJECT_PINNED);
1675       return anv_address_physical(anv_address_add(address, delta));
1676    } else {
1677       assert(batch->start <= location && location < batch->end);
1678       /* i915 relocations are signed. */
1679       assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1680       return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1681    }
1682 }
1683 
1684 #define __gen_address_type struct anv_address
1685 #define __gen_user_data struct anv_batch
1686 #define __gen_combine_address _anv_combine_address
1687 
1688 /* Wrapper macros needed to work around preprocessor argument issues.  In
1689  * particular, arguments don't get pre-evaluated if they are concatenated.
1690  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1691  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1692  * We can work around this easily enough with these helpers.
1693  */
1694 #define __anv_cmd_length(cmd) cmd ## _length
1695 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1696 #define __anv_cmd_header(cmd) cmd ## _header
1697 #define __anv_cmd_pack(cmd) cmd ## _pack
1698 #define __anv_reg_num(reg) reg ## _num
1699 
1700 #define anv_pack_struct(dst, struc, ...) do {                              \
1701       struct struc __template = {                                          \
1702          __VA_ARGS__                                                       \
1703       };                                                                   \
1704       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1705       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1706    } while (0)
1707 
1708 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
1709       void *__dst = anv_batch_emit_dwords(batch, n);       \
1710       if (__dst) {                                         \
1711          struct cmd __template = {                         \
1712             __anv_cmd_header(cmd),                         \
1713            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1714             __VA_ARGS__                                    \
1715          };                                                \
1716          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1717       }                                                    \
1718       __dst;                                               \
1719    })
1720 
1721 #define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1722    do {                                                                 \
1723       uint32_t *dw;                                                     \
1724                                                                         \
1725       STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1726       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1727       if (!dw)                                                          \
1728          break;                                                         \
1729       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1730          dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1731       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1732    } while (0)
1733 
1734 #define anv_batch_emit(batch, cmd, name)                            \
1735    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1736         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1737         __builtin_expect(_dst != NULL, 1);                              \
1738         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1739            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1740            _dst = NULL;                                                 \
1741          }))
1742 
1743 #define anv_batch_write_reg(batch, reg, name)                           \
1744    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
1745         ({                                                              \
1746             uint32_t _dw[__anv_cmd_length(reg)];                        \
1747             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
1748             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
1749                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1750                   lri.RegisterOffset   = __anv_reg_num(reg);            \
1751                   lri.DataDWord        = _dw[i];                        \
1752                }                                                        \
1753             }                                                           \
1754            _cont = NULL;                                                \
1755          }))
1756 
1757 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1758 /* #define __gen_get_batch_address anv_batch_address */
1759 /* #define __gen_address_value anv_address_physical */
1760 /* #define __gen_address_offset anv_address_add */
1761 
1762 struct anv_device_memory {
1763    struct vk_object_base                        base;
1764 
1765    struct list_head                             link;
1766 
1767    struct anv_bo *                              bo;
1768    const struct anv_memory_type *               type;
1769    VkDeviceSize                                 map_size;
1770    void *                                       map;
1771 
1772    /* The map, from the user PoV is map + map_delta */
1773    uint32_t                                     map_delta;
1774 
1775    /* If set, we are holding reference to AHardwareBuffer
1776     * which we must release when memory is freed.
1777     */
1778    struct AHardwareBuffer *                     ahw;
1779 
1780    /* If set, this memory comes from a host pointer. */
1781    void *                                       host_ptr;
1782 };
1783 
1784 /**
1785  * Header for Vertex URB Entry (VUE)
1786  */
1787 struct anv_vue_header {
1788    uint32_t Reserved;
1789    uint32_t RTAIndex; /* RenderTargetArrayIndex */
1790    uint32_t ViewportIndex;
1791    float PointWidth;
1792 };
1793 
1794 /** Struct representing a sampled image descriptor
1795  *
1796  * This descriptor layout is used for sampled images, bare sampler, and
1797  * combined image/sampler descriptors.
1798  */
1799 struct anv_sampled_image_descriptor {
1800    /** Bindless image handle
1801     *
1802     * This is expected to already be shifted such that the 20-bit
1803     * SURFACE_STATE table index is in the top 20 bits.
1804     */
1805    uint32_t image;
1806 
1807    /** Bindless sampler handle
1808     *
1809     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1810     * to the dynamic state base address.
1811     */
1812    uint32_t sampler;
1813 };
1814 
1815 struct anv_texture_swizzle_descriptor {
1816    /** Texture swizzle
1817     *
1818     * See also nir_intrinsic_channel_select_intel
1819     */
1820    uint8_t swizzle[4];
1821 
1822    /** Unused padding to ensure the struct is a multiple of 64 bits */
1823    uint32_t _pad;
1824 };
1825 
1826 /** Struct representing a storage image descriptor */
1827 struct anv_storage_image_descriptor {
1828    /** Bindless image handles
1829     *
1830     * These are expected to already be shifted such that the 20-bit
1831     * SURFACE_STATE table index is in the top 20 bits.
1832     */
1833    uint32_t vanilla;
1834    uint32_t lowered;
1835 };
1836 
1837 /** Struct representing a address/range descriptor
1838  *
1839  * The fields of this struct correspond directly to the data layout of
1840  * nir_address_format_64bit_bounded_global addresses.  The last field is the
1841  * offset in the NIR address so it must be zero so that when you load the
1842  * descriptor you get a pointer to the start of the range.
1843  */
1844 struct anv_address_range_descriptor {
1845    uint64_t address;
1846    uint32_t range;
1847    uint32_t zero;
1848 };
1849 
1850 enum anv_descriptor_data {
1851    /** The descriptor contains a BTI reference to a surface state */
1852    ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1853    /** The descriptor contains a BTI reference to a sampler state */
1854    ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1855    /** The descriptor contains an actual buffer view */
1856    ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1857    /** The descriptor contains auxiliary image layout data */
1858    ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1859    /** The descriptor contains auxiliary image layout data */
1860    ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1861    /** anv_address_range_descriptor with a buffer address and range */
1862    ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1863    /** Bindless surface handle */
1864    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1865    /** Storage image handles */
1866    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1867    /** Storage image handles */
1868    ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1869 };
1870 
1871 struct anv_descriptor_set_binding_layout {
1872    /* The type of the descriptors in this binding */
1873    VkDescriptorType type;
1874 
1875    /* Flags provided when this binding was created */
1876    VkDescriptorBindingFlagsEXT flags;
1877 
1878    /* Bitfield representing the type of data this descriptor contains */
1879    enum anv_descriptor_data data;
1880 
1881    /* Maximum number of YCbCr texture/sampler planes */
1882    uint8_t max_plane_count;
1883 
1884    /* Number of array elements in this binding (or size in bytes for inline
1885     * uniform data)
1886     */
1887    uint32_t array_size;
1888 
1889    /* Index into the flattend descriptor set */
1890    uint32_t descriptor_index;
1891 
1892    /* Index into the dynamic state array for a dynamic buffer */
1893    int16_t dynamic_offset_index;
1894 
1895    /* Index into the descriptor set buffer views */
1896    int32_t buffer_view_index;
1897 
1898    /* Offset into the descriptor buffer where this descriptor lives */
1899    uint32_t descriptor_offset;
1900 
1901    /* Immutable samplers (or NULL if no immutable samplers) */
1902    struct anv_sampler **immutable_samplers;
1903 };
1904 
1905 unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
1906 
1907 unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
1908                                   VkDescriptorType type);
1909 
1910 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1911                                       const struct anv_descriptor_set_binding_layout *binding,
1912                                       bool sampler);
1913 
1914 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1915                                       const struct anv_descriptor_set_binding_layout *binding,
1916                                       bool sampler);
1917 
1918 struct anv_descriptor_set_layout {
1919    struct vk_object_base base;
1920 
1921    /* Descriptor set layouts can be destroyed at almost any time */
1922    uint32_t ref_cnt;
1923 
1924    /* Number of bindings in this descriptor set */
1925    uint32_t binding_count;
1926 
1927    /* Total number of descriptors */
1928    uint32_t descriptor_count;
1929 
1930    /* Shader stages affected by this descriptor set */
1931    uint16_t shader_stages;
1932 
1933    /* Number of buffer views in this descriptor set */
1934    uint32_t buffer_view_count;
1935 
1936    /* Number of dynamic offsets used by this descriptor set */
1937    uint16_t dynamic_offset_count;
1938 
1939    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1940     * this buffer
1941     */
1942    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1943 
1944    /* Size of the descriptor buffer for this descriptor set */
1945    uint32_t descriptor_buffer_size;
1946 
1947    /* Bindings in this descriptor set */
1948    struct anv_descriptor_set_binding_layout binding[0];
1949 };
1950 
1951 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1952                                        struct anv_descriptor_set_layout *layout);
1953 
1954 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1955 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1956 {
1957    assert(layout && layout->ref_cnt >= 1);
1958    p_atomic_inc(&layout->ref_cnt);
1959 }
1960 
1961 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1962 anv_descriptor_set_layout_unref(struct anv_device *device,
1963                                 struct anv_descriptor_set_layout *layout)
1964 {
1965    assert(layout && layout->ref_cnt >= 1);
1966    if (p_atomic_dec_zero(&layout->ref_cnt))
1967       anv_descriptor_set_layout_destroy(device, layout);
1968 }
1969 
1970 struct anv_descriptor {
1971    VkDescriptorType type;
1972 
1973    union {
1974       struct {
1975          VkImageLayout layout;
1976          struct anv_image_view *image_view;
1977          struct anv_sampler *sampler;
1978       };
1979 
1980       struct {
1981          struct anv_buffer *buffer;
1982          uint64_t offset;
1983          uint64_t range;
1984       };
1985 
1986       struct anv_buffer_view *buffer_view;
1987    };
1988 };
1989 
1990 struct anv_descriptor_set {
1991    struct vk_object_base base;
1992 
1993    struct anv_descriptor_pool *pool;
1994    struct anv_descriptor_set_layout *layout;
1995 
1996    /* Amount of space occupied in the the pool by this descriptor set. It can
1997     * be larger than the size of the descriptor set.
1998     */
1999    uint32_t size;
2000 
2001    /* State relative to anv_descriptor_pool::bo */
2002    struct anv_state desc_mem;
2003    /* Surface state for the descriptor buffer */
2004    struct anv_state desc_surface_state;
2005 
2006    /* Descriptor set address. */
2007    struct anv_address desc_addr;
2008 
2009    uint32_t buffer_view_count;
2010    struct anv_buffer_view *buffer_views;
2011 
2012    /* Link to descriptor pool's desc_sets list . */
2013    struct list_head pool_link;
2014 
2015    uint32_t descriptor_count;
2016    struct anv_descriptor descriptors[0];
2017 };
2018 
2019 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2020 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2021 {
2022    return set->pool == NULL;
2023 }
2024 
2025 struct anv_buffer_view {
2026    struct vk_object_base base;
2027 
2028    enum isl_format format; /**< VkBufferViewCreateInfo::format */
2029    uint64_t range; /**< VkBufferViewCreateInfo::range */
2030 
2031    struct anv_address address;
2032 
2033    struct anv_state surface_state;
2034    struct anv_state storage_surface_state;
2035    struct anv_state lowered_storage_surface_state;
2036 
2037    struct brw_image_param lowered_storage_image_param;
2038 };
2039 
2040 struct anv_push_descriptor_set {
2041    struct anv_descriptor_set set;
2042 
2043    /* Put this field right behind anv_descriptor_set so it fills up the
2044     * descriptors[0] field. */
2045    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2046 
2047    /** True if the descriptor set buffer has been referenced by a draw or
2048     * dispatch command.
2049     */
2050    bool set_used_on_gpu;
2051 
2052    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2053 };
2054 
2055 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2056 anv_descriptor_set_address(struct anv_descriptor_set *set)
2057 {
2058    if (anv_descriptor_set_is_push(set)) {
2059       /* We have to flag push descriptor set as used on the GPU
2060        * so that the next time we push descriptors, we grab a new memory.
2061        */
2062       struct anv_push_descriptor_set *push_set =
2063          (struct anv_push_descriptor_set *)set;
2064       push_set->set_used_on_gpu = true;
2065    }
2066 
2067    return set->desc_addr;
2068 }
2069 
2070 struct anv_descriptor_pool {
2071    struct vk_object_base base;
2072 
2073    uint32_t size;
2074    uint32_t next;
2075    uint32_t free_list;
2076 
2077    struct anv_bo *bo;
2078    struct util_vma_heap bo_heap;
2079 
2080    struct anv_state_stream surface_state_stream;
2081    void *surface_state_free_list;
2082 
2083    struct list_head desc_sets;
2084 
2085    char data[0];
2086 };
2087 
2088 enum anv_descriptor_template_entry_type {
2089    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
2090    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
2091    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
2092 };
2093 
2094 struct anv_descriptor_template_entry {
2095    /* The type of descriptor in this entry */
2096    VkDescriptorType type;
2097 
2098    /* Binding in the descriptor set */
2099    uint32_t binding;
2100 
2101    /* Offset at which to write into the descriptor set binding */
2102    uint32_t array_element;
2103 
2104    /* Number of elements to write into the descriptor set binding */
2105    uint32_t array_count;
2106 
2107    /* Offset into the user provided data */
2108    size_t offset;
2109 
2110    /* Stride between elements into the user provided data */
2111    size_t stride;
2112 };
2113 
2114 struct anv_descriptor_update_template {
2115     struct vk_object_base base;
2116 
2117     VkPipelineBindPoint bind_point;
2118 
2119    /* The descriptor set this template corresponds to. This value is only
2120     * valid if the template was created with the templateType
2121     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2122     */
2123    uint8_t set;
2124 
2125    /* Number of entries in this template */
2126    uint32_t entry_count;
2127 
2128    /* Entries of the template */
2129    struct anv_descriptor_template_entry entries[0];
2130 };
2131 
2132 size_t
2133 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2134                                uint32_t var_desc_count);
2135 
2136 uint32_t
2137 anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2138                                                  uint32_t var_desc_count);
2139 
2140 void
2141 anv_descriptor_set_write_image_view(struct anv_device *device,
2142                                     struct anv_descriptor_set *set,
2143                                     const VkDescriptorImageInfo * const info,
2144                                     VkDescriptorType type,
2145                                     uint32_t binding,
2146                                     uint32_t element);
2147 
2148 void
2149 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2150                                      struct anv_descriptor_set *set,
2151                                      VkDescriptorType type,
2152                                      struct anv_buffer_view *buffer_view,
2153                                      uint32_t binding,
2154                                      uint32_t element);
2155 
2156 void
2157 anv_descriptor_set_write_buffer(struct anv_device *device,
2158                                 struct anv_descriptor_set *set,
2159                                 struct anv_state_stream *alloc_stream,
2160                                 VkDescriptorType type,
2161                                 struct anv_buffer *buffer,
2162                                 uint32_t binding,
2163                                 uint32_t element,
2164                                 VkDeviceSize offset,
2165                                 VkDeviceSize range);
2166 
2167 void
2168 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2169                                                 struct anv_descriptor_set *set,
2170                                                 struct anv_acceleration_structure *accel,
2171                                                 uint32_t binding,
2172                                                 uint32_t element);
2173 
2174 void
2175 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2176                                              struct anv_descriptor_set *set,
2177                                              uint32_t binding,
2178                                              const void *data,
2179                                              size_t offset,
2180                                              size_t size);
2181 
2182 void
2183 anv_descriptor_set_write_template(struct anv_device *device,
2184                                   struct anv_descriptor_set *set,
2185                                   struct anv_state_stream *alloc_stream,
2186                                   const struct anv_descriptor_update_template *template,
2187                                   const void *data);
2188 
2189 VkResult
2190 anv_descriptor_set_create(struct anv_device *device,
2191                           struct anv_descriptor_pool *pool,
2192                           struct anv_descriptor_set_layout *layout,
2193                           uint32_t var_desc_count,
2194                           struct anv_descriptor_set **out_set);
2195 
2196 void
2197 anv_descriptor_set_destroy(struct anv_device *device,
2198                            struct anv_descriptor_pool *pool,
2199                            struct anv_descriptor_set *set);
2200 
2201 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
2202 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
2203 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
2204 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
2205 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2206 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2207 
2208 struct anv_pipeline_binding {
2209    /** Index in the descriptor set
2210     *
2211     * This is a flattened index; the descriptor set layout is already taken
2212     * into account.
2213     */
2214    uint32_t index;
2215 
2216    /** The descriptor set this surface corresponds to.
2217     *
2218     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2219     * binding is not a normal descriptor set but something else.
2220     */
2221    uint8_t set;
2222 
2223    union {
2224       /** Plane in the binding index for images */
2225       uint8_t plane;
2226 
2227       /** Input attachment index (relative to the subpass) */
2228       uint8_t input_attachment_index;
2229 
2230       /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2231       uint8_t dynamic_offset_index;
2232    };
2233 
2234    /** For a storage image, whether it requires a lowered surface */
2235    uint8_t lowered_storage_surface;
2236 
2237    /** Pad to 64 bits so that there are no holes and we can safely memcmp
2238     * assuming POD zero-initialization.
2239     */
2240    uint8_t pad;
2241 };
2242 
2243 struct anv_push_range {
2244    /** Index in the descriptor set */
2245    uint32_t index;
2246 
2247    /** Descriptor set index */
2248    uint8_t set;
2249 
2250    /** Dynamic offset index (for dynamic UBOs) */
2251    uint8_t dynamic_offset_index;
2252 
2253    /** Start offset in units of 32B */
2254    uint8_t start;
2255 
2256    /** Range in units of 32B */
2257    uint8_t length;
2258 };
2259 
2260 struct anv_pipeline_layout {
2261    struct vk_object_base base;
2262 
2263    struct {
2264       struct anv_descriptor_set_layout *layout;
2265       uint32_t dynamic_offset_start;
2266    } set[MAX_SETS];
2267 
2268    uint32_t num_sets;
2269 
2270    unsigned char sha1[20];
2271 };
2272 
2273 struct anv_buffer {
2274    struct vk_object_base                        base;
2275 
2276    struct anv_device *                          device;
2277    VkDeviceSize                                 size;
2278 
2279    VkBufferCreateFlags                          create_flags;
2280    VkBufferUsageFlags                           usage;
2281 
2282    /* Set when bound */
2283    struct anv_address                           address;
2284 };
2285 
2286 static inline uint64_t
anv_buffer_get_range(struct anv_buffer * buffer,uint64_t offset,uint64_t range)2287 anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
2288 {
2289    assert(offset <= buffer->size);
2290    if (range == VK_WHOLE_SIZE) {
2291       return buffer->size - offset;
2292    } else {
2293       assert(range + offset >= range);
2294       assert(range + offset <= buffer->size);
2295       return range;
2296    }
2297 }
2298 
2299 enum anv_cmd_dirty_bits {
2300    ANV_CMD_DIRTY_DYNAMIC_VIEWPORT                    = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
2301    ANV_CMD_DIRTY_DYNAMIC_SCISSOR                     = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
2302    ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH                  = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
2303    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS                  = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
2304    ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS             = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
2305    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS                = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
2306    ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK        = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
2307    ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK          = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
2308    ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE           = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
2309    ANV_CMD_DIRTY_PIPELINE                            = 1 << 9,
2310    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 10,
2311    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 11,
2312    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 12,
2313    ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE                = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
2314    ANV_CMD_DIRTY_DYNAMIC_CULL_MODE                   = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
2315    ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE                  = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
2316    ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY          = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
2317    ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
2318    ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE           = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
2319    ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE          = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
2320    ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP            = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
2321    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE    = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
2322    ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE         = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
2323    ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP                  = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
2324    ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS            = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
2325    ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE           = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
2326    ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE                = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */
2327    ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE   = 1 << 27, /* VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT */
2328    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE           = 1 << 28, /* VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT */
2329    ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP                    = 1 << 29, /* VK_DYNAMIC_STATE_LOGIC_OP_EXT */
2330    ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE    = 1 << 30, /* VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT */
2331 };
2332 typedef uint32_t anv_cmd_dirty_mask_t;
2333 
2334 #define ANV_CMD_DIRTY_DYNAMIC_ALL                       \
2335    (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |                    \
2336     ANV_CMD_DIRTY_DYNAMIC_SCISSOR |                     \
2337     ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |                  \
2338     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |                  \
2339     ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |             \
2340     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS |                \
2341     ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |        \
2342     ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |          \
2343     ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |           \
2344     ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE |                \
2345     ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |                   \
2346     ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |                  \
2347     ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |          \
2348     ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
2349     ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |           \
2350     ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |          \
2351     ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |            \
2352     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |    \
2353     ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |         \
2354     ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP |                  \
2355     ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |            \
2356     ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |           \
2357     ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |                \
2358     ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |   \
2359     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE |           \
2360     ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |                    \
2361     ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)
2362 
2363 static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)2364 anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
2365 {
2366    switch (vk_state) {
2367    case VK_DYNAMIC_STATE_VIEWPORT:
2368    case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
2369       return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
2370    case VK_DYNAMIC_STATE_SCISSOR:
2371    case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
2372       return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
2373    case VK_DYNAMIC_STATE_LINE_WIDTH:
2374       return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2375    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2376       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
2377    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2378       return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
2379    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2380       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
2381    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2382       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2383    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2384       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2385    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2386       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2387    case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
2388       return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
2389    case VK_DYNAMIC_STATE_CULL_MODE_EXT:
2390       return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
2391    case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
2392       return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
2393    case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
2394       return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
2395    case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
2396       return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
2397    case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
2398       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
2399    case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
2400       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
2401    case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
2402       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
2403    case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
2404       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
2405    case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
2406       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
2407    case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
2408       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
2409    case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
2410       return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
2411    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2412       return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
2413    case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
2414       return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
2415    case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT:
2416       return ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
2417    case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT:
2418       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE;
2419    case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
2420       return ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
2421    case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT:
2422       return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
2423    default:
2424       assert(!"Unsupported dynamic state");
2425       return 0;
2426    }
2427 }
2428 
2429 
2430 enum anv_pipe_bits {
2431    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
2432    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
2433    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
2434    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
2435    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
2436    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
2437    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
2438    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
2439    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2440    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
2441    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
2442 
2443    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2444     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
2445     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2446     */
2447    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
2448    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
2449    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
2450 
2451    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
2452     * a flush has happened but not a CS stall.  The next time we do any sort
2453     * of invalidation we need to insert a CS stall at that time.  Otherwise,
2454     * we would have to CS stall on every flush which could be bad.
2455     */
2456    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
2457 
2458    /* This bit does not exist directly in PIPE_CONTROL. It means that render
2459     * target operations related to transfer commands with VkBuffer as
2460     * destination are ongoing. Some operations like copies on the command
2461     * streamer might need to be aware of this to trigger the appropriate stall
2462     * before they can proceed with the copy.
2463     */
2464    ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2465 
2466    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2467     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2468     * done by writing the AUX-TT register.
2469     */
2470    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2471 
2472    /* This bit does not exist directly in PIPE_CONTROL. It means that a
2473     * PIPE_CONTROL with a post-sync operation will follow. This is used to
2474     * implement a workaround for Gfx9.
2475     */
2476    ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2477 };
2478 
2479 #define ANV_PIPE_FLUSH_BITS ( \
2480    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2481    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2482    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2483    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2484    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2485 
2486 #define ANV_PIPE_STALL_BITS ( \
2487    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2488    ANV_PIPE_DEPTH_STALL_BIT | \
2489    ANV_PIPE_CS_STALL_BIT)
2490 
2491 #define ANV_PIPE_INVALIDATE_BITS ( \
2492    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2493    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2494    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2495    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2496    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2497    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2498    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2499 
2500 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags2KHR flags)2501 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2502                                      VkAccessFlags2KHR flags)
2503 {
2504    enum anv_pipe_bits pipe_bits = 0;
2505 
2506    u_foreach_bit64(b, flags) {
2507       switch ((VkAccessFlags2KHR)BITFIELD64_BIT(b)) {
2508       case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
2509       case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
2510          /* We're transitioning a buffer that was previously used as write
2511           * destination through the data port. To make its content available
2512           * to future operations, flush the hdc pipeline.
2513           */
2514          pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2515          break;
2516       case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
2517          /* We're transitioning a buffer that was previously used as render
2518           * target. To make its content available to future operations, flush
2519           * the render target cache.
2520           */
2521          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2522          break;
2523       case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
2524          /* We're transitioning a buffer that was previously used as depth
2525           * buffer. To make its content available to future operations, flush
2526           * the depth cache.
2527           */
2528          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2529          break;
2530       case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
2531          /* We're transitioning a buffer that was previously used as a
2532           * transfer write destination. Generic write operations include color
2533           * & depth operations as well as buffer operations like :
2534           *     - vkCmdClearColorImage()
2535           *     - vkCmdClearDepthStencilImage()
2536           *     - vkCmdBlitImage()
2537           *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2538           *
2539           * Most of these operations are implemented using Blorp which writes
2540           * through the render target, so flush that cache to make it visible
2541           * to future operations. And for depth related operations we also
2542           * need to flush the depth cache.
2543           */
2544          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2545          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2546          break;
2547       case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
2548          /* We're transitioning a buffer for generic write operations. Flush
2549           * all the caches.
2550           */
2551          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2552          break;
2553       case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
2554          /* We're transitioning a buffer for access by CPU. Invalidate
2555           * all the caches. Since data and tile caches don't have invalidate,
2556           * we are forced to flush those as well.
2557           */
2558          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2559          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2560          break;
2561       case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2562       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2563          /* We're transitioning a buffer written either from VS stage or from
2564           * the command streamer (see CmdEndTransformFeedbackEXT), we just
2565           * need to stall the CS.
2566           */
2567          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2568          break;
2569       default:
2570          break; /* Nothing to do */
2571       }
2572    }
2573 
2574    return pipe_bits;
2575 }
2576 
2577 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags2KHR flags)2578 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2579                                           VkAccessFlags2KHR flags)
2580 {
2581    enum anv_pipe_bits pipe_bits = 0;
2582 
2583    u_foreach_bit64(b, flags) {
2584       switch ((VkAccessFlags2KHR)BITFIELD64_BIT(b)) {
2585       case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
2586          /* Indirect draw commands take a buffer as input that we're going to
2587           * read from the command streamer to load some of the HW registers
2588           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2589           * command streamer stall so that all the cache flushes have
2590           * completed before the command streamer loads from memory.
2591           */
2592          pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2593          /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2594           * through a vertex buffer, so invalidate that cache.
2595           */
2596          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2597          /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2598           * UBO from the buffer, so we need to invalidate constant cache.
2599           */
2600          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2601          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2602          /* Tile cache flush needed For CmdDipatchIndirect since command
2603           * streamer and vertex fetch aren't L3 coherent.
2604           */
2605          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2606          break;
2607       case VK_ACCESS_2_INDEX_READ_BIT_KHR:
2608       case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
2609          /* We transitioning a buffer to be used for as input for vkCmdDraw*
2610           * commands, so we invalidate the VF cache to make sure there is no
2611           * stale data when we start rendering.
2612           */
2613          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2614          break;
2615       case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
2616          /* We transitioning a buffer to be used as uniform data. Because
2617           * uniform is accessed through the data port & sampler, we need to
2618           * invalidate the texture cache (sampler) & constant cache (data
2619           * port) to avoid stale data.
2620           */
2621          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2622          if (device->physical->compiler->indirect_ubos_use_sampler)
2623             pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2624          else
2625             pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2626          break;
2627       case VK_ACCESS_2_SHADER_READ_BIT_KHR:
2628       case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
2629       case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
2630          /* Transitioning a buffer to be read through the sampler, so
2631           * invalidate the texture cache, we don't want any stale data.
2632           */
2633          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2634          break;
2635       case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
2636          /* Transitioning a buffer for generic read, invalidate all the
2637           * caches.
2638           */
2639          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2640          break;
2641       case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
2642          /* Generic write, make sure all previously written things land in
2643           * memory.
2644           */
2645          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2646          break;
2647       case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2648       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2649          /* Transitioning a buffer for conditional rendering or transform
2650           * feedback. We'll load the content of this buffer into HW registers
2651           * using the command streamer, so we need to stall the command
2652           * streamer , so we need to stall the command streamer to make sure
2653           * any in-flight flush operations have completed.
2654           */
2655          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2656          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2657          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2658          break;
2659       case VK_ACCESS_2_HOST_READ_BIT_KHR:
2660          /* We're transitioning a buffer that was written by CPU.  Flush
2661           * all the caches.
2662           */
2663          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2664          break;
2665       default:
2666          break; /* Nothing to do */
2667       }
2668    }
2669 
2670    return pipe_bits;
2671 }
2672 
2673 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2674    VK_IMAGE_ASPECT_COLOR_BIT | \
2675    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2676    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2677    VK_IMAGE_ASPECT_PLANE_2_BIT)
2678 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2679    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2680    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2681    VK_IMAGE_ASPECT_PLANE_2_BIT)
2682 
2683 struct anv_vertex_binding {
2684    struct anv_buffer *                          buffer;
2685    VkDeviceSize                                 offset;
2686    VkDeviceSize                                 stride;
2687    VkDeviceSize                                 size;
2688 };
2689 
2690 struct anv_xfb_binding {
2691    struct anv_buffer *                          buffer;
2692    VkDeviceSize                                 offset;
2693    VkDeviceSize                                 size;
2694 };
2695 
2696 struct anv_push_constants {
2697    /** Push constant data provided by the client through vkPushConstants */
2698    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2699 
2700    /** Dynamic offsets for dynamic UBOs and SSBOs */
2701    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2702 
2703    /* Robust access pushed registers. */
2704    uint64_t push_reg_mask[MESA_SHADER_STAGES];
2705 
2706    /** Pad out to a multiple of 32 bytes */
2707    uint32_t pad[2];
2708 
2709    /* Base addresses for descriptor sets */
2710    uint64_t desc_sets[MAX_SETS];
2711 
2712    struct {
2713       /** Base workgroup ID
2714        *
2715        * Used for vkCmdDispatchBase.
2716        */
2717       uint32_t base_work_group_id[3];
2718 
2719       /** Subgroup ID
2720        *
2721        * This is never set by software but is implicitly filled out when
2722        * uploading the push constants for compute shaders.
2723        */
2724       uint32_t subgroup_id;
2725    } cs;
2726 };
2727 
2728 struct anv_dynamic_state {
2729    struct {
2730       uint32_t                                  count;
2731       VkViewport                                viewports[MAX_VIEWPORTS];
2732    } viewport;
2733 
2734    struct {
2735       uint32_t                                  count;
2736       VkRect2D                                  scissors[MAX_SCISSORS];
2737    } scissor;
2738 
2739    float                                        line_width;
2740 
2741    struct {
2742       float                                     bias;
2743       float                                     clamp;
2744       float                                     slope;
2745    } depth_bias;
2746 
2747    float                                        blend_constants[4];
2748 
2749    struct {
2750       float                                     min;
2751       float                                     max;
2752    } depth_bounds;
2753 
2754    struct {
2755       uint32_t                                  front;
2756       uint32_t                                  back;
2757    } stencil_compare_mask;
2758 
2759    struct {
2760       uint32_t                                  front;
2761       uint32_t                                  back;
2762    } stencil_write_mask;
2763 
2764    struct {
2765       uint32_t                                  front;
2766       uint32_t                                  back;
2767    } stencil_reference;
2768 
2769    struct {
2770       struct {
2771          VkStencilOp fail_op;
2772          VkStencilOp pass_op;
2773          VkStencilOp depth_fail_op;
2774          VkCompareOp compare_op;
2775       } front;
2776       struct {
2777          VkStencilOp fail_op;
2778          VkStencilOp pass_op;
2779          VkStencilOp depth_fail_op;
2780          VkCompareOp compare_op;
2781       } back;
2782    } stencil_op;
2783 
2784    struct {
2785       uint32_t                                  factor;
2786       uint16_t                                  pattern;
2787    } line_stipple;
2788 
2789    struct {
2790       uint32_t                                  samples;
2791       VkSampleLocationEXT                       locations[MAX_SAMPLE_LOCATIONS];
2792    } sample_locations;
2793 
2794    VkExtent2D                                   fragment_shading_rate;
2795 
2796    VkCullModeFlags                              cull_mode;
2797    VkFrontFace                                  front_face;
2798    VkPrimitiveTopology                          primitive_topology;
2799    bool                                         depth_test_enable;
2800    bool                                         depth_write_enable;
2801    VkCompareOp                                  depth_compare_op;
2802    bool                                         depth_bounds_test_enable;
2803    bool                                         stencil_test_enable;
2804    bool                                         raster_discard;
2805    bool                                         depth_bias_enable;
2806    bool                                         primitive_restart_enable;
2807    VkLogicOp                                    logic_op;
2808    bool                                         dyn_vbo_stride;
2809    bool                                         dyn_vbo_size;
2810 
2811    /* Bitfield, one bit per render target */
2812    uint8_t                                      color_writes;
2813 };
2814 
2815 extern const struct anv_dynamic_state default_dynamic_state;
2816 
2817 uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2818                                 const struct anv_dynamic_state *src,
2819                                 uint32_t copy_mask);
2820 
2821 struct anv_surface_state {
2822    struct anv_state state;
2823    /** Address of the surface referred to by this state
2824     *
2825     * This address is relative to the start of the BO.
2826     */
2827    struct anv_address address;
2828    /* Address of the aux surface, if any
2829     *
2830     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2831     *
2832     * With the exception of gfx8, the bottom 12 bits of this address' offset
2833     * include extra aux information.
2834     */
2835    struct anv_address aux_address;
2836    /* Address of the clear color, if any
2837     *
2838     * This address is relative to the start of the BO.
2839     */
2840    struct anv_address clear_address;
2841 };
2842 
2843 /**
2844  * Attachment state when recording a renderpass instance.
2845  *
2846  * The clear value is valid only if there exists a pending clear.
2847  */
2848 struct anv_attachment_state {
2849    enum isl_aux_usage                           aux_usage;
2850    struct anv_surface_state                     color;
2851    struct anv_surface_state                     input;
2852 
2853    VkImageLayout                                current_layout;
2854    VkImageLayout                                current_stencil_layout;
2855    VkImageAspectFlags                           pending_clear_aspects;
2856    VkImageAspectFlags                           pending_load_aspects;
2857    bool                                         fast_clear;
2858    VkClearValue                                 clear_value;
2859 
2860    /* When multiview is active, attachments with a renderpass clear
2861     * operation have their respective layers cleared on the first
2862     * subpass that uses them, and only in that subpass. We keep track
2863     * of this using a bitfield to indicate which layers of an attachment
2864     * have not been cleared yet when multiview is active.
2865     */
2866    uint32_t                                     pending_clear_views;
2867    struct anv_image_view *                      image_view;
2868 };
2869 
2870 /** State tracking for vertex buffer flushes
2871  *
2872  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2873  * addresses.  If you happen to have two vertex buffers which get placed
2874  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2875  * collisions.  In order to solve this problem, we track vertex address ranges
2876  * which are live in the cache and invalidate the cache if one ever exceeds 32
2877  * bits.
2878  */
2879 struct anv_vb_cache_range {
2880    /* Virtual address at which the live vertex buffer cache range starts for
2881     * this vertex buffer index.
2882     */
2883    uint64_t start;
2884 
2885    /* Virtual address of the byte after where vertex buffer cache range ends.
2886     * This is exclusive such that end - start is the size of the range.
2887     */
2888    uint64_t end;
2889 };
2890 
2891 /** State tracking for particular pipeline bind point
2892  *
2893  * This struct is the base struct for anv_cmd_graphics_state and
2894  * anv_cmd_compute_state.  These are used to track state which is bound to a
2895  * particular type of pipeline.  Generic state that applies per-stage such as
2896  * binding table offsets and push constants is tracked generically with a
2897  * per-stage array in anv_cmd_state.
2898  */
2899 struct anv_cmd_pipeline_state {
2900    struct anv_descriptor_set *descriptors[MAX_SETS];
2901    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2902 
2903    struct anv_push_constants push_constants;
2904 
2905    /* Push constant state allocated when flushing push constants. */
2906    struct anv_state          push_constants_state;
2907 };
2908 
2909 /** State tracking for graphics pipeline
2910  *
2911  * This has anv_cmd_pipeline_state as a base struct to track things which get
2912  * bound to a graphics pipeline.  Along with general pipeline bind point state
2913  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2914  * state which is graphics-specific.
2915  */
2916 struct anv_cmd_graphics_state {
2917    struct anv_cmd_pipeline_state base;
2918 
2919    struct anv_graphics_pipeline *pipeline;
2920 
2921    anv_cmd_dirty_mask_t dirty;
2922    uint32_t vb_dirty;
2923 
2924    struct anv_vb_cache_range ib_bound_range;
2925    struct anv_vb_cache_range ib_dirty_range;
2926    struct anv_vb_cache_range vb_bound_ranges[33];
2927    struct anv_vb_cache_range vb_dirty_ranges[33];
2928 
2929    VkShaderStageFlags push_constant_stages;
2930 
2931    struct anv_dynamic_state dynamic;
2932 
2933    uint32_t primitive_topology;
2934 
2935    struct {
2936       struct anv_buffer *index_buffer;
2937       uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2938       uint32_t index_offset;
2939    } gfx7;
2940 };
2941 
2942 enum anv_depth_reg_mode {
2943    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2944    ANV_DEPTH_REG_MODE_HW_DEFAULT,
2945    ANV_DEPTH_REG_MODE_D16,
2946 };
2947 
2948 /** State tracking for compute pipeline
2949  *
2950  * This has anv_cmd_pipeline_state as a base struct to track things which get
2951  * bound to a compute pipeline.  Along with general pipeline bind point state
2952  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2953  * state which is compute-specific.
2954  */
2955 struct anv_cmd_compute_state {
2956    struct anv_cmd_pipeline_state base;
2957 
2958    struct anv_compute_pipeline *pipeline;
2959 
2960    bool pipeline_dirty;
2961 
2962    struct anv_state push_data;
2963 
2964    struct anv_address num_workgroups;
2965 };
2966 
2967 struct anv_cmd_ray_tracing_state {
2968    struct anv_cmd_pipeline_state base;
2969 
2970    struct anv_ray_tracing_pipeline *pipeline;
2971 
2972    bool pipeline_dirty;
2973 
2974    struct {
2975       struct anv_bo *bo;
2976       struct brw_rt_scratch_layout layout;
2977    } scratch;
2978 };
2979 
2980 /** State required while building cmd buffer */
2981 struct anv_cmd_state {
2982    /* PIPELINE_SELECT.PipelineSelection */
2983    uint32_t                                     current_pipeline;
2984    const struct intel_l3_config *               current_l3_config;
2985    uint32_t                                     last_aux_map_state;
2986 
2987    struct anv_cmd_graphics_state                gfx;
2988    struct anv_cmd_compute_state                 compute;
2989    struct anv_cmd_ray_tracing_state             rt;
2990 
2991    enum anv_pipe_bits                           pending_pipe_bits;
2992    VkShaderStageFlags                           descriptors_dirty;
2993    VkShaderStageFlags                           push_constants_dirty;
2994 
2995    struct anv_framebuffer *                     framebuffer;
2996    struct anv_render_pass *                     pass;
2997    struct anv_subpass *                         subpass;
2998    VkRect2D                                     render_area;
2999    uint32_t                                     restart_index;
3000    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
3001    bool                                         xfb_enabled;
3002    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
3003    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
3004    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
3005 
3006    unsigned char                                sampler_sha1s[MESA_SHADER_STAGES][20];
3007    unsigned char                                surface_sha1s[MESA_SHADER_STAGES][20];
3008    unsigned char                                push_sha1s[MESA_SHADER_STAGES][20];
3009 
3010    /**
3011     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
3012     * of any command buffer it is disabled by disabling it in EndCommandBuffer
3013     * and before invoking the secondary in ExecuteCommands.
3014     */
3015    bool                                         pma_fix_enabled;
3016 
3017    /**
3018     * Whether or not we know for certain that HiZ is enabled for the current
3019     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
3020     * enabled or not, this will be false.
3021     */
3022    bool                                         hiz_enabled;
3023 
3024    /* We ensure the registers for the gfx12 D16 fix are initalized at the
3025     * first non-NULL depth stencil packet emission of every command buffer.
3026     * For secondary command buffer execution, we transfer the state from the
3027     * last command buffer to the primary (if known).
3028     */
3029    enum anv_depth_reg_mode                      depth_reg_mode;
3030 
3031    bool                                         conditional_render_enabled;
3032 
3033    /**
3034     * Last rendering scale argument provided to
3035     * genX(cmd_buffer_emit_hashing_mode)().
3036     */
3037    unsigned                                     current_hash_scale;
3038 
3039    /**
3040     * Array length is anv_cmd_state::pass::attachment_count. Array content is
3041     * valid only when recording a render pass instance.
3042     */
3043    struct anv_attachment_state *                attachments;
3044 
3045    /**
3046     * Surface states for color render targets.  These are stored in a single
3047     * flat array.  For depth-stencil attachments, the surface state is simply
3048     * left blank.
3049     */
3050    struct anv_state                             attachment_states;
3051 
3052    /**
3053     * A null surface state of the right size to match the framebuffer.  This
3054     * is one of the states in attachment_states.
3055     */
3056    struct anv_state                             null_surface_state;
3057 };
3058 
3059 struct anv_cmd_pool {
3060    struct vk_object_base                        base;
3061    VkAllocationCallbacks                        alloc;
3062    struct list_head                             cmd_buffers;
3063 
3064    VkCommandPoolCreateFlags                     flags;
3065    struct anv_queue_family *                    queue_family;
3066 };
3067 
3068 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
3069 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
3070 
3071 enum anv_cmd_buffer_exec_mode {
3072    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3073    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3074    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3075    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3076    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3077    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3078 };
3079 
3080 struct anv_measure_batch;
3081 
3082 struct anv_cmd_buffer {
3083    struct vk_command_buffer                     vk;
3084 
3085    struct anv_device *                          device;
3086 
3087    struct anv_cmd_pool *                        pool;
3088    struct list_head                             pool_link;
3089 
3090    struct anv_batch                             batch;
3091 
3092    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
3093     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
3094     * rewrite the end to chain multiple batch together at vkQueueSubmit().
3095     */
3096    void *                                       batch_end;
3097 
3098    /* Fields required for the actual chain of anv_batch_bo's.
3099     *
3100     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3101     */
3102    struct list_head                             batch_bos;
3103    enum anv_cmd_buffer_exec_mode                exec_mode;
3104 
3105    /* A vector of anv_batch_bo pointers for every batch or surface buffer
3106     * referenced by this command buffer
3107     *
3108     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3109     */
3110    struct u_vector                            seen_bbos;
3111 
3112    /* A vector of int32_t's for every block of binding tables.
3113     *
3114     * initialized by anv_cmd_buffer_init_batch_bo_chain()
3115     */
3116    struct u_vector                              bt_block_states;
3117    struct anv_state                             bt_next;
3118 
3119    struct anv_reloc_list                        surface_relocs;
3120    /** Last seen surface state block pool center bo offset */
3121    uint32_t                                     last_ss_pool_center;
3122 
3123    /* Serial for tracking buffer completion */
3124    uint32_t                                     serial;
3125 
3126    /* Stream objects for storing temporary data */
3127    struct anv_state_stream                      surface_state_stream;
3128    struct anv_state_stream                      dynamic_state_stream;
3129    struct anv_state_stream                      general_state_stream;
3130 
3131    VkCommandBufferUsageFlags                    usage_flags;
3132    VkCommandBufferLevel                         level;
3133 
3134    struct anv_query_pool                       *perf_query_pool;
3135 
3136    struct anv_cmd_state                         state;
3137 
3138    struct anv_address                           return_addr;
3139 
3140    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3141    uint64_t                                     intel_perf_marker;
3142 
3143    struct anv_measure_batch *measure;
3144 
3145    /**
3146     * KHR_performance_query requires self modifying command buffers and this
3147     * array has the location of modifying commands to the query begin and end
3148     * instructions storing performance counters. The array length is
3149     * anv_physical_device::n_perf_query_commands.
3150     */
3151    struct mi_address_token                  *self_mod_locations;
3152 
3153    /**
3154     * Index tracking which of the self_mod_locations items have already been
3155     * used.
3156     */
3157    uint32_t                                      perf_reloc_idx;
3158 
3159    /**
3160     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
3161     * Used to increase allocation size for long command buffers.
3162     */
3163    uint32_t                                     total_batch_size;
3164 };
3165 
3166 /* Determine whether we can chain a given cmd_buffer to another one. We need
3167  * softpin and we also need to make sure that we can edit the end of the batch
3168  * to point to next one, which requires the command buffer to not be used
3169  * simultaneously.
3170  */
3171 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)3172 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
3173 {
3174    return anv_use_softpin(cmd_buffer->device->physical) &&
3175       !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
3176 }
3177 
3178 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3179 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3180 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3181 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3182 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3183                                   struct anv_cmd_buffer *secondary);
3184 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3185 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3186                                 struct anv_cmd_buffer *cmd_buffer,
3187                                 const VkSemaphore *in_semaphores,
3188                                 const uint64_t *in_wait_values,
3189                                 uint32_t num_in_semaphores,
3190                                 const VkSemaphore *out_semaphores,
3191                                 const uint64_t *out_signal_values,
3192                                 uint32_t num_out_semaphores,
3193                                 VkFence fence,
3194                                 int perf_query_pass);
3195 
3196 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
3197 
3198 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3199                                              const void *data, uint32_t size, uint32_t alignment);
3200 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3201                                               uint32_t *a, uint32_t *b,
3202                                               uint32_t dwords, uint32_t alignment);
3203 
3204 struct anv_address
3205 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3206 struct anv_state
3207 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3208                                    uint32_t entries, uint32_t *state_offset);
3209 struct anv_state
3210 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
3211 struct anv_state
3212 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3213                                    uint32_t size, uint32_t alignment);
3214 
3215 VkResult
3216 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3217 
3218 void gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
3219 void gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
3220                                          bool depth_clamp_enable);
3221 void gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
3222 
3223 void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
3224                                       struct anv_render_pass *pass,
3225                                       struct anv_framebuffer *framebuffer,
3226                                       const VkClearValue *clear_values);
3227 
3228 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3229 
3230 struct anv_state
3231 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3232 struct anv_state
3233 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3234 
3235 const struct anv_image_view *
3236 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
3237 
3238 VkResult
3239 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3240                                          uint32_t num_entries,
3241                                          uint32_t *state_offset,
3242                                          struct anv_state *bt_state);
3243 
3244 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
3245 
3246 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3247 
3248 enum anv_fence_type {
3249    ANV_FENCE_TYPE_NONE = 0,
3250    ANV_FENCE_TYPE_BO,
3251    ANV_FENCE_TYPE_WSI_BO,
3252    ANV_FENCE_TYPE_SYNCOBJ,
3253    ANV_FENCE_TYPE_WSI,
3254 };
3255 
3256 enum anv_bo_fence_state {
3257    /** Indicates that this is a new (or newly reset fence) */
3258    ANV_BO_FENCE_STATE_RESET,
3259 
3260    /** Indicates that this fence has been submitted to the GPU but is still
3261     * (as far as we know) in use by the GPU.
3262     */
3263    ANV_BO_FENCE_STATE_SUBMITTED,
3264 
3265    ANV_BO_FENCE_STATE_SIGNALED,
3266 };
3267 
3268 struct anv_fence_impl {
3269    enum anv_fence_type type;
3270 
3271    union {
3272       /** Fence implementation for BO fences
3273        *
3274        * These fences use a BO and a set of CPU-tracked state flags.  The BO
3275        * is added to the object list of the last execbuf call in a QueueSubmit
3276        * and is marked EXEC_WRITE.  The state flags track when the BO has been
3277        * submitted to the kernel.  We need to do this because Vulkan lets you
3278        * wait on a fence that has not yet been submitted and I915_GEM_BUSY
3279        * will say it's idle in this case.
3280        */
3281       struct {
3282          struct anv_bo *bo;
3283          enum anv_bo_fence_state state;
3284       } bo;
3285 
3286       /** DRM syncobj handle for syncobj-based fences */
3287       uint32_t syncobj;
3288 
3289       /** WSI fence */
3290       struct wsi_fence *fence_wsi;
3291    };
3292 };
3293 
3294 struct anv_fence {
3295    struct vk_object_base base;
3296 
3297    /* Permanent fence state.  Every fence has some form of permanent state
3298     * (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on (for
3299     * cross-process fences) or it could just be a dummy for use internally.
3300     */
3301    struct anv_fence_impl permanent;
3302 
3303    /* Temporary fence state.  A fence *may* have temporary state.  That state
3304     * is added to the fence by an import operation and is reset back to
3305     * ANV_SEMAPHORE_TYPE_NONE when the fence is reset.  A fence with temporary
3306     * state cannot be signaled because the fence must already be signaled
3307     * before the temporary state can be exported from the fence in the other
3308     * process and imported here.
3309     */
3310    struct anv_fence_impl temporary;
3311 };
3312 
3313 void anv_fence_reset_temporary(struct anv_device *device,
3314                                struct anv_fence *fence);
3315 
3316 struct anv_event {
3317    struct vk_object_base                        base;
3318    uint64_t                                     semaphore;
3319    struct anv_state                             state;
3320 };
3321 
3322 enum anv_semaphore_type {
3323    ANV_SEMAPHORE_TYPE_NONE = 0,
3324    ANV_SEMAPHORE_TYPE_DUMMY,
3325    ANV_SEMAPHORE_TYPE_WSI_BO,
3326    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
3327    ANV_SEMAPHORE_TYPE_TIMELINE,
3328    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
3329 };
3330 
3331 struct anv_timeline_point {
3332    struct list_head link;
3333 
3334    uint64_t serial;
3335 
3336    /* Number of waiter on this point, when > 0 the point should not be garbage
3337     * collected.
3338     */
3339    int waiting;
3340 
3341    /* BO used for synchronization. */
3342    struct anv_bo *bo;
3343 };
3344 
3345 struct anv_timeline {
3346    pthread_mutex_t mutex;
3347    pthread_cond_t  cond;
3348 
3349    uint64_t highest_past;
3350    uint64_t highest_pending;
3351 
3352    struct list_head points;
3353    struct list_head free_points;
3354 };
3355 
3356 struct anv_semaphore_impl {
3357    enum anv_semaphore_type type;
3358 
3359    union {
3360       /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
3361        * or type == ANV_SEMAPHORE_TYPE_WSI_BO.  This BO will be added to the
3362        * object list on any execbuf2 calls for which this semaphore is used as
3363        * a wait or signal fence.  When used as a signal fence or when type ==
3364        * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
3365        */
3366       struct anv_bo *bo;
3367 
3368       /* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
3369        * Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
3370        * import so we don't need to bother with a userspace cache.
3371        */
3372       uint32_t syncobj;
3373 
3374       /* Non shareable timeline semaphore
3375        *
3376        * Used when kernel don't have support for timeline semaphores.
3377        */
3378       struct anv_timeline timeline;
3379    };
3380 };
3381 
3382 struct anv_semaphore {
3383    struct vk_object_base base;
3384 
3385    /* Permanent semaphore state.  Every semaphore has some form of permanent
3386     * state (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on
3387     * (for cross-process semaphores0 or it could just be a dummy for use
3388     * internally.
3389     */
3390    struct anv_semaphore_impl permanent;
3391 
3392    /* Temporary semaphore state.  A semaphore *may* have temporary state.
3393     * That state is added to the semaphore by an import operation and is reset
3394     * back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on.  A
3395     * semaphore with temporary state cannot be signaled because the semaphore
3396     * must already be signaled before the temporary state can be exported from
3397     * the semaphore in the other process and imported here.
3398     */
3399    struct anv_semaphore_impl temporary;
3400 };
3401 
3402 void anv_semaphore_reset_temporary(struct anv_device *device,
3403                                    struct anv_semaphore *semaphore);
3404 
3405 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
3406 
3407 #define anv_foreach_stage(stage, stage_bits)                         \
3408    for (gl_shader_stage stage,                                       \
3409         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
3410         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
3411         __tmp &= ~(1 << (stage)))
3412 
3413 struct anv_pipeline_bind_map {
3414    unsigned char                                surface_sha1[20];
3415    unsigned char                                sampler_sha1[20];
3416    unsigned char                                push_sha1[20];
3417 
3418    uint32_t surface_count;
3419    uint32_t sampler_count;
3420 
3421    struct anv_pipeline_binding *                surface_to_descriptor;
3422    struct anv_pipeline_binding *                sampler_to_descriptor;
3423 
3424    struct anv_push_range                        push_ranges[4];
3425 };
3426 
3427 struct anv_shader_bin_key {
3428    uint32_t size;
3429    uint8_t data[0];
3430 };
3431 
3432 struct anv_shader_bin {
3433    uint32_t ref_cnt;
3434 
3435    gl_shader_stage stage;
3436 
3437    const struct anv_shader_bin_key *key;
3438 
3439    struct anv_state kernel;
3440    uint32_t kernel_size;
3441 
3442    const struct brw_stage_prog_data *prog_data;
3443    uint32_t prog_data_size;
3444 
3445    struct brw_compile_stats stats[3];
3446    uint32_t num_stats;
3447 
3448    struct nir_xfb_info *xfb_info;
3449 
3450    struct anv_pipeline_bind_map bind_map;
3451 };
3452 
3453 struct anv_shader_bin *
3454 anv_shader_bin_create(struct anv_device *device,
3455                       gl_shader_stage stage,
3456                       const void *key, uint32_t key_size,
3457                       const void *kernel, uint32_t kernel_size,
3458                       const struct brw_stage_prog_data *prog_data,
3459                       uint32_t prog_data_size,
3460                       const struct brw_compile_stats *stats, uint32_t num_stats,
3461                       const struct nir_xfb_info *xfb_info,
3462                       const struct anv_pipeline_bind_map *bind_map);
3463 
3464 void
3465 anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
3466 
3467 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3468 anv_shader_bin_ref(struct anv_shader_bin *shader)
3469 {
3470    assert(shader && shader->ref_cnt >= 1);
3471    p_atomic_inc(&shader->ref_cnt);
3472 }
3473 
3474 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3475 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3476 {
3477    assert(shader && shader->ref_cnt >= 1);
3478    if (p_atomic_dec_zero(&shader->ref_cnt))
3479       anv_shader_bin_destroy(device, shader);
3480 }
3481 
3482 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({             \
3483    assert((local_arg_offset) % 8 == 0);                              \
3484    const struct brw_bs_prog_data *prog_data =                        \
3485       brw_bs_prog_data_const(bin->prog_data);                        \
3486    assert(prog_data->simd_size == 8 || prog_data->simd_size == 16);  \
3487                                                                      \
3488    (struct GFX_BINDLESS_SHADER_RECORD) {                             \
3489       .OffsetToLocalArguments = (local_arg_offset) / 8,              \
3490       .BindlessShaderDispatchMode = prog_data->simd_size / 16,       \
3491       .KernelStartPointer = bin->kernel.offset,                      \
3492    };                                                                \
3493 })
3494 
3495 struct anv_pipeline_executable {
3496    gl_shader_stage stage;
3497 
3498    struct brw_compile_stats stats;
3499 
3500    char *nir;
3501    char *disasm;
3502 };
3503 
3504 enum anv_pipeline_type {
3505    ANV_PIPELINE_GRAPHICS,
3506    ANV_PIPELINE_COMPUTE,
3507    ANV_PIPELINE_RAY_TRACING,
3508 };
3509 
3510 struct anv_pipeline {
3511    struct vk_object_base                        base;
3512 
3513    struct anv_device *                          device;
3514 
3515    struct anv_batch                             batch;
3516    struct anv_reloc_list                        batch_relocs;
3517 
3518    void *                                       mem_ctx;
3519 
3520    enum anv_pipeline_type                       type;
3521    VkPipelineCreateFlags                        flags;
3522 
3523    struct util_dynarray                         executables;
3524 
3525    const struct intel_l3_config *               l3_config;
3526 };
3527 
3528 struct anv_graphics_pipeline {
3529    struct anv_pipeline                          base;
3530 
3531    uint32_t                                     batch_data[512];
3532 
3533    /* States that are part of batch_data and should be not emitted
3534     * dynamically.
3535     */
3536    anv_cmd_dirty_mask_t                         static_state_mask;
3537 
3538    /* States that need to be reemitted in cmd_buffer_flush_dynamic_state().
3539     * This might cover more than the dynamic states specified at pipeline
3540     * creation.
3541     */
3542    anv_cmd_dirty_mask_t                         dynamic_state_mask;
3543 
3544    struct anv_dynamic_state                     dynamic_state;
3545 
3546    /* States declared dynamic at pipeline creation. */
3547    anv_cmd_dirty_mask_t                         dynamic_states;
3548 
3549    uint32_t                                     topology;
3550 
3551    /* These fields are required with dynamic primitive topology,
3552     * rasterization_samples used only with gen < 8.
3553     */
3554    VkLineRasterizationModeEXT                   line_mode;
3555    VkPolygonMode                                polygon_mode;
3556    uint32_t                                     rasterization_samples;
3557 
3558    struct anv_subpass *                         subpass;
3559 
3560    struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
3561 
3562    VkShaderStageFlags                           active_stages;
3563 
3564    bool                                         writes_depth;
3565    bool                                         depth_test_enable;
3566    bool                                         writes_stencil;
3567    bool                                         stencil_test_enable;
3568    bool                                         depth_clamp_enable;
3569    bool                                         depth_clip_enable;
3570    bool                                         sample_shading_enable;
3571    bool                                         kill_pixel;
3572    bool                                         depth_bounds_test_enable;
3573    bool                                         force_fragment_thread_dispatch;
3574 
3575    /* When primitive replication is used, subpass->view_mask will describe what
3576     * views to replicate.
3577     */
3578    bool                                         use_primitive_replication;
3579 
3580    struct anv_state                             blend_state;
3581 
3582    struct anv_state                             cps_state;
3583 
3584    uint32_t                                     vb_used;
3585    struct anv_pipeline_vertex_binding {
3586       uint32_t                                  stride;
3587       bool                                      instanced;
3588       uint32_t                                  instance_divisor;
3589    } vb[MAX_VBS];
3590 
3591    struct {
3592       uint32_t                                  sf[7];
3593       uint32_t                                  depth_stencil_state[3];
3594       uint32_t                                  clip[4];
3595       uint32_t                                  xfb_bo_pitch[4];
3596       uint32_t                                  wm[3];
3597       uint32_t                                  blend_state[MAX_RTS * 2];
3598       uint32_t                                  streamout_state[3];
3599    } gfx7;
3600 
3601    struct {
3602       uint32_t                                  sf[4];
3603       uint32_t                                  raster[5];
3604       uint32_t                                  wm_depth_stencil[3];
3605       uint32_t                                  wm[2];
3606       uint32_t                                  ps_blend[2];
3607       uint32_t                                  blend_state[1 + MAX_RTS * 2];
3608       uint32_t                                  streamout_state[5];
3609    } gfx8;
3610 
3611    struct {
3612       uint32_t                                  wm_depth_stencil[4];
3613    } gfx9;
3614 };
3615 
3616 struct anv_compute_pipeline {
3617    struct anv_pipeline                          base;
3618 
3619    struct anv_shader_bin *                      cs;
3620    uint32_t                                     batch_data[9];
3621    uint32_t                                     interface_descriptor_data[8];
3622 };
3623 
3624 struct anv_rt_shader_group {
3625    VkRayTracingShaderGroupTypeKHR type;
3626 
3627    struct anv_shader_bin *general;
3628    struct anv_shader_bin *closest_hit;
3629    struct anv_shader_bin *any_hit;
3630    struct anv_shader_bin *intersection;
3631 
3632    /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3633    uint32_t handle[8];
3634 };
3635 
3636 struct anv_ray_tracing_pipeline {
3637    struct anv_pipeline                          base;
3638 
3639    /* All shaders in the pipeline */
3640    struct util_dynarray                         shaders;
3641 
3642    uint32_t                                     group_count;
3643    struct anv_rt_shader_group *                 groups;
3644 
3645    /* If non-zero, this is the default computed stack size as per the stack
3646     * size computation in the Vulkan spec.  If zero, that indicates that the
3647     * client has requested a dynamic stack size.
3648     */
3649    uint32_t                                     stack_size;
3650 };
3651 
3652 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
3653    static inline struct anv_##pipe_type##_pipeline *                 \
3654    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
3655    {                                                                 \
3656       assert(pipeline->type == pipe_enum);                           \
3657       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
3658    }
3659 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3660 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3661 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3662 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3663 
3664 static inline bool
3665 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3666                        gl_shader_stage stage)
3667 {
3668    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3669 }
3670 
3671 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)3672 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
3673 {
3674    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
3675 }
3676 
3677 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
3678 static inline const struct brw_##prefix##_prog_data *                   \
3679 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
3680 {                                                                       \
3681    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
3682       return (const struct brw_##prefix##_prog_data *)                  \
3683              pipeline->shaders[stage]->prog_data;                       \
3684    } else {                                                             \
3685       return NULL;                                                      \
3686    }                                                                    \
3687 }
3688 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3689 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3690 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3691 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3692 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3693 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3694 
3695 static inline const struct brw_cs_prog_data *
3696 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3697 {
3698    assert(pipeline->cs);
3699    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3700 }
3701 
3702 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3703 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3704 {
3705    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3706       return &get_gs_prog_data(pipeline)->base;
3707    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3708       return &get_tes_prog_data(pipeline)->base;
3709    else
3710       return &get_vs_prog_data(pipeline)->base;
3711 }
3712 
3713 VkResult
3714 anv_device_init_rt_shaders(struct anv_device *device);
3715 
3716 void
3717 anv_device_finish_rt_shaders(struct anv_device *device);
3718 
3719 VkResult
3720 anv_pipeline_init(struct anv_pipeline *pipeline,
3721                   struct anv_device *device,
3722                   enum anv_pipeline_type type,
3723                   VkPipelineCreateFlags flags,
3724                   const VkAllocationCallbacks *pAllocator);
3725 
3726 void
3727 anv_pipeline_finish(struct anv_pipeline *pipeline,
3728                     struct anv_device *device,
3729                     const VkAllocationCallbacks *pAllocator);
3730 
3731 VkResult
3732 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
3733                            struct anv_pipeline_cache *cache,
3734                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
3735                            const VkAllocationCallbacks *alloc);
3736 
3737 VkResult
3738 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
3739                         struct anv_pipeline_cache *cache,
3740                         const VkComputePipelineCreateInfo *info,
3741                         const struct vk_shader_module *module,
3742                         const char *entrypoint,
3743                         const VkSpecializationInfo *spec_info);
3744 
3745 VkResult
3746 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3747                               struct anv_device *device,
3748                               struct anv_pipeline_cache *cache,
3749                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3750                               const VkAllocationCallbacks *alloc);
3751 
3752 struct anv_format_plane {
3753    enum isl_format isl_format:16;
3754    struct isl_swizzle swizzle;
3755 
3756    /* Whether this plane contains chroma channels */
3757    bool has_chroma;
3758 
3759    /* For downscaling of YUV planes */
3760    uint8_t denominator_scales[2];
3761 
3762    /* How to map sampled ycbcr planes to a single 4 component element. */
3763    struct isl_swizzle ycbcr_swizzle;
3764 
3765    /* What aspect is associated to this plane */
3766    VkImageAspectFlags aspect;
3767 };
3768 
3769 
3770 struct anv_format {
3771    struct anv_format_plane planes[3];
3772    VkFormat vk_format;
3773    uint8_t n_planes;
3774    bool can_ycbcr;
3775 };
3776 
3777 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)3778 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3779 {
3780    if (util_bitcount(aspects) == 1) {
3781       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3782                         VK_IMAGE_ASPECT_DEPTH_BIT |
3783                         VK_IMAGE_ASPECT_STENCIL_BIT |
3784                         VK_IMAGE_ASPECT_PLANE_0_BIT |
3785                         VK_IMAGE_ASPECT_PLANE_1_BIT |
3786                         VK_IMAGE_ASPECT_PLANE_2_BIT));
3787    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3788       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3789              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3790                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3791              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3792                          VK_IMAGE_ASPECT_PLANE_1_BIT |
3793                          VK_IMAGE_ASPECT_PLANE_2_BIT));
3794    } else {
3795       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3796                          VK_IMAGE_ASPECT_STENCIL_BIT));
3797    }
3798 }
3799 
3800 /**
3801  * Return the aspect's plane relative to all_aspects.  For an image, for
3802  * instance, all_aspects would be the set of aspects in the image.  For
3803  * an image view, all_aspects would be the subset of aspects represented
3804  * by that particular view.
3805  */
3806 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)3807 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3808                     VkImageAspectFlagBits aspect)
3809 {
3810    anv_assert_valid_aspect_set(all_aspects);
3811    assert(util_bitcount(aspect) == 1);
3812    assert(!(aspect & ~all_aspects));
3813 
3814    /* Because we always put image and view planes in aspect-bit-order, the
3815     * plane index is the number of bits in all_aspects before aspect.
3816     */
3817    return util_bitcount(all_aspects & (aspect - 1));
3818 }
3819 
3820 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3821    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3822 
3823 const struct anv_format *
3824 anv_get_format(VkFormat format);
3825 
3826 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3827 anv_get_format_planes(VkFormat vk_format)
3828 {
3829    const struct anv_format *format = anv_get_format(vk_format);
3830 
3831    return format != NULL ? format->n_planes : 0;
3832 }
3833 
3834 struct anv_format_plane
3835 anv_get_format_plane(const struct intel_device_info *devinfo,
3836                      VkFormat vk_format, uint32_t plane,
3837                      VkImageTiling tiling);
3838 
3839 struct anv_format_plane
3840 anv_get_format_aspect(const struct intel_device_info *devinfo,
3841                       VkFormat vk_format,
3842                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
3843 
3844 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3845 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3846                    VkImageAspectFlags aspect, VkImageTiling tiling)
3847 {
3848    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3849 }
3850 
3851 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3852                                   VkImageCreateFlags create_flags,
3853                                   VkFormat vk_format,
3854                                   VkImageTiling vk_tiling,
3855                                   const VkImageFormatListCreateInfoKHR *fmt_list);
3856 
3857 extern VkFormat
3858 vk_format_from_android(unsigned android_format, unsigned android_usage);
3859 
3860 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3861 anv_swizzle_for_render(struct isl_swizzle swizzle)
3862 {
3863    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3864     * RGB as RGBA for texturing
3865     */
3866    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3867           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3868 
3869    /* But it doesn't matter what we render to that channel */
3870    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3871 
3872    return swizzle;
3873 }
3874 
3875 void
3876 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3877 
3878 /**
3879  * Describes how each part of anv_image will be bound to memory.
3880  */
3881 struct anv_image_memory_range {
3882    /**
3883     * Disjoint bindings into which each portion of the image will be bound.
3884     *
3885     * Binding images to memory can be complicated and invold binding different
3886     * portions of the image to different memory objects or regions.  For most
3887     * images, everything lives in the MAIN binding and gets bound by
3888     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
3889     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3890     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
3891     * implicit or driver-managed and live in special-case bindings.
3892     */
3893    enum anv_image_memory_binding {
3894       /**
3895        * Used if and only if image is not multi-planar disjoint. Bound by
3896        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3897        */
3898       ANV_IMAGE_MEMORY_BINDING_MAIN,
3899 
3900       /**
3901        * Used if and only if image is multi-planar disjoint.  Bound by
3902        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3903        */
3904       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3905       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3906       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3907 
3908       /**
3909        * Driver-private bo. In special cases we may store the aux surface and/or
3910        * aux state in this binding.
3911        */
3912       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3913 
3914       /** Sentinel */
3915       ANV_IMAGE_MEMORY_BINDING_END,
3916    } binding;
3917 
3918    /**
3919     * Offset is relative to the start of the binding created by
3920     * vkBindImageMemory, not to the start of the bo.
3921     */
3922    uint64_t offset;
3923 
3924    uint64_t size;
3925    uint32_t alignment;
3926 };
3927 
3928 /**
3929  * Subsurface of an anv_image.
3930  */
3931 struct anv_surface {
3932    struct isl_surf isl;
3933    struct anv_image_memory_range memory_range;
3934 };
3935 
3936 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)3937 anv_surface_is_valid(const struct anv_surface *surface)
3938 {
3939    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3940 }
3941 
3942 struct anv_image {
3943    struct vk_image vk;
3944 
3945    uint32_t n_planes;
3946 
3947    /**
3948     * Image has multi-planar format and was created with
3949     * VK_IMAGE_CREATE_DISJOINT_BIT.
3950     */
3951    bool disjoint;
3952 
3953    /**
3954     * Image was imported from an struct AHardwareBuffer.  We have to delay
3955     * final image creation until bind time.
3956     */
3957    bool from_ahb;
3958 
3959    /**
3960     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3961     * must be released when the image is destroyed.
3962     */
3963    bool from_gralloc;
3964 
3965    /**
3966     * The memory bindings created by vkCreateImage and vkBindImageMemory.
3967     *
3968     * For details on the image's memory layout, see check_memory_bindings().
3969     *
3970     * vkCreateImage constructs the `memory_range` for each
3971     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
3972     * and only if `memory_range::size > 0`.
3973     *
3974     * vkBindImageMemory binds each valid `memory_range` to an `address`.
3975     * Usually, the app will provide the address via the parameters of
3976     * vkBindImageMemory.  However, special-case bindings may be bound to
3977     * driver-private memory.
3978     */
3979    struct anv_image_binding {
3980       struct anv_image_memory_range memory_range;
3981       struct anv_address address;
3982    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3983 
3984    /**
3985     * Image subsurfaces
3986     *
3987     * For each foo, anv_image::planes[x].surface is valid if and only if
3988     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3989     * to figure the number associated with a given aspect.
3990     *
3991     * The hardware requires that the depth buffer and stencil buffer be
3992     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3993     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3994     * allocate the depth and stencil buffers as separate surfaces in the same
3995     * bo.
3996     */
3997    struct anv_image_plane {
3998       struct anv_surface primary_surface;
3999 
4000       /**
4001        * A surface which shadows the main surface and may have different
4002        * tiling. This is used for sampling using a tiling that isn't supported
4003        * for other operations.
4004        */
4005       struct anv_surface shadow_surface;
4006 
4007       /**
4008        * The base aux usage for this image.  For color images, this can be
4009        * either CCS_E or CCS_D depending on whether or not we can reliably
4010        * leave CCS on all the time.
4011        */
4012       enum isl_aux_usage aux_usage;
4013 
4014       struct anv_surface aux_surface;
4015 
4016       /** Location of the fast clear state.  */
4017       struct anv_image_memory_range fast_clear_memory_range;
4018    } planes[3];
4019 };
4020 
4021 /* The ordering of this enum is important */
4022 enum anv_fast_clear_type {
4023    /** Image does not have/support any fast-clear blocks */
4024    ANV_FAST_CLEAR_NONE = 0,
4025    /** Image has/supports fast-clear but only to the default value */
4026    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
4027    /** Image has/supports fast-clear with an arbitrary fast-clear value */
4028    ANV_FAST_CLEAR_ANY = 2,
4029 };
4030 
4031 /**
4032  * Return the aspect's _format_ plane, not its _memory_ plane (using the
4033  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
4034  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
4035  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
4036  */
4037 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)4038 anv_image_aspect_to_plane(const struct anv_image *image,
4039                           VkImageAspectFlagBits aspect)
4040 {
4041    return anv_aspect_to_plane(image->vk.aspects, aspect);
4042 }
4043 
4044 /* Returns the number of auxiliary buffer levels attached to an image. */
4045 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)4046 anv_image_aux_levels(const struct anv_image * const image,
4047                      VkImageAspectFlagBits aspect)
4048 {
4049    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4050    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
4051       return 0;
4052 
4053    return image->vk.mip_levels;
4054 }
4055 
4056 /* Returns the number of auxiliary buffer layers attached to an image. */
4057 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)4058 anv_image_aux_layers(const struct anv_image * const image,
4059                      VkImageAspectFlagBits aspect,
4060                      const uint8_t miplevel)
4061 {
4062    assert(image);
4063 
4064    /* The miplevel must exist in the main buffer. */
4065    assert(miplevel < image->vk.mip_levels);
4066 
4067    if (miplevel >= anv_image_aux_levels(image, aspect)) {
4068       /* There are no layers with auxiliary data because the miplevel has no
4069        * auxiliary data.
4070        */
4071       return 0;
4072    }
4073 
4074    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
4075 }
4076 
4077 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)4078 anv_image_address(const struct anv_image *image,
4079                   const struct anv_image_memory_range *mem_range)
4080 {
4081    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
4082    assert(binding->memory_range.offset == 0);
4083 
4084    if (mem_range->size == 0)
4085       return ANV_NULL_ADDRESS;
4086 
4087    return anv_address_add(binding->address, mem_range->offset);
4088 }
4089 
4090 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)4091 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
4092                                const struct anv_image *image,
4093                                VkImageAspectFlagBits aspect)
4094 {
4095    assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
4096                                VK_IMAGE_ASPECT_DEPTH_BIT));
4097 
4098    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4099    const struct anv_image_memory_range *mem_range =
4100       &image->planes[plane].fast_clear_memory_range;
4101 
4102    return anv_image_address(image, mem_range);
4103 }
4104 
4105 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)4106 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
4107                                    const struct anv_image *image,
4108                                    VkImageAspectFlagBits aspect)
4109 {
4110    struct anv_address addr =
4111       anv_image_get_clear_color_addr(device, image, aspect);
4112 
4113    const unsigned clear_color_state_size = device->info.ver >= 10 ?
4114       device->isl_dev.ss.clear_color_state_size :
4115       device->isl_dev.ss.clear_value_size;
4116    return anv_address_add(addr, clear_color_state_size);
4117 }
4118 
4119 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)4120 anv_image_get_compression_state_addr(const struct anv_device *device,
4121                                      const struct anv_image *image,
4122                                      VkImageAspectFlagBits aspect,
4123                                      uint32_t level, uint32_t array_layer)
4124 {
4125    assert(level < anv_image_aux_levels(image, aspect));
4126    assert(array_layer < anv_image_aux_layers(image, aspect, level));
4127    UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4128    assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
4129 
4130    /* Relative to start of the plane's fast clear memory range */
4131    uint32_t offset;
4132 
4133    offset = 4; /* Go past the fast clear type */
4134 
4135    if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
4136       for (uint32_t l = 0; l < level; l++)
4137          offset += anv_minify(image->vk.extent.depth, l) * 4;
4138    } else {
4139       offset += level * image->vk.array_layers * 4;
4140    }
4141 
4142    offset += array_layer * 4;
4143 
4144    assert(offset < image->planes[plane].fast_clear_memory_range.size);
4145 
4146    return anv_address_add(
4147       anv_image_get_fast_clear_type_addr(device, image, aspect),
4148       offset);
4149 }
4150 
4151 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
4152 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)4153 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
4154                         const struct anv_image *image)
4155 {
4156    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
4157       return false;
4158 
4159    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
4160     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
4161     * say:
4162     *
4163     *    "If this field is set to AUX_HIZ, Number of Multisamples must
4164     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
4165     */
4166    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
4167       return false;
4168 
4169    /* Allow this feature on BDW even though it is disabled in the BDW devinfo
4170     * struct. There's documentation which suggests that this feature actually
4171     * reduces performance on BDW, but it has only been observed to help so
4172     * far. Sampling fast-cleared blocks on BDW must also be handled with care
4173     * (see depth_stencil_attachment_compute_aux_usage() for more info).
4174     */
4175    if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
4176       return false;
4177 
4178    return image->vk.samples == 1;
4179 }
4180 
4181 /* Returns true if an MCS-enabled buffer can be sampled from. */
4182 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)4183 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
4184                               const struct anv_image *image)
4185 {
4186    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
4187    const uint32_t plane =
4188       anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
4189 
4190    assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
4191 
4192    const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
4193 
4194    /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
4195     * See HSD 1707282275, wa_14013111325. Due to the use of
4196     * format-reinterpretation, a simplified workaround is implemented.
4197     */
4198    if (devinfo->ver >= 12 &&
4199        isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
4200       return false;
4201    }
4202 
4203    return true;
4204 }
4205 
4206 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)4207 anv_image_plane_uses_aux_map(const struct anv_device *device,
4208                              const struct anv_image *image,
4209                              uint32_t plane)
4210 {
4211    return device->info.has_aux_map &&
4212       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
4213 }
4214 
4215 void
4216 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
4217                                   const struct anv_image *image,
4218                                   VkImageAspectFlagBits aspect,
4219                                   enum isl_aux_usage aux_usage,
4220                                   uint32_t level,
4221                                   uint32_t base_layer,
4222                                   uint32_t layer_count);
4223 
4224 void
4225 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
4226                       const struct anv_image *image,
4227                       VkImageAspectFlagBits aspect,
4228                       enum isl_aux_usage aux_usage,
4229                       enum isl_format format, struct isl_swizzle swizzle,
4230                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
4231                       VkRect2D area, union isl_color_value clear_color);
4232 void
4233 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
4234                               const struct anv_image *image,
4235                               VkImageAspectFlags aspects,
4236                               enum isl_aux_usage depth_aux_usage,
4237                               uint32_t level,
4238                               uint32_t base_layer, uint32_t layer_count,
4239                               VkRect2D area,
4240                               float depth_value, uint8_t stencil_value);
4241 void
4242 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
4243                        const struct anv_image *src_image,
4244                        enum isl_aux_usage src_aux_usage,
4245                        uint32_t src_level, uint32_t src_base_layer,
4246                        const struct anv_image *dst_image,
4247                        enum isl_aux_usage dst_aux_usage,
4248                        uint32_t dst_level, uint32_t dst_base_layer,
4249                        VkImageAspectFlagBits aspect,
4250                        uint32_t src_x, uint32_t src_y,
4251                        uint32_t dst_x, uint32_t dst_y,
4252                        uint32_t width, uint32_t height,
4253                        uint32_t layer_count,
4254                        enum blorp_filter filter);
4255 void
4256 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
4257                  const struct anv_image *image,
4258                  VkImageAspectFlagBits aspect, uint32_t level,
4259                  uint32_t base_layer, uint32_t layer_count,
4260                  enum isl_aux_op hiz_op);
4261 void
4262 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
4263                     const struct anv_image *image,
4264                     VkImageAspectFlags aspects,
4265                     uint32_t level,
4266                     uint32_t base_layer, uint32_t layer_count,
4267                     VkRect2D area, uint8_t stencil_value);
4268 void
4269 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
4270                  const struct anv_image *image,
4271                  enum isl_format format, struct isl_swizzle swizzle,
4272                  VkImageAspectFlagBits aspect,
4273                  uint32_t base_layer, uint32_t layer_count,
4274                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
4275                  bool predicate);
4276 void
4277 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
4278                  const struct anv_image *image,
4279                  enum isl_format format, struct isl_swizzle swizzle,
4280                  VkImageAspectFlagBits aspect, uint32_t level,
4281                  uint32_t base_layer, uint32_t layer_count,
4282                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
4283                  bool predicate);
4284 
4285 void
4286 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
4287                          const struct anv_image *image,
4288                          VkImageAspectFlagBits aspect,
4289                          uint32_t base_level, uint32_t level_count,
4290                          uint32_t base_layer, uint32_t layer_count);
4291 
4292 enum isl_aux_state ATTRIBUTE_PURE
4293 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
4294                         const struct anv_image *image,
4295                         const VkImageAspectFlagBits aspect,
4296                         const VkImageLayout layout);
4297 
4298 enum isl_aux_usage ATTRIBUTE_PURE
4299 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
4300                         const struct anv_image *image,
4301                         const VkImageAspectFlagBits aspect,
4302                         const VkImageUsageFlagBits usage,
4303                         const VkImageLayout layout);
4304 
4305 enum anv_fast_clear_type ATTRIBUTE_PURE
4306 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
4307                               const struct anv_image * const image,
4308                               const VkImageAspectFlagBits aspect,
4309                               const VkImageLayout layout);
4310 
4311 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)4312 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
4313                              VkImageAspectFlags aspects2)
4314 {
4315    if (aspects1 == aspects2)
4316       return true;
4317 
4318    /* Only 1 color aspects are compatibles. */
4319    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4320        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4321        util_bitcount(aspects1) == util_bitcount(aspects2))
4322       return true;
4323 
4324    return false;
4325 }
4326 
4327 struct anv_image_view {
4328    struct vk_image_view vk;
4329 
4330    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
4331 
4332    unsigned n_planes;
4333    struct {
4334       uint32_t image_plane;
4335 
4336       struct isl_view isl;
4337 
4338       /**
4339        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4340        * image layout of SHADER_READ_ONLY_OPTIMAL or
4341        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
4342        */
4343       struct anv_surface_state optimal_sampler_surface_state;
4344 
4345       /**
4346        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4347        * image layout of GENERAL.
4348        */
4349       struct anv_surface_state general_sampler_surface_state;
4350 
4351       /**
4352        * RENDER_SURFACE_STATE when using image as a storage image. Separate
4353        * states for vanilla (with the original format) and one which has been
4354        * lowered to a format suitable for reading.  This may be a raw surface
4355        * in extreme cases or simply a surface with a different format where we
4356        * expect some conversion to be done in the shader.
4357        */
4358       struct anv_surface_state storage_surface_state;
4359       struct anv_surface_state lowered_storage_surface_state;
4360 
4361       struct brw_image_param lowered_storage_image_param;
4362    } planes[3];
4363 };
4364 
4365 enum anv_image_view_state_flags {
4366    ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED      = (1 << 0),
4367    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
4368 };
4369 
4370 void anv_image_fill_surface_state(struct anv_device *device,
4371                                   const struct anv_image *image,
4372                                   VkImageAspectFlagBits aspect,
4373                                   const struct isl_view *view,
4374                                   isl_surf_usage_flags_t view_usage,
4375                                   enum isl_aux_usage aux_usage,
4376                                   const union isl_color_value *clear_color,
4377                                   enum anv_image_view_state_flags flags,
4378                                   struct anv_surface_state *state_inout,
4379                                   struct brw_image_param *image_param_out);
4380 
4381 struct anv_image_create_info {
4382    const VkImageCreateInfo *vk_info;
4383 
4384    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
4385    isl_tiling_flags_t isl_tiling_flags;
4386 
4387    /** These flags will be added to any derived from VkImageCreateInfo. */
4388    isl_surf_usage_flags_t isl_extra_usage_flags;
4389 };
4390 
4391 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
4392                         const struct anv_image_create_info *create_info);
4393 
4394 void anv_image_finish(struct anv_image *image);
4395 
4396 void anv_image_get_memory_requirements(struct anv_device *device,
4397                                        struct anv_image *image,
4398                                        VkImageAspectFlags aspects,
4399                                        VkMemoryRequirements2 *pMemoryRequirements);
4400 
4401 enum isl_format
4402 anv_isl_format_for_descriptor_type(const struct anv_device *device,
4403                                    VkDescriptorType type);
4404 
4405 static inline VkExtent3D
anv_sanitize_image_extent(const VkImageType imageType,const VkExtent3D imageExtent)4406 anv_sanitize_image_extent(const VkImageType imageType,
4407                           const VkExtent3D imageExtent)
4408 {
4409    switch (imageType) {
4410    case VK_IMAGE_TYPE_1D:
4411       return (VkExtent3D) { imageExtent.width, 1, 1 };
4412    case VK_IMAGE_TYPE_2D:
4413       return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
4414    case VK_IMAGE_TYPE_3D:
4415       return imageExtent;
4416    default:
4417       unreachable("invalid image type");
4418    }
4419 }
4420 
4421 static inline VkOffset3D
anv_sanitize_image_offset(const VkImageType imageType,const VkOffset3D imageOffset)4422 anv_sanitize_image_offset(const VkImageType imageType,
4423                           const VkOffset3D imageOffset)
4424 {
4425    switch (imageType) {
4426    case VK_IMAGE_TYPE_1D:
4427       return (VkOffset3D) { imageOffset.x, 0, 0 };
4428    case VK_IMAGE_TYPE_2D:
4429       return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
4430    case VK_IMAGE_TYPE_3D:
4431       return imageOffset;
4432    default:
4433       unreachable("invalid image type");
4434    }
4435 }
4436 
4437 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeEXT line_mode)4438 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4439                           VkLineRasterizationModeEXT line_mode)
4440 {
4441    if (raster_mode == VK_POLYGON_MODE_LINE &&
4442        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4443       return true;
4444    return false;
4445 }
4446 
4447 VkFormatFeatureFlags2KHR
4448 anv_get_image_format_features2(const struct intel_device_info *devinfo,
4449                                VkFormat vk_format,
4450                                const struct anv_format *anv_format,
4451                                VkImageTiling vk_tiling,
4452                                const struct isl_drm_modifier_info *isl_mod_info);
4453 
4454 void anv_fill_buffer_surface_state(struct anv_device *device,
4455                                    struct anv_state state,
4456                                    enum isl_format format,
4457                                    isl_surf_usage_flags_t usage,
4458                                    struct anv_address address,
4459                                    uint32_t range, uint32_t stride);
4460 
4461 static inline void
anv_clear_color_from_att_state(union isl_color_value * clear_color,const struct anv_attachment_state * att_state,const struct anv_image_view * iview)4462 anv_clear_color_from_att_state(union isl_color_value *clear_color,
4463                                const struct anv_attachment_state *att_state,
4464                                const struct anv_image_view *iview)
4465 {
4466    const struct isl_format_layout *view_fmtl =
4467       isl_format_get_layout(iview->planes[0].isl.format);
4468 
4469 #define COPY_CLEAR_COLOR_CHANNEL(c, i) \
4470    if (view_fmtl->channels.c.bits) \
4471       clear_color->u32[i] = att_state->clear_value.color.uint32[i]
4472 
4473    COPY_CLEAR_COLOR_CHANNEL(r, 0);
4474    COPY_CLEAR_COLOR_CHANNEL(g, 1);
4475    COPY_CLEAR_COLOR_CHANNEL(b, 2);
4476    COPY_CLEAR_COLOR_CHANNEL(a, 3);
4477 
4478 #undef COPY_CLEAR_COLOR_CHANNEL
4479 }
4480 
4481 
4482 /* Haswell border color is a bit of a disaster.  Float and unorm formats use a
4483  * straightforward 32-bit float color in the first 64 bytes.  Instead of using
4484  * a nice float/integer union like Gfx8+, Haswell specifies the integer border
4485  * color as a separate entry /after/ the float color.  The layout of this entry
4486  * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4487  *
4488  * Since we don't know the format/bpp, we can't make any of the border colors
4489  * containing '1' work for all formats, as it would be in the wrong place for
4490  * some of them.  We opt to make 32-bit integers work as this seems like the
4491  * most common option.  Fortunately, transparent black works regardless, as
4492  * all zeroes is the same in every bit-size.
4493  */
4494 struct hsw_border_color {
4495    float float32[4];
4496    uint32_t _pad0[12];
4497    uint32_t uint32[4];
4498    uint32_t _pad1[108];
4499 };
4500 
4501 struct gfx8_border_color {
4502    union {
4503       float float32[4];
4504       uint32_t uint32[4];
4505    };
4506    /* Pad out to 64 bytes */
4507    uint32_t _pad[12];
4508 };
4509 
4510 struct anv_ycbcr_conversion {
4511    struct vk_object_base base;
4512 
4513    const struct anv_format *        format;
4514    VkSamplerYcbcrModelConversion    ycbcr_model;
4515    VkSamplerYcbcrRange              ycbcr_range;
4516    VkComponentSwizzle               mapping[4];
4517    VkChromaLocation                 chroma_offsets[2];
4518    VkFilter                         chroma_filter;
4519    bool                             chroma_reconstruction;
4520 };
4521 
4522 struct anv_sampler {
4523    struct vk_object_base        base;
4524 
4525    uint32_t                     state[3][4];
4526    uint32_t                     n_planes;
4527    struct anv_ycbcr_conversion *conversion;
4528 
4529    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4530     * and with a 32-byte stride for use as bindless samplers.
4531     */
4532    struct anv_state             bindless_state;
4533 
4534    struct anv_state             custom_border_color;
4535 };
4536 
4537 struct anv_framebuffer {
4538    struct vk_object_base                        base;
4539 
4540    uint32_t                                     width;
4541    uint32_t                                     height;
4542    uint32_t                                     layers;
4543 
4544    uint32_t                                     attachment_count;
4545    struct anv_image_view *                      attachments[0];
4546 };
4547 
4548 struct anv_subpass_attachment {
4549    VkImageUsageFlagBits usage;
4550    uint32_t attachment;
4551    VkImageLayout layout;
4552 
4553    /* Used only with attachment containing stencil data. */
4554    VkImageLayout stencil_layout;
4555 };
4556 
4557 struct anv_subpass {
4558    uint32_t                                     attachment_count;
4559 
4560    /**
4561     * A pointer to all attachment references used in this subpass.
4562     * Only valid if ::attachment_count > 0.
4563     */
4564    struct anv_subpass_attachment *              attachments;
4565    uint32_t                                     input_count;
4566    struct anv_subpass_attachment *              input_attachments;
4567    uint32_t                                     color_count;
4568    struct anv_subpass_attachment *              color_attachments;
4569    struct anv_subpass_attachment *              resolve_attachments;
4570 
4571    struct anv_subpass_attachment *              depth_stencil_attachment;
4572    struct anv_subpass_attachment *              ds_resolve_attachment;
4573    VkResolveModeFlagBitsKHR                     depth_resolve_mode;
4574    VkResolveModeFlagBitsKHR                     stencil_resolve_mode;
4575 
4576    uint32_t                                     view_mask;
4577 
4578    /** Subpass has a depth/stencil self-dependency */
4579    bool                                         has_ds_self_dep;
4580 
4581    /** Subpass has at least one color resolve attachment */
4582    bool                                         has_color_resolve;
4583 };
4584 
4585 static inline unsigned
anv_subpass_view_count(const struct anv_subpass * subpass)4586 anv_subpass_view_count(const struct anv_subpass *subpass)
4587 {
4588    return MAX2(1, util_bitcount(subpass->view_mask));
4589 }
4590 
4591 struct anv_render_pass_attachment {
4592    /* TODO: Consider using VkAttachmentDescription instead of storing each of
4593     * its members individually.
4594     */
4595    VkFormat                                     format;
4596    uint32_t                                     samples;
4597    VkImageUsageFlags                            usage;
4598    VkAttachmentLoadOp                           load_op;
4599    VkAttachmentStoreOp                          store_op;
4600    VkAttachmentLoadOp                           stencil_load_op;
4601    VkImageLayout                                initial_layout;
4602    VkImageLayout                                final_layout;
4603    VkImageLayout                                first_subpass_layout;
4604 
4605    VkImageLayout                                stencil_initial_layout;
4606    VkImageLayout                                stencil_final_layout;
4607 
4608    /* The subpass id in which the attachment will be used last. */
4609    uint32_t                                     last_subpass_idx;
4610 };
4611 
4612 struct anv_render_pass {
4613    struct vk_object_base                        base;
4614 
4615    uint32_t                                     attachment_count;
4616    uint32_t                                     subpass_count;
4617    /* An array of subpass_count+1 flushes, one per subpass boundary */
4618    enum anv_pipe_bits *                         subpass_flushes;
4619    struct anv_render_pass_attachment *          attachments;
4620    struct anv_subpass                           subpasses[0];
4621 };
4622 
4623 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4624 
4625 struct anv_query_pool {
4626    struct vk_object_base                        base;
4627 
4628    VkQueryType                                  type;
4629    VkQueryPipelineStatisticFlags                pipeline_statistics;
4630    /** Stride between slots, in bytes */
4631    uint32_t                                     stride;
4632    /** Number of slots in this query pool */
4633    uint32_t                                     slots;
4634    struct anv_bo *                              bo;
4635 
4636    /* KHR perf queries : */
4637    uint32_t                                     pass_size;
4638    uint32_t                                     data_offset;
4639    uint32_t                                     snapshot_size;
4640    uint32_t                                     n_counters;
4641    struct intel_perf_counter_pass                *counter_pass;
4642    uint32_t                                     n_passes;
4643    struct intel_perf_query_info                 **pass_query;
4644 };
4645 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)4646 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4647                                                       uint32_t pass)
4648 {
4649    return pool->pass_size * pass + 8;
4650 }
4651 
4652 struct anv_acceleration_structure {
4653    struct vk_object_base                        base;
4654 
4655    VkDeviceSize                                 size;
4656    struct anv_address                           address;
4657 };
4658 
4659 int anv_get_instance_entrypoint_index(const char *name);
4660 int anv_get_device_entrypoint_index(const char *name);
4661 int anv_get_physical_device_entrypoint_index(const char *name);
4662 
4663 const char *anv_get_instance_entry_name(int index);
4664 const char *anv_get_physical_device_entry_name(int index);
4665 const char *anv_get_device_entry_name(int index);
4666 
4667 bool
4668 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4669                                    const struct vk_instance_extension_table *instance);
4670 bool
4671 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4672                                           const struct vk_instance_extension_table *instance);
4673 bool
4674 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4675                                  const struct vk_instance_extension_table *instance,
4676                                  const struct vk_device_extension_table *device);
4677 
4678 const struct vk_device_dispatch_table *
4679 anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4680 
4681 void
4682 anv_dump_pipe_bits(enum anv_pipe_bits bits);
4683 
4684 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)4685 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4686                           enum anv_pipe_bits bits,
4687                           const char* reason)
4688 {
4689    cmd_buffer->state.pending_pipe_bits |= bits;
4690    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
4691    {
4692       fputs("pc: add ", stderr);
4693       anv_dump_pipe_bits(bits);
4694       fprintf(stderr, "reason: %s\n", reason);
4695    }
4696 }
4697 
4698 static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)4699 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
4700 {
4701    /* This function must be called from within a subpass. */
4702    assert(cmd_state->pass && cmd_state->subpass);
4703 
4704    const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
4705 
4706    /* The id of this subpass shouldn't exceed the number of subpasses in this
4707     * render pass minus 1.
4708     */
4709    assert(subpass_id < cmd_state->pass->subpass_count);
4710    return subpass_id;
4711 }
4712 
4713 struct anv_performance_configuration_intel {
4714    struct vk_object_base      base;
4715 
4716    struct intel_perf_registers *register_config;
4717 
4718    uint64_t                   config_id;
4719 };
4720 
4721 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4722 void anv_device_perf_init(struct anv_device *device);
4723 void anv_perf_write_pass_results(struct intel_perf_config *perf,
4724                                  struct anv_query_pool *pool, uint32_t pass,
4725                                  const struct intel_perf_query_result *accumulated_results,
4726                                  union VkPerformanceCounterResultKHR *results);
4727 
4728 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4729    VK_FROM_HANDLE(__anv_type, __name, __handle)
4730 
4731 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
4732                        VK_OBJECT_TYPE_COMMAND_BUFFER)
4733 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4734 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4735 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4736                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4737 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4738 
4739 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4740                                VkAccelerationStructureKHR,
4741                                VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4742 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,
4743                                VK_OBJECT_TYPE_COMMAND_POOL)
4744 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,
4745                                VK_OBJECT_TYPE_BUFFER)
4746 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4747                                VK_OBJECT_TYPE_BUFFER_VIEW)
4748 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4749                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4750 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4751                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
4752 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4753                                VkDescriptorSetLayout,
4754                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4755 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4756                                VkDescriptorUpdateTemplate,
4757                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4758 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4759                                VK_OBJECT_TYPE_DEVICE_MEMORY)
4760 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
4761 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4762 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
4763                                VK_OBJECT_TYPE_FRAMEBUFFER)
4764 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
4765 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
4766                                VK_OBJECT_TYPE_IMAGE_VIEW);
4767 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
4768                                VK_OBJECT_TYPE_PIPELINE_CACHE)
4769 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4770                                VK_OBJECT_TYPE_PIPELINE)
4771 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4772                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4773 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4774                                VK_OBJECT_TYPE_QUERY_POOL)
4775 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
4776                                VK_OBJECT_TYPE_RENDER_PASS)
4777 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4778                                VK_OBJECT_TYPE_SAMPLER)
4779 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
4780                                VK_OBJECT_TYPE_SEMAPHORE)
4781 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4782                                VkSamplerYcbcrConversion,
4783                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4784 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4785                                VkPerformanceConfigurationINTEL,
4786                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4787 
4788 #define anv_genX(devinfo, thing) ({             \
4789    __typeof(&gfx9_##thing) genX_thing;          \
4790    switch ((devinfo)->verx10) {                 \
4791    case 70:                                     \
4792       genX_thing = &gfx7_##thing;               \
4793       break;                                    \
4794    case 75:                                     \
4795       genX_thing = &gfx75_##thing;              \
4796       break;                                    \
4797    case 80:                                     \
4798       genX_thing = &gfx8_##thing;               \
4799       break;                                    \
4800    case 90:                                     \
4801       genX_thing = &gfx9_##thing;               \
4802       break;                                    \
4803    case 110:                                    \
4804       genX_thing = &gfx11_##thing;              \
4805       break;                                    \
4806    case 120:                                    \
4807       genX_thing = &gfx12_##thing;              \
4808       break;                                    \
4809    case 125:                                    \
4810       genX_thing = &gfx125_##thing;             \
4811       break;                                    \
4812    default:                                     \
4813       unreachable("Unknown hardware generation"); \
4814    }                                            \
4815    genX_thing;                                  \
4816 })
4817 
4818 /* Gen-specific function declarations */
4819 #ifdef genX
4820 #  include "anv_genX.h"
4821 #else
4822 #  define genX(x) gfx7_##x
4823 #  include "anv_genX.h"
4824 #  undef genX
4825 #  define genX(x) gfx75_##x
4826 #  include "anv_genX.h"
4827 #  undef genX
4828 #  define genX(x) gfx8_##x
4829 #  include "anv_genX.h"
4830 #  undef genX
4831 #  define genX(x) gfx9_##x
4832 #  include "anv_genX.h"
4833 #  undef genX
4834 #  define genX(x) gfx11_##x
4835 #  include "anv_genX.h"
4836 #  undef genX
4837 #  define genX(x) gfx12_##x
4838 #  include "anv_genX.h"
4839 #  undef genX
4840 #  define genX(x) gfx125_##x
4841 #  include "anv_genX.h"
4842 #  undef genX
4843 #endif
4844 
4845 #endif /* ANV_PRIVATE_H */
4846