• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 #include "drm-uapi/drm_fourcc.h"
35 
36 #ifdef HAVE_VALGRIND
37 #include <valgrind.h>
38 #include <memcheck.h>
39 #define VG(x) x
40 #ifndef NDEBUG
41 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
42 #endif
43 #else
44 #define VG(x) ((void)0)
45 #endif
46 
47 #include "common/intel_clflush.h"
48 #include "common/intel_decoder.h"
49 #include "common/intel_gem.h"
50 #include "common/intel_l3_config.h"
51 #include "common/intel_measure.h"
52 #include "common/intel_sample_positions.h"
53 #include "dev/intel_device_info.h"
54 #include "blorp/blorp.h"
55 #include "compiler/brw_compiler.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/macros.h"
61 #include "util/hash_table.h"
62 #include "util/list.h"
63 #include "util/perf/u_trace.h"
64 #include "util/sparse_array.h"
65 #include "util/u_atomic.h"
66 #include "util/u_vector.h"
67 #include "util/u_math.h"
68 #include "util/vma.h"
69 #include "util/xmlconfig.h"
70 #include "vk_alloc.h"
71 #include "vk_buffer.h"
72 #include "vk_command_buffer.h"
73 #include "vk_command_pool.h"
74 #include "vk_debug_report.h"
75 #include "vk_device.h"
76 #include "vk_drm_syncobj.h"
77 #include "vk_enum_defines.h"
78 #include "vk_framebuffer.h"
79 #include "vk_graphics_state.h"
80 #include "vk_image.h"
81 #include "vk_instance.h"
82 #include "vk_pipeline_cache.h"
83 #include "vk_physical_device.h"
84 #include "vk_shader_module.h"
85 #include "vk_sync.h"
86 #include "vk_sync_timeline.h"
87 #include "vk_util.h"
88 #include "vk_queue.h"
89 #include "vk_log.h"
90 
91 /* Pre-declarations needed for WSI entrypoints */
92 struct wl_surface;
93 struct wl_display;
94 typedef struct xcb_connection_t xcb_connection_t;
95 typedef uint32_t xcb_visualid_t;
96 typedef uint32_t xcb_window_t;
97 
98 struct anv_batch;
99 struct anv_buffer;
100 struct anv_buffer_view;
101 struct anv_image_view;
102 struct anv_acceleration_structure;
103 struct anv_instance;
104 
105 struct intel_aux_map_context;
106 struct intel_perf_config;
107 struct intel_perf_counter_pass;
108 struct intel_perf_query_result;
109 
110 #include <vulkan/vulkan.h>
111 #include <vulkan/vk_icd.h>
112 
113 #include "anv_android.h"
114 #include "anv_entrypoints.h"
115 #include "isl/isl.h"
116 
117 #include "dev/intel_debug.h"
118 #undef MESA_LOG_TAG
119 #define MESA_LOG_TAG "MESA-INTEL"
120 #include "util/log.h"
121 #include "wsi_common.h"
122 
123 #define NSEC_PER_SEC 1000000000ull
124 
125 /* anv Virtual Memory Layout
126  * =========================
127  *
128  * When the anv driver is determining the virtual graphics addresses of memory
129  * objects itself using the softpin mechanism, the following memory ranges
130  * will be used.
131  *
132  * Three special considerations to notice:
133  *
134  * (1) the dynamic state pool is located within the same 4 GiB as the low
135  * heap. This is to work around a VF cache issue described in a comment in
136  * anv_physical_device_init_heaps.
137  *
138  * (2) the binding table pool is located at lower addresses than the surface
139  * state pool, within a 4 GiB range. This allows surface state base addresses
140  * to cover both binding tables (16 bit offsets) and surface states (32 bit
141  * offsets).
142  *
143  * (3) the last 4 GiB of the address space is withheld from the high
144  * heap. Various hardware units will read past the end of an object for
145  * various reasons. This healthy margin prevents reads from wrapping around
146  * 48-bit addresses.
147  */
148 #define GENERAL_STATE_POOL_MIN_ADDRESS     0x000000200000ULL /* 2 MiB */
149 #define GENERAL_STATE_POOL_MAX_ADDRESS     0x00003fffffffULL
150 #define LOW_HEAP_MIN_ADDRESS               0x000040000000ULL /* 1 GiB */
151 #define LOW_HEAP_MAX_ADDRESS               0x00007fffffffULL
152 #define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
153 #define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
154 #define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
155 #define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
156 #define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
157 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
158 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
159 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
160 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
161 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
162 #define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
163 
164 #define GENERAL_STATE_POOL_SIZE     \
165    (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
166 #define LOW_HEAP_SIZE               \
167    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
168 #define DYNAMIC_STATE_POOL_SIZE     \
169    (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
170 #define BINDING_TABLE_POOL_SIZE     \
171    (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
172 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
173 #define SURFACE_STATE_POOL_SIZE     \
174    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
175 #define INSTRUCTION_STATE_POOL_SIZE \
176    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
177 #define CLIENT_VISIBLE_HEAP_SIZE               \
178    (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
179 
180 /* Allowing different clear colors requires us to perform a depth resolve at
181  * the end of certain render passes. This is because while slow clears store
182  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
183  * See the PRMs for examples describing when additional resolves would be
184  * necessary. To enable fast clears without requiring extra resolves, we set
185  * the clear value to a globally-defined one. We could allow different values
186  * if the user doesn't expect coherent data during or after a render passes
187  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
188  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
189  * 1.0f seems to be the only value used. The only application that doesn't set
190  * this value does so through the usage of an seemingly uninitialized clear
191  * value.
192  */
193 #define ANV_HZ_FC_VAL 1.0f
194 
195 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
196 #define MAX_VBS         (33 - 2)
197 
198 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
199  * only supports the push model of VS inputs, and we only have 128 GRFs,
200  * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
201  * we use two of them for SGVs.
202  */
203 #define MAX_VES         (31 - 2)
204 
205 #define MAX_XFB_BUFFERS  4
206 #define MAX_XFB_STREAMS  4
207 #define MAX_SETS        32
208 #define MAX_RTS          8
209 #define MAX_VIEWPORTS   16
210 #define MAX_SCISSORS    16
211 #define MAX_PUSH_CONSTANTS_SIZE 128
212 #define MAX_DYNAMIC_BUFFERS 16
213 #define MAX_IMAGES 64
214 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
215 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
216 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
217 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
218  * use 64 here to avoid cache issues. This could most likely bring it back to
219  * 32 if we had different virtual addresses for the different views on a given
220  * GEM object.
221  */
222 #define ANV_UBO_ALIGNMENT 64
223 #define ANV_SSBO_ALIGNMENT 4
224 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
225 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
226 #define MAX_SAMPLE_LOCATIONS 16
227 
228 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
229  *
230  *    "The surface state model is used when a Binding Table Index (specified
231  *    in the message descriptor) of less than 240 is specified. In this model,
232  *    the Binding Table Index is used to index into the binding table, and the
233  *    binding table entry contains a pointer to the SURFACE_STATE."
234  *
235  * Binding table values above 240 are used for various things in the hardware
236  * such as stateless, stateless with incoherent cache, SLM, and bindless.
237  */
238 #define MAX_BINDING_TABLE_SIZE 240
239 
240 /* The kernel relocation API has a limitation of a 32-bit delta value
241  * applied to the address before it is written which, in spite of it being
242  * unsigned, is treated as signed .  Because of the way that this maps to
243  * the Vulkan API, we cannot handle an offset into a buffer that does not
244  * fit into a signed 32 bits.  The only mechanism we have for dealing with
245  * this at the moment is to limit all VkDeviceMemory objects to a maximum
246  * of 2GB each.  The Vulkan spec allows us to do this:
247  *
248  *    "Some platforms may have a limit on the maximum size of a single
249  *    allocation. For example, certain systems may fail to create
250  *    allocations with a size greater than or equal to 4GB. Such a limit is
251  *    implementation-dependent, and if such a failure occurs then the error
252  *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
253  */
254 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
255 
256 #define ANV_SVGS_VB_INDEX    MAX_VBS
257 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
258 
259 /* We reserve this MI ALU register for the purpose of handling predication.
260  * Other code which uses the MI ALU should leave it alone.
261  */
262 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
263 
264 /* We reserve this MI ALU register to pass around an offset computed from
265  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
266  * Other code which uses the MI ALU should leave it alone.
267  */
268 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
269 
270 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
271 
272 /* For gfx12 we set the streamout buffers using 4 separate commands
273  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
274  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
275  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
276  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
277  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
278  * 3DSTATE_SO_BUFFER_INDEX_0.
279  */
280 #define SO_BUFFER_INDEX_0_CMD 0x60
281 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
282 
283 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)284 align_down_npot_u32(uint32_t v, uint32_t a)
285 {
286    return v - (v % a);
287 }
288 
289 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)290 align_down_u32(uint32_t v, uint32_t a)
291 {
292    assert(a != 0 && a == (a & -a));
293    return v & ~(a - 1);
294 }
295 
296 static inline uint32_t
align_u32(uint32_t v,uint32_t a)297 align_u32(uint32_t v, uint32_t a)
298 {
299    assert(a != 0 && a == (a & -a));
300    return align_down_u32(v + a - 1, a);
301 }
302 
303 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)304 align_down_u64(uint64_t v, uint64_t a)
305 {
306    assert(a != 0 && a == (a & -a));
307    return v & ~(a - 1);
308 }
309 
310 static inline uint64_t
align_u64(uint64_t v,uint64_t a)311 align_u64(uint64_t v, uint64_t a)
312 {
313    return align_down_u64(v + a - 1, a);
314 }
315 
316 static inline int32_t
align_i32(int32_t v,int32_t a)317 align_i32(int32_t v, int32_t a)
318 {
319    assert(a != 0 && a == (a & -a));
320    return (v + a - 1) & ~(a - 1);
321 }
322 
323 /** Alignment must be a power of 2. */
324 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)325 anv_is_aligned(uintmax_t n, uintmax_t a)
326 {
327    assert(a == (a & -a));
328    return (n & (a - 1)) == 0;
329 }
330 
331 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)332 anv_minify(uint32_t n, uint32_t levels)
333 {
334    if (unlikely(n == 0))
335       return 0;
336    else
337       return MAX2(n >> levels, 1);
338 }
339 
340 static inline float
anv_clamp_f(float f,float min,float max)341 anv_clamp_f(float f, float min, float max)
342 {
343    assert(min < max);
344 
345    if (f > max)
346       return max;
347    else if (f < min)
348       return min;
349    else
350       return f;
351 }
352 
353 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)354 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
355 {
356    if (*inout_mask & clear_mask) {
357       *inout_mask &= ~clear_mask;
358       return true;
359    } else {
360       return false;
361    }
362 }
363 
364 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)365 vk_to_isl_color(VkClearColorValue color)
366 {
367    return (union isl_color_value) {
368       .u32 = {
369          color.uint32[0],
370          color.uint32[1],
371          color.uint32[2],
372          color.uint32[3],
373       },
374    };
375 }
376 
377 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)378 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
379 {
380    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
381    union isl_color_value isl_color = { .u32 = {0, } };
382 
383 #define COPY_COLOR_CHANNEL(c, i) \
384    if (fmtl->channels.c.bits) \
385       isl_color.u32[i] = color.uint32[i]
386 
387    COPY_COLOR_CHANNEL(r, 0);
388    COPY_COLOR_CHANNEL(g, 1);
389    COPY_COLOR_CHANNEL(b, 2);
390    COPY_COLOR_CHANNEL(a, 3);
391 
392 #undef COPY_COLOR_CHANNEL
393 
394    return isl_color;
395 }
396 
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)397 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
398 {
399    uintptr_t mask = (1ull << bits) - 1;
400    *flags = ptr & mask;
401    return (void *) (ptr & ~mask);
402 }
403 
anv_pack_ptr(void * ptr,int bits,int flags)404 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
405 {
406    uintptr_t value = (uintptr_t) ptr;
407    uintptr_t mask = (1ull << bits) - 1;
408    return value | (mask & flags);
409 }
410 
411 /**
412  * Warn on ignored extension structs.
413  *
414  * The Vulkan spec requires us to ignore unsupported or unknown structs in
415  * a pNext chain.  In debug mode, emitting warnings for ignored structs may
416  * help us discover structs that we should not have ignored.
417  *
418  *
419  * From the Vulkan 1.0.38 spec:
420  *
421  *    Any component of the implementation (the loader, any enabled layers,
422  *    and drivers) must skip over, without processing (other than reading the
423  *    sType and pNext members) any chained structures with sType values not
424  *    defined by extensions supported by that component.
425  */
426 #define anv_debug_ignored_stype(sType) \
427    mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
428 
429 void __anv_perf_warn(struct anv_device *device,
430                      const struct vk_object_base *object,
431                      const char *file, int line, const char *format, ...)
432    anv_printflike(5, 6);
433 
434 /**
435  * Print a FINISHME message, including its source location.
436  */
437 #define anv_finishme(format, ...) \
438    do { \
439       static bool reported = false; \
440       if (!reported) { \
441          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
442                     ##__VA_ARGS__); \
443          reported = true; \
444       } \
445    } while (0)
446 
447 /**
448  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
449  */
450 #define anv_perf_warn(objects_macro, format, ...)   \
451    do { \
452       static bool reported = false; \
453       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
454          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
455                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
456                   objects_macro, __FILE__, __LINE__,                    \
457                   format, ## __VA_ARGS__);                              \
458          reported = true; \
459       } \
460    } while (0)
461 
462 /* A non-fatal assert.  Useful for debugging. */
463 #ifdef DEBUG
464 #define anv_assert(x) ({ \
465    if (unlikely(!(x))) \
466       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
467 })
468 #else
469 #define anv_assert(x)
470 #endif
471 
472 struct anv_bo {
473    const char *name;
474 
475    uint32_t gem_handle;
476 
477    uint32_t refcount;
478 
479    /* Index into the current validation list.  This is used by the
480     * validation list building algorithm to track which buffers are already
481     * in the validation list so that we can ensure uniqueness.
482     */
483    uint32_t exec_obj_index;
484 
485    /* Index for use with util_sparse_array_free_list */
486    uint32_t free_index;
487 
488    /* Last known offset.  This value is provided by the kernel when we
489     * execbuf and is used as the presumed offset for the next bunch of
490     * relocations.
491     */
492    uint64_t offset;
493 
494    /** Size of the buffer not including implicit aux */
495    uint64_t size;
496 
497    /* Map for internally mapped BOs.
498     *
499     * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
500     * BO. If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
501     */
502    void *map;
503 
504    /** Size of the implicit CCS range at the end of the buffer
505     *
506     * On Gfx12, CCS data is always a direct 1/256 scale-down.  A single 64K
507     * page of main surface data maps to a 256B chunk of CCS data and that
508     * mapping is provided on TGL-LP by the AUX table which maps virtual memory
509     * addresses in the main surface to virtual memory addresses for CCS data.
510     *
511     * Because we can't change these maps around easily and because Vulkan
512     * allows two VkImages to be bound to overlapping memory regions (as long
513     * as the app is careful), it's not feasible to make this mapping part of
514     * the image.  (On Gfx11 and earlier, the mapping was provided via
515     * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
516     * Instead, we attach the CCS data directly to the buffer object and setup
517     * the AUX table mapping at BO creation time.
518     *
519     * This field is for internal tracking use by the BO allocator only and
520     * should not be touched by other parts of the code.  If something wants to
521     * know if a BO has implicit CCS data, it should instead look at the
522     * has_implicit_ccs boolean below.
523     *
524     * This data is not included in maps of this buffer.
525     */
526    uint32_t _ccs_size;
527 
528    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
529    uint32_t flags;
530 
531    /** True if this BO may be shared with other processes */
532    bool is_external:1;
533 
534    /** True if this BO is a wrapper
535     *
536     * When set to true, none of the fields in this BO are meaningful except
537     * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
538     * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
539     * is set in the physical device.
540     */
541    bool is_wrapper:1;
542 
543    /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
544    bool has_fixed_address:1;
545 
546    /** True if this BO wraps a host pointer */
547    bool from_host_ptr:1;
548 
549    /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
550    bool has_client_visible_address:1;
551 
552    /** True if this BO has implicit CCS data attached to it */
553    bool has_implicit_ccs:1;
554 };
555 
556 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)557 anv_bo_ref(struct anv_bo *bo)
558 {
559    p_atomic_inc(&bo->refcount);
560    return bo;
561 }
562 
563 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)564 anv_bo_unwrap(struct anv_bo *bo)
565 {
566    while (bo->is_wrapper)
567       bo = bo->map;
568    return bo;
569 }
570 
571 static inline bool
anv_bo_is_pinned(struct anv_bo * bo)572 anv_bo_is_pinned(struct anv_bo *bo)
573 {
574 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
575    /* Sky Lake and later always uses softpin */
576    assert(bo->flags & EXEC_OBJECT_PINNED);
577    return true;
578 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
579    /* Haswell and earlier never use softpin */
580    assert(!(bo->flags & EXEC_OBJECT_PINNED));
581    assert(!bo->has_fixed_address);
582    return false;
583 #else
584    /* If we don't have a GFX_VERx10 #define, we need to look at the BO.  Also,
585     * for GFX version 8, we need to look at the BO because Broadwell softpins
586     * but Cherryview doesn't.
587     */
588    assert((bo->flags & EXEC_OBJECT_PINNED) || !bo->has_fixed_address);
589    return (bo->flags & EXEC_OBJECT_PINNED) != 0;
590 #endif
591 }
592 
593 struct anv_address {
594    struct anv_bo *bo;
595    int64_t offset;
596 };
597 
598 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
599 
600 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)601 anv_address_from_u64(uint64_t addr_u64)
602 {
603    assert(addr_u64 == intel_canonical_address(addr_u64));
604    return (struct anv_address) {
605       .bo = NULL,
606       .offset = addr_u64,
607    };
608 }
609 
610 static inline bool
anv_address_is_null(struct anv_address addr)611 anv_address_is_null(struct anv_address addr)
612 {
613    return addr.bo == NULL && addr.offset == 0;
614 }
615 
616 static inline uint64_t
anv_address_physical(struct anv_address addr)617 anv_address_physical(struct anv_address addr)
618 {
619    if (addr.bo && anv_bo_is_pinned(addr.bo)) {
620       return intel_canonical_address(addr.bo->offset + addr.offset);
621    } else {
622       return intel_canonical_address(addr.offset);
623    }
624 }
625 
626 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)627 anv_address_add(struct anv_address addr, uint64_t offset)
628 {
629    addr.offset += offset;
630    return addr;
631 }
632 
633 /* Represents a lock-free linked list of "free" things.  This is used by
634  * both the block pool and the state pools.  Unfortunately, in order to
635  * solve the ABA problem, we can't use a single uint32_t head.
636  */
637 union anv_free_list {
638    struct {
639       uint32_t offset;
640 
641       /* A simple count that is incremented every time the head changes. */
642       uint32_t count;
643    };
644    /* Make sure it's aligned to 64 bits. This will make atomic operations
645     * faster on 32 bit platforms.
646     */
647    uint64_t u64 __attribute__ ((aligned (8)));
648 };
649 
650 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
651 
652 struct anv_block_state {
653    union {
654       struct {
655          uint32_t next;
656          uint32_t end;
657       };
658       /* Make sure it's aligned to 64 bits. This will make atomic operations
659        * faster on 32 bit platforms.
660        */
661       uint64_t u64 __attribute__ ((aligned (8)));
662    };
663 };
664 
665 #define anv_block_pool_foreach_bo(bo, pool)  \
666    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
667         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
668         _pp_bo++)
669 
670 #define ANV_MAX_BLOCK_POOL_BOS 20
671 
672 struct anv_block_pool {
673    const char *name;
674 
675    struct anv_device *device;
676    bool use_relocations;
677 
678    /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
679     * around the actual BO so that we grow the pool after the wrapper BO has
680     * been put in a relocation list.  This is only used in the non-softpin
681     * case.
682     */
683    struct anv_bo wrapper_bo;
684 
685    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
686    struct anv_bo *bo;
687    uint32_t nbos;
688 
689    uint64_t size;
690 
691    /* The address where the start of the pool is pinned. The various bos that
692     * are created as the pool grows will have addresses in the range
693     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
694     */
695    uint64_t start_address;
696 
697    /* The offset from the start of the bo to the "center" of the block
698     * pool.  Pointers to allocated blocks are given by
699     * bo.map + center_bo_offset + offsets.
700     */
701    uint32_t center_bo_offset;
702 
703    /* Current memory map of the block pool.  This pointer may or may not
704     * point to the actual beginning of the block pool memory.  If
705     * anv_block_pool_alloc_back has ever been called, then this pointer
706     * will point to the "center" position of the buffer and all offsets
707     * (negative or positive) given out by the block pool alloc functions
708     * will be valid relative to this pointer.
709     *
710     * In particular, map == bo.map + center_offset
711     *
712     * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
713     * since it will handle the softpin case as well, where this points to NULL.
714     */
715    void *map;
716    int fd;
717 
718    /**
719     * Array of mmaps and gem handles owned by the block pool, reclaimed when
720     * the block pool is destroyed.
721     */
722    struct u_vector mmap_cleanups;
723 
724    struct anv_block_state state;
725 
726    struct anv_block_state back_state;
727 };
728 
729 /* Block pools are backed by a fixed-size 1GB memfd */
730 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
731 
732 /* The center of the block pool is also the middle of the memfd.  This may
733  * change in the future if we decide differently for some reason.
734  */
735 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
736 
737 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)738 anv_block_pool_size(struct anv_block_pool *pool)
739 {
740    return pool->state.end + pool->back_state.end;
741 }
742 
743 struct anv_state {
744    int32_t offset;
745    uint32_t alloc_size;
746    void *map;
747    uint32_t idx;
748 };
749 
750 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
751 
752 struct anv_fixed_size_state_pool {
753    union anv_free_list free_list;
754    struct anv_block_state block;
755 };
756 
757 #define ANV_MIN_STATE_SIZE_LOG2 6
758 #define ANV_MAX_STATE_SIZE_LOG2 22
759 
760 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
761 
762 struct anv_free_entry {
763    uint32_t next;
764    struct anv_state state;
765 };
766 
767 struct anv_state_table {
768    struct anv_device *device;
769    int fd;
770    struct anv_free_entry *map;
771    uint32_t size;
772    struct anv_block_state state;
773    struct u_vector cleanups;
774 };
775 
776 struct anv_state_pool {
777    struct anv_block_pool block_pool;
778 
779    /* Offset into the relevant state base address where the state pool starts
780     * allocating memory.
781     */
782    int32_t start_offset;
783 
784    struct anv_state_table table;
785 
786    /* The size of blocks which will be allocated from the block pool */
787    uint32_t block_size;
788 
789    /** Free list for "back" allocations */
790    union anv_free_list back_alloc_free_list;
791 
792    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
793 };
794 
795 struct anv_state_reserved_pool {
796    struct anv_state_pool *pool;
797    union anv_free_list reserved_blocks;
798    uint32_t count;
799 };
800 
801 struct anv_state_stream {
802    struct anv_state_pool *state_pool;
803 
804    /* The size of blocks to allocate from the state pool */
805    uint32_t block_size;
806 
807    /* Current block we're allocating from */
808    struct anv_state block;
809 
810    /* Offset into the current block at which to allocate the next state */
811    uint32_t next;
812 
813    /* List of all blocks allocated from this pool */
814    struct util_dynarray all_blocks;
815 };
816 
817 /* The block_pool functions exported for testing only.  The block pool should
818  * only be used via a state pool (see below).
819  */
820 VkResult anv_block_pool_init(struct anv_block_pool *pool,
821                              struct anv_device *device,
822                              const char *name,
823                              uint64_t start_address,
824                              uint32_t initial_size);
825 void anv_block_pool_finish(struct anv_block_pool *pool);
826 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
827                              uint32_t block_size, uint32_t *padding);
828 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
829                                   uint32_t block_size);
830 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
831 size);
832 
833 VkResult anv_state_pool_init(struct anv_state_pool *pool,
834                              struct anv_device *device,
835                              const char *name,
836                              uint64_t base_address,
837                              int32_t start_offset,
838                              uint32_t block_size);
839 void anv_state_pool_finish(struct anv_state_pool *pool);
840 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
841                                       uint32_t state_size, uint32_t alignment);
842 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
843 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
844 void anv_state_stream_init(struct anv_state_stream *stream,
845                            struct anv_state_pool *state_pool,
846                            uint32_t block_size);
847 void anv_state_stream_finish(struct anv_state_stream *stream);
848 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
849                                         uint32_t size, uint32_t alignment);
850 
851 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
852                                       struct anv_state_pool *parent,
853                                       uint32_t count, uint32_t size,
854                                       uint32_t alignment);
855 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
856 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
857 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
858                                   struct anv_state state);
859 
860 VkResult anv_state_table_init(struct anv_state_table *table,
861                              struct anv_device *device,
862                              uint32_t initial_entries);
863 void anv_state_table_finish(struct anv_state_table *table);
864 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
865                              uint32_t count);
866 void anv_free_list_push(union anv_free_list *list,
867                         struct anv_state_table *table,
868                         uint32_t idx, uint32_t count);
869 struct anv_state* anv_free_list_pop(union anv_free_list *list,
870                                     struct anv_state_table *table);
871 
872 
873 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)874 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
875 {
876    return &table->map[idx].state;
877 }
878 /**
879  * Implements a pool of re-usable BOs.  The interface is identical to that
880  * of block_pool except that each block is its own BO.
881  */
882 struct anv_bo_pool {
883    const char *name;
884 
885    struct anv_device *device;
886 
887    struct util_sparse_array_free_list free_list[16];
888 };
889 
890 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
891                       const char *name);
892 void anv_bo_pool_finish(struct anv_bo_pool *pool);
893 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
894                            struct anv_bo **bo_out);
895 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
896 
897 struct anv_scratch_pool {
898    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
899    struct anv_bo *bos[16][MESA_SHADER_STAGES];
900    uint32_t surfs[16];
901    struct anv_state surf_states[16];
902 };
903 
904 void anv_scratch_pool_init(struct anv_device *device,
905                            struct anv_scratch_pool *pool);
906 void anv_scratch_pool_finish(struct anv_device *device,
907                              struct anv_scratch_pool *pool);
908 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
909                                       struct anv_scratch_pool *pool,
910                                       gl_shader_stage stage,
911                                       unsigned per_thread_scratch);
912 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
913                                    struct anv_scratch_pool *pool,
914                                    unsigned per_thread_scratch);
915 
916 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
917 struct anv_bo_cache {
918    struct util_sparse_array bo_map;
919    pthread_mutex_t mutex;
920 };
921 
922 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
923                            struct anv_device *device);
924 void anv_bo_cache_finish(struct anv_bo_cache *cache);
925 
926 struct anv_queue_family {
927    /* Standard bits passed on to the client */
928    VkQueueFlags   queueFlags;
929    uint32_t       queueCount;
930 
931    /* Driver internal information */
932    enum drm_i915_gem_engine_class engine_class;
933 };
934 
935 #define ANV_MAX_QUEUE_FAMILIES 3
936 
937 struct anv_memory_type {
938    /* Standard bits passed on to the client */
939    VkMemoryPropertyFlags   propertyFlags;
940    uint32_t                heapIndex;
941 };
942 
943 struct anv_memory_heap {
944    /* Standard bits passed on to the client */
945    VkDeviceSize      size;
946    VkMemoryHeapFlags flags;
947 
948    /** Driver-internal book-keeping.
949     *
950     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
951     */
952    VkDeviceSize      used __attribute__ ((aligned (8)));
953 
954    bool              is_local_mem;
955 };
956 
957 struct anv_memregion {
958    struct drm_i915_gem_memory_class_instance region;
959    uint64_t size;
960    uint64_t available;
961 };
962 
963 struct anv_physical_device {
964     struct vk_physical_device                   vk;
965 
966     /* Link in anv_instance::physical_devices */
967     struct list_head                            link;
968 
969     struct anv_instance *                       instance;
970     char                                        path[20];
971     struct intel_device_info                      info;
972     /** Amount of "GPU memory" we want to advertise
973      *
974      * Clearly, this value is bogus since Intel is a UMA architecture.  On
975      * gfx7 platforms, we are limited by GTT size unless we want to implement
976      * fine-grained tracking and GTT splitting.  On Broadwell and above we are
977      * practically unlimited.  However, we will never report more than 3/4 of
978      * the total system ram to try and avoid running out of RAM.
979      */
980     bool                                        supports_48bit_addresses;
981     struct brw_compiler *                       compiler;
982     struct isl_device                           isl_dev;
983     struct intel_perf_config *                    perf;
984    /* True if hardware support is incomplete/alpha */
985     bool                                        is_alpha;
986     /*
987      * Number of commands required to implement a performance query begin +
988      * end.
989      */
990     uint32_t                                    n_perf_query_commands;
991     int                                         cmd_parser_version;
992     bool                                        has_exec_async;
993     bool                                        has_exec_capture;
994     int                                         max_context_priority;
995     bool                                        has_context_isolation;
996     bool                                        has_mmap_offset;
997     bool                                        has_userptr_probe;
998     uint64_t                                    gtt_size;
999 
1000     bool                                        use_relocations;
1001     bool                                        use_softpin;
1002     bool                                        always_use_bindless;
1003     bool                                        use_call_secondary;
1004 
1005     /** True if we can access buffers using A64 messages */
1006     bool                                        has_a64_buffer_access;
1007     /** True if we can use bindless access for images */
1008     bool                                        has_bindless_images;
1009     /** True if we can use bindless access for samplers */
1010     bool                                        has_bindless_samplers;
1011     /** True if we can use timeline semaphores through execbuf */
1012     bool                                        has_exec_timeline;
1013 
1014     /** True if we can read the GPU timestamp register
1015      *
1016      * When running in a virtual context, the timestamp register is unreadable
1017      * on Gfx12+.
1018      */
1019     bool                                        has_reg_timestamp;
1020 
1021     /** True if this device has implicit AUX
1022      *
1023      * If true, CCS is handled as an implicit attachment to the BO rather than
1024      * as an explicitly bound surface.
1025      */
1026     bool                                        has_implicit_ccs;
1027 
1028     bool                                        always_flush_cache;
1029 
1030     struct {
1031       uint32_t                                  family_count;
1032       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
1033     } queue;
1034 
1035     struct {
1036       uint32_t                                  type_count;
1037       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1038       uint32_t                                  heap_count;
1039       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1040       bool                                      need_clflush;
1041     } memory;
1042 
1043     /* Either we have a single vram region and it's all mappable, or we have
1044      * both mappable & non-mappable parts. System memory is always available.
1045      */
1046     struct anv_memregion                        vram_mappable;
1047     struct anv_memregion                        vram_non_mappable;
1048     struct anv_memregion                        sys;
1049     uint8_t                                     driver_build_sha1[20];
1050     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1051     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1052     uint8_t                                     device_uuid[VK_UUID_SIZE];
1053 
1054     struct vk_sync_type                         sync_syncobj_type;
1055     struct vk_sync_timeline_type                sync_timeline_type;
1056     const struct vk_sync_type *                 sync_types[4];
1057 
1058     struct wsi_device                       wsi_device;
1059     int                                         local_fd;
1060     bool                                        has_local;
1061     int64_t                                     local_major;
1062     int64_t                                     local_minor;
1063     int                                         master_fd;
1064     bool                                        has_master;
1065     int64_t                                     master_major;
1066     int64_t                                     master_minor;
1067     struct drm_i915_query_engine_info *         engine_info;
1068 
1069     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool);
1070     struct intel_measure_device                 measure_device;
1071 };
1072 
1073 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1074 anv_physical_device_has_vram(const struct anv_physical_device *device)
1075 {
1076    return device->vram_mappable.size > 0;
1077 }
1078 
1079 struct anv_app_info {
1080    const char*        app_name;
1081    uint32_t           app_version;
1082    const char*        engine_name;
1083    uint32_t           engine_version;
1084    uint32_t           api_version;
1085 };
1086 
1087 struct anv_instance {
1088     struct vk_instance                          vk;
1089 
1090     bool                                        physical_devices_enumerated;
1091     struct list_head                            physical_devices;
1092 
1093     struct driOptionCache                       dri_options;
1094     struct driOptionCache                       available_dri_options;
1095 
1096     /**
1097      * Workarounds for game bugs.
1098      */
1099     bool                                        assume_full_subgroups;
1100     bool                                        limit_trig_input_range;
1101     bool                                        sample_mask_out_opengl_behaviour;
1102 };
1103 
1104 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1105 void anv_finish_wsi(struct anv_physical_device *physical_device);
1106 
1107 struct anv_queue {
1108    struct vk_queue                           vk;
1109 
1110    struct anv_device *                       device;
1111 
1112    const struct anv_queue_family *           family;
1113 
1114    uint32_t                                  index_in_family;
1115 
1116    uint32_t                                  exec_flags;
1117 
1118    /** Synchronization object for debug purposes (DEBUG_SYNC) */
1119    struct vk_sync                           *sync;
1120 
1121    struct intel_ds_queue *                   ds;
1122 };
1123 
1124 struct nir_xfb_info;
1125 struct anv_pipeline_bind_map;
1126 
1127 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1128 
1129 struct anv_shader_bin *
1130 anv_device_search_for_kernel(struct anv_device *device,
1131                              struct vk_pipeline_cache *cache,
1132                              const void *key_data, uint32_t key_size,
1133                              bool *user_cache_bit);
1134 
1135 struct anv_shader_bin *
1136 anv_device_upload_kernel(struct anv_device *device,
1137                          struct vk_pipeline_cache *cache,
1138                          gl_shader_stage stage,
1139                          const void *key_data, uint32_t key_size,
1140                          const void *kernel_data, uint32_t kernel_size,
1141                          const struct brw_stage_prog_data *prog_data,
1142                          uint32_t prog_data_size,
1143                          const struct brw_compile_stats *stats,
1144                          uint32_t num_stats,
1145                          const struct nir_xfb_info *xfb_info,
1146                          const struct anv_pipeline_bind_map *bind_map);
1147 
1148 struct nir_shader;
1149 struct nir_shader_compiler_options;
1150 
1151 struct nir_shader *
1152 anv_device_search_for_nir(struct anv_device *device,
1153                           struct vk_pipeline_cache *cache,
1154                           const struct nir_shader_compiler_options *nir_options,
1155                           unsigned char sha1_key[20],
1156                           void *mem_ctx);
1157 
1158 void
1159 anv_device_upload_nir(struct anv_device *device,
1160                       struct vk_pipeline_cache *cache,
1161                       const struct nir_shader *nir,
1162                       unsigned char sha1_key[20]);
1163 
1164 struct anv_device {
1165     struct vk_device                            vk;
1166 
1167     struct anv_physical_device *                physical;
1168     struct intel_device_info                      info;
1169     struct isl_device                           isl_dev;
1170     int                                         context_id;
1171     int                                         fd;
1172     bool                                        can_chain_batches;
1173     bool                                        robust_buffer_access;
1174 
1175     pthread_mutex_t                             vma_mutex;
1176     struct util_vma_heap                        vma_lo;
1177     struct util_vma_heap                        vma_cva;
1178     struct util_vma_heap                        vma_hi;
1179 
1180     /** List of all anv_device_memory objects */
1181     struct list_head                            memory_objects;
1182 
1183     struct anv_bo_pool                          batch_bo_pool;
1184     struct anv_bo_pool                          utrace_bo_pool;
1185 
1186     struct anv_bo_cache                         bo_cache;
1187 
1188     struct anv_state_pool                       general_state_pool;
1189     struct anv_state_pool                       dynamic_state_pool;
1190     struct anv_state_pool                       instruction_state_pool;
1191     struct anv_state_pool                       binding_table_pool;
1192     struct anv_state_pool                       surface_state_pool;
1193 
1194     struct anv_state_reserved_pool              custom_border_colors;
1195 
1196     /** BO used for various workarounds
1197      *
1198      * There are a number of workarounds on our hardware which require writing
1199      * data somewhere and it doesn't really matter where.  For that, we use
1200      * this BO and just write to the first dword or so.
1201      *
1202      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1203      * For that, we use the high bytes (>= 1024) of the workaround BO.
1204      */
1205     struct anv_bo *                             workaround_bo;
1206     struct anv_address                          workaround_address;
1207 
1208     struct anv_bo *                             trivial_batch_bo;
1209     struct anv_state                            null_surface_state;
1210 
1211     struct vk_pipeline_cache *                  default_pipeline_cache;
1212     struct vk_pipeline_cache *                  internal_cache;
1213     struct blorp_context                        blorp;
1214 
1215     struct anv_state                            border_colors;
1216 
1217     struct anv_state                            slice_hash;
1218 
1219     /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1220      *
1221      * We need to emit CPS_STATE structures for each viewport accessible by a
1222      * pipeline. So rather than write many identical CPS_STATE structures
1223      * dynamically, we can enumerate all possible combinaisons and then just
1224      * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1225      * array.
1226      */
1227     struct anv_state                            cps_states;
1228 
1229     uint32_t                                    queue_count;
1230     struct anv_queue  *                         queues;
1231 
1232     struct anv_scratch_pool                     scratch_pool;
1233     struct anv_bo                              *rt_scratch_bos[16];
1234 
1235     /** Shadow ray query BO
1236      *
1237      * The ray_query_bo only holds the current ray being traced. When using
1238      * more than 1 ray query per thread, we cannot fit all the queries in
1239      * there, so we need a another buffer to hold query data that is not
1240      * currently being used by the HW for tracing, similar to a scratch space.
1241      *
1242      * The size of the shadow buffer depends on the number of queries per
1243      * shader.
1244      */
1245     struct anv_bo                              *ray_query_shadow_bos[16];
1246     /** Ray query buffer used to communicated with HW unit.
1247      */
1248     struct anv_bo                              *ray_query_bo;
1249 
1250     struct anv_shader_bin                      *rt_trampoline;
1251     struct anv_shader_bin                      *rt_trivial_return;
1252 
1253     pthread_mutex_t                             mutex;
1254     pthread_cond_t                              queue_submit;
1255 
1256     struct intel_batch_decode_ctx               decoder_ctx;
1257     /*
1258      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1259      * the cmd_buffer's list.
1260      */
1261     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1262 
1263     int                                         perf_fd; /* -1 if no opened */
1264     uint64_t                                    perf_metric; /* 0 if unset */
1265 
1266     struct intel_aux_map_context                *aux_map_ctx;
1267 
1268     const struct intel_l3_config                *l3_config;
1269 
1270     struct intel_debug_block_frame              *debug_frame_desc;
1271 
1272     struct intel_ds_device                       ds;
1273 };
1274 
1275 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1276 #define ANV_ALWAYS_SOFTPIN true
1277 #else
1278 #define ANV_ALWAYS_SOFTPIN false
1279 #endif
1280 
1281 static inline bool
anv_use_relocations(const struct anv_physical_device * pdevice)1282 anv_use_relocations(const struct anv_physical_device *pdevice)
1283 {
1284 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1285    /* Sky Lake and later always uses softpin */
1286    assert(!pdevice->use_relocations);
1287    return false;
1288 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
1289    /* Haswell and earlier never use softpin */
1290    assert(pdevice->use_relocations);
1291    return true;
1292 #else
1293    /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1294     * device.  Also, for GFX version 8, we need to look at the physical
1295     * device because Broadwell softpins but Cherryview doesn't.
1296     */
1297    return pdevice->use_relocations;
1298 #endif
1299 }
1300 
1301 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1302 anv_binding_table_pool(struct anv_device *device)
1303 {
1304    if (anv_use_relocations(device->physical))
1305       return &device->surface_state_pool;
1306    else
1307       return &device->binding_table_pool;
1308 }
1309 
1310 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1311 anv_binding_table_pool_alloc(struct anv_device *device)
1312 {
1313    if (anv_use_relocations(device->physical))
1314       return anv_state_pool_alloc_back(&device->surface_state_pool);
1315    else
1316       return anv_state_pool_alloc(&device->binding_table_pool,
1317                                   device->binding_table_pool.block_size, 0);
1318 }
1319 
1320 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1321 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1322    anv_state_pool_free(anv_binding_table_pool(device), state);
1323 }
1324 
1325 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1326 anv_mocs(const struct anv_device *device,
1327          const struct anv_bo *bo,
1328          isl_surf_usage_flags_t usage)
1329 {
1330    return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1331 }
1332 
1333 void anv_device_init_blorp(struct anv_device *device);
1334 void anv_device_finish_blorp(struct anv_device *device);
1335 
1336 enum anv_bo_alloc_flags {
1337    /** Specifies that the BO must have a 32-bit address
1338     *
1339     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1340     */
1341    ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1342 
1343    /** Specifies that the BO may be shared externally */
1344    ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1345 
1346    /** Specifies that the BO should be mapped */
1347    ANV_BO_ALLOC_MAPPED =         (1 << 2),
1348 
1349    /** Specifies that the BO should be snooped so we get coherency */
1350    ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1351 
1352    /** Specifies that the BO should be captured in error states */
1353    ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1354 
1355    /** Specifies that the BO will have an address assigned by the caller
1356     *
1357     * Such BOs do not exist in any VMA heap.
1358     */
1359    ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1360 
1361    /** Enables implicit synchronization on the BO
1362     *
1363     * This is the opposite of EXEC_OBJECT_ASYNC.
1364     */
1365    ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1366 
1367    /** Enables implicit synchronization on the BO
1368     *
1369     * This is equivalent to EXEC_OBJECT_WRITE.
1370     */
1371    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1372 
1373    /** Has an address which is visible to the client */
1374    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1375 
1376    /** This buffer has implicit CCS data attached to it */
1377    ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1378 
1379    /** This buffer is allocated from local memory */
1380    ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1381 
1382    /** This buffer is allocated from local memory and should be cpu visible */
1383    ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
1384 };
1385 
1386 VkResult anv_device_alloc_bo(struct anv_device *device,
1387                              const char *name, uint64_t size,
1388                              enum anv_bo_alloc_flags alloc_flags,
1389                              uint64_t explicit_address,
1390                              struct anv_bo **bo);
1391 VkResult anv_device_map_bo(struct anv_device *device,
1392                            struct anv_bo *bo,
1393                            uint64_t offset,
1394                            size_t size,
1395                            uint32_t gem_flags,
1396                            void **map_out);
1397 void anv_device_unmap_bo(struct anv_device *device,
1398                          struct anv_bo *bo,
1399                          void *map, size_t map_size);
1400 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1401                                             void *host_ptr, uint32_t size,
1402                                             enum anv_bo_alloc_flags alloc_flags,
1403                                             uint64_t client_address,
1404                                             struct anv_bo **bo_out);
1405 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1406                               enum anv_bo_alloc_flags alloc_flags,
1407                               uint64_t client_address,
1408                               struct anv_bo **bo);
1409 VkResult anv_device_export_bo(struct anv_device *device,
1410                               struct anv_bo *bo, int *fd_out);
1411 VkResult anv_device_get_bo_tiling(struct anv_device *device,
1412                                   struct anv_bo *bo,
1413                                   enum isl_tiling *tiling_out);
1414 VkResult anv_device_set_bo_tiling(struct anv_device *device,
1415                                   struct anv_bo *bo,
1416                                   uint32_t row_pitch_B,
1417                                   enum isl_tiling tiling);
1418 void anv_device_release_bo(struct anv_device *device,
1419                            struct anv_bo *bo);
1420 
1421 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1422 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1423 {
1424    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1425 }
1426 
1427 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1428                          int64_t timeout);
1429 
1430 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1431                         uint32_t exec_flags,
1432                         const VkDeviceQueueCreateInfo *pCreateInfo,
1433                         uint32_t index_in_family);
1434 void anv_queue_finish(struct anv_queue *queue);
1435 
1436 VkResult anv_queue_submit(struct vk_queue *queue,
1437                           struct vk_queue_submit *submit);
1438 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1439                                        struct anv_batch *batch);
1440 
1441 void* anv_gem_mmap(struct anv_device *device,
1442                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1443 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1444 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1445 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1446 uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1447                                 uint32_t flags, uint32_t num_regions,
1448                                 struct drm_i915_gem_memory_class_instance *regions);
1449 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1450 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1451 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1452 int anv_gem_execbuffer(struct anv_device *device,
1453                        struct drm_i915_gem_execbuffer2 *execbuf);
1454 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1455                        uint32_t stride, uint32_t tiling);
1456 int anv_gem_create_context(struct anv_device *device);
1457 bool anv_gem_has_context_priority(int fd, int priority);
1458 int anv_gem_destroy_context(struct anv_device *device, int context);
1459 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1460                               uint64_t value);
1461 int anv_gem_get_param(int fd, uint32_t param);
1462 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1463 int anv_gem_context_get_reset_stats(int fd, int context,
1464                                     uint32_t *active, uint32_t *pending);
1465 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1466 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1467 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1468 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1469 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1470                        uint32_t read_domains, uint32_t write_domain);
1471 int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1472                    int32_t *buffer_len);
1473 struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1474 
1475 uint64_t anv_vma_alloc(struct anv_device *device,
1476                        uint64_t size, uint64_t align,
1477                        enum anv_bo_alloc_flags alloc_flags,
1478                        uint64_t client_address);
1479 void anv_vma_free(struct anv_device *device,
1480                   uint64_t address, uint64_t size);
1481 
1482 struct anv_reloc_list {
1483    uint32_t                                     num_relocs;
1484    uint32_t                                     array_length;
1485    struct drm_i915_gem_relocation_entry *       relocs;
1486    struct anv_bo **                             reloc_bos;
1487    uint32_t                                     dep_words;
1488    BITSET_WORD *                                deps;
1489 };
1490 
1491 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1492                              const VkAllocationCallbacks *alloc);
1493 void anv_reloc_list_finish(struct anv_reloc_list *list,
1494                            const VkAllocationCallbacks *alloc);
1495 
1496 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1497                             const VkAllocationCallbacks *alloc,
1498                             uint32_t offset, struct anv_bo *target_bo,
1499                             uint32_t delta, uint64_t *address_u64_out);
1500 
1501 VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1502                                const VkAllocationCallbacks *alloc,
1503                                struct anv_bo *target_bo);
1504 
1505 struct anv_batch_bo {
1506    /* Link in the anv_cmd_buffer.owned_batch_bos list */
1507    struct list_head                             link;
1508 
1509    struct anv_bo *                              bo;
1510 
1511    /* Bytes actually consumed in this batch BO */
1512    uint32_t                                     length;
1513 
1514    /* When this batch BO is used as part of a primary batch buffer, this
1515     * tracked whether it is chained to another primary batch buffer.
1516     *
1517     * If this is the case, the relocation list's last entry points the
1518     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1519     */
1520    bool                                         chained;
1521 
1522    struct anv_reloc_list                        relocs;
1523 };
1524 
1525 struct anv_batch {
1526    const VkAllocationCallbacks *                alloc;
1527 
1528    struct anv_address                           start_addr;
1529 
1530    void *                                       start;
1531    void *                                       end;
1532    void *                                       next;
1533 
1534    struct anv_reloc_list *                      relocs;
1535 
1536    /* This callback is called (with the associated user data) in the event
1537     * that the batch runs out of space.
1538     */
1539    VkResult (*extend_cb)(struct anv_batch *, void *);
1540    void *                                       user_data;
1541 
1542    /**
1543     * Current error status of the command buffer. Used to track inconsistent
1544     * or incomplete command buffer states that are the consequence of run-time
1545     * errors such as out of memory scenarios. We want to track this in the
1546     * batch because the command buffer object is not visible to some parts
1547     * of the driver.
1548     */
1549    VkResult                                     status;
1550 };
1551 
1552 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1553 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1554 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1555 
1556 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1557 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1558                       void *map, size_t size)
1559 {
1560    batch->start_addr = addr;
1561    batch->next = batch->start = map;
1562    batch->end = map + size;
1563 }
1564 
1565 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1566 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1567 {
1568    assert(error != VK_SUCCESS);
1569    if (batch->status == VK_SUCCESS)
1570       batch->status = error;
1571    return batch->status;
1572 }
1573 
1574 static inline bool
anv_batch_has_error(struct anv_batch * batch)1575 anv_batch_has_error(struct anv_batch *batch)
1576 {
1577    return batch->status != VK_SUCCESS;
1578 }
1579 
1580 static inline uint64_t
anv_batch_emit_reloc(struct anv_batch * batch,void * location,struct anv_bo * bo,uint32_t delta)1581 anv_batch_emit_reloc(struct anv_batch *batch,
1582                      void *location, struct anv_bo *bo, uint32_t delta)
1583 {
1584    uint64_t address_u64 = 0;
1585    VkResult result;
1586 
1587    if (ANV_ALWAYS_SOFTPIN) {
1588       address_u64 = bo->offset + delta;
1589       result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1590    } else {
1591       result = anv_reloc_list_add(batch->relocs, batch->alloc,
1592                                   location - batch->start, bo, delta,
1593                                   &address_u64);
1594    }
1595    if (unlikely(result != VK_SUCCESS)) {
1596       anv_batch_set_error(batch, result);
1597       return 0;
1598    }
1599 
1600    return address_u64;
1601 }
1602 
1603 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1604 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1605 {
1606    unsigned reloc_size = 0;
1607    if (device->info.ver >= 8) {
1608       reloc_size = sizeof(uint64_t);
1609       *(uint64_t *)p = intel_canonical_address(v);
1610    } else {
1611       reloc_size = sizeof(uint32_t);
1612       *(uint32_t *)p = v;
1613    }
1614 
1615    if (flush && device->physical->memory.need_clflush)
1616       intel_flush_range(p, reloc_size);
1617 }
1618 
1619 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1620 _anv_combine_address(struct anv_batch *batch, void *location,
1621                      const struct anv_address address, uint32_t delta)
1622 {
1623    if (address.bo == NULL) {
1624       return address.offset + delta;
1625    } else if (batch == NULL) {
1626       assert(anv_bo_is_pinned(address.bo));
1627       return anv_address_physical(anv_address_add(address, delta));
1628    } else {
1629       assert(batch->start <= location && location < batch->end);
1630       /* i915 relocations are signed. */
1631       assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1632       return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1633    }
1634 }
1635 
1636 #define __gen_address_type struct anv_address
1637 #define __gen_user_data struct anv_batch
1638 #define __gen_combine_address _anv_combine_address
1639 
1640 /* Wrapper macros needed to work around preprocessor argument issues.  In
1641  * particular, arguments don't get pre-evaluated if they are concatenated.
1642  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1643  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1644  * We can work around this easily enough with these helpers.
1645  */
1646 #define __anv_cmd_length(cmd) cmd ## _length
1647 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1648 #define __anv_cmd_header(cmd) cmd ## _header
1649 #define __anv_cmd_pack(cmd) cmd ## _pack
1650 #define __anv_reg_num(reg) reg ## _num
1651 
1652 #define anv_pack_struct(dst, struc, ...) do {                              \
1653       struct struc __template = {                                          \
1654          __VA_ARGS__                                                       \
1655       };                                                                   \
1656       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1657       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1658    } while (0)
1659 
1660 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
1661       void *__dst = anv_batch_emit_dwords(batch, n);       \
1662       if (__dst) {                                         \
1663          struct cmd __template = {                         \
1664             __anv_cmd_header(cmd),                         \
1665            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1666             __VA_ARGS__                                    \
1667          };                                                \
1668          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1669       }                                                    \
1670       __dst;                                               \
1671    })
1672 
1673 #define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1674    do {                                                                 \
1675       uint32_t *dw;                                                     \
1676                                                                         \
1677       STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1678       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1679       if (!dw)                                                          \
1680          break;                                                         \
1681       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1682          dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1683       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1684    } while (0)
1685 
1686 #define anv_batch_emit(batch, cmd, name)                            \
1687    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1688         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1689         __builtin_expect(_dst != NULL, 1);                              \
1690         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1691            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1692            _dst = NULL;                                                 \
1693          }))
1694 
1695 #define anv_batch_write_reg(batch, reg, name)                           \
1696    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
1697         ({                                                              \
1698             uint32_t _dw[__anv_cmd_length(reg)];                        \
1699             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
1700             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
1701                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1702                   lri.RegisterOffset   = __anv_reg_num(reg);            \
1703                   lri.DataDWord        = _dw[i];                        \
1704                }                                                        \
1705             }                                                           \
1706            _cont = NULL;                                                \
1707          }))
1708 
1709 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1710 /* #define __gen_get_batch_address anv_batch_address */
1711 /* #define __gen_address_value anv_address_physical */
1712 /* #define __gen_address_offset anv_address_add */
1713 
1714 struct anv_device_memory {
1715    struct vk_object_base                        base;
1716 
1717    struct list_head                             link;
1718 
1719    struct anv_bo *                              bo;
1720    const struct anv_memory_type *               type;
1721 
1722    void *                                       map;
1723    size_t                                       map_size;
1724 
1725    /* The map, from the user PoV is map + map_delta */
1726    uint64_t                                     map_delta;
1727 
1728    /* The map, from the user PoV is map + map_delta */
1729    uint32_t                                     map_delta;
1730 
1731    /* If set, we are holding reference to AHardwareBuffer
1732     * which we must release when memory is freed.
1733     */
1734    struct AHardwareBuffer *                     ahw;
1735 
1736    /* If set, this memory comes from a host pointer. */
1737    void *                                       host_ptr;
1738 };
1739 
1740 /**
1741  * Header for Vertex URB Entry (VUE)
1742  */
1743 struct anv_vue_header {
1744    uint32_t Reserved;
1745    uint32_t RTAIndex; /* RenderTargetArrayIndex */
1746    uint32_t ViewportIndex;
1747    float PointWidth;
1748 };
1749 
1750 /** Struct representing a sampled image descriptor
1751  *
1752  * This descriptor layout is used for sampled images, bare sampler, and
1753  * combined image/sampler descriptors.
1754  */
1755 struct anv_sampled_image_descriptor {
1756    /** Bindless image handle
1757     *
1758     * This is expected to already be shifted such that the 20-bit
1759     * SURFACE_STATE table index is in the top 20 bits.
1760     */
1761    uint32_t image;
1762 
1763    /** Bindless sampler handle
1764     *
1765     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1766     * to the dynamic state base address.
1767     */
1768    uint32_t sampler;
1769 };
1770 
1771 struct anv_texture_swizzle_descriptor {
1772    /** Texture swizzle
1773     *
1774     * See also nir_intrinsic_channel_select_intel
1775     */
1776    uint8_t swizzle[4];
1777 
1778    /** Unused padding to ensure the struct is a multiple of 64 bits */
1779    uint32_t _pad;
1780 };
1781 
1782 /** Struct representing a storage image descriptor */
1783 struct anv_storage_image_descriptor {
1784    /** Bindless image handles
1785     *
1786     * These are expected to already be shifted such that the 20-bit
1787     * SURFACE_STATE table index is in the top 20 bits.
1788     */
1789    uint32_t vanilla;
1790    uint32_t lowered;
1791 };
1792 
1793 /** Struct representing a address/range descriptor
1794  *
1795  * The fields of this struct correspond directly to the data layout of
1796  * nir_address_format_64bit_bounded_global addresses.  The last field is the
1797  * offset in the NIR address so it must be zero so that when you load the
1798  * descriptor you get a pointer to the start of the range.
1799  */
1800 struct anv_address_range_descriptor {
1801    uint64_t address;
1802    uint32_t range;
1803    uint32_t zero;
1804 };
1805 
1806 enum anv_descriptor_data {
1807    /** The descriptor contains a BTI reference to a surface state */
1808    ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1809    /** The descriptor contains a BTI reference to a sampler state */
1810    ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1811    /** The descriptor contains an actual buffer view */
1812    ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1813    /** The descriptor contains auxiliary image layout data */
1814    ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1815    /** The descriptor contains auxiliary image layout data */
1816    ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1817    /** anv_address_range_descriptor with a buffer address and range */
1818    ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1819    /** Bindless surface handle */
1820    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1821    /** Storage image handles */
1822    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1823    /** Storage image handles */
1824    ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1825 };
1826 
1827 struct anv_descriptor_set_binding_layout {
1828    /* The type of the descriptors in this binding */
1829    VkDescriptorType type;
1830 
1831    /* Flags provided when this binding was created */
1832    VkDescriptorBindingFlags flags;
1833 
1834    /* Bitfield representing the type of data this descriptor contains */
1835    enum anv_descriptor_data data;
1836 
1837    /* Maximum number of YCbCr texture/sampler planes */
1838    uint8_t max_plane_count;
1839 
1840    /* Number of array elements in this binding (or size in bytes for inline
1841     * uniform data)
1842     */
1843    uint32_t array_size;
1844 
1845    /* Index into the flattened descriptor set */
1846    uint32_t descriptor_index;
1847 
1848    /* Index into the dynamic state array for a dynamic buffer */
1849    int16_t dynamic_offset_index;
1850 
1851    /* Index into the descriptor set buffer views */
1852    int32_t buffer_view_index;
1853 
1854    /* Offset into the descriptor buffer where this descriptor lives */
1855    uint32_t descriptor_offset;
1856 
1857    /* Pre computed stride */
1858    unsigned descriptor_stride;
1859 
1860    /* Immutable samplers (or NULL if no immutable samplers) */
1861    struct anv_sampler **immutable_samplers;
1862 };
1863 
1864 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1865                                       const struct anv_descriptor_set_binding_layout *binding,
1866                                       bool sampler);
1867 
1868 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1869                                       const struct anv_descriptor_set_binding_layout *binding,
1870                                       bool sampler);
1871 
1872 struct anv_descriptor_set_layout {
1873    struct vk_object_base base;
1874 
1875    /* Descriptor set layouts can be destroyed at almost any time */
1876    uint32_t ref_cnt;
1877 
1878    /* Number of bindings in this descriptor set */
1879    uint32_t binding_count;
1880 
1881    /* Total number of descriptors */
1882    uint32_t descriptor_count;
1883 
1884    /* Shader stages affected by this descriptor set */
1885    uint16_t shader_stages;
1886 
1887    /* Number of buffer views in this descriptor set */
1888    uint32_t buffer_view_count;
1889 
1890    /* Number of dynamic offsets used by this descriptor set */
1891    uint16_t dynamic_offset_count;
1892 
1893    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1894     * this buffer
1895     */
1896    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1897 
1898    /* Size of the descriptor buffer for this descriptor set */
1899    uint32_t descriptor_buffer_size;
1900 
1901    /* Bindings in this descriptor set */
1902    struct anv_descriptor_set_binding_layout binding[0];
1903 };
1904 
1905 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1906                                        struct anv_descriptor_set_layout *layout);
1907 
1908 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1909 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1910 {
1911    assert(layout && layout->ref_cnt >= 1);
1912    p_atomic_inc(&layout->ref_cnt);
1913 }
1914 
1915 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1916 anv_descriptor_set_layout_unref(struct anv_device *device,
1917                                 struct anv_descriptor_set_layout *layout)
1918 {
1919    assert(layout && layout->ref_cnt >= 1);
1920    if (p_atomic_dec_zero(&layout->ref_cnt))
1921       anv_descriptor_set_layout_destroy(device, layout);
1922 }
1923 
1924 struct anv_descriptor {
1925    VkDescriptorType type;
1926 
1927    union {
1928       struct {
1929          VkImageLayout layout;
1930          struct anv_image_view *image_view;
1931          struct anv_sampler *sampler;
1932       };
1933 
1934       struct {
1935          struct anv_buffer_view *set_buffer_view;
1936          struct anv_buffer *buffer;
1937          uint64_t offset;
1938          uint64_t range;
1939       };
1940 
1941       struct anv_buffer_view *buffer_view;
1942 
1943       struct anv_acceleration_structure *accel_struct;
1944    };
1945 };
1946 
1947 struct anv_descriptor_set {
1948    struct vk_object_base base;
1949 
1950    struct anv_descriptor_pool *pool;
1951    struct anv_descriptor_set_layout *layout;
1952 
1953    /* Amount of space occupied in the the pool by this descriptor set. It can
1954     * be larger than the size of the descriptor set.
1955     */
1956    uint32_t size;
1957 
1958    /* State relative to anv_descriptor_pool::bo */
1959    struct anv_state desc_mem;
1960    /* Surface state for the descriptor buffer */
1961    struct anv_state desc_surface_state;
1962 
1963    /* Descriptor set address. */
1964    struct anv_address desc_addr;
1965 
1966    uint32_t buffer_view_count;
1967    struct anv_buffer_view *buffer_views;
1968 
1969    /* Link to descriptor pool's desc_sets list . */
1970    struct list_head pool_link;
1971 
1972    uint32_t descriptor_count;
1973    struct anv_descriptor descriptors[0];
1974 };
1975 
1976 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)1977 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
1978 {
1979    return set->pool == NULL;
1980 }
1981 
1982 struct anv_buffer_view {
1983    struct vk_object_base base;
1984 
1985    uint64_t range; /**< VkBufferViewCreateInfo::range */
1986 
1987    struct anv_address address;
1988 
1989    struct anv_state surface_state;
1990    struct anv_state storage_surface_state;
1991    struct anv_state lowered_storage_surface_state;
1992 
1993    struct brw_image_param lowered_storage_image_param;
1994 };
1995 
1996 struct anv_push_descriptor_set {
1997    struct anv_descriptor_set set;
1998 
1999    /* Put this field right behind anv_descriptor_set so it fills up the
2000     * descriptors[0] field. */
2001    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2002 
2003    /** True if the descriptor set buffer has been referenced by a draw or
2004     * dispatch command.
2005     */
2006    bool set_used_on_gpu;
2007 
2008    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2009 };
2010 
2011 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2012 anv_descriptor_set_address(struct anv_descriptor_set *set)
2013 {
2014    if (anv_descriptor_set_is_push(set)) {
2015       /* We have to flag push descriptor set as used on the GPU
2016        * so that the next time we push descriptors, we grab a new memory.
2017        */
2018       struct anv_push_descriptor_set *push_set =
2019          (struct anv_push_descriptor_set *)set;
2020       push_set->set_used_on_gpu = true;
2021    }
2022 
2023    return set->desc_addr;
2024 }
2025 
2026 struct anv_descriptor_pool {
2027    struct vk_object_base base;
2028 
2029    uint32_t size;
2030    uint32_t next;
2031    uint32_t free_list;
2032 
2033    struct anv_bo *bo;
2034    struct util_vma_heap bo_heap;
2035 
2036    struct anv_state_stream surface_state_stream;
2037    void *surface_state_free_list;
2038 
2039    struct list_head desc_sets;
2040 
2041    bool host_only;
2042 
2043    char data[0];
2044 };
2045 
2046 struct anv_descriptor_template_entry {
2047    /* The type of descriptor in this entry */
2048    VkDescriptorType type;
2049 
2050    /* Binding in the descriptor set */
2051    uint32_t binding;
2052 
2053    /* Offset at which to write into the descriptor set binding */
2054    uint32_t array_element;
2055 
2056    /* Number of elements to write into the descriptor set binding */
2057    uint32_t array_count;
2058 
2059    /* Offset into the user provided data */
2060    size_t offset;
2061 
2062    /* Stride between elements into the user provided data */
2063    size_t stride;
2064 };
2065 
2066 struct anv_descriptor_update_template {
2067     struct vk_object_base base;
2068 
2069     VkPipelineBindPoint bind_point;
2070 
2071    /* The descriptor set this template corresponds to. This value is only
2072     * valid if the template was created with the templateType
2073     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2074     */
2075    uint8_t set;
2076 
2077    /* Number of entries in this template */
2078    uint32_t entry_count;
2079 
2080    /* Entries of the template */
2081    struct anv_descriptor_template_entry entries[0];
2082 };
2083 
2084 size_t
2085 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2086                                uint32_t var_desc_count);
2087 
2088 uint32_t
2089 anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2090                                                  uint32_t var_desc_count);
2091 
2092 void
2093 anv_descriptor_set_write_image_view(struct anv_device *device,
2094                                     struct anv_descriptor_set *set,
2095                                     const VkDescriptorImageInfo * const info,
2096                                     VkDescriptorType type,
2097                                     uint32_t binding,
2098                                     uint32_t element);
2099 
2100 void
2101 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2102                                      struct anv_descriptor_set *set,
2103                                      VkDescriptorType type,
2104                                      struct anv_buffer_view *buffer_view,
2105                                      uint32_t binding,
2106                                      uint32_t element);
2107 
2108 void
2109 anv_descriptor_set_write_buffer(struct anv_device *device,
2110                                 struct anv_descriptor_set *set,
2111                                 struct anv_state_stream *alloc_stream,
2112                                 VkDescriptorType type,
2113                                 struct anv_buffer *buffer,
2114                                 uint32_t binding,
2115                                 uint32_t element,
2116                                 VkDeviceSize offset,
2117                                 VkDeviceSize range);
2118 
2119 void
2120 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2121                                                 struct anv_descriptor_set *set,
2122                                                 struct anv_acceleration_structure *accel,
2123                                                 uint32_t binding,
2124                                                 uint32_t element);
2125 
2126 void
2127 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2128                                              struct anv_descriptor_set *set,
2129                                              uint32_t binding,
2130                                              const void *data,
2131                                              size_t offset,
2132                                              size_t size);
2133 
2134 void
2135 anv_descriptor_set_write_template(struct anv_device *device,
2136                                   struct anv_descriptor_set *set,
2137                                   struct anv_state_stream *alloc_stream,
2138                                   const struct anv_descriptor_update_template *template,
2139                                   const void *data);
2140 
2141 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
2142 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
2143 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
2144 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
2145 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2146 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2147 
2148 struct anv_pipeline_binding {
2149    /** Index in the descriptor set
2150     *
2151     * This is a flattened index; the descriptor set layout is already taken
2152     * into account.
2153     */
2154    uint32_t index;
2155 
2156    /** The descriptor set this surface corresponds to.
2157     *
2158     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2159     * binding is not a normal descriptor set but something else.
2160     */
2161    uint8_t set;
2162 
2163    union {
2164       /** Plane in the binding index for images */
2165       uint8_t plane;
2166 
2167       /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2168       uint8_t dynamic_offset_index;
2169    };
2170 
2171    /** For a storage image, whether it requires a lowered surface */
2172    uint8_t lowered_storage_surface;
2173 
2174    /** Pad to 64 bits so that there are no holes and we can safely memcmp
2175     * assuming POD zero-initialization.
2176     */
2177    uint8_t pad;
2178 };
2179 
2180 struct anv_push_range {
2181    /** Index in the descriptor set */
2182    uint32_t index;
2183 
2184    /** Descriptor set index */
2185    uint8_t set;
2186 
2187    /** Dynamic offset index (for dynamic UBOs) */
2188    uint8_t dynamic_offset_index;
2189 
2190    /** Start offset in units of 32B */
2191    uint8_t start;
2192 
2193    /** Range in units of 32B */
2194    uint8_t length;
2195 };
2196 
2197 struct anv_pipeline_layout {
2198    struct vk_object_base base;
2199 
2200    struct {
2201       struct anv_descriptor_set_layout *layout;
2202       uint32_t dynamic_offset_start;
2203    } set[MAX_SETS];
2204 
2205    uint32_t num_sets;
2206 
2207    unsigned char sha1[20];
2208 };
2209 
2210 struct anv_buffer {
2211    struct vk_buffer vk;
2212 
2213    /* Set when bound */
2214    struct anv_address address;
2215 };
2216 
2217 enum anv_cmd_dirty_bits {
2218    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
2219    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
2220    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 2,
2221    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 3,
2222 };
2223 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
2224 
2225 enum anv_pipe_bits {
2226    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
2227    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
2228    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
2229    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
2230    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
2231    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
2232    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
2233    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
2234    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2235    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
2236    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
2237 
2238    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2239     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
2240     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2241     */
2242    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
2243    ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
2244    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
2245    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
2246 
2247    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
2248     * a flush has happened but not a CS stall.  The next time we do any sort
2249     * of invalidation we need to insert a CS stall at that time.  Otherwise,
2250     * we would have to CS stall on every flush which could be bad.
2251     */
2252    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
2253 
2254    /* This bit does not exist directly in PIPE_CONTROL. It means that render
2255     * target operations related to transfer commands with VkBuffer as
2256     * destination are ongoing. Some operations like copies on the command
2257     * streamer might need to be aware of this to trigger the appropriate stall
2258     * before they can proceed with the copy.
2259     */
2260    ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2261 
2262    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2263     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2264     * done by writing the AUX-TT register.
2265     */
2266    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2267 
2268    /* This bit does not exist directly in PIPE_CONTROL. It means that a
2269     * PIPE_CONTROL with a post-sync operation will follow. This is used to
2270     * implement a workaround for Gfx9.
2271     */
2272    ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2273 };
2274 
2275 #define ANV_PIPE_FLUSH_BITS ( \
2276    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2277    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2278    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2279    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2280    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2281 
2282 #define ANV_PIPE_STALL_BITS ( \
2283    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2284    ANV_PIPE_DEPTH_STALL_BIT | \
2285    ANV_PIPE_CS_STALL_BIT)
2286 
2287 #define ANV_PIPE_INVALIDATE_BITS ( \
2288    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2289    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2290    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2291    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2292    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2293    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2294 
2295 enum intel_ds_stall_flag
2296 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
2297 
2298 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2299 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2300                                      VkAccessFlags2 flags)
2301 {
2302    enum anv_pipe_bits pipe_bits = 0;
2303 
2304    u_foreach_bit64(b, flags) {
2305       switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2306       case VK_ACCESS_2_SHADER_WRITE_BIT:
2307       case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
2308          /* We're transitioning a buffer that was previously used as write
2309           * destination through the data port. To make its content available
2310           * to future operations, flush the hdc pipeline.
2311           */
2312          pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2313          break;
2314       case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
2315          /* We're transitioning a buffer that was previously used as render
2316           * target. To make its content available to future operations, flush
2317           * the render target cache.
2318           */
2319          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2320          break;
2321       case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2322          /* We're transitioning a buffer that was previously used as depth
2323           * buffer. To make its content available to future operations, flush
2324           * the depth cache.
2325           */
2326          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2327          break;
2328       case VK_ACCESS_2_TRANSFER_WRITE_BIT:
2329          /* We're transitioning a buffer that was previously used as a
2330           * transfer write destination. Generic write operations include color
2331           * & depth operations as well as buffer operations like :
2332           *     - vkCmdClearColorImage()
2333           *     - vkCmdClearDepthStencilImage()
2334           *     - vkCmdBlitImage()
2335           *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2336           *
2337           * Most of these operations are implemented using Blorp which writes
2338           * through the render target, so flush that cache to make it visible
2339           * to future operations. And for depth related operations we also
2340           * need to flush the depth cache.
2341           */
2342          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2343          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2344          break;
2345       case VK_ACCESS_2_MEMORY_WRITE_BIT:
2346          /* We're transitioning a buffer for generic write operations. Flush
2347           * all the caches.
2348           */
2349          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2350          break;
2351       case VK_ACCESS_2_HOST_WRITE_BIT:
2352          /* We're transitioning a buffer for access by CPU. Invalidate
2353           * all the caches. Since data and tile caches don't have invalidate,
2354           * we are forced to flush those as well.
2355           */
2356          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2357          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2358          break;
2359       case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2360       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2361          /* We're transitioning a buffer written either from VS stage or from
2362           * the command streamer (see CmdEndTransformFeedbackEXT), we just
2363           * need to stall the CS.
2364           */
2365          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2366          break;
2367       default:
2368          break; /* Nothing to do */
2369       }
2370    }
2371 
2372    return pipe_bits;
2373 }
2374 
2375 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2376 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2377                                           VkAccessFlags2 flags)
2378 {
2379    enum anv_pipe_bits pipe_bits = 0;
2380 
2381    u_foreach_bit64(b, flags) {
2382       switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2383       case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
2384          /* Indirect draw commands take a buffer as input that we're going to
2385           * read from the command streamer to load some of the HW registers
2386           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2387           * command streamer stall so that all the cache flushes have
2388           * completed before the command streamer loads from memory.
2389           */
2390          pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2391          /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2392           * through a vertex buffer, so invalidate that cache.
2393           */
2394          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2395          /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2396           * UBO from the buffer, so we need to invalidate constant cache.
2397           */
2398          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2399          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2400          /* Tile cache flush needed For CmdDipatchIndirect since command
2401           * streamer and vertex fetch aren't L3 coherent.
2402           */
2403          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2404          break;
2405       case VK_ACCESS_2_INDEX_READ_BIT:
2406       case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
2407          /* We transitioning a buffer to be used for as input for vkCmdDraw*
2408           * commands, so we invalidate the VF cache to make sure there is no
2409           * stale data when we start rendering.
2410           */
2411          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2412          break;
2413       case VK_ACCESS_2_UNIFORM_READ_BIT:
2414          /* We transitioning a buffer to be used as uniform data. Because
2415           * uniform is accessed through the data port & sampler, we need to
2416           * invalidate the texture cache (sampler) & constant cache (data
2417           * port) to avoid stale data.
2418           */
2419          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2420          if (device->physical->compiler->indirect_ubos_use_sampler)
2421             pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2422          else
2423             pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2424          break;
2425       case VK_ACCESS_2_SHADER_READ_BIT:
2426       case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
2427       case VK_ACCESS_2_TRANSFER_READ_BIT:
2428          /* Transitioning a buffer to be read through the sampler, so
2429           * invalidate the texture cache, we don't want any stale data.
2430           */
2431          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2432          break;
2433       case VK_ACCESS_2_MEMORY_READ_BIT:
2434          /* Transitioning a buffer for generic read, invalidate all the
2435           * caches.
2436           */
2437          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2438          break;
2439       case VK_ACCESS_2_MEMORY_WRITE_BIT:
2440          /* Generic write, make sure all previously written things land in
2441           * memory.
2442           */
2443          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2444          break;
2445       case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2446       case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2447          /* Transitioning a buffer for conditional rendering or transform
2448           * feedback. We'll load the content of this buffer into HW registers
2449           * using the command streamer, so we need to stall the command
2450           * streamer , so we need to stall the command streamer to make sure
2451           * any in-flight flush operations have completed.
2452           */
2453          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2454          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2455          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2456          break;
2457       case VK_ACCESS_2_HOST_READ_BIT:
2458          /* We're transitioning a buffer that was written by CPU.  Flush
2459           * all the caches.
2460           */
2461          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2462          break;
2463       case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2464          /* We're transitioning a buffer to be written by the streamout fixed
2465           * function. This one is apparently not L3 coherent, so we need a
2466           * tile cache flush to make sure any previous write is not going to
2467           * create WaW hazards.
2468           */
2469          pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2470          break;
2471       default:
2472          break; /* Nothing to do */
2473       }
2474    }
2475 
2476    return pipe_bits;
2477 }
2478 
2479 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2480    VK_IMAGE_ASPECT_COLOR_BIT | \
2481    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2482    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2483    VK_IMAGE_ASPECT_PLANE_2_BIT)
2484 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2485    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2486    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2487    VK_IMAGE_ASPECT_PLANE_2_BIT)
2488 
2489 struct anv_vertex_binding {
2490    struct anv_buffer *                          buffer;
2491    VkDeviceSize                                 offset;
2492    VkDeviceSize                                 size;
2493 };
2494 
2495 struct anv_xfb_binding {
2496    struct anv_buffer *                          buffer;
2497    VkDeviceSize                                 offset;
2498    VkDeviceSize                                 size;
2499 };
2500 
2501 struct anv_push_constants {
2502    /** Push constant data provided by the client through vkPushConstants */
2503    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2504 
2505    /** Dynamic offsets for dynamic UBOs and SSBOs */
2506    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2507 
2508    /* Robust access pushed registers. */
2509    uint64_t push_reg_mask[MESA_SHADER_STAGES];
2510 
2511    /** Ray query globals (RT_DISPATCH_GLOBALS) */
2512    uint64_t ray_query_globals;
2513 
2514    /* Base addresses for descriptor sets */
2515    uint64_t desc_sets[MAX_SETS];
2516 
2517    struct {
2518       /** Base workgroup ID
2519        *
2520        * Used for vkCmdDispatchBase.
2521        */
2522       uint32_t base_work_group_id[3];
2523 
2524       /** Subgroup ID
2525        *
2526        * This is never set by software but is implicitly filled out when
2527        * uploading the push constants for compute shaders.
2528        */
2529       uint32_t subgroup_id;
2530    } cs;
2531 };
2532 
2533 struct anv_surface_state {
2534    struct anv_state state;
2535    /** Address of the surface referred to by this state
2536     *
2537     * This address is relative to the start of the BO.
2538     */
2539    struct anv_address address;
2540    /* Address of the aux surface, if any
2541     *
2542     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2543     *
2544     * With the exception of gfx8, the bottom 12 bits of this address' offset
2545     * include extra aux information.
2546     */
2547    struct anv_address aux_address;
2548    /* Address of the clear color, if any
2549     *
2550     * This address is relative to the start of the BO.
2551     */
2552    struct anv_address clear_address;
2553 };
2554 
2555 struct anv_attachment {
2556    VkFormat vk_format;
2557    const struct anv_image_view *iview;
2558    VkImageLayout layout;
2559    enum isl_aux_usage aux_usage;
2560    struct anv_surface_state surface_state;
2561 
2562    VkResolveModeFlagBits resolve_mode;
2563    const struct anv_image_view *resolve_iview;
2564    VkImageLayout resolve_layout;
2565 };
2566 
2567 /** State tracking for vertex buffer flushes
2568  *
2569  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2570  * addresses.  If you happen to have two vertex buffers which get placed
2571  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2572  * collisions.  In order to solve this problem, we track vertex address ranges
2573  * which are live in the cache and invalidate the cache if one ever exceeds 32
2574  * bits.
2575  */
2576 struct anv_vb_cache_range {
2577    /* Virtual address at which the live vertex buffer cache range starts for
2578     * this vertex buffer index.
2579     */
2580    uint64_t start;
2581 
2582    /* Virtual address of the byte after where vertex buffer cache range ends.
2583     * This is exclusive such that end - start is the size of the range.
2584     */
2585    uint64_t end;
2586 };
2587 
2588 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
2589 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)2590 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
2591                                            struct anv_vb_cache_range *dirty,
2592                                            struct anv_address vb_address,
2593                                            uint32_t vb_size)
2594 {
2595    if (vb_size == 0) {
2596       bound->start = 0;
2597       bound->end = 0;
2598       return false;
2599    }
2600 
2601    assert(vb_address.bo && anv_bo_is_pinned(vb_address.bo));
2602    bound->start = intel_48b_address(anv_address_physical(vb_address));
2603    bound->end = bound->start + vb_size;
2604    assert(bound->end > bound->start); /* No overflow */
2605 
2606    /* Align everything to a cache line */
2607    bound->start &= ~(64ull - 1ull);
2608    bound->end = align_u64(bound->end, 64);
2609 
2610    /* Compute the dirty range */
2611    dirty->start = MIN2(dirty->start, bound->start);
2612    dirty->end = MAX2(dirty->end, bound->end);
2613 
2614    /* If our range is larger than 32 bits, we have to flush */
2615    assert(bound->end - bound->start <= (1ull << 32));
2616    return (dirty->end - dirty->start) > (1ull << 32);
2617 }
2618 
2619 /** State tracking for particular pipeline bind point
2620  *
2621  * This struct is the base struct for anv_cmd_graphics_state and
2622  * anv_cmd_compute_state.  These are used to track state which is bound to a
2623  * particular type of pipeline.  Generic state that applies per-stage such as
2624  * binding table offsets and push constants is tracked generically with a
2625  * per-stage array in anv_cmd_state.
2626  */
2627 struct anv_cmd_pipeline_state {
2628    struct anv_descriptor_set *descriptors[MAX_SETS];
2629    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2630 
2631    struct anv_push_constants push_constants;
2632 
2633    /* Push constant state allocated when flushing push constants. */
2634    struct anv_state          push_constants_state;
2635 };
2636 
2637 /** State tracking for graphics pipeline
2638  *
2639  * This has anv_cmd_pipeline_state as a base struct to track things which get
2640  * bound to a graphics pipeline.  Along with general pipeline bind point state
2641  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2642  * state which is graphics-specific.
2643  */
2644 struct anv_cmd_graphics_state {
2645    struct anv_cmd_pipeline_state base;
2646 
2647    struct anv_graphics_pipeline *pipeline;
2648 
2649    VkRenderingFlags rendering_flags;
2650    VkRect2D render_area;
2651    uint32_t layer_count;
2652    uint32_t samples;
2653    uint32_t view_mask;
2654    uint32_t color_att_count;
2655    struct anv_state att_states;
2656    struct anv_attachment color_att[MAX_RTS];
2657    struct anv_attachment depth_att;
2658    struct anv_attachment stencil_att;
2659    struct anv_state null_surface_state;
2660 
2661    anv_cmd_dirty_mask_t dirty;
2662    uint32_t vb_dirty;
2663 
2664    struct anv_vb_cache_range ib_bound_range;
2665    struct anv_vb_cache_range ib_dirty_range;
2666    struct anv_vb_cache_range vb_bound_ranges[33];
2667    struct anv_vb_cache_range vb_dirty_ranges[33];
2668 
2669    uint32_t restart_index;
2670 
2671    VkShaderStageFlags push_constant_stages;
2672 
2673    uint32_t primitive_topology;
2674 
2675    struct anv_buffer *index_buffer;
2676    uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2677    uint32_t index_offset;
2678 
2679    struct vk_sample_locations_state sample_locations;
2680 };
2681 
2682 enum anv_depth_reg_mode {
2683    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2684    ANV_DEPTH_REG_MODE_HW_DEFAULT,
2685    ANV_DEPTH_REG_MODE_D16_1X_MSAA,
2686 };
2687 
2688 /** State tracking for compute pipeline
2689  *
2690  * This has anv_cmd_pipeline_state as a base struct to track things which get
2691  * bound to a compute pipeline.  Along with general pipeline bind point state
2692  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2693  * state which is compute-specific.
2694  */
2695 struct anv_cmd_compute_state {
2696    struct anv_cmd_pipeline_state base;
2697 
2698    struct anv_compute_pipeline *pipeline;
2699 
2700    bool pipeline_dirty;
2701 
2702    struct anv_state push_data;
2703 
2704    struct anv_address num_workgroups;
2705 };
2706 
2707 struct anv_cmd_ray_tracing_state {
2708    struct anv_cmd_pipeline_state base;
2709 
2710    struct anv_ray_tracing_pipeline *pipeline;
2711 
2712    bool pipeline_dirty;
2713 
2714    struct {
2715       struct anv_bo *bo;
2716       struct brw_rt_scratch_layout layout;
2717    } scratch;
2718 };
2719 
2720 /** State required while building cmd buffer */
2721 struct anv_cmd_state {
2722    /* PIPELINE_SELECT.PipelineSelection */
2723    uint32_t                                     current_pipeline;
2724    const struct intel_l3_config *               current_l3_config;
2725    uint32_t                                     last_aux_map_state;
2726 
2727    struct anv_cmd_graphics_state                gfx;
2728    struct anv_cmd_compute_state                 compute;
2729    struct anv_cmd_ray_tracing_state             rt;
2730 
2731    enum anv_pipe_bits                           pending_pipe_bits;
2732    VkShaderStageFlags                           descriptors_dirty;
2733    VkShaderStageFlags                           push_constants_dirty;
2734 
2735    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
2736    bool                                         xfb_enabled;
2737    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
2738    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
2739    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
2740 
2741    unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2742    unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2743    unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2744 
2745    /**
2746     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
2747     * of any command buffer it is disabled by disabling it in EndCommandBuffer
2748     * and before invoking the secondary in ExecuteCommands.
2749     */
2750    bool                                         pma_fix_enabled;
2751 
2752    /**
2753     * Whether or not we know for certain that HiZ is enabled for the current
2754     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
2755     * enabled or not, this will be false.
2756     */
2757    bool                                         hiz_enabled;
2758 
2759    /* We ensure the registers for the gfx12 D16 fix are initialized at the
2760     * first non-NULL depth stencil packet emission of every command buffer.
2761     * For secondary command buffer execution, we transfer the state from the
2762     * last command buffer to the primary (if known).
2763     */
2764    enum anv_depth_reg_mode                      depth_reg_mode;
2765 
2766    /**
2767     * Whether RHWO optimization is enabled (Wa_1508744258).
2768     */
2769    bool                                         rhwo_optimization_enabled;
2770 
2771    /**
2772     * Pending state of the RHWO optimization, to be applied at the next
2773     * genX(cmd_buffer_apply_pipe_flushes).
2774     */
2775    bool                                         pending_rhwo_optimization_enabled;
2776 
2777    bool                                         conditional_render_enabled;
2778 
2779    /**
2780     * Last rendering scale argument provided to
2781     * genX(cmd_buffer_emit_hashing_mode)().
2782     */
2783    unsigned                                     current_hash_scale;
2784 
2785    /**
2786     * A buffer used for spill/fill of ray queries.
2787     */
2788    struct anv_bo *                              ray_query_shadow_bo;
2789 };
2790 
2791 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
2792 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
2793 
2794 enum anv_cmd_buffer_exec_mode {
2795    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2796    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2797    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2798    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2799    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2800    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
2801 };
2802 
2803 struct anv_measure_batch;
2804 
2805 struct anv_cmd_buffer {
2806    struct vk_command_buffer                     vk;
2807 
2808    struct anv_device *                          device;
2809    struct anv_queue_family *                    queue_family;
2810 
2811    struct anv_batch                             batch;
2812 
2813    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
2814     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
2815     * rewrite the end to chain multiple batch together at vkQueueSubmit().
2816     */
2817    void *                                       batch_end;
2818 
2819    /* Fields required for the actual chain of anv_batch_bo's.
2820     *
2821     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2822     */
2823    struct list_head                             batch_bos;
2824    enum anv_cmd_buffer_exec_mode                exec_mode;
2825 
2826    /* A vector of anv_batch_bo pointers for every batch or surface buffer
2827     * referenced by this command buffer
2828     *
2829     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2830     */
2831    struct u_vector                            seen_bbos;
2832 
2833    /* A vector of int32_t's for every block of binding tables.
2834     *
2835     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2836     */
2837    struct u_vector                              bt_block_states;
2838    struct anv_state                             bt_next;
2839 
2840    struct anv_reloc_list                        surface_relocs;
2841    /** Last seen surface state block pool center bo offset */
2842    uint32_t                                     last_ss_pool_center;
2843 
2844    /* Serial for tracking buffer completion */
2845    uint32_t                                     serial;
2846 
2847    /* Stream objects for storing temporary data */
2848    struct anv_state_stream                      surface_state_stream;
2849    struct anv_state_stream                      dynamic_state_stream;
2850    struct anv_state_stream                      general_state_stream;
2851 
2852    VkCommandBufferUsageFlags                    usage_flags;
2853 
2854    struct anv_query_pool                       *perf_query_pool;
2855 
2856    struct anv_cmd_state                         state;
2857 
2858    struct anv_address                           return_addr;
2859 
2860    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
2861    uint64_t                                     intel_perf_marker;
2862 
2863    struct anv_measure_batch *measure;
2864 
2865    /**
2866     * KHR_performance_query requires self modifying command buffers and this
2867     * array has the location of modifying commands to the query begin and end
2868     * instructions storing performance counters. The array length is
2869     * anv_physical_device::n_perf_query_commands.
2870     */
2871    struct mi_address_token                  *self_mod_locations;
2872 
2873    /**
2874     * Index tracking which of the self_mod_locations items have already been
2875     * used.
2876     */
2877    uint32_t                                      perf_reloc_idx;
2878 
2879    /**
2880     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2881     * Used to increase allocation size for long command buffers.
2882     */
2883    uint32_t                                     total_batch_size;
2884 
2885    /**
2886     *
2887     */
2888    struct u_trace                               trace;
2889 };
2890 
2891 /* Determine whether we can chain a given cmd_buffer to another one. We need
2892  * softpin and we also need to make sure that we can edit the end of the batch
2893  * to point to next one, which requires the command buffer to not be used
2894  * simultaneously.
2895  */
2896 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)2897 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
2898 {
2899    return !anv_use_relocations(cmd_buffer->device->physical) &&
2900       !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
2901 }
2902 
2903 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2904 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2905 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2906 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
2907 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
2908                                   struct anv_cmd_buffer *secondary);
2909 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
2910 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
2911                                 struct anv_cmd_buffer *cmd_buffer,
2912                                 const VkSemaphore *in_semaphores,
2913                                 const uint64_t *in_wait_values,
2914                                 uint32_t num_in_semaphores,
2915                                 const VkSemaphore *out_semaphores,
2916                                 const uint64_t *out_signal_values,
2917                                 uint32_t num_out_semaphores,
2918                                 VkFence fence,
2919                                 int perf_query_pass);
2920 
2921 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
2922 
2923 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2924                                              const void *data, uint32_t size, uint32_t alignment);
2925 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
2926                                               uint32_t *a, uint32_t *b,
2927                                               uint32_t dwords, uint32_t alignment);
2928 
2929 struct anv_address
2930 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
2931 struct anv_state
2932 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
2933                                    uint32_t entries, uint32_t *state_offset);
2934 struct anv_state
2935 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
2936 struct anv_state
2937 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
2938                                    uint32_t size, uint32_t alignment);
2939 
2940 VkResult
2941 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
2942 
2943 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
2944 
2945 struct anv_state
2946 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
2947 struct anv_state
2948 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
2949 
2950 VkResult
2951 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
2952                                          uint32_t num_entries,
2953                                          uint32_t *state_offset,
2954                                          struct anv_state *bt_state);
2955 
2956 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
2957 
2958 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
2959 
2960 enum anv_bo_sync_state {
2961    /** Indicates that this is a new (or newly reset fence) */
2962    ANV_BO_SYNC_STATE_RESET,
2963 
2964    /** Indicates that this fence has been submitted to the GPU but is still
2965     * (as far as we know) in use by the GPU.
2966     */
2967    ANV_BO_SYNC_STATE_SUBMITTED,
2968 
2969    ANV_BO_SYNC_STATE_SIGNALED,
2970 };
2971 
2972 struct anv_bo_sync {
2973    struct vk_sync sync;
2974 
2975    enum anv_bo_sync_state state;
2976    struct anv_bo *bo;
2977 };
2978 
2979 extern const struct vk_sync_type anv_bo_sync_type;
2980 
2981 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)2982 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
2983 {
2984    return sync->type == &anv_bo_sync_type;
2985 }
2986 
2987 VkResult anv_create_sync_for_memory(struct vk_device *device,
2988                                     VkDeviceMemory memory,
2989                                     bool signal_memory,
2990                                     struct vk_sync **sync_out);
2991 
2992 struct anv_event {
2993    struct vk_object_base                        base;
2994    uint64_t                                     semaphore;
2995    struct anv_state                             state;
2996 };
2997 
2998 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
2999 
3000 #define anv_foreach_stage(stage, stage_bits)                         \
3001    for (gl_shader_stage stage,                                       \
3002         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
3003         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
3004         __tmp &= ~(1 << (stage)))
3005 
3006 struct anv_pipeline_bind_map {
3007    unsigned char                                surface_sha1[20];
3008    unsigned char                                sampler_sha1[20];
3009    unsigned char                                push_sha1[20];
3010 
3011    uint32_t surface_count;
3012    uint32_t sampler_count;
3013 
3014    struct anv_pipeline_binding *                surface_to_descriptor;
3015    struct anv_pipeline_binding *                sampler_to_descriptor;
3016 
3017    struct anv_push_range                        push_ranges[4];
3018 };
3019 
3020 struct anv_shader_bin {
3021    struct vk_pipeline_cache_object base;
3022 
3023    gl_shader_stage stage;
3024 
3025    struct anv_state kernel;
3026    uint32_t kernel_size;
3027 
3028    const struct brw_stage_prog_data *prog_data;
3029    uint32_t prog_data_size;
3030 
3031    struct brw_compile_stats stats[3];
3032    uint32_t num_stats;
3033 
3034    struct nir_xfb_info *xfb_info;
3035 
3036    struct anv_pipeline_bind_map bind_map;
3037 };
3038 
3039 struct anv_shader_bin *
3040 anv_shader_bin_create(struct anv_device *device,
3041                       gl_shader_stage stage,
3042                       const void *key, uint32_t key_size,
3043                       const void *kernel, uint32_t kernel_size,
3044                       const struct brw_stage_prog_data *prog_data,
3045                       uint32_t prog_data_size,
3046                       const struct brw_compile_stats *stats, uint32_t num_stats,
3047                       const struct nir_xfb_info *xfb_info,
3048                       const struct anv_pipeline_bind_map *bind_map);
3049 
3050 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3051 anv_shader_bin_ref(struct anv_shader_bin *shader)
3052 {
3053    vk_pipeline_cache_object_ref(&shader->base);
3054 }
3055 
3056 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3057 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3058 {
3059    vk_pipeline_cache_object_unref(&shader->base);
3060 }
3061 
3062 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({             \
3063    assert((local_arg_offset) % 8 == 0);                              \
3064    const struct brw_bs_prog_data *prog_data =                        \
3065       brw_bs_prog_data_const(bin->prog_data);                        \
3066    assert(prog_data->simd_size == 8 || prog_data->simd_size == 16);  \
3067                                                                      \
3068    (struct GFX_BINDLESS_SHADER_RECORD) {                             \
3069       .OffsetToLocalArguments = (local_arg_offset) / 8,              \
3070       .BindlessShaderDispatchMode =                                  \
3071          prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8,          \
3072       .KernelStartPointer = bin->kernel.offset,                      \
3073    };                                                                \
3074 })
3075 
3076 struct anv_pipeline_executable {
3077    gl_shader_stage stage;
3078 
3079    struct brw_compile_stats stats;
3080 
3081    char *nir;
3082    char *disasm;
3083 };
3084 
3085 enum anv_pipeline_type {
3086    ANV_PIPELINE_GRAPHICS,
3087    ANV_PIPELINE_COMPUTE,
3088    ANV_PIPELINE_RAY_TRACING,
3089 };
3090 
3091 struct anv_pipeline {
3092    struct vk_object_base                        base;
3093 
3094    struct anv_device *                          device;
3095 
3096    struct anv_batch                             batch;
3097    struct anv_reloc_list                        batch_relocs;
3098 
3099    void *                                       mem_ctx;
3100 
3101    enum anv_pipeline_type                       type;
3102    VkPipelineCreateFlags                        flags;
3103 
3104    uint32_t                                     ray_queries;
3105 
3106    struct util_dynarray                         executables;
3107 
3108    const struct intel_l3_config *               l3_config;
3109 };
3110 
3111 struct anv_graphics_pipeline {
3112    struct anv_pipeline                          base;
3113 
3114    /* Shaders */
3115    struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
3116 
3117    VkShaderStageFlags                           active_stages;
3118 
3119    struct vk_sample_locations_state             sample_locations;
3120    struct vk_dynamic_graphics_state             dynamic_state;
3121 
3122    /* These fields are required with dynamic primitive topology,
3123     * rasterization_samples used only with gen < 8.
3124     */
3125    VkLineRasterizationModeEXT                   line_mode;
3126    VkPolygonMode                                polygon_mode;
3127    uint32_t                                     patch_control_points;
3128    uint32_t                                     rasterization_samples;
3129 
3130    VkColorComponentFlags                        color_comp_writes[MAX_RTS];
3131 
3132    uint32_t                                     view_mask;
3133    uint32_t                                     instance_multiplier;
3134 
3135    bool                                         depth_clamp_enable;
3136    bool                                         depth_clip_enable;
3137    bool                                         kill_pixel;
3138    bool                                         force_fragment_thread_dispatch;
3139    bool                                         negative_one_to_one;
3140 
3141    /* When primitive replication is used, subpass->view_mask will describe what
3142     * views to replicate.
3143     */
3144    bool                                         use_primitive_replication;
3145 
3146    uint32_t                                     vb_used;
3147    struct anv_pipeline_vertex_binding {
3148       uint32_t                                  stride;
3149       bool                                      instanced;
3150       uint32_t                                  instance_divisor;
3151    } vb[MAX_VBS];
3152 
3153    /* Pre computed CS instructions that can directly be copied into
3154     * anv_cmd_buffer.
3155     */
3156    uint32_t                                     batch_data[512];
3157 
3158    /* Pre packed CS instructions & structures that need to be merged later
3159     * with dynamic state.
3160     */
3161    struct {
3162       uint32_t                                  sf[7];
3163       uint32_t                                  clip[4];
3164       uint32_t                                  xfb_bo_pitch[4];
3165       uint32_t                                  wm[3];
3166       uint32_t                                  blend_state[MAX_RTS * 2];
3167       uint32_t                                  streamout_state[3];
3168    } gfx7;
3169 
3170    struct {
3171       uint32_t                                  sf[4];
3172       uint32_t                                  raster[5];
3173       uint32_t                                  wm[2];
3174       uint32_t                                  ps_blend[2];
3175       uint32_t                                  blend_state[1 + MAX_RTS * 2];
3176       uint32_t                                  streamout_state[5];
3177    } gfx8;
3178 };
3179 
3180 struct anv_compute_pipeline {
3181    struct anv_pipeline                          base;
3182 
3183    struct anv_shader_bin *                      cs;
3184    uint32_t                                     batch_data[9];
3185    uint32_t                                     interface_descriptor_data[8];
3186 };
3187 
3188 struct anv_rt_shader_group {
3189    VkRayTracingShaderGroupTypeKHR type;
3190 
3191    struct anv_shader_bin *general;
3192    struct anv_shader_bin *closest_hit;
3193    struct anv_shader_bin *any_hit;
3194    struct anv_shader_bin *intersection;
3195 
3196    /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3197    uint32_t handle[8];
3198 };
3199 
3200 struct anv_ray_tracing_pipeline {
3201    struct anv_pipeline                          base;
3202 
3203    /* All shaders in the pipeline */
3204    struct util_dynarray                         shaders;
3205 
3206    uint32_t                                     group_count;
3207    struct anv_rt_shader_group *                 groups;
3208 
3209    /* If non-zero, this is the default computed stack size as per the stack
3210     * size computation in the Vulkan spec.  If zero, that indicates that the
3211     * client has requested a dynamic stack size.
3212     */
3213    uint32_t                                     stack_size;
3214 };
3215 
3216 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
3217    static inline struct anv_##pipe_type##_pipeline *                 \
3218    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
3219    {                                                                 \
3220       assert(pipeline->type == pipe_enum);                           \
3221       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
3222    }
3223 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3224 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3225 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3226 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3227 
3228 static inline bool
3229 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3230                        gl_shader_stage stage)
3231 {
3232    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3233 }
3234 
3235 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)3236 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
3237 {
3238    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
3239 }
3240 
3241 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)3242 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
3243 {
3244    return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
3245 }
3246 
3247 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)3248 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
3249 {
3250    const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
3251    const struct vk_dynamic_graphics_state *dyn =
3252       &cmd_buffer->vk.dynamic_graphics_state;
3253    uint8_t color_writes = dyn->cb.color_write_enables;
3254 
3255    /* All writes disabled through vkCmdSetColorWriteEnableEXT */
3256    if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
3257       return true;
3258 
3259    /* Or all write masks are empty */
3260    for (uint32_t i = 0; i < state->color_att_count; i++) {
3261       if (state->pipeline->color_comp_writes[i] != 0)
3262          return false;
3263    }
3264 
3265    return true;
3266 }
3267 
3268 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
3269 static inline const struct brw_##prefix##_prog_data *                   \
3270 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
3271 {                                                                       \
3272    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
3273       return (const struct brw_##prefix##_prog_data *)                  \
3274              pipeline->shaders[stage]->prog_data;                       \
3275    } else {                                                             \
3276       return NULL;                                                      \
3277    }                                                                    \
3278 }
3279 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3280 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3281 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3282 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3283 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3284 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3285 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
3286 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
3287 
3288 static inline const struct brw_cs_prog_data *
3289 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3290 {
3291    assert(pipeline->cs);
3292    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3293 }
3294 
3295 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3296 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3297 {
3298    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3299       return &get_gs_prog_data(pipeline)->base;
3300    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3301       return &get_tes_prog_data(pipeline)->base;
3302    else
3303       return &get_vs_prog_data(pipeline)->base;
3304 }
3305 
3306 VkResult
3307 anv_device_init_rt_shaders(struct anv_device *device);
3308 
3309 void
3310 anv_device_finish_rt_shaders(struct anv_device *device);
3311 
3312 VkResult
3313 anv_pipeline_init(struct anv_pipeline *pipeline,
3314                   struct anv_device *device,
3315                   enum anv_pipeline_type type,
3316                   VkPipelineCreateFlags flags,
3317                   const VkAllocationCallbacks *pAllocator);
3318 
3319 void
3320 anv_pipeline_finish(struct anv_pipeline *pipeline,
3321                     struct anv_device *device,
3322                     const VkAllocationCallbacks *pAllocator);
3323 
3324 struct anv_format_plane {
3325    enum isl_format isl_format:16;
3326    struct isl_swizzle swizzle;
3327 
3328    /* Whether this plane contains chroma channels */
3329    bool has_chroma;
3330 
3331    /* For downscaling of YUV planes */
3332    uint8_t denominator_scales[2];
3333 
3334    /* How to map sampled ycbcr planes to a single 4 component element. */
3335    struct isl_swizzle ycbcr_swizzle;
3336 
3337    /* What aspect is associated to this plane */
3338    VkImageAspectFlags aspect;
3339 };
3340 
3341 
3342 struct anv_format {
3343    struct anv_format_plane planes[3];
3344    VkFormat vk_format;
3345    uint8_t n_planes;
3346    bool can_ycbcr;
3347 };
3348 
3349 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)3350 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3351 {
3352    if (util_bitcount(aspects) == 1) {
3353       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3354                         VK_IMAGE_ASPECT_DEPTH_BIT |
3355                         VK_IMAGE_ASPECT_STENCIL_BIT |
3356                         VK_IMAGE_ASPECT_PLANE_0_BIT |
3357                         VK_IMAGE_ASPECT_PLANE_1_BIT |
3358                         VK_IMAGE_ASPECT_PLANE_2_BIT));
3359    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3360       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3361              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3362                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3363              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3364                          VK_IMAGE_ASPECT_PLANE_1_BIT |
3365                          VK_IMAGE_ASPECT_PLANE_2_BIT));
3366    } else {
3367       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3368                          VK_IMAGE_ASPECT_STENCIL_BIT));
3369    }
3370 }
3371 
3372 /**
3373  * Return the aspect's plane relative to all_aspects.  For an image, for
3374  * instance, all_aspects would be the set of aspects in the image.  For
3375  * an image view, all_aspects would be the subset of aspects represented
3376  * by that particular view.
3377  */
3378 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)3379 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3380                     VkImageAspectFlagBits aspect)
3381 {
3382    anv_assert_valid_aspect_set(all_aspects);
3383    assert(util_bitcount(aspect) == 1);
3384    assert(!(aspect & ~all_aspects));
3385 
3386    /* Because we always put image and view planes in aspect-bit-order, the
3387     * plane index is the number of bits in all_aspects before aspect.
3388     */
3389    return util_bitcount(all_aspects & (aspect - 1));
3390 }
3391 
3392 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3393    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3394 
3395 const struct anv_format *
3396 anv_get_format(VkFormat format);
3397 
3398 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3399 anv_get_format_planes(VkFormat vk_format)
3400 {
3401    const struct anv_format *format = anv_get_format(vk_format);
3402 
3403    return format != NULL ? format->n_planes : 0;
3404 }
3405 
3406 struct anv_format_plane
3407 anv_get_format_plane(const struct intel_device_info *devinfo,
3408                      VkFormat vk_format, uint32_t plane,
3409                      VkImageTiling tiling);
3410 
3411 struct anv_format_plane
3412 anv_get_format_aspect(const struct intel_device_info *devinfo,
3413                       VkFormat vk_format,
3414                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
3415 
3416 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3417 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3418                    VkImageAspectFlags aspect, VkImageTiling tiling)
3419 {
3420    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3421 }
3422 
3423 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3424                                   VkImageCreateFlags create_flags,
3425                                   VkFormat vk_format, VkImageTiling vk_tiling,
3426                                   VkImageUsageFlags vk_usage,
3427                                   const VkImageFormatListCreateInfo *fmt_list);
3428 
3429 extern VkFormat
3430 vk_format_from_android(unsigned android_format, unsigned android_usage);
3431 
3432 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3433 anv_swizzle_for_render(struct isl_swizzle swizzle)
3434 {
3435    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3436     * RGB as RGBA for texturing
3437     */
3438    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3439           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3440 
3441    /* But it doesn't matter what we render to that channel */
3442    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3443 
3444    return swizzle;
3445 }
3446 
3447 void
3448 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3449 
3450 /**
3451  * Describes how each part of anv_image will be bound to memory.
3452  */
3453 struct anv_image_memory_range {
3454    /**
3455     * Disjoint bindings into which each portion of the image will be bound.
3456     *
3457     * Binding images to memory can be complicated and invold binding different
3458     * portions of the image to different memory objects or regions.  For most
3459     * images, everything lives in the MAIN binding and gets bound by
3460     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
3461     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3462     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
3463     * implicit or driver-managed and live in special-case bindings.
3464     */
3465    enum anv_image_memory_binding {
3466       /**
3467        * Used if and only if image is not multi-planar disjoint. Bound by
3468        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3469        */
3470       ANV_IMAGE_MEMORY_BINDING_MAIN,
3471 
3472       /**
3473        * Used if and only if image is multi-planar disjoint.  Bound by
3474        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3475        */
3476       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3477       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3478       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3479 
3480       /**
3481        * Driver-private bo. In special cases we may store the aux surface and/or
3482        * aux state in this binding.
3483        */
3484       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3485 
3486       /** Sentinel */
3487       ANV_IMAGE_MEMORY_BINDING_END,
3488    } binding;
3489 
3490    /**
3491     * Offset is relative to the start of the binding created by
3492     * vkBindImageMemory, not to the start of the bo.
3493     */
3494    uint64_t offset;
3495 
3496    uint64_t size;
3497    uint32_t alignment;
3498 };
3499 
3500 /**
3501  * Subsurface of an anv_image.
3502  */
3503 struct anv_surface {
3504    struct isl_surf isl;
3505    struct anv_image_memory_range memory_range;
3506 };
3507 
3508 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)3509 anv_surface_is_valid(const struct anv_surface *surface)
3510 {
3511    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3512 }
3513 
3514 struct anv_image {
3515    struct vk_image vk;
3516 
3517    uint32_t n_planes;
3518 
3519    /**
3520     * Image has multi-planar format and was created with
3521     * VK_IMAGE_CREATE_DISJOINT_BIT.
3522     */
3523    bool disjoint;
3524 
3525    /**
3526     * Image was imported from an struct AHardwareBuffer.  We have to delay
3527     * final image creation until bind time.
3528     */
3529    bool from_ahb;
3530 
3531    /**
3532     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3533     * must be released when the image is destroyed.
3534     */
3535    bool from_gralloc;
3536 
3537    /**
3538     * The memory bindings created by vkCreateImage and vkBindImageMemory.
3539     *
3540     * For details on the image's memory layout, see check_memory_bindings().
3541     *
3542     * vkCreateImage constructs the `memory_range` for each
3543     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
3544     * and only if `memory_range::size > 0`.
3545     *
3546     * vkBindImageMemory binds each valid `memory_range` to an `address`.
3547     * Usually, the app will provide the address via the parameters of
3548     * vkBindImageMemory.  However, special-case bindings may be bound to
3549     * driver-private memory.
3550     */
3551    struct anv_image_binding {
3552       struct anv_image_memory_range memory_range;
3553       struct anv_address address;
3554    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3555 
3556    /**
3557     * Image subsurfaces
3558     *
3559     * For each foo, anv_image::planes[x].surface is valid if and only if
3560     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3561     * to figure the number associated with a given aspect.
3562     *
3563     * The hardware requires that the depth buffer and stencil buffer be
3564     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3565     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3566     * allocate the depth and stencil buffers as separate surfaces in the same
3567     * bo.
3568     */
3569    struct anv_image_plane {
3570       struct anv_surface primary_surface;
3571 
3572       /**
3573        * A surface which shadows the main surface and may have different
3574        * tiling. This is used for sampling using a tiling that isn't supported
3575        * for other operations.
3576        */
3577       struct anv_surface shadow_surface;
3578 
3579       /**
3580        * The base aux usage for this image.  For color images, this can be
3581        * either CCS_E or CCS_D depending on whether or not we can reliably
3582        * leave CCS on all the time.
3583        */
3584       enum isl_aux_usage aux_usage;
3585 
3586       struct anv_surface aux_surface;
3587 
3588       /** Location of the fast clear state.  */
3589       struct anv_image_memory_range fast_clear_memory_range;
3590 
3591       /**
3592        * Whether this image can be fast cleared with non-zero clear colors.
3593        * This can happen with mutable images when formats of different bit
3594        * sizes per components are used.
3595        *
3596        * On Gfx9+, because the clear colors are stored as a 4 components 32bit
3597        * values, we can clear in R16G16_UNORM (store 2 16bit values in the
3598        * components 0 & 1 of the clear color) and then draw in R32_UINT which
3599        * would interpret the clear color as a single component value, using
3600        * only the first 16bit component of the previous written clear color.
3601        *
3602        * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
3603        * boolean will prevent the usage of CC_ONE.
3604        */
3605       bool can_non_zero_fast_clear;
3606    } planes[3];
3607 };
3608 
3609 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)3610 anv_image_is_externally_shared(const struct anv_image *image)
3611 {
3612    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
3613           image->vk.external_handle_types != 0;
3614 }
3615 
3616 static inline bool
anv_image_has_private_binding(const struct anv_image * image)3617 anv_image_has_private_binding(const struct anv_image *image)
3618 {
3619    const struct anv_image_binding private_binding =
3620       image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
3621    return private_binding.memory_range.size != 0;
3622 }
3623 
3624 /* The ordering of this enum is important */
3625 enum anv_fast_clear_type {
3626    /** Image does not have/support any fast-clear blocks */
3627    ANV_FAST_CLEAR_NONE = 0,
3628    /** Image has/supports fast-clear but only to the default value */
3629    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3630    /** Image has/supports fast-clear with an arbitrary fast-clear value */
3631    ANV_FAST_CLEAR_ANY = 2,
3632 };
3633 
3634 /**
3635  * Return the aspect's _format_ plane, not its _memory_ plane (using the
3636  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3637  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3638  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3639  */
3640 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)3641 anv_image_aspect_to_plane(const struct anv_image *image,
3642                           VkImageAspectFlagBits aspect)
3643 {
3644    return anv_aspect_to_plane(image->vk.aspects, aspect);
3645 }
3646 
3647 /* Returns the number of auxiliary buffer levels attached to an image. */
3648 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)3649 anv_image_aux_levels(const struct anv_image * const image,
3650                      VkImageAspectFlagBits aspect)
3651 {
3652    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3653    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3654       return 0;
3655 
3656    return image->vk.mip_levels;
3657 }
3658 
3659 /* Returns the number of auxiliary buffer layers attached to an image. */
3660 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)3661 anv_image_aux_layers(const struct anv_image * const image,
3662                      VkImageAspectFlagBits aspect,
3663                      const uint8_t miplevel)
3664 {
3665    assert(image);
3666 
3667    /* The miplevel must exist in the main buffer. */
3668    assert(miplevel < image->vk.mip_levels);
3669 
3670    if (miplevel >= anv_image_aux_levels(image, aspect)) {
3671       /* There are no layers with auxiliary data because the miplevel has no
3672        * auxiliary data.
3673        */
3674       return 0;
3675    }
3676 
3677    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
3678 }
3679 
3680 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)3681 anv_image_address(const struct anv_image *image,
3682                   const struct anv_image_memory_range *mem_range)
3683 {
3684    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
3685    assert(binding->memory_range.offset == 0);
3686 
3687    if (mem_range->size == 0)
3688       return ANV_NULL_ADDRESS;
3689 
3690    return anv_address_add(binding->address, mem_range->offset);
3691 }
3692 
3693 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3694 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3695                                const struct anv_image *image,
3696                                VkImageAspectFlagBits aspect)
3697 {
3698    assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
3699                                VK_IMAGE_ASPECT_DEPTH_BIT));
3700 
3701    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3702    const struct anv_image_memory_range *mem_range =
3703       &image->planes[plane].fast_clear_memory_range;
3704 
3705    return anv_image_address(image, mem_range);
3706 }
3707 
3708 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3709 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3710                                    const struct anv_image *image,
3711                                    VkImageAspectFlagBits aspect)
3712 {
3713    struct anv_address addr =
3714       anv_image_get_clear_color_addr(device, image, aspect);
3715 
3716    const unsigned clear_color_state_size = device->info.ver >= 10 ?
3717       device->isl_dev.ss.clear_color_state_size :
3718       device->isl_dev.ss.clear_value_size;
3719    return anv_address_add(addr, clear_color_state_size);
3720 }
3721 
3722 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)3723 anv_image_get_compression_state_addr(const struct anv_device *device,
3724                                      const struct anv_image *image,
3725                                      VkImageAspectFlagBits aspect,
3726                                      uint32_t level, uint32_t array_layer)
3727 {
3728    assert(level < anv_image_aux_levels(image, aspect));
3729    assert(array_layer < anv_image_aux_layers(image, aspect, level));
3730    UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3731    assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3732 
3733    /* Relative to start of the plane's fast clear memory range */
3734    uint32_t offset;
3735 
3736    offset = 4; /* Go past the fast clear type */
3737 
3738    if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
3739       for (uint32_t l = 0; l < level; l++)
3740          offset += anv_minify(image->vk.extent.depth, l) * 4;
3741    } else {
3742       offset += level * image->vk.array_layers * 4;
3743    }
3744 
3745    offset += array_layer * 4;
3746 
3747    assert(offset < image->planes[plane].fast_clear_memory_range.size);
3748 
3749    return anv_address_add(
3750       anv_image_get_fast_clear_type_addr(device, image, aspect),
3751       offset);
3752 }
3753 
3754 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3755 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)3756 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
3757                         const struct anv_image *image)
3758 {
3759    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3760       return false;
3761 
3762    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
3763     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3764     * say:
3765     *
3766     *    "If this field is set to AUX_HIZ, Number of Multisamples must
3767     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3768     */
3769    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
3770       return false;
3771 
3772    /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3773     * struct. There's documentation which suggests that this feature actually
3774     * reduces performance on BDW, but it has only been observed to help so
3775     * far. Sampling fast-cleared blocks on BDW must also be handled with care
3776     * (see depth_stencil_attachment_compute_aux_usage() for more info).
3777     */
3778    if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
3779       return false;
3780 
3781    return image->vk.samples == 1;
3782 }
3783 
3784 /* Returns true if an MCS-enabled buffer can be sampled from. */
3785 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)3786 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
3787                               const struct anv_image *image)
3788 {
3789    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
3790    const uint32_t plane =
3791       anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
3792 
3793    assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
3794 
3795    const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
3796 
3797    /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
3798     * See HSD 1707282275, wa_14013111325. Due to the use of
3799     * format-reinterpretation, a simplified workaround is implemented.
3800     */
3801    if (devinfo->ver >= 12 &&
3802        isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
3803       return false;
3804    }
3805 
3806    return true;
3807 }
3808 
3809 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)3810 anv_image_plane_uses_aux_map(const struct anv_device *device,
3811                              const struct anv_image *image,
3812                              uint32_t plane)
3813 {
3814    return device->info.has_aux_map &&
3815       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
3816 }
3817 
3818 void
3819 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3820                                   const struct anv_image *image,
3821                                   VkImageAspectFlagBits aspect,
3822                                   enum isl_aux_usage aux_usage,
3823                                   uint32_t level,
3824                                   uint32_t base_layer,
3825                                   uint32_t layer_count);
3826 
3827 void
3828 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3829                       const struct anv_image *image,
3830                       VkImageAspectFlagBits aspect,
3831                       enum isl_aux_usage aux_usage,
3832                       enum isl_format format, struct isl_swizzle swizzle,
3833                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
3834                       VkRect2D area, union isl_color_value clear_color);
3835 void
3836 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3837                               const struct anv_image *image,
3838                               VkImageAspectFlags aspects,
3839                               enum isl_aux_usage depth_aux_usage,
3840                               uint32_t level,
3841                               uint32_t base_layer, uint32_t layer_count,
3842                               VkRect2D area,
3843                               float depth_value, uint8_t stencil_value);
3844 void
3845 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3846                        const struct anv_image *src_image,
3847                        enum isl_aux_usage src_aux_usage,
3848                        uint32_t src_level, uint32_t src_base_layer,
3849                        const struct anv_image *dst_image,
3850                        enum isl_aux_usage dst_aux_usage,
3851                        uint32_t dst_level, uint32_t dst_base_layer,
3852                        VkImageAspectFlagBits aspect,
3853                        uint32_t src_x, uint32_t src_y,
3854                        uint32_t dst_x, uint32_t dst_y,
3855                        uint32_t width, uint32_t height,
3856                        uint32_t layer_count,
3857                        enum blorp_filter filter);
3858 void
3859 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3860                  const struct anv_image *image,
3861                  VkImageAspectFlagBits aspect, uint32_t level,
3862                  uint32_t base_layer, uint32_t layer_count,
3863                  enum isl_aux_op hiz_op);
3864 void
3865 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3866                     const struct anv_image *image,
3867                     VkImageAspectFlags aspects,
3868                     uint32_t level,
3869                     uint32_t base_layer, uint32_t layer_count,
3870                     VkRect2D area, uint8_t stencil_value);
3871 void
3872 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3873                  const struct anv_image *image,
3874                  enum isl_format format, struct isl_swizzle swizzle,
3875                  VkImageAspectFlagBits aspect,
3876                  uint32_t base_layer, uint32_t layer_count,
3877                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3878                  bool predicate);
3879 void
3880 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
3881                  const struct anv_image *image,
3882                  enum isl_format format, struct isl_swizzle swizzle,
3883                  VkImageAspectFlagBits aspect, uint32_t level,
3884                  uint32_t base_layer, uint32_t layer_count,
3885                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
3886                  bool predicate);
3887 
3888 void
3889 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
3890                          const struct anv_image *image,
3891                          VkImageAspectFlagBits aspect,
3892                          uint32_t base_level, uint32_t level_count,
3893                          uint32_t base_layer, uint32_t layer_count);
3894 
3895 enum isl_aux_state ATTRIBUTE_PURE
3896 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
3897                         const struct anv_image *image,
3898                         const VkImageAspectFlagBits aspect,
3899                         const VkImageLayout layout);
3900 
3901 enum isl_aux_usage ATTRIBUTE_PURE
3902 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
3903                         const struct anv_image *image,
3904                         const VkImageAspectFlagBits aspect,
3905                         const VkImageUsageFlagBits usage,
3906                         const VkImageLayout layout);
3907 
3908 enum anv_fast_clear_type ATTRIBUTE_PURE
3909 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
3910                               const struct anv_image * const image,
3911                               const VkImageAspectFlagBits aspect,
3912                               const VkImageLayout layout);
3913 
3914 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)3915 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
3916                              VkImageAspectFlags aspects2)
3917 {
3918    if (aspects1 == aspects2)
3919       return true;
3920 
3921    /* Only 1 color aspects are compatibles. */
3922    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3923        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3924        util_bitcount(aspects1) == util_bitcount(aspects2))
3925       return true;
3926 
3927    return false;
3928 }
3929 
3930 struct anv_image_view {
3931    struct vk_image_view vk;
3932 
3933    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
3934 
3935    unsigned n_planes;
3936    struct {
3937       uint32_t image_plane;
3938 
3939       struct isl_view isl;
3940 
3941       /**
3942        * RENDER_SURFACE_STATE when using image as a sampler surface with an
3943        * image layout of SHADER_READ_ONLY_OPTIMAL or
3944        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
3945        */
3946       struct anv_surface_state optimal_sampler_surface_state;
3947 
3948       /**
3949        * RENDER_SURFACE_STATE when using image as a sampler surface with an
3950        * image layout of GENERAL.
3951        */
3952       struct anv_surface_state general_sampler_surface_state;
3953 
3954       /**
3955        * RENDER_SURFACE_STATE when using image as a storage image. Separate
3956        * states for vanilla (with the original format) and one which has been
3957        * lowered to a format suitable for reading.  This may be a raw surface
3958        * in extreme cases or simply a surface with a different format where we
3959        * expect some conversion to be done in the shader.
3960        */
3961       struct anv_surface_state storage_surface_state;
3962       struct anv_surface_state lowered_storage_surface_state;
3963 
3964       struct brw_image_param lowered_storage_image_param;
3965    } planes[3];
3966 };
3967 
3968 enum anv_image_view_state_flags {
3969    ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED      = (1 << 0),
3970    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
3971 };
3972 
3973 void anv_image_fill_surface_state(struct anv_device *device,
3974                                   const struct anv_image *image,
3975                                   VkImageAspectFlagBits aspect,
3976                                   const struct isl_view *view,
3977                                   isl_surf_usage_flags_t view_usage,
3978                                   enum isl_aux_usage aux_usage,
3979                                   const union isl_color_value *clear_color,
3980                                   enum anv_image_view_state_flags flags,
3981                                   struct anv_surface_state *state_inout,
3982                                   struct brw_image_param *image_param_out);
3983 
3984 struct anv_image_create_info {
3985    const VkImageCreateInfo *vk_info;
3986 
3987    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
3988    isl_tiling_flags_t isl_tiling_flags;
3989 
3990    /** These flags will be added to any derived from VkImageCreateInfo. */
3991    isl_surf_usage_flags_t isl_extra_usage_flags;
3992 };
3993 
3994 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
3995                         const struct anv_image_create_info *create_info);
3996 
3997 void anv_image_finish(struct anv_image *image);
3998 
3999 void anv_image_get_memory_requirements(struct anv_device *device,
4000                                        struct anv_image *image,
4001                                        VkImageAspectFlags aspects,
4002                                        VkMemoryRequirements2 *pMemoryRequirements);
4003 
4004 enum isl_format
4005 anv_isl_format_for_descriptor_type(const struct anv_device *device,
4006                                    VkDescriptorType type);
4007 
4008 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeEXT line_mode)4009 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4010                           VkLineRasterizationModeEXT line_mode)
4011 {
4012    if (raster_mode == VK_POLYGON_MODE_LINE &&
4013        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4014       return true;
4015    return false;
4016 }
4017 
4018 VkFormatFeatureFlags2
4019 anv_get_image_format_features2(const struct intel_device_info *devinfo,
4020                                VkFormat vk_format,
4021                                const struct anv_format *anv_format,
4022                                VkImageTiling vk_tiling,
4023                                const struct isl_drm_modifier_info *isl_mod_info);
4024 
4025 void anv_fill_buffer_surface_state(struct anv_device *device,
4026                                    struct anv_state state,
4027                                    enum isl_format format,
4028                                    struct isl_swizzle swizzle,
4029                                    isl_surf_usage_flags_t usage,
4030                                    struct anv_address address,
4031                                    uint32_t range, uint32_t stride);
4032 
4033 
4034 /* Haswell border color is a bit of a disaster.  Float and unorm formats use a
4035  * straightforward 32-bit float color in the first 64 bytes.  Instead of using
4036  * a nice float/integer union like Gfx8+, Haswell specifies the integer border
4037  * color as a separate entry /after/ the float color.  The layout of this entry
4038  * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4039  *
4040  * Since we don't know the format/bpp, we can't make any of the border colors
4041  * containing '1' work for all formats, as it would be in the wrong place for
4042  * some of them.  We opt to make 32-bit integers work as this seems like the
4043  * most common option.  Fortunately, transparent black works regardless, as
4044  * all zeroes is the same in every bit-size.
4045  */
4046 struct hsw_border_color {
4047    float float32[4];
4048    uint32_t _pad0[12];
4049    uint32_t uint32[4];
4050    uint32_t _pad1[108];
4051 };
4052 
4053 struct gfx8_border_color {
4054    union {
4055       float float32[4];
4056       uint32_t uint32[4];
4057    };
4058    /* Pad out to 64 bytes */
4059    uint32_t _pad[12];
4060 };
4061 
4062 struct anv_ycbcr_conversion {
4063    struct vk_object_base base;
4064 
4065    const struct anv_format *        format;
4066    VkSamplerYcbcrModelConversion    ycbcr_model;
4067    VkSamplerYcbcrRange              ycbcr_range;
4068    VkComponentSwizzle               mapping[4];
4069    VkChromaLocation                 chroma_offsets[2];
4070    VkFilter                         chroma_filter;
4071    bool                             chroma_reconstruction;
4072 };
4073 
4074 struct anv_sampler {
4075    struct vk_object_base        base;
4076 
4077    uint32_t                     state[3][4];
4078    uint32_t                     n_planes;
4079    struct anv_ycbcr_conversion *conversion;
4080 
4081    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4082     * and with a 32-byte stride for use as bindless samplers.
4083     */
4084    struct anv_state             bindless_state;
4085 
4086    struct anv_state             custom_border_color;
4087 };
4088 
4089 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4090 
4091 struct anv_query_pool {
4092    struct vk_object_base                        base;
4093 
4094    VkQueryType                                  type;
4095    VkQueryPipelineStatisticFlags                pipeline_statistics;
4096    /** Stride between slots, in bytes */
4097    uint32_t                                     stride;
4098    /** Number of slots in this query pool */
4099    uint32_t                                     slots;
4100    struct anv_bo *                              bo;
4101 
4102    /* KHR perf queries : */
4103    uint32_t                                     pass_size;
4104    uint32_t                                     data_offset;
4105    uint32_t                                     snapshot_size;
4106    uint32_t                                     n_counters;
4107    struct intel_perf_counter_pass                *counter_pass;
4108    uint32_t                                     n_passes;
4109    struct intel_perf_query_info                 **pass_query;
4110 };
4111 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)4112 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4113                                                       uint32_t pass)
4114 {
4115    return pool->pass_size * pass + 8;
4116 }
4117 
4118 struct anv_acceleration_structure {
4119    struct vk_object_base                        base;
4120 
4121    VkDeviceSize                                 size;
4122    struct anv_address                           address;
4123 };
4124 
4125 int anv_get_instance_entrypoint_index(const char *name);
4126 int anv_get_device_entrypoint_index(const char *name);
4127 int anv_get_physical_device_entrypoint_index(const char *name);
4128 
4129 const char *anv_get_instance_entry_name(int index);
4130 const char *anv_get_physical_device_entry_name(int index);
4131 const char *anv_get_device_entry_name(int index);
4132 
4133 bool
4134 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4135                                    const struct vk_instance_extension_table *instance);
4136 bool
4137 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4138                                           const struct vk_instance_extension_table *instance);
4139 bool
4140 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4141                                  const struct vk_instance_extension_table *instance,
4142                                  const struct vk_device_extension_table *device);
4143 
4144 const struct vk_device_dispatch_table *
4145 anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4146 
4147 void
4148 anv_dump_pipe_bits(enum anv_pipe_bits bits);
4149 
4150 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)4151 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4152                           enum anv_pipe_bits bits,
4153                           const char* reason)
4154 {
4155    cmd_buffer->state.pending_pipe_bits |= bits;
4156    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
4157    {
4158       fputs("pc: add ", stderr);
4159       anv_dump_pipe_bits(bits);
4160       fprintf(stderr, "reason: %s\n", reason);
4161    }
4162 }
4163 
4164 struct anv_performance_configuration_intel {
4165    struct vk_object_base      base;
4166 
4167    struct intel_perf_registers *register_config;
4168 
4169    uint64_t                   config_id;
4170 };
4171 
4172 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4173 void anv_device_perf_init(struct anv_device *device);
4174 void anv_perf_write_pass_results(struct intel_perf_config *perf,
4175                                  struct anv_query_pool *pool, uint32_t pass,
4176                                  const struct intel_perf_query_result *accumulated_results,
4177                                  union VkPerformanceCounterResultKHR *results);
4178 
4179 /* Use to emit a series of memcpy operations */
4180 struct anv_memcpy_state {
4181    struct anv_device *device;
4182    struct anv_batch *batch;
4183 
4184    struct anv_vb_cache_range vb_bound;
4185    struct anv_vb_cache_range vb_dirty;
4186 };
4187 
4188 struct anv_utrace_flush_copy {
4189    /* Needs to be the first field */
4190    struct intel_ds_flush_data ds;
4191 
4192    /* Batch stuff to implement of copy of timestamps recorded in another
4193     * buffer.
4194     */
4195    struct anv_reloc_list relocs;
4196    struct anv_batch batch;
4197    struct anv_bo *batch_bo;
4198 
4199    /* Buffer of 64bits timestamps */
4200    struct anv_bo *trace_bo;
4201 
4202    /* Syncobj to be signaled when the batch completes */
4203    struct vk_sync *sync;
4204 
4205    /* Queue on which all the recorded traces are submitted */
4206    struct anv_queue *queue;
4207 
4208    struct anv_memcpy_state memcpy_state;
4209 };
4210 
4211 void anv_device_utrace_init(struct anv_device *device);
4212 void anv_device_utrace_finish(struct anv_device *device);
4213 VkResult
4214 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
4215                                     uint32_t cmd_buffer_count,
4216                                     struct anv_cmd_buffer **cmd_buffers,
4217                                     struct anv_utrace_flush_copy **out_flush_data);
4218 
4219 #ifdef HAVE_PERFETTO
4220 void anv_perfetto_init(void);
4221 uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
4222 void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
4223                              uint64_t start_ts);
4224 #else
anv_perfetto_init(void)4225 static inline void anv_perfetto_init(void)
4226 {
4227 }
anv_perfetto_begin_submit(struct anv_queue * queue)4228 static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
4229 {
4230    return 0;
4231 }
anv_perfetto_end_submit(struct anv_queue * queue,uint32_t submission_id,uint64_t start_ts)4232 static inline void anv_perfetto_end_submit(struct anv_queue *queue,
4233                                            uint32_t submission_id,
4234                                            uint64_t start_ts)
4235 {}
4236 #endif
4237 
4238 
4239 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4240    VK_FROM_HANDLE(__anv_type, __name, __handle)
4241 
4242 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
4243                        VK_OBJECT_TYPE_COMMAND_BUFFER)
4244 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4245 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4246 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4247                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4248 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4249 
4250 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4251                                VkAccelerationStructureKHR,
4252                                VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4253 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
4254                                VK_OBJECT_TYPE_BUFFER)
4255 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4256                                VK_OBJECT_TYPE_BUFFER_VIEW)
4257 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4258                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4259 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4260                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
4261 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4262                                VkDescriptorSetLayout,
4263                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4264 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4265                                VkDescriptorUpdateTemplate,
4266                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4267 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4268                                VK_OBJECT_TYPE_DEVICE_MEMORY)
4269 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4270 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
4271 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
4272                                VK_OBJECT_TYPE_IMAGE_VIEW);
4273 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4274                                VK_OBJECT_TYPE_PIPELINE)
4275 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4276                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4277 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4278                                VK_OBJECT_TYPE_QUERY_POOL)
4279 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4280                                VK_OBJECT_TYPE_SAMPLER)
4281 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4282                                VkSamplerYcbcrConversion,
4283                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4284 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4285                                VkPerformanceConfigurationINTEL,
4286                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4287 
4288 #define anv_genX(devinfo, thing) ({             \
4289    __typeof(&gfx9_##thing) genX_thing;          \
4290    switch ((devinfo)->verx10) {                 \
4291    case 70:                                     \
4292       genX_thing = &gfx7_##thing;               \
4293       break;                                    \
4294    case 75:                                     \
4295       genX_thing = &gfx75_##thing;              \
4296       break;                                    \
4297    case 80:                                     \
4298       genX_thing = &gfx8_##thing;               \
4299       break;                                    \
4300    case 90:                                     \
4301       genX_thing = &gfx9_##thing;               \
4302       break;                                    \
4303    case 110:                                    \
4304       genX_thing = &gfx11_##thing;              \
4305       break;                                    \
4306    case 120:                                    \
4307       genX_thing = &gfx12_##thing;              \
4308       break;                                    \
4309    case 125:                                    \
4310       genX_thing = &gfx125_##thing;             \
4311       break;                                    \
4312    default:                                     \
4313       unreachable("Unknown hardware generation"); \
4314    }                                            \
4315    genX_thing;                                  \
4316 })
4317 
4318 /* Gen-specific function declarations */
4319 #ifdef genX
4320 #  include "anv_genX.h"
4321 #else
4322 #  define genX(x) gfx7_##x
4323 #  include "anv_genX.h"
4324 #  undef genX
4325 #  define genX(x) gfx75_##x
4326 #  include "anv_genX.h"
4327 #  undef genX
4328 #  define genX(x) gfx8_##x
4329 #  include "anv_genX.h"
4330 #  undef genX
4331 #  define genX(x) gfx9_##x
4332 #  include "anv_genX.h"
4333 #  undef genX
4334 #  define genX(x) gfx11_##x
4335 #  include "anv_genX.h"
4336 #  undef genX
4337 #  define genX(x) gfx12_##x
4338 #  include "anv_genX.h"
4339 #  undef genX
4340 #  define genX(x) gfx125_##x
4341 #  include "anv_genX.h"
4342 #  undef genX
4343 #endif
4344 
4345 #endif /* ANV_PRIVATE_H */
4346