• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/drm_fourcc.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #else
40 #define VG(x) ((void)0)
41 #endif
42 
43 #include "common/intel_aux_map.h"
44 #include "common/intel_bind_timeline.h"
45 #include "common/intel_engine.h"
46 #include "common/intel_gem.h"
47 #include "common/intel_l3_config.h"
48 #include "common/intel_measure.h"
49 #include "common/intel_mem.h"
50 #include "common/intel_sample_positions.h"
51 #include "decoder/intel_decoder.h"
52 #include "dev/intel_device_info.h"
53 #include "blorp/blorp.h"
54 #include "compiler/brw_compiler.h"
55 #include "compiler/brw_kernel.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/detect_os.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/perf/u_trace.h"
65 #include "util/set.h"
66 #include "util/sparse_array.h"
67 #include "util/u_atomic.h"
68 #if DETECT_OS_ANDROID
69 #include "util/u_gralloc/u_gralloc.h"
70 #endif
71 #include "util/u_vector.h"
72 #include "util/u_math.h"
73 #include "util/vma.h"
74 #include "util/xmlconfig.h"
75 #include "vk_acceleration_structure.h"
76 #include "vk_alloc.h"
77 #include "vk_buffer.h"
78 #include "vk_buffer_view.h"
79 #include "vk_command_buffer.h"
80 #include "vk_command_pool.h"
81 #include "vk_debug_report.h"
82 #include "vk_descriptor_update_template.h"
83 #include "vk_device.h"
84 #include "vk_device_memory.h"
85 #include "vk_drm_syncobj.h"
86 #include "vk_enum_defines.h"
87 #include "vk_format.h"
88 #include "vk_framebuffer.h"
89 #include "vk_graphics_state.h"
90 #include "vk_image.h"
91 #include "vk_instance.h"
92 #include "vk_pipeline_cache.h"
93 #include "vk_physical_device.h"
94 #include "vk_sampler.h"
95 #include "vk_shader_module.h"
96 #include "vk_sync.h"
97 #include "vk_sync_timeline.h"
98 #include "vk_texcompress_astc.h"
99 #include "vk_util.h"
100 #include "vk_query_pool.h"
101 #include "vk_queue.h"
102 #include "vk_log.h"
103 #include "vk_ycbcr_conversion.h"
104 #include "vk_video.h"
105 #include "vk_meta.h"
106 
107 #ifdef __cplusplus
108 extern "C" {
109 #endif
110 
111 /* Pre-declarations needed for WSI entrypoints */
112 struct wl_surface;
113 struct wl_display;
114 typedef struct xcb_connection_t xcb_connection_t;
115 typedef uint32_t xcb_visualid_t;
116 typedef uint32_t xcb_window_t;
117 
118 struct anv_batch;
119 struct anv_buffer;
120 struct anv_buffer_view;
121 struct anv_image_view;
122 struct anv_instance;
123 
124 struct intel_aux_map_context;
125 struct intel_perf_config;
126 struct intel_perf_counter_pass;
127 struct intel_perf_query_result;
128 
129 #include <vulkan/vulkan.h>
130 #include <vulkan/vk_icd.h>
131 
132 #include "anv_android.h"
133 #include "anv_entrypoints.h"
134 #include "anv_kmd_backend.h"
135 #include "anv_rmv.h"
136 #include "isl/isl.h"
137 
138 #include "dev/intel_debug.h"
139 #undef MESA_LOG_TAG
140 #define MESA_LOG_TAG "MESA-INTEL"
141 #include "util/log.h"
142 #include "wsi_common.h"
143 
144 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
145 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
146 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
147 #endif
148 
149 #define NSEC_PER_SEC 1000000000ull
150 
151 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
152 
153 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
154 #define MAX_VBS         (33 - 2)
155 
156 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
157  * only supports the push model of VS inputs, and we only have 128 GRFs,
158  * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
159  * we use two of them for SGVs.
160  */
161 #define MAX_VES         (31 - 2)
162 
163 #define MAX_XFB_BUFFERS  4
164 #define MAX_XFB_STREAMS  4
165 #define MAX_SETS         8
166 #define MAX_RTS          8
167 #define MAX_VIEWPORTS   16
168 #define MAX_SCISSORS    16
169 #define MAX_PUSH_CONSTANTS_SIZE 256  /* Minimum requirement as of Vulkan 1.4 */
170 #define MAX_DYNAMIC_BUFFERS 16
171 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
172 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
173 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
174 #define MAX_EMBEDDED_SAMPLERS 2048
175 #define MAX_CUSTOM_BORDER_COLORS 4096
176 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
177  * use 64 here to avoid cache issues. This could most likely bring it back to
178  * 32 if we had different virtual addresses for the different views on a given
179  * GEM object.
180  */
181 #define ANV_UBO_ALIGNMENT 64
182 #define ANV_SSBO_ALIGNMENT 4
183 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
184 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
185 #define MAX_SAMPLE_LOCATIONS 16
186 
187 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
188  * and we can't put anything else there we use 64b.
189  */
190 #define ANV_SURFACE_STATE_SIZE (64)
191 
192 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
193  *
194  *    "The surface state model is used when a Binding Table Index (specified
195  *    in the message descriptor) of less than 240 is specified. In this model,
196  *    the Binding Table Index is used to index into the binding table, and the
197  *    binding table entry contains a pointer to the SURFACE_STATE."
198  *
199  * Binding table values above 240 are used for various things in the hardware
200  * such as stateless, stateless with incoherent cache, SLM, and bindless.
201  */
202 #define MAX_BINDING_TABLE_SIZE 240
203 
204 #define ANV_SVGS_VB_INDEX    MAX_VBS
205 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
206 
207 /* We reserve this MI ALU register for the purpose of handling predication.
208  * Other code which uses the MI ALU should leave it alone.
209  */
210 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
211 
212 /* We reserve this MI ALU register to pass around an offset computed from
213  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
214  * Other code which uses the MI ALU should leave it alone.
215  */
216 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
217 
218 /* We reserve this MI ALU register to hold the last programmed bindless
219  * surface state base address so that we can predicate STATE_BASE_ADDRESS
220  * emissions if the address doesn't change.
221  */
222 #define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
223 
224 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
225 
226 #define ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET (8)
227 
228 /* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
229  * and we can't put anything else there we use 64b.
230  */
231 #define ANV_SURFACE_STATE_SIZE (64)
232 #define ANV_SAMPLER_STATE_SIZE (32)
233 
234 /* For gfx12 we set the streamout buffers using 4 separate commands
235  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
236  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
237  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
238  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
239  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
240  * 3DSTATE_SO_BUFFER_INDEX_0.
241  */
242 #define SO_BUFFER_INDEX_0_CMD 0x60
243 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
244 
245 /* The TR-TT L1 page table entries may contain these values instead of actual
246  * pointers to indicate the regions are either NULL or invalid. We program
247  * these values to TR-TT registers, so we could change them, but it's super
248  * convenient to have the NULL value be 0 because everything is
249  * zero-initialized when allocated.
250  *
251  * Since we reserve these values for NULL/INVALID, then we can't use them as
252  * destinations for TR-TT address translation. Both values are shifted by 16
253  * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
254  * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
255  * nothing really to reserve. We could instead just reserve random 64kb
256  * ranges from any of the non-TR-TT vmas and use their addresses.
257  */
258 #define ANV_TRTT_L1_NULL_TILE_VAL 0
259 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
260 
261 #define ANV_COLOR_OUTPUT_DISABLED (0xff)
262 #define ANV_COLOR_OUTPUT_UNUSED   (0xfe)
263 
264 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)265 align_down_npot_u32(uint32_t v, uint32_t a)
266 {
267    return v - (v % a);
268 }
269 
270 /** Alignment must be a power of 2. */
271 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)272 anv_is_aligned(uintmax_t n, uintmax_t a)
273 {
274    assert(a == (a & -a));
275    return (n & (a - 1)) == 0;
276 }
277 
278 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)279 vk_to_isl_color(VkClearColorValue color)
280 {
281    return (union isl_color_value) {
282       .u32 = {
283          color.uint32[0],
284          color.uint32[1],
285          color.uint32[2],
286          color.uint32[3],
287       },
288    };
289 }
290 
291 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)292 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
293 {
294    const struct isl_format_layout *fmtl = isl_format_get_layout(format);
295    union isl_color_value isl_color = { .u32 = {0, } };
296 
297 #define COPY_COLOR_CHANNEL(c, i) \
298    if (fmtl->channels.c.bits) \
299       isl_color.u32[i] = color.uint32[i]
300 
301    COPY_COLOR_CHANNEL(r, 0);
302    COPY_COLOR_CHANNEL(g, 1);
303    COPY_COLOR_CHANNEL(b, 2);
304    COPY_COLOR_CHANNEL(a, 3);
305 
306 #undef COPY_COLOR_CHANNEL
307 
308    return isl_color;
309 }
310 
311 void __anv_perf_warn(struct anv_device *device,
312                      const struct vk_object_base *object,
313                      const char *file, int line, const char *format, ...)
314    anv_printflike(5, 6);
315 
316 /**
317  * Print a FINISHME message, including its source location.
318  */
319 #define anv_finishme(format, ...) \
320    do { \
321       static bool reported = false; \
322       if (!reported) { \
323          mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
324                     ##__VA_ARGS__); \
325          reported = true; \
326       } \
327    } while (0)
328 
329 /**
330  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
331  */
332 #define anv_perf_warn(objects_macro, format, ...)   \
333    do { \
334       static bool reported = false; \
335       if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
336          __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
337                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
338                   objects_macro, __FILE__, __LINE__,                    \
339                   format, ## __VA_ARGS__);                              \
340          reported = true; \
341       } \
342    } while (0)
343 
344 /* A non-fatal assert.  Useful for debugging. */
345 #if MESA_DEBUG
346 #define anv_assert(x) ({ \
347    if (unlikely(!(x))) \
348       mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
349 })
350 #else
351 #define anv_assert(x)
352 #endif
353 
354 enum anv_bo_alloc_flags {
355    /** Specifies that the BO must have a 32-bit address
356     *
357     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
358     */
359    ANV_BO_ALLOC_32BIT_ADDRESS =           (1 << 0),
360 
361    /** Specifies that the BO may be shared externally */
362    ANV_BO_ALLOC_EXTERNAL =                (1 << 1),
363 
364    /** Specifies that the BO should be mapped */
365    ANV_BO_ALLOC_MAPPED =                  (1 << 2),
366 
367    /** Specifies that the BO should be coherent.
368     *
369     * Note: In platforms with LLC where HOST_CACHED + HOST_COHERENT is free,
370     * bo can get upgraded to HOST_CACHED_COHERENT
371     */
372    ANV_BO_ALLOC_HOST_COHERENT =           (1 << 3),
373 
374    /** Specifies that the BO should be captured in error states */
375    ANV_BO_ALLOC_CAPTURE =                 (1 << 4),
376 
377    /** Specifies that the BO will have an address assigned by the caller
378     *
379     * Such BOs do not exist in any VMA heap.
380     */
381    ANV_BO_ALLOC_FIXED_ADDRESS =           (1 << 5),
382 
383    /** Enables implicit synchronization on the BO
384     *
385     * This is the opposite of EXEC_OBJECT_ASYNC.
386     */
387    ANV_BO_ALLOC_IMPLICIT_SYNC =           (1 << 6),
388 
389    /** Enables implicit synchronization on the BO
390     *
391     * This is equivalent to EXEC_OBJECT_WRITE.
392     */
393    ANV_BO_ALLOC_IMPLICIT_WRITE =          (1 << 7),
394 
395    /** Has an address which is visible to the client */
396    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS =  (1 << 8),
397 
398    /** Align the BO's virtual address to match AUX-TT requirements */
399    ANV_BO_ALLOC_AUX_TT_ALIGNED =          (1 << 9),
400 
401    /** This buffer is allocated from local memory and should be cpu visible */
402    ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE =   (1 << 10),
403 
404    /** For non device local allocations */
405    ANV_BO_ALLOC_NO_LOCAL_MEM =            (1 << 11),
406 
407    /** This buffer will be scanout to display */
408    ANV_BO_ALLOC_SCANOUT =                 (1 << 12),
409 
410    /** For descriptor pools */
411    ANV_BO_ALLOC_DESCRIPTOR_POOL =         (1 << 13),
412 
413    /** For buffers that will be bound using TR-TT.
414     *
415     * Not for buffers used as the TR-TT page tables.
416     */
417    ANV_BO_ALLOC_TRTT =                    (1 << 14),
418 
419    /** Protected buffer */
420    ANV_BO_ALLOC_PROTECTED =               (1 << 15),
421 
422    /** Specifies that the BO should be cached and incoherent. */
423    ANV_BO_ALLOC_HOST_CACHED =             (1 << 16),
424 
425    /** For buffer addressable from the dynamic state heap */
426    ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL =    (1 << 17),
427 
428    /** Specifies that the BO is imported.
429     *
430     * Imported BOs must also be marked as ANV_BO_ALLOC_EXTERNAL
431     */
432    ANV_BO_ALLOC_IMPORTED =                (1 << 18),
433 
434    /** Specify whether this BO is internal to the driver */
435    ANV_BO_ALLOC_INTERNAL =                (1 << 19),
436 
437    /** Allocate with CCS AUX requirements
438     *
439     * This pads the BO include CCS data mapppable through the AUX-TT and
440     * aligned to the AUX-TT requirements.
441     */
442    ANV_BO_ALLOC_AUX_CCS =                 (1 << 20),
443 
444    /** Compressed buffer, only supported in Xe2+ */
445    ANV_BO_ALLOC_COMPRESSED =              (1 << 21),
446 };
447 
448 /** Specifies that the BO should be cached and coherent. */
449 #define ANV_BO_ALLOC_HOST_CACHED_COHERENT (ANV_BO_ALLOC_HOST_COHERENT | \
450                                            ANV_BO_ALLOC_HOST_CACHED)
451 
452 
453 struct anv_bo {
454    const char *name;
455 
456    /* The VMA heap in anv_device from which this BO takes its offset.
457     *
458     * This can only be NULL when has_fixed_address is true.
459     */
460    struct util_vma_heap *vma_heap;
461 
462    /* All userptr bos in Xe KMD has gem_handle set to workaround_bo->gem_handle */
463    uint32_t gem_handle;
464 
465    uint32_t refcount;
466 
467    /* Index into the current validation list.  This is used by the
468     * validation list building algorithm to track which buffers are already
469     * in the validation list so that we can ensure uniqueness.
470     */
471    uint32_t exec_obj_index;
472 
473    /* Index for use with util_sparse_array_free_list */
474    uint32_t free_index;
475 
476    /* Last known offset.  This value is provided by the kernel when we
477     * execbuf and is used as the presumed offset for the next bunch of
478     * relocations, in canonical address format.
479     */
480    uint64_t offset;
481 
482    /** Size of the buffer */
483    uint64_t size;
484 
485    /** Offset at which the CCS data is stored */
486    uint64_t ccs_offset;
487 
488    /* Map for internally mapped BOs.
489     *
490     * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
491     * BO.
492     */
493    void *map;
494 
495    /* The actual size of bo allocated by kmd, basically:
496     * align(size, mem_alignment)
497     */
498    uint64_t actual_size;
499 
500    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
501    uint32_t flags;
502 
503    enum anv_bo_alloc_flags alloc_flags;
504 
505    /** True if this BO wraps a host pointer */
506    bool from_host_ptr:1;
507 
508    /** True if this BO is mapped in the GTT (only used for RMV) */
509    bool gtt_mapped:1;
510 };
511 
512 static inline bool
anv_bo_is_external(const struct anv_bo * bo)513 anv_bo_is_external(const struct anv_bo *bo)
514 {
515    return bo->alloc_flags & ANV_BO_ALLOC_EXTERNAL;
516 }
517 
518 static inline bool
anv_bo_is_vram_only(const struct anv_bo * bo)519 anv_bo_is_vram_only(const struct anv_bo *bo)
520 {
521    return !(bo->alloc_flags & (ANV_BO_ALLOC_NO_LOCAL_MEM |
522                                ANV_BO_ALLOC_MAPPED |
523                                ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
524                                ANV_BO_ALLOC_IMPORTED));
525 }
526 
527 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)528 anv_bo_ref(struct anv_bo *bo)
529 {
530    p_atomic_inc(&bo->refcount);
531    return bo;
532 }
533 
534 enum intel_device_info_mmap_mode
535 anv_bo_get_mmap_mode(struct anv_device *device, struct anv_bo *bo);
536 
537 static inline bool
anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)538 anv_bo_needs_host_cache_flush(enum anv_bo_alloc_flags alloc_flags)
539 {
540    return (alloc_flags & (ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT)) ==
541           ANV_BO_ALLOC_HOST_CACHED;
542 }
543 
544 struct anv_address {
545    struct anv_bo *bo;
546    int64_t offset;
547 };
548 
549 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
550 
551 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)552 anv_address_from_u64(uint64_t addr_u64)
553 {
554    assert(addr_u64 == intel_canonical_address(addr_u64));
555    return (struct anv_address) {
556       .bo = NULL,
557       .offset = addr_u64,
558    };
559 }
560 
561 static inline bool
anv_address_is_null(struct anv_address addr)562 anv_address_is_null(struct anv_address addr)
563 {
564    return addr.bo == NULL && addr.offset == 0;
565 }
566 
567 static inline uint64_t
anv_address_physical(struct anv_address addr)568 anv_address_physical(struct anv_address addr)
569 {
570    uint64_t address = (addr.bo ? addr.bo->offset : 0ull) + addr.offset;
571    return intel_canonical_address(address);
572 }
573 
574 static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)575 anv_address_utrace(struct anv_address addr)
576 {
577    return (struct u_trace_address) {
578       .bo = addr.bo,
579       .offset = addr.offset,
580    };
581 }
582 
583 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)584 anv_address_add(struct anv_address addr, uint64_t offset)
585 {
586    addr.offset += offset;
587    return addr;
588 }
589 
590 static inline struct anv_address
anv_address_add_aligned(struct anv_address addr,uint64_t offset,uint32_t alignment)591 anv_address_add_aligned(struct anv_address addr, uint64_t offset, uint32_t alignment)
592 {
593    addr.offset = align(addr.offset + offset, alignment);
594    return addr;
595 }
596 
597 static inline void *
anv_address_map(struct anv_address addr)598 anv_address_map(struct anv_address addr)
599 {
600    if (addr.bo == NULL)
601       return NULL;
602 
603    if (addr.bo->map == NULL)
604       return NULL;
605 
606    return addr.bo->map + addr.offset;
607 }
608 
609 /* Represent a virtual address range */
610 struct anv_va_range {
611    uint64_t addr;
612    uint64_t size;
613 };
614 
615 /* Represents a lock-free linked list of "free" things.  This is used by
616  * both the block pool and the state pools.  Unfortunately, in order to
617  * solve the ABA problem, we can't use a single uint32_t head.
618  */
619 union anv_free_list {
620    struct {
621       uint32_t offset;
622 
623       /* A simple count that is incremented every time the head changes. */
624       uint32_t count;
625    };
626    /* Make sure it's aligned to 64 bits. This will make atomic operations
627     * faster on 32 bit platforms.
628     */
629    alignas(8) uint64_t u64;
630 };
631 
632 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
633 
634 struct anv_block_state {
635    union {
636       struct {
637          uint32_t next;
638          uint32_t end;
639       };
640       /* Make sure it's aligned to 64 bits. This will make atomic operations
641        * faster on 32 bit platforms.
642        */
643       alignas(8) uint64_t u64;
644    };
645 };
646 
647 #define anv_block_pool_foreach_bo(bo, pool)  \
648    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
649         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
650         _pp_bo++)
651 
652 #define ANV_MAX_BLOCK_POOL_BOS 20
653 
654 struct anv_block_pool {
655    const char *name;
656 
657    struct anv_device *device;
658 
659    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
660    struct anv_bo *bo;
661    uint32_t nbos;
662 
663    /* Maximum size of the pool */
664    uint64_t max_size;
665 
666    /* Current size of the pool */
667    uint64_t size;
668 
669    /* The canonical address where the start of the pool is pinned. The various bos that
670     * are created as the pool grows will have addresses in the range
671     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
672     */
673    uint64_t start_address;
674 
675    /* The offset from the start of the bo to the "center" of the block
676     * pool.  Pointers to allocated blocks are given by
677     * bo.map + center_bo_offset + offsets.
678     */
679    uint32_t center_bo_offset;
680 
681    struct anv_block_state state;
682 
683    enum anv_bo_alloc_flags bo_alloc_flags;
684 };
685 
686 /* Block pools are backed by a fixed-size 1GB memfd */
687 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
688 
689 /* The center of the block pool is also the middle of the memfd.  This may
690  * change in the future if we decide differently for some reason.
691  */
692 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
693 
694 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)695 anv_block_pool_size(struct anv_block_pool *pool)
696 {
697    return pool->state.end;
698 }
699 
700 struct anv_state {
701    int64_t offset;
702    uint32_t alloc_size;
703    uint32_t idx;
704    void *map;
705 };
706 
707 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
708 
709 struct anv_fixed_size_state_pool {
710    union anv_free_list free_list;
711    struct anv_block_state block;
712 };
713 
714 #define ANV_MIN_STATE_SIZE_LOG2 6
715 #define ANV_MAX_STATE_SIZE_LOG2 24
716 
717 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
718 
719 struct anv_free_entry {
720    uint32_t next;
721    struct anv_state state;
722 };
723 
724 struct anv_state_table {
725    struct anv_device *device;
726    int fd;
727    struct anv_free_entry *map;
728    uint32_t size;
729    uint64_t max_size;
730    struct anv_block_state state;
731    struct u_vector cleanups;
732 };
733 
734 struct anv_state_pool {
735    struct anv_block_pool block_pool;
736 
737    /* Offset into the relevant state base address where the state pool starts
738     * allocating memory.
739     */
740    int64_t start_offset;
741 
742    struct anv_state_table table;
743 
744    /* The size of blocks which will be allocated from the block pool */
745    uint32_t block_size;
746 
747    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
748 };
749 
750 struct anv_state_reserved_pool {
751    struct anv_state_pool *pool;
752    union anv_free_list reserved_blocks;
753    uint32_t count;
754 };
755 
756 struct anv_state_reserved_array_pool {
757    struct anv_state_pool *pool;
758    simple_mtx_t mutex;
759    /* Bitfield of usable elements */
760    BITSET_WORD *states;
761    /* Backing store */
762    struct anv_state state;
763    /* Number of elements */
764    uint32_t count;
765    /* Stride between each element */
766    uint32_t stride;
767    /* Size of each element */
768    uint32_t size;
769 };
770 
771 struct anv_state_stream {
772    struct anv_state_pool *state_pool;
773 
774    /* The size of blocks to allocate from the state pool */
775    uint32_t block_size;
776 
777    /* Current block we're allocating from */
778    struct anv_state block;
779 
780    /* Offset into the current block at which to allocate the next state */
781    uint32_t next;
782 
783    /* Sum of all the blocks in all_blocks */
784    uint32_t total_size;
785 
786    /* List of all blocks allocated from this pool */
787    struct util_dynarray all_blocks;
788 };
789 
790 /* The block_pool functions exported for testing only.  The block pool should
791  * only be used via a state pool (see below).
792  */
793 VkResult anv_block_pool_init(struct anv_block_pool *pool,
794                              struct anv_device *device,
795                              const char *name,
796                              uint64_t start_address,
797                              uint32_t initial_size,
798                              uint32_t max_size);
799 void anv_block_pool_finish(struct anv_block_pool *pool);
800 VkResult anv_block_pool_alloc(struct anv_block_pool *pool,
801                               uint32_t block_size,
802                               int64_t *offset,
803                               uint32_t *padding);
804 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
805 size);
806 
807 struct anv_state_pool_params {
808    const char *name;
809    uint64_t    base_address;
810    int64_t     start_offset;
811    uint32_t    block_size;
812    uint32_t    max_size;
813 };
814 
815 VkResult anv_state_pool_init(struct anv_state_pool *pool,
816                              struct anv_device *device,
817                              const struct anv_state_pool_params *params);
818 void anv_state_pool_finish(struct anv_state_pool *pool);
819 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
820                                       uint32_t state_size, uint32_t alignment);
821 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
822 
823 static inline struct anv_address
anv_state_pool_state_address(struct anv_state_pool * pool,struct anv_state state)824 anv_state_pool_state_address(struct anv_state_pool *pool, struct anv_state state)
825 {
826    return (struct anv_address) {
827       .bo = pool->block_pool.bo,
828       .offset = state.offset - pool->start_offset,
829    };
830 }
831 
832 static inline struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)833 anv_state_pool_emit_data(struct anv_state_pool *pool,
834                          size_t size, size_t align,
835                          const void *p)
836 {
837    struct anv_state state;
838 
839    state = anv_state_pool_alloc(pool, size, align);
840    memcpy(state.map, p, size);
841 
842    return state;
843 }
844 
845 void anv_state_stream_init(struct anv_state_stream *stream,
846                            struct anv_state_pool *state_pool,
847                            uint32_t block_size);
848 void anv_state_stream_finish(struct anv_state_stream *stream);
849 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
850                                         uint32_t size, uint32_t alignment);
851 
852 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
853                                       struct anv_state_pool *parent,
854                                       uint32_t count, uint32_t size,
855                                       uint32_t alignment);
856 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
857 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
858 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
859                                   struct anv_state state);
860 
861 VkResult anv_state_reserved_array_pool_init(struct anv_state_reserved_array_pool *pool,
862                                             struct anv_state_pool *parent,
863                                             uint32_t count, uint32_t size,
864                                             uint32_t alignment);
865 void anv_state_reserved_array_pool_finish(struct anv_state_reserved_array_pool *pool);
866 struct anv_state anv_state_reserved_array_pool_alloc(struct anv_state_reserved_array_pool *pool,
867                                                      bool alloc_back);
868 struct anv_state anv_state_reserved_array_pool_alloc_index(struct anv_state_reserved_array_pool *pool,
869                                                            unsigned idx);
870 uint32_t anv_state_reserved_array_pool_state_index(struct anv_state_reserved_array_pool *pool,
871                                                    struct anv_state state);
872 void anv_state_reserved_array_pool_free(struct anv_state_reserved_array_pool *pool,
873                                         struct anv_state state);
874 
875 VkResult anv_state_table_init(struct anv_state_table *table,
876                              struct anv_device *device,
877                              uint32_t initial_entries);
878 void anv_state_table_finish(struct anv_state_table *table);
879 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
880                              uint32_t count);
881 void anv_free_list_push(union anv_free_list *list,
882                         struct anv_state_table *table,
883                         uint32_t idx, uint32_t count);
884 struct anv_state* anv_free_list_pop(union anv_free_list *list,
885                                     struct anv_state_table *table);
886 
887 
888 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)889 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
890 {
891    return &table->map[idx].state;
892 }
893 /**
894  * Implements a pool of re-usable BOs.  The interface is identical to that
895  * of block_pool except that each block is its own BO.
896  */
897 struct anv_bo_pool {
898    const char *name;
899 
900    struct anv_device *device;
901 
902    enum anv_bo_alloc_flags bo_alloc_flags;
903 
904    struct util_sparse_array_free_list free_list[16];
905 };
906 
907 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
908                       const char *name, enum anv_bo_alloc_flags alloc_flags);
909 void anv_bo_pool_finish(struct anv_bo_pool *pool);
910 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
911                            struct anv_bo **bo_out);
912 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
913 
914 struct anv_scratch_pool {
915    enum anv_bo_alloc_flags alloc_flags;
916    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
917    struct anv_bo *bos[16][MESA_SHADER_STAGES];
918    uint32_t surfs[16];
919    struct anv_state surf_states[16];
920 };
921 
922 void anv_scratch_pool_init(struct anv_device *device,
923                            struct anv_scratch_pool *pool,
924                            bool protected);
925 void anv_scratch_pool_finish(struct anv_device *device,
926                              struct anv_scratch_pool *pool);
927 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
928                                       struct anv_scratch_pool *pool,
929                                       gl_shader_stage stage,
930                                       unsigned per_thread_scratch);
931 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
932                                    struct anv_scratch_pool *pool,
933                                    unsigned per_thread_scratch);
934 
935 /* Note that on Gfx12HP we pass a scratch space surface state offset
936  * shifted by 2 relative to the value specified on the BSpec, since
937  * that allows the compiler to save a shift instruction while
938  * constructing the extended descriptor for SS addressing.  That
939  * worked because we limit the scratch surface state pool to 8 MB and
940  * because we relied on the legacy (ExBSO=0) encoding of the extended
941  * descriptor in order to save the shift, which is no longer supported
942  * for the UGM shared function on Xe2 platforms, so we no longer
943  * attempt to do that trick.
944  */
945 #define ANV_SCRATCH_SPACE_SHIFT(ver) ((ver) >= 20 ? 6 : 4)
946 
947 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
948 struct anv_bo_cache {
949    struct util_sparse_array bo_map;
950    pthread_mutex_t mutex;
951 };
952 
953 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
954                            struct anv_device *device);
955 void anv_bo_cache_finish(struct anv_bo_cache *cache);
956 
957 struct anv_queue_family {
958    /* Standard bits passed on to the client */
959    VkQueueFlags   queueFlags;
960    uint32_t       queueCount;
961 
962    enum intel_engine_class engine_class;
963    bool supports_perf;
964 };
965 
966 #define ANV_MAX_QUEUE_FAMILIES 5
967 
968 struct anv_memory_type {
969    /* Standard bits passed on to the client */
970    VkMemoryPropertyFlags   propertyFlags;
971    uint32_t                heapIndex;
972    /* Whether this is the dynamic visible memory type */
973    bool                    dynamic_visible;
974    bool                    compressed;
975 };
976 
977 struct anv_memory_heap {
978    /* Standard bits passed on to the client */
979    VkDeviceSize      size;
980    VkMemoryHeapFlags flags;
981 
982    /** Driver-internal book-keeping.
983     *
984     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
985     */
986    alignas(8) VkDeviceSize used;
987 
988    bool              is_local_mem;
989 };
990 
991 struct anv_memregion {
992    const struct intel_memory_class_instance *region;
993    uint64_t size;
994    uint64_t available;
995 };
996 
997 enum anv_timestamp_capture_type {
998     ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE,
999     ANV_TIMESTAMP_CAPTURE_END_OF_PIPE,
1000     ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
1001     ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
1002     ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
1003 };
1004 
1005 struct anv_physical_device {
1006     struct vk_physical_device                   vk;
1007 
1008     /* Link in anv_instance::physical_devices */
1009     struct list_head                            link;
1010 
1011     struct anv_instance *                       instance;
1012     char                                        path[20];
1013     struct intel_device_info                      info;
1014 
1015     bool                                        video_decode_enabled;
1016     bool                                        video_encode_enabled;
1017 
1018     struct brw_compiler *                       compiler;
1019     struct isl_device                           isl_dev;
1020     struct intel_perf_config *                    perf;
1021     /*
1022      * Number of commands required to implement a performance query begin +
1023      * end.
1024      */
1025     uint32_t                                    n_perf_query_commands;
1026     bool                                        has_exec_async;
1027     bool                                        has_exec_capture;
1028     VkQueueGlobalPriorityKHR                    max_context_priority;
1029     uint64_t                                    gtt_size;
1030 
1031     bool                                        always_use_bindless;
1032     bool                                        use_call_secondary;
1033 
1034     /** True if we can use timeline semaphores through execbuf */
1035     bool                                        has_exec_timeline;
1036 
1037     /** True if we can read the GPU timestamp register
1038      *
1039      * When running in a virtual context, the timestamp register is unreadable
1040      * on Gfx12+.
1041      */
1042     bool                                        has_reg_timestamp;
1043 
1044     /** True if we can create protected contexts. */
1045     bool                                        has_protected_contexts;
1046 
1047     /** Whether KMD has the ability to create VM objects */
1048     bool                                        has_vm_control;
1049 
1050     /** Whether the device is not able map all the device local memory on the
1051      * host
1052      */
1053     bool                                        has_small_bar;
1054 
1055     /** True if we have the means to do sparse binding (e.g., a Kernel driver
1056      * a vm_bind ioctl).
1057      */
1058     enum anv_sparse_type {
1059       ANV_SPARSE_TYPE_NOT_SUPPORTED = 0,
1060       ANV_SPARSE_TYPE_VM_BIND,
1061       ANV_SPARSE_TYPE_TRTT,
1062       ANV_SPARSE_TYPE_FAKE,
1063     } sparse_type;
1064 
1065     /** True if HW supports ASTC LDR */
1066     bool                                        has_astc_ldr;
1067     /** True if denorms in void extents should be flushed to zero */
1068     bool                                        flush_astc_ldr_void_extent_denorms;
1069     /** True if ASTC LDR is supported via emulation */
1070     bool                                        emu_astc_ldr;
1071     /* true if FCV optimization should be disabled. */
1072     bool                                        disable_fcv;
1073     /**/
1074     bool                                        uses_ex_bso;
1075 
1076     bool                                        always_flush_cache;
1077 
1078     /** True if application memory is allocated with extra AUX memory
1079      *
1080      * Applications quite often pool image allocations together in a single
1081      * VkDeviceMemory object. On platforms like MTL, the alignment of images
1082      * with compression mapped through the AUX translation tables is large :
1083      * 1MB. This can create a lot of wasted space in the application memory
1084      * objects.
1085      *
1086      * To workaround this problem, we allocate CCS data at the end of
1087      * VkDeviceMemory objects. This would not work well for TGL-like platforms
1088      * because the AUX translation tables also contain the format of the
1089      * images, but on MTL the HW ignore those values. So we can share the AUX
1090      * TT entries between different images without problem.
1091      *
1092      * This should be only true for platforms with AUX TT.
1093      */
1094     bool                                         alloc_aux_tt_mem;
1095 
1096     /**
1097      * True if the descriptors buffers are holding one of the following :
1098      *    - anv_sampled_image_descriptor
1099      *    - anv_storage_image_descriptor
1100      *    - anv_address_range_descriptor
1101      *
1102      * Accessing the descriptors in a bindless fashion from the shader
1103      * requires an indirection in the shader, first fetch one of the structure
1104      * listed above from the descriptor buffer, then emit the send message to
1105      * the fixed function (sampler, dataport, etc...) with the handle fetched
1106      * above.
1107      *
1108      * We need to do things this way prior to DG2 because the bindless surface
1109      * state space is limited to 64Mb and some application will allocate more
1110      * than what HW can support. On DG2+ we get 4Gb of bindless surface state
1111      * and so we can reference directly RENDER_SURFACE_STATE/SAMPLER_STATE
1112      * structures instead.
1113      */
1114     bool                                        indirect_descriptors;
1115 
1116     bool                                        uses_relocs;
1117 
1118     /** Can the platform support cooperative matrices and is it enabled? */
1119     bool                                        has_cooperative_matrix;
1120 
1121     struct {
1122       uint32_t                                  family_count;
1123       struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
1124     } queue;
1125 
1126     struct {
1127       uint32_t                                  type_count;
1128       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1129       uint32_t                                  heap_count;
1130       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1131 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1132       bool                                      need_flush;
1133 #endif
1134       /** Mask of memory types of normal allocations */
1135       uint32_t                                  default_buffer_mem_types;
1136       /** Mask of memory types of data indexable from the dynamic heap */
1137       uint32_t                                  dynamic_visible_mem_types;
1138       /** Mask of memory types of protected buffers/images */
1139       uint32_t                                  protected_mem_types;
1140       /**
1141        * Mask of memory types of compressed buffers/images. This is generally
1142        * a win for images, but a loss for buffers.
1143        */
1144       uint32_t                                  compressed_mem_types;
1145     } memory;
1146 
1147     struct {
1148        /**
1149         * General state pool
1150         */
1151        struct anv_va_range                      general_state_pool;
1152        /**
1153         * Low 32bit heap
1154         */
1155        struct anv_va_range                      low_heap;
1156        /**
1157         * Binding table pool
1158         */
1159        struct anv_va_range                      binding_table_pool;
1160        /**
1161         * Internal surface states for blorp & push descriptors.
1162         */
1163        struct anv_va_range                      internal_surface_state_pool;
1164        /**
1165         * Scratch surfaces (overlaps with internal_surface_state_pool).
1166         */
1167        struct anv_va_range                      scratch_surface_state_pool;
1168        /**
1169         * Bindless surface states (indirectly referred to by indirect
1170         * descriptors or for direct descriptors)
1171         */
1172        struct anv_va_range                      bindless_surface_state_pool;
1173        /**
1174         * Dynamic state pool
1175         */
1176        struct anv_va_range                      dynamic_state_pool;
1177        /**
1178         * Buffer pool that can be index from the dynamic state heap
1179         */
1180        struct anv_va_range                      dynamic_visible_pool;
1181        /**
1182         * Indirect descriptor pool
1183         */
1184        struct anv_va_range                      indirect_descriptor_pool;
1185        /**
1186         * Indirect push descriptor pool
1187         */
1188        struct anv_va_range                      indirect_push_descriptor_pool;
1189        /**
1190         * Instruction state pool
1191         */
1192        struct anv_va_range                      instruction_state_pool;
1193        /**
1194         * Push descriptor with descriptor buffers
1195         */
1196        struct anv_va_range                      push_descriptor_buffer_pool;
1197        /**
1198         * AUX-TT
1199         */
1200        struct anv_va_range                      aux_tt_pool;
1201        /**
1202         * Client heap
1203         */
1204        struct anv_va_range                      high_heap;
1205        struct anv_va_range                      trtt;
1206     } va;
1207 
1208     /* Either we have a single vram region and it's all mappable, or we have
1209      * both mappable & non-mappable parts. System memory is always available.
1210      */
1211     struct anv_memregion                        vram_mappable;
1212     struct anv_memregion                        vram_non_mappable;
1213     struct anv_memregion                        sys;
1214     uint8_t                                     driver_build_sha1[20];
1215     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1216     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1217     uint8_t                                     device_uuid[VK_UUID_SIZE];
1218     uint8_t                                     rt_uuid[VK_UUID_SIZE];
1219 
1220     /* Maximum amount of scratch space used by all the GRL kernels */
1221     uint32_t                                    max_grl_scratch_size;
1222 
1223     struct vk_sync_type                         sync_syncobj_type;
1224     struct vk_sync_timeline_type                sync_timeline_type;
1225     const struct vk_sync_type *                 sync_types[4];
1226 
1227     struct wsi_device                       wsi_device;
1228     int                                         local_fd;
1229     bool                                        has_local;
1230     int64_t                                     local_major;
1231     int64_t                                     local_minor;
1232     int                                         master_fd;
1233     bool                                        has_master;
1234     int64_t                                     master_major;
1235     int64_t                                     master_minor;
1236     struct intel_query_engine_info *            engine_info;
1237 
1238     void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
1239                                enum anv_timestamp_capture_type, void *);
1240     void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
1241                              struct anv_address, struct anv_address,
1242                              uint32_t);
1243     struct intel_measure_device                 measure_device;
1244 
1245     /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */
1246     uint32_t                                    gpgpu_pipeline_value;
1247 
1248     /** A pre packed VERTEX_ELEMENT_STATE feeding 0s to the VS stage
1249      *
1250      * For use when a pipeline has no VS input
1251      */
1252     uint32_t                                    empty_vs_input[2];
1253 };
1254 
1255 VkResult anv_physical_device_try_create(struct vk_instance *vk_instance,
1256                                         struct _drmDevice *drm_device,
1257                                         struct vk_physical_device **out);
1258 
1259 void anv_physical_device_destroy(struct vk_physical_device *vk_device);
1260 
1261 static inline uint32_t
anv_physical_device_bindless_heap_size(const struct anv_physical_device * device,bool descriptor_buffer)1262 anv_physical_device_bindless_heap_size(const struct anv_physical_device *device,
1263                                        bool descriptor_buffer)
1264 {
1265    /* Pre-Gfx12.5, the HW bindless surface heap is only 64MB. After it's 4GB,
1266     * but we have some workarounds that require 2 heaps to overlap, so the
1267     * size is dictated by our VA allocation.
1268     */
1269    return device->uses_ex_bso ?
1270       (descriptor_buffer ?
1271        device->va.dynamic_visible_pool.size :
1272        device->va.bindless_surface_state_pool.size) :
1273       64 * 1024 * 1024 /* 64 MiB */;
1274 }
1275 
1276 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1277 anv_physical_device_has_vram(const struct anv_physical_device *device)
1278 {
1279    return device->vram_mappable.size > 0;
1280 }
1281 
1282 struct anv_instance {
1283     struct vk_instance                          vk;
1284 
1285     struct driOptionCache                       dri_options;
1286     struct driOptionCache                       available_dri_options;
1287 
1288     int                                         mesh_conv_prim_attrs_to_vert_attrs;
1289     bool                                        enable_tbimr;
1290     bool                                        external_memory_implicit_sync;
1291     bool                                        force_guc_low_latency;
1292 
1293     /**
1294      * Workarounds for game bugs.
1295      */
1296     uint8_t                                     assume_full_subgroups;
1297     bool                                        assume_full_subgroups_with_barrier;
1298     bool                                        limit_trig_input_range;
1299     bool                                        sample_mask_out_opengl_behaviour;
1300     bool                                        force_filter_addr_rounding;
1301     bool                                        fp64_workaround_enabled;
1302     float                                       lower_depth_range_rate;
1303     unsigned                                    generated_indirect_threshold;
1304     unsigned                                    generated_indirect_ring_threshold;
1305     unsigned                                    query_clear_with_blorp_threshold;
1306     unsigned                                    query_copy_with_shader_threshold;
1307     unsigned                                    force_vk_vendor;
1308     bool                                        has_fake_sparse;
1309     bool                                        disable_fcv;
1310     bool                                        enable_buffer_comp;
1311     bool                                        compression_control_enabled;
1312     bool                                        anv_fake_nonlocal_memory;
1313     bool                                        anv_upper_bound_descriptor_pool_sampler;
1314 
1315     /* HW workarounds */
1316     bool                                        no_16bit;
1317     bool                                        intel_enable_wa_14018912822;
1318 
1319     /**
1320      * Ray tracing configuration.
1321      */
1322     unsigned                                    stack_ids;
1323 };
1324 
1325 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1326 void anv_finish_wsi(struct anv_physical_device *physical_device);
1327 
1328 struct anv_queue {
1329    struct vk_queue                           vk;
1330 
1331    struct anv_device *                       device;
1332 
1333    const struct anv_queue_family *           family;
1334 
1335    struct intel_batch_decode_ctx *           decoder;
1336 
1337    union {
1338       uint32_t                               exec_flags; /* i915 */
1339       uint32_t                               context_id; /* i915 */
1340       uint32_t                               exec_queue_id; /* Xe */
1341    };
1342 
1343    uint32_t                                  bind_queue_id; /* Xe */
1344 
1345    /** Context/Engine id which executes companion RCS command buffer */
1346    uint32_t                                  companion_rcs_id;
1347 
1348    /** Synchronization object for debug purposes (DEBUG_SYNC) */
1349    struct vk_sync                           *sync;
1350 
1351    /** Companion synchronization object
1352     *
1353     * Vulkan command buffers can be destroyed as soon as their lifecycle moved
1354     * from the Pending state to the Invalid/Executable state. This transition
1355     * happens when the VkFence/VkSemaphore associated with the completion of
1356     * the command buffer work is signaled.
1357     *
1358     * When we're using a companion command buffer to execute part of another
1359     * command buffer, we need to tie the 2 work submissions together to ensure
1360     * when the associated VkFence/VkSemaphore is signaled, both command
1361     * buffers are actually unused by the HW. To do this, we run an empty batch
1362     * buffer that we use to signal after both submissions :
1363     *
1364     *   CCS -->    main   ---> empty_batch (with wait on companion) --> signal
1365     *   RCS --> companion -|
1366     *
1367     * When companion batch completes, it signals companion_sync and allow
1368     * empty_batch to execute. Since empty_batch is running on the main engine,
1369     * we're guaranteed that upon completion both main & companion command
1370     * buffers are not used by HW anymore.
1371     */
1372    struct vk_sync                           *companion_sync;
1373 
1374    struct intel_ds_queue                     ds;
1375 
1376    struct anv_async_submit                  *init_submit;
1377    struct anv_async_submit                  *init_companion_submit;
1378 };
1379 
1380 struct nir_xfb_info;
1381 struct anv_pipeline_bind_map;
1382 struct anv_pipeline_sets_layout;
1383 struct anv_push_descriptor_info;
1384 enum anv_dynamic_push_bits;
1385 
1386 void anv_device_init_embedded_samplers(struct anv_device *device);
1387 void anv_device_finish_embedded_samplers(struct anv_device *device);
1388 
1389 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1390 
1391 struct anv_shader_bin *
1392 anv_device_search_for_kernel(struct anv_device *device,
1393                              struct vk_pipeline_cache *cache,
1394                              const void *key_data, uint32_t key_size,
1395                              bool *user_cache_bit);
1396 
1397 struct anv_shader_upload_params;
1398 
1399 struct anv_shader_bin *
1400 anv_device_upload_kernel(struct anv_device *device,
1401                          struct vk_pipeline_cache *cache,
1402                          const struct anv_shader_upload_params *params);
1403 
1404 struct nir_shader;
1405 struct nir_shader_compiler_options;
1406 
1407 struct nir_shader *
1408 anv_device_search_for_nir(struct anv_device *device,
1409                           struct vk_pipeline_cache *cache,
1410                           const struct nir_shader_compiler_options *nir_options,
1411                           unsigned char sha1_key[20],
1412                           void *mem_ctx);
1413 
1414 void
1415 anv_device_upload_nir(struct anv_device *device,
1416                       struct vk_pipeline_cache *cache,
1417                       const struct nir_shader *nir,
1418                       unsigned char sha1_key[20]);
1419 
1420 void
1421 anv_load_fp64_shader(struct anv_device *device);
1422 
1423 /**
1424  * This enum tracks the various HW instructions that hold graphics state
1425  * needing to be reprogrammed. Some instructions are grouped together as they
1426  * pretty much need to be emitted together (like 3DSTATE_URB_*).
1427  *
1428  * Not all bits apply to all platforms. We build a dirty state based on
1429  * enabled extensions & generation on anv_device.
1430  */
1431 enum anv_gfx_state_bits {
1432    /* Pipeline states */
1433    ANV_GFX_STATE_URB, /* All legacy stages, including mesh */
1434    ANV_GFX_STATE_VF_STATISTICS,
1435    ANV_GFX_STATE_VF_SGVS,
1436    ANV_GFX_STATE_VF_SGVS_2,
1437    ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
1438    ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
1439    ANV_GFX_STATE_PRIMITIVE_REPLICATION,
1440    ANV_GFX_STATE_SBE,
1441    ANV_GFX_STATE_SBE_SWIZ,
1442    ANV_GFX_STATE_SO_DECL_LIST,
1443    ANV_GFX_STATE_VS,
1444    ANV_GFX_STATE_HS,
1445    ANV_GFX_STATE_DS,
1446    ANV_GFX_STATE_GS,
1447    ANV_GFX_STATE_PS,
1448    ANV_GFX_STATE_SBE_MESH,
1449    ANV_GFX_STATE_CLIP_MESH,
1450    ANV_GFX_STATE_MESH_CONTROL,
1451    ANV_GFX_STATE_MESH_SHADER,
1452    ANV_GFX_STATE_MESH_DISTRIB,
1453    ANV_GFX_STATE_TASK_CONTROL,
1454    ANV_GFX_STATE_TASK_SHADER,
1455    ANV_GFX_STATE_TASK_REDISTRIB,
1456    /* Dynamic states */
1457    ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
1458    ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */
1459    ANV_GFX_STATE_CLIP,
1460    ANV_GFX_STATE_CC_STATE,
1461    ANV_GFX_STATE_CC_STATE_PTR,
1462    ANV_GFX_STATE_COARSE_PIXEL,
1463    ANV_GFX_STATE_CPS,
1464    ANV_GFX_STATE_DEPTH_BOUNDS,
1465    ANV_GFX_STATE_INDEX_BUFFER,
1466    ANV_GFX_STATE_LINE_STIPPLE,
1467    ANV_GFX_STATE_MULTISAMPLE,
1468    ANV_GFX_STATE_PS_BLEND,
1469    ANV_GFX_STATE_RASTER,
1470    ANV_GFX_STATE_SAMPLE_MASK,
1471    ANV_GFX_STATE_SAMPLE_PATTERN,
1472    ANV_GFX_STATE_SCISSOR,
1473    ANV_GFX_STATE_SF,
1474    ANV_GFX_STATE_STREAMOUT,
1475    ANV_GFX_STATE_TE,
1476    ANV_GFX_STATE_VERTEX_INPUT,
1477    ANV_GFX_STATE_VF,
1478    ANV_GFX_STATE_VF_TOPOLOGY,
1479    ANV_GFX_STATE_VFG,
1480    ANV_GFX_STATE_VIEWPORT_CC,
1481    ANV_GFX_STATE_VIEWPORT_CC_PTR,
1482    ANV_GFX_STATE_VIEWPORT_SF_CLIP,
1483    ANV_GFX_STATE_WM,
1484    ANV_GFX_STATE_WM_DEPTH_STENCIL,
1485    ANV_GFX_STATE_PS_EXTRA,
1486    ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */
1487    ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */
1488    ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */
1489    ANV_GFX_STATE_TBIMR_TILE_PASS_INFO,
1490    ANV_GFX_STATE_FS_MSAA_FLAGS,
1491    ANV_GFX_STATE_TCS_INPUT_VERTICES,
1492    ANV_GFX_STATE_COARSE_STATE,
1493 
1494    ANV_GFX_STATE_MAX,
1495 };
1496 
1497 const char *anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state);
1498 
1499 enum anv_coarse_pixel_state {
1500    ANV_COARSE_PIXEL_STATE_UNKNOWN,
1501    ANV_COARSE_PIXEL_STATE_DISABLED,
1502    ANV_COARSE_PIXEL_STATE_ENABLED,
1503 };
1504 
1505 /* This structure tracks the values to program in HW instructions for
1506  * corresponding to dynamic states of the Vulkan API. Only fields that need to
1507  * be reemitted outside of the VkPipeline object are tracked here.
1508  */
1509 struct anv_gfx_dynamic_state {
1510    /* 3DSTATE_BLEND_STATE_POINTERS */
1511    struct {
1512       bool AlphaToCoverageEnable;
1513       bool AlphaToOneEnable;
1514       bool IndependentAlphaBlendEnable;
1515       bool ColorDitherEnable;
1516       struct {
1517          bool     WriteDisableAlpha;
1518          bool     WriteDisableRed;
1519          bool     WriteDisableGreen;
1520          bool     WriteDisableBlue;
1521 
1522          uint32_t LogicOpFunction;
1523          bool     LogicOpEnable;
1524 
1525          bool     ColorBufferBlendEnable;
1526          uint32_t ColorClampRange;
1527          bool     PreBlendColorClampEnable;
1528          bool     PostBlendColorClampEnable;
1529          uint32_t SourceBlendFactor;
1530          uint32_t DestinationBlendFactor;
1531          uint32_t ColorBlendFunction;
1532          uint32_t SourceAlphaBlendFactor;
1533          uint32_t DestinationAlphaBlendFactor;
1534          uint32_t AlphaBlendFunction;
1535       } rts[MAX_RTS];
1536 
1537       struct anv_state state;
1538    } blend;
1539 
1540    /* 3DSTATE_CC_STATE_POINTERS */
1541    struct {
1542       float BlendConstantColorRed;
1543       float BlendConstantColorGreen;
1544       float BlendConstantColorBlue;
1545       float BlendConstantColorAlpha;
1546 
1547       struct anv_state state;
1548    } cc;
1549 
1550    /* 3DSTATE_CLIP */
1551    struct {
1552       uint32_t APIMode;
1553       uint32_t ViewportXYClipTestEnable;
1554       uint32_t MaximumVPIndex;
1555       uint32_t TriangleStripListProvokingVertexSelect;
1556       uint32_t LineStripListProvokingVertexSelect;
1557       uint32_t TriangleFanProvokingVertexSelect;
1558    } clip;
1559 
1560    /* 3DSTATE_COARSE_PIXEL */
1561    struct {
1562       uint32_t    CPSizeX;
1563       uint32_t    CPSizeY;
1564       uint32_t    CPSizeCombiner0Opcode;
1565       uint32_t    CPSizeCombiner1Opcode;
1566       bool        DisableCPSPointers;
1567    } coarse_pixel;
1568 
1569    /* 3DSTATE_CPS/3DSTATE_CPS_POINTERS */
1570    struct {
1571       /* Gfx11 */
1572       uint32_t CoarsePixelShadingMode;
1573       float    MinCPSizeX;
1574       float    MinCPSizeY;
1575       /* Gfx12+ */
1576       uint32_t CoarsePixelShadingStateArrayPointer;
1577    } cps;
1578 
1579    /* 3DSTATE_DEPTH_BOUNDS */
1580    struct {
1581       bool     DepthBoundsTestEnable;
1582       float    DepthBoundsTestMinValue;
1583       float    DepthBoundsTestMaxValue;
1584    } db;
1585 
1586    /* 3DSTATE_GS */
1587    struct {
1588       uint32_t ReorderMode;
1589    } gs;
1590 
1591    /* 3DSTATE_LINE_STIPPLE */
1592    struct {
1593       uint32_t LineStipplePattern;
1594       float    LineStippleInverseRepeatCount;
1595       uint32_t LineStippleRepeatCount;
1596    } ls;
1597 
1598    /* 3DSTATE_MULTISAMPLE */
1599    struct {
1600       uint32_t NumberofMultisamples;
1601    } ms;
1602 
1603    /* 3DSTATE_PS */
1604    struct {
1605       uint32_t PositionXYOffsetSelect;
1606 
1607       uint32_t KernelStartPointer0;
1608       uint32_t KernelStartPointer1;
1609       uint32_t KernelStartPointer2;
1610 
1611       uint32_t DispatchGRFStartRegisterForConstantSetupData0;
1612       uint32_t DispatchGRFStartRegisterForConstantSetupData1;
1613       uint32_t DispatchGRFStartRegisterForConstantSetupData2;
1614 
1615       /* Pre-Gfx20 only */
1616       bool     _8PixelDispatchEnable;
1617       bool     _16PixelDispatchEnable;
1618       bool     _32PixelDispatchEnable;
1619 
1620       /* Gfx20+ only */
1621       bool     Kernel0Enable;
1622       bool     Kernel1Enable;
1623       uint32_t Kernel0SIMDWidth;
1624       uint32_t Kernel1SIMDWidth;
1625       uint32_t Kernel0PolyPackingPolicy;
1626    } ps;
1627 
1628    /* 3DSTATE_PS_EXTRA */
1629    struct {
1630       bool PixelShaderHasUAV;
1631       bool PixelShaderIsPerSample;
1632       bool PixelShaderKillsPixel;
1633       bool PixelShaderIsPerCoarsePixel;
1634       bool EnablePSDependencyOnCPsizeChange;
1635    } ps_extra;
1636 
1637    /* 3DSTATE_PS_BLEND */
1638    struct {
1639       bool     HasWriteableRT;
1640       bool     ColorBufferBlendEnable;
1641       uint32_t SourceAlphaBlendFactor;
1642       uint32_t DestinationAlphaBlendFactor;
1643       uint32_t SourceBlendFactor;
1644       uint32_t DestinationBlendFactor;
1645       bool     AlphaTestEnable;
1646       bool     IndependentAlphaBlendEnable;
1647       bool     AlphaToCoverageEnable;
1648    } ps_blend;
1649 
1650    /* 3DSTATE_RASTER */
1651    struct {
1652       uint32_t APIMode;
1653       bool     DXMultisampleRasterizationEnable;
1654       bool     AntialiasingEnable;
1655       uint32_t CullMode;
1656       uint32_t FrontWinding;
1657       bool     GlobalDepthOffsetEnableSolid;
1658       bool     GlobalDepthOffsetEnableWireframe;
1659       bool     GlobalDepthOffsetEnablePoint;
1660       float    GlobalDepthOffsetConstant;
1661       float    GlobalDepthOffsetScale;
1662       float    GlobalDepthOffsetClamp;
1663       uint32_t FrontFaceFillMode;
1664       uint32_t BackFaceFillMode;
1665       bool     ViewportZFarClipTestEnable;
1666       bool     ViewportZNearClipTestEnable;
1667       bool     ConservativeRasterizationEnable;
1668    } raster;
1669 
1670    /* 3DSTATE_SCISSOR_STATE_POINTERS */
1671    struct {
1672       uint32_t count;
1673       struct {
1674          uint32_t ScissorRectangleYMin;
1675          uint32_t ScissorRectangleXMin;
1676          uint32_t ScissorRectangleYMax;
1677          uint32_t ScissorRectangleXMax;
1678       } elem[MAX_SCISSORS];
1679    } scissor;
1680 
1681    /* 3DSTATE_SF */
1682    struct {
1683       float    LineWidth;
1684       uint32_t TriangleStripListProvokingVertexSelect;
1685       uint32_t LineStripListProvokingVertexSelect;
1686       uint32_t TriangleFanProvokingVertexSelect;
1687       bool     LegacyGlobalDepthBiasEnable;
1688    } sf;
1689 
1690    /* 3DSTATE_STREAMOUT */
1691    struct {
1692       bool     RenderingDisable;
1693       uint32_t RenderStreamSelect;
1694       uint32_t ReorderMode;
1695       uint32_t ForceRendering;
1696    } so;
1697 
1698    /* 3DSTATE_SAMPLE_MASK */
1699    struct {
1700       uint32_t SampleMask;
1701    } sm;
1702 
1703    /* 3DSTATE_TE */
1704    struct {
1705       uint32_t OutputTopology;
1706    } te;
1707 
1708    /* 3DSTATE_VF */
1709    struct {
1710       bool     IndexedDrawCutIndexEnable;
1711       uint32_t CutIndex;
1712    } vf;
1713 
1714    /* 3DSTATE_VFG */
1715    struct {
1716       uint32_t DistributionMode;
1717       bool     ListCutIndexEnable;
1718    } vfg;
1719 
1720    /* 3DSTATE_VF_TOPOLOGY */
1721    struct {
1722       uint32_t PrimitiveTopologyType;
1723    } vft;
1724 
1725    /* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
1726    struct {
1727       uint32_t count;
1728       struct {
1729          float MinimumDepth;
1730          float MaximumDepth;
1731       } elem[MAX_VIEWPORTS];
1732 
1733       struct anv_state state;
1734    } vp_cc;
1735 
1736    /* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
1737    struct {
1738       uint32_t count;
1739       struct {
1740          float ViewportMatrixElementm00;
1741          float ViewportMatrixElementm11;
1742          float ViewportMatrixElementm22;
1743          float ViewportMatrixElementm30;
1744          float ViewportMatrixElementm31;
1745          float ViewportMatrixElementm32;
1746          float XMinClipGuardband;
1747          float XMaxClipGuardband;
1748          float YMinClipGuardband;
1749          float YMaxClipGuardband;
1750          float XMinViewPort;
1751          float XMaxViewPort;
1752          float YMinViewPort;
1753          float YMaxViewPort;
1754       } elem[MAX_VIEWPORTS];
1755    } vp_sf_clip;
1756 
1757    /* 3DSTATE_WM */
1758    struct {
1759       bool     LineStippleEnable;
1760       uint32_t BarycentricInterpolationMode;
1761    } wm;
1762 
1763    /* 3DSTATE_WM_DEPTH_STENCIL */
1764    struct {
1765       bool     DoubleSidedStencilEnable;
1766       uint32_t StencilTestMask;
1767       uint32_t StencilWriteMask;
1768       uint32_t BackfaceStencilTestMask;
1769       uint32_t BackfaceStencilWriteMask;
1770       uint32_t StencilReferenceValue;
1771       uint32_t BackfaceStencilReferenceValue;
1772       bool     DepthTestEnable;
1773       bool     DepthBufferWriteEnable;
1774       uint32_t DepthTestFunction;
1775       bool     StencilTestEnable;
1776       bool     StencilBufferWriteEnable;
1777       uint32_t StencilFailOp;
1778       uint32_t StencilPassDepthPassOp;
1779       uint32_t StencilPassDepthFailOp;
1780       uint32_t StencilTestFunction;
1781       uint32_t BackfaceStencilFailOp;
1782       uint32_t BackfaceStencilPassDepthPassOp;
1783       uint32_t BackfaceStencilPassDepthFailOp;
1784       uint32_t BackfaceStencilTestFunction;
1785    } ds;
1786 
1787    /* 3DSTATE_TBIMR_TILE_PASS_INFO */
1788    struct {
1789       unsigned TileRectangleHeight;
1790       unsigned TileRectangleWidth;
1791       unsigned VerticalTileCount;
1792       unsigned HorizontalTileCount;
1793       unsigned TBIMRBatchSize;
1794       unsigned TileBoxCheck;
1795    } tbimr;
1796    bool use_tbimr;
1797 
1798    /**
1799     * Dynamic msaa flags, this value can be different from
1800     * anv_push_constants::gfx::fs_msaa_flags, as the push constant value only
1801     * needs to be updated for fragment shaders dynamically checking the value.
1802     */
1803    enum intel_msaa_flags fs_msaa_flags;
1804 
1805    /**
1806     * Dynamic TCS input vertices, this value can be different from
1807     * anv_driver_constants::gfx::tcs_input_vertices, as the push constant
1808     * value only needs to be updated for tesselation control shaders
1809     * dynamically checking the value.
1810     */
1811    uint32_t tcs_input_vertices;
1812 
1813    bool pma_fix;
1814 
1815    /**
1816     * DEPTH and STENCIL attachment write state for Wa_18019816803.
1817     */
1818    bool ds_write_state;
1819 
1820    /**
1821     * Toggle tracking for Wa_14018283232.
1822     */
1823    bool wa_14018283232_toggle;
1824 
1825    /**
1826     * Coarse state tracking for Wa_18038825448.
1827     */
1828    enum anv_coarse_pixel_state coarse_state;
1829 
1830    BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX);
1831 };
1832 
1833 enum anv_internal_kernel_name {
1834    ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
1835    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE,
1836    ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1837    ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE,
1838 
1839    ANV_INTERNAL_KERNEL_COUNT,
1840 };
1841 
1842 enum anv_rt_bvh_build_method {
1843    ANV_BVH_BUILD_METHOD_TRIVIAL,
1844    ANV_BVH_BUILD_METHOD_NEW_SAH,
1845 };
1846 
1847 /* If serialization-breaking or algorithm-breaking changes are made,
1848  * increment the digits at the end
1849  */
1850 #define ANV_RT_UUID_MACRO             "ANV_RT_BVH_0001"
1851 
1852 enum bvh_dump_type {
1853    BVH_ANV,
1854    BVH_IR_HDR,
1855    BVH_IR_AS
1856 };
1857 
1858 struct anv_bvh_dump {
1859    struct anv_bo *bo;
1860    uint32_t bvh_id;
1861    uint64_t dump_size;
1862    VkGeometryTypeKHR geometry_type;
1863    enum bvh_dump_type dump_type;
1864 
1865    /* Link in the anv_device.bvh_dumps list */
1866    struct list_head link;
1867 };
1868 
1869 struct anv_device_astc_emu {
1870     struct vk_texcompress_astc_state           *texcompress;
1871 
1872     /* for flush_astc_ldr_void_extent_denorms */
1873     simple_mtx_t mutex;
1874     VkDescriptorSetLayout ds_layout;
1875     VkPipelineLayout pipeline_layout;
1876     VkPipeline pipeline;
1877 };
1878 
1879 struct anv_device {
1880     struct vk_device                            vk;
1881 
1882     struct anv_physical_device *                physical;
1883     const struct intel_device_info *            info;
1884     const struct anv_kmd_backend *              kmd_backend;
1885     struct isl_device                           isl_dev;
1886     union {
1887        uint32_t                                 context_id; /* i915 */
1888        uint32_t                                 vm_id; /* Xe */
1889     };
1890     int                                         fd;
1891 
1892     pthread_mutex_t                             vma_mutex;
1893     struct util_vma_heap                        vma_lo;
1894     struct util_vma_heap                        vma_hi;
1895     struct util_vma_heap                        vma_desc;
1896     struct util_vma_heap                        vma_dynamic_visible;
1897     struct util_vma_heap                        vma_trtt;
1898 
1899     /** List of all anv_device_memory objects */
1900     struct list_head                            memory_objects;
1901 
1902     /** List of anv_image objects with a private binding for implicit CCS */
1903     struct list_head                            image_private_objects;
1904 
1905     /** List of anv_bvh_dump objects that get dumped on cmd buf completion */
1906     struct list_head                            bvh_dumps;
1907 
1908     /** Memory pool for batch buffers */
1909     struct anv_bo_pool                          batch_bo_pool;
1910     /** Memory pool for utrace timestamp buffers */
1911     struct anv_bo_pool                          utrace_bo_pool;
1912     /**
1913      * Size of the timestamp captured for utrace.
1914      */
1915     uint32_t                                     utrace_timestamp_size;
1916     /** Memory pool for BVH build buffers */
1917     struct anv_bo_pool                          bvh_bo_pool;
1918 
1919     struct anv_bo_cache                         bo_cache;
1920 
1921     struct anv_state_pool                       general_state_pool;
1922     struct anv_state_pool                       aux_tt_pool;
1923     struct anv_state_pool                       dynamic_state_pool;
1924     struct anv_state_pool                       instruction_state_pool;
1925     struct anv_state_pool                       binding_table_pool;
1926     struct anv_state_pool                       scratch_surface_state_pool;
1927     struct anv_state_pool                       internal_surface_state_pool;
1928     struct anv_state_pool                       bindless_surface_state_pool;
1929     struct anv_state_pool                       indirect_push_descriptor_pool;
1930     struct anv_state_pool                       push_descriptor_buffer_pool;
1931 
1932     struct anv_state_reserved_array_pool        custom_border_colors;
1933 
1934     /** BO used for various workarounds
1935      *
1936      * There are a number of workarounds on our hardware which require writing
1937      * data somewhere and it doesn't really matter where.  For that, we use
1938      * this BO and just write to the first dword or so.
1939      *
1940      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1941      * For that, we use the high bytes (>= 1024) of the workaround BO.
1942      */
1943     struct anv_bo *                             workaround_bo;
1944     struct anv_address                          workaround_address;
1945 
1946     struct anv_bo *                             dummy_aux_bo;
1947     struct anv_bo *                             mem_fence_bo;
1948 
1949     /**
1950      * Workarounds for game bugs.
1951      */
1952     struct {
1953        struct set *                             doom64_images;
1954     } workarounds;
1955 
1956     struct anv_bo *                             trivial_batch_bo;
1957     struct anv_state                            null_surface_state;
1958 
1959     /**
1960      * NULL surface state copy stored in host memory for use as a fast
1961      * memcpy() source.
1962      */
1963     char                                        host_null_surface_state[ANV_SURFACE_STATE_SIZE];
1964 
1965     struct vk_pipeline_cache *                  internal_cache;
1966 
1967     struct {
1968        struct blorp_context                     context;
1969        struct anv_state                         dynamic_states[BLORP_DYNAMIC_STATE_COUNT];
1970     }                                           blorp;
1971 
1972     struct anv_state                            border_colors;
1973 
1974     struct anv_state                            slice_hash;
1975 
1976     /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1977      *
1978      * We need to emit CPS_STATE structures for each viewport accessible by a
1979      * pipeline. So rather than write many identical CPS_STATE structures
1980      * dynamically, we can enumerate all possible combinaisons and then just
1981      * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1982      * array.
1983      */
1984     struct anv_state                            cps_states;
1985 
1986     uint32_t                                    queue_count;
1987     struct anv_queue  *                         queues;
1988 
1989     struct anv_scratch_pool                     scratch_pool;
1990     struct anv_scratch_pool                     protected_scratch_pool;
1991     struct anv_bo                              *rt_scratch_bos[16];
1992     struct anv_bo                              *btd_fifo_bo;
1993     struct anv_address                          rt_uuid_addr;
1994 
1995     bool                                        robust_buffer_access;
1996 
1997     uint32_t                                    protected_session_id;
1998 
1999     /** Shadow ray query BO
2000      *
2001      * The ray_query_bo only holds the current ray being traced. When using
2002      * more than 1 ray query per thread, we cannot fit all the queries in
2003      * there, so we need a another buffer to hold query data that is not
2004      * currently being used by the HW for tracing, similar to a scratch space.
2005      *
2006      * The size of the shadow buffer depends on the number of queries per
2007      * shader.
2008      *
2009      * We might need a buffer per queue family due to Wa_14022863161.
2010      */
2011     struct anv_bo                              *ray_query_shadow_bos[2][16];
2012     /** Ray query buffer used to communicated with HW unit.
2013      */
2014     struct anv_bo                              *ray_query_bo[2];
2015 
2016     struct anv_shader_bin                      *rt_trampoline;
2017     struct anv_shader_bin                      *rt_trivial_return;
2018 
2019     enum anv_rt_bvh_build_method                bvh_build_method;
2020 
2021     /** Draw generation shader
2022      *
2023      * Generates direct draw calls out of indirect parameters. Used to
2024      * workaround slowness with indirect draw calls.
2025      */
2026     struct anv_shader_bin                      *internal_kernels[ANV_INTERNAL_KERNEL_COUNT];
2027     const struct intel_l3_config               *internal_kernels_l3_config;
2028 
2029     pthread_mutex_t                             mutex;
2030     pthread_cond_t                              queue_submit;
2031 
2032     struct intel_batch_decode_ctx               decoder[ANV_MAX_QUEUE_FAMILIES];
2033     /*
2034      * When decoding a anv_cmd_buffer, we might need to search for BOs through
2035      * the cmd_buffer's list.
2036      */
2037     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
2038 
2039     int                                         perf_fd; /* -1 if no opened */
2040     struct anv_queue                            *perf_queue;
2041     struct intel_bind_timeline                  perf_timeline;
2042 
2043     struct intel_aux_map_context                *aux_map_ctx;
2044 
2045     const struct intel_l3_config                *l3_config;
2046 
2047     struct intel_debug_block_frame              *debug_frame_desc;
2048 
2049     struct intel_ds_device                       ds;
2050 
2051     nir_shader                                  *fp64_nir;
2052 
2053     uint32_t                                    draw_call_count;
2054     struct anv_state                            breakpoint;
2055 #if DETECT_OS_ANDROID
2056     struct u_gralloc                            *u_gralloc;
2057 #endif
2058 
2059     /** Precompute all dirty graphics bits
2060      *
2061      * Depending on platforms, some of the dirty bits don't apply (for example
2062      * 3DSTATE_PRIMITIVE_REPLICATION is only Gfx12.0+). Disabling some
2063      * extensions like Mesh shaders also allow us to avoid emitting any
2064      * mesh/task related instructions (we only initialize them once at device
2065      * initialization).
2066      */
2067     BITSET_DECLARE(gfx_dirty_state, ANV_GFX_STATE_MAX);
2068 
2069     /*
2070      * Command pool for companion RCS command buffer.
2071      */
2072     VkCommandPool                               companion_rcs_cmd_pool;
2073 
2074     struct anv_trtt {
2075        simple_mtx_t mutex;
2076 
2077        /* Sometimes we need to run batches from places where we don't have a
2078         * queue coming from the API, so we use this.
2079         */
2080        struct anv_queue *queue;
2081 
2082        /* There's only one L3 table, so if l3_addr is zero that means we
2083         * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
2084         * yet in this context).
2085         */
2086        uint64_t l3_addr;
2087 
2088        /* We don't want to access the page tables from the CPU, so just
2089         * maintain a mirror that we can use.
2090         */
2091        uint64_t *l3_mirror;
2092        uint64_t *l2_mirror;
2093 
2094        /* We keep a dynamic list of page table bos, and each bo can store
2095         * multiple page tables.
2096         */
2097        struct anv_bo **page_table_bos;
2098        int num_page_table_bos;
2099        int page_table_bos_capacity;
2100 
2101        /* These are used to keep track of space available for more page tables
2102         * within a bo.
2103         */
2104        struct anv_bo *cur_page_table_bo;
2105        uint64_t next_page_table_bo_offset;
2106 
2107        struct vk_sync *timeline;
2108        uint64_t timeline_val;
2109 
2110        /* List of struct anv_trtt_submission that are in flight and can be
2111         * freed once their vk_sync gets signaled.
2112         */
2113        struct list_head in_flight_batches;
2114     } trtt;
2115 
2116     /* Number of sparse resources that currently exist. This is used for a
2117      * workaround that makes every memoryBarrier flush more things than it
2118      * should. Some workloads create and then immediately destroy sparse
2119      * resources when they start, so just counting if a sparse resource was
2120      * ever created is not enough.
2121      */
2122     uint32_t num_sparse_resources;
2123 
2124     struct anv_device_astc_emu                   astc_emu;
2125 
2126     struct intel_bind_timeline bind_timeline; /* Xe only */
2127 
2128     struct {
2129        simple_mtx_t                              mutex;
2130        struct hash_table                        *map;
2131     }                                            embedded_samplers;
2132 
2133     struct {
2134        /**
2135         * Mutex for the printfs array
2136         */
2137        simple_mtx_t                              mutex;
2138        /**
2139         * Buffer in which the shader printfs are stored
2140         */
2141        struct anv_bo                            *bo;
2142        /**
2143         * Array of pointers to u_printf_info
2144         */
2145        struct util_dynarray                      prints;
2146     } printf;
2147 
2148     struct {
2149        simple_mtx_t  mutex;
2150        struct radix_sort_vk *radix_sort;
2151        struct vk_acceleration_structure_build_args build_args;
2152    } accel_struct_build;
2153 
2154    struct vk_meta_device meta_device;
2155 };
2156 
2157 static inline uint32_t
anv_printf_buffer_size(void)2158 anv_printf_buffer_size(void)
2159 {
2160    return debug_get_num_option("ANV_PRINTF_BUFFER_SIZE", 1024 * 1024);
2161 }
2162 
2163 static inline uint32_t
anv_get_first_render_queue_index(struct anv_physical_device * pdevice)2164 anv_get_first_render_queue_index(struct anv_physical_device *pdevice)
2165 {
2166    assert(pdevice != NULL);
2167 
2168    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2169       if (pdevice->queue.families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) {
2170          return i;
2171       }
2172    }
2173 
2174    unreachable("Graphics capable queue family not found");
2175 }
2176 
2177 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)2178 anv_binding_table_pool_alloc(struct anv_device *device)
2179 {
2180    return anv_state_pool_alloc(&device->binding_table_pool,
2181                                device->binding_table_pool.block_size, 0);
2182 }
2183 
2184 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)2185 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
2186 {
2187    anv_state_pool_free(&device->binding_table_pool, state);
2188 }
2189 
2190 static inline struct anv_state
anv_null_surface_state_for_binding_table(struct anv_device * device)2191 anv_null_surface_state_for_binding_table(struct anv_device *device)
2192 {
2193    struct anv_state state = device->null_surface_state;
2194    if (device->physical->indirect_descriptors) {
2195       state.offset += device->physical->va.bindless_surface_state_pool.addr -
2196                       device->physical->va.internal_surface_state_pool.addr;
2197    }
2198    return state;
2199 }
2200 
2201 static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_device * device,struct anv_state state)2202 anv_bindless_state_for_binding_table(struct anv_device *device,
2203                                      struct anv_state state)
2204 {
2205    state.offset += device->physical->va.bindless_surface_state_pool.addr -
2206                    device->physical->va.internal_surface_state_pool.addr;
2207    return state;
2208 }
2209 
2210 static inline struct anv_state
anv_device_maybe_alloc_surface_state(struct anv_device * device,struct anv_state_stream * surface_state_stream)2211 anv_device_maybe_alloc_surface_state(struct anv_device *device,
2212                                      struct anv_state_stream *surface_state_stream)
2213 {
2214    if (device->physical->indirect_descriptors) {
2215       if (surface_state_stream)
2216          return anv_state_stream_alloc(surface_state_stream, 64, 64);
2217       return anv_state_pool_alloc(&device->bindless_surface_state_pool, 64, 64);
2218    } else {
2219       return ANV_STATE_NULL;
2220    }
2221 }
2222 
2223 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)2224 anv_mocs(const struct anv_device *device,
2225          const struct anv_bo *bo,
2226          isl_surf_usage_flags_t usage)
2227 {
2228    return isl_mocs(&device->isl_dev, usage, bo && anv_bo_is_external(bo));
2229 }
2230 
2231 static inline uint32_t
anv_mocs_for_address(const struct anv_device * device,const struct anv_address * addr)2232 anv_mocs_for_address(const struct anv_device *device,
2233                      const struct anv_address *addr)
2234 {
2235    return anv_mocs(device, addr->bo, 0);
2236 }
2237 
2238 void anv_device_init_blorp(struct anv_device *device);
2239 void anv_device_finish_blorp(struct anv_device *device);
2240 
2241 static inline void
anv_sanitize_map_params(struct anv_device * device,uint64_t in_offset,uint64_t in_size,uint64_t * out_offset,uint64_t * out_size)2242 anv_sanitize_map_params(struct anv_device *device,
2243                         uint64_t in_offset,
2244                         uint64_t in_size,
2245                         uint64_t *out_offset,
2246                         uint64_t *out_size)
2247 {
2248    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
2249    if (!device->physical->info.has_mmap_offset)
2250       *out_offset = in_offset & ~4095ull;
2251    else
2252       *out_offset = 0;
2253    assert(in_offset >= *out_offset);
2254    *out_size = (in_offset + in_size) - *out_offset;
2255 
2256    /* Let's map whole pages */
2257    *out_size = align64(*out_size, 4096);
2258 }
2259 
2260 
2261 VkResult anv_device_alloc_bo(struct anv_device *device,
2262                              const char *name, uint64_t size,
2263                              enum anv_bo_alloc_flags alloc_flags,
2264                              uint64_t explicit_address,
2265                              struct anv_bo **bo);
2266 VkResult anv_device_map_bo(struct anv_device *device,
2267                            struct anv_bo *bo,
2268                            uint64_t offset,
2269                            size_t size,
2270                            void *placed_addr,
2271                            void **map_out);
2272 VkResult anv_device_unmap_bo(struct anv_device *device,
2273                              struct anv_bo *bo,
2274                              void *map, size_t map_size,
2275                              bool replace);
2276 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
2277                                             void *host_ptr, uint32_t size,
2278                                             enum anv_bo_alloc_flags alloc_flags,
2279                                             uint64_t client_address,
2280                                             struct anv_bo **bo_out);
2281 VkResult anv_device_import_bo(struct anv_device *device, int fd,
2282                               enum anv_bo_alloc_flags alloc_flags,
2283                               uint64_t client_address,
2284                               struct anv_bo **bo);
2285 VkResult anv_device_export_bo(struct anv_device *device,
2286                               struct anv_bo *bo, int *fd_out);
2287 VkResult anv_device_get_bo_tiling(struct anv_device *device,
2288                                   struct anv_bo *bo,
2289                                   enum isl_tiling *tiling_out);
2290 VkResult anv_device_set_bo_tiling(struct anv_device *device,
2291                                   struct anv_bo *bo,
2292                                   uint32_t row_pitch_B,
2293                                   enum isl_tiling tiling);
2294 void anv_device_release_bo(struct anv_device *device,
2295                            struct anv_bo *bo);
2296 
anv_device_set_physical(struct anv_device * device,struct anv_physical_device * physical_device)2297 static inline void anv_device_set_physical(struct anv_device *device,
2298                                            struct anv_physical_device *physical_device)
2299 {
2300    device->physical = physical_device;
2301    device->info = &physical_device->info;
2302    device->isl_dev = physical_device->isl_dev;
2303 }
2304 
2305 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)2306 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
2307 {
2308    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
2309 }
2310 
2311 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2312                          int64_t timeout);
2313 
2314 VkResult anv_device_print_init(struct anv_device *device);
2315 void anv_device_print_fini(struct anv_device *device);
2316 void anv_device_print_shader_prints(struct anv_device *device);
2317 
2318 void anv_dump_bvh_to_files(struct anv_device *device);
2319 
2320 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
2321                         const VkDeviceQueueCreateInfo *pCreateInfo,
2322                         uint32_t index_in_family);
2323 void anv_queue_finish(struct anv_queue *queue);
2324 
2325 VkResult anv_queue_submit(struct vk_queue *queue,
2326                           struct vk_queue_submit *submit);
2327 
2328 void anv_queue_trace(struct anv_queue *queue, const char *label,
2329                      bool frame, bool begin);
2330 
2331 static inline VkResult
anv_queue_post_submit(struct anv_queue * queue,VkResult submit_result)2332 anv_queue_post_submit(struct anv_queue *queue, VkResult submit_result)
2333 {
2334    if (submit_result != VK_SUCCESS)
2335       return submit_result;
2336 
2337    VkResult result = VK_SUCCESS;
2338    if (queue->sync) {
2339       result = vk_sync_wait(&queue->device->vk, queue->sync, 0,
2340                             VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
2341       if (result != VK_SUCCESS)
2342          result = vk_queue_set_lost(&queue->vk, "sync wait failed");
2343    }
2344 
2345    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
2346       anv_device_print_shader_prints(queue->device);
2347 
2348 #if ANV_SUPPORT_RT && !ANV_SUPPORT_RT_GRL
2349    /* The recorded bvh is dumped to files upon command buffer completion */
2350    if (INTEL_DEBUG(DEBUG_BVH_ANY))
2351       anv_dump_bvh_to_files(queue->device);
2352 #endif
2353 
2354    return result;
2355 }
2356 
2357 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
2358 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
2359                        uint32_t stride, uint32_t tiling);
2360 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
2361 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
2362 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
2363 int anv_gem_set_context_param(int fd, uint32_t context, uint32_t param,
2364                               uint64_t value);
2365 VkResult
2366 anv_gem_import_bo_alloc_flags_to_bo_flags(struct anv_device *device,
2367                                           struct anv_bo *bo,
2368                                           enum anv_bo_alloc_flags alloc_flags,
2369                                           uint32_t *bo_flags);
2370 const struct intel_device_info_pat_entry *
2371 anv_device_get_pat_entry(struct anv_device *device,
2372                          enum anv_bo_alloc_flags alloc_flags);
2373 
2374 uint64_t anv_vma_alloc(struct anv_device *device,
2375                        uint64_t size, uint64_t align,
2376                        enum anv_bo_alloc_flags alloc_flags,
2377                        uint64_t client_address,
2378                        struct util_vma_heap **out_vma_heap);
2379 void anv_vma_free(struct anv_device *device,
2380                   struct util_vma_heap *vma_heap,
2381                   uint64_t address, uint64_t size);
2382 
2383 struct anv_reloc_list {
2384    bool                                         uses_relocs;
2385    uint32_t                                     dep_words;
2386    BITSET_WORD *                                deps;
2387    const VkAllocationCallbacks                  *alloc;
2388 };
2389 
2390 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
2391                              const VkAllocationCallbacks *alloc,
2392                              bool uses_relocs);
2393 void anv_reloc_list_finish(struct anv_reloc_list *list);
2394 
2395 VkResult
2396 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list, struct anv_bo *target_bo);
2397 
2398 static inline VkResult
anv_reloc_list_add_bo(struct anv_reloc_list * list,struct anv_bo * target_bo)2399 anv_reloc_list_add_bo(struct anv_reloc_list *list, struct anv_bo *target_bo)
2400 {
2401    return list->uses_relocs ? anv_reloc_list_add_bo_impl(list, target_bo) : VK_SUCCESS;
2402 }
2403 
2404 VkResult anv_reloc_list_append(struct anv_reloc_list *list,
2405                                struct anv_reloc_list *other);
2406 
2407 struct anv_batch_bo {
2408    /* Link in the anv_cmd_buffer.owned_batch_bos list */
2409    struct list_head                             link;
2410 
2411    struct anv_bo *                              bo;
2412 
2413    /* Bytes actually consumed in this batch BO */
2414    uint32_t                                     length;
2415 
2416    /* When this batch BO is used as part of a primary batch buffer, this
2417     * tracked whether it is chained to another primary batch buffer.
2418     *
2419     * If this is the case, the relocation list's last entry points the
2420     * location of the MI_BATCH_BUFFER_START chaining to the next batch.
2421     */
2422    bool                                         chained;
2423 
2424    struct anv_reloc_list                        relocs;
2425 };
2426 
2427 struct anv_batch {
2428    const VkAllocationCallbacks *                alloc;
2429 
2430    /**
2431     * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2432     * Used to increase allocation size for long command buffers.
2433     */
2434    size_t                                       allocated_batch_size;
2435 
2436    struct anv_address                           start_addr;
2437 
2438    void *                                       start;
2439    void *                                       end;
2440    void *                                       next;
2441 
2442    struct anv_reloc_list *                      relocs;
2443 
2444    /* This callback is called (with the associated user data) in the event
2445     * that the batch runs out of space.
2446     */
2447    VkResult (*extend_cb)(struct anv_batch *, uint32_t, void *);
2448    void *                                       user_data;
2449 
2450    /**
2451     * Current error status of the command buffer. Used to track inconsistent
2452     * or incomplete command buffer states that are the consequence of run-time
2453     * errors such as out of memory scenarios. We want to track this in the
2454     * batch because the command buffer object is not visible to some parts
2455     * of the driver.
2456     */
2457    VkResult                                     status;
2458 
2459    enum intel_engine_class                      engine_class;
2460 
2461    /**
2462     * Write fencing status for mi_builder.
2463     */
2464    bool write_fence_status;
2465 
2466    /**
2467     * Number of 3DPRIMITIVE's emitted for WA 16014538804
2468     */
2469    uint8_t num_3d_primitives_emitted;
2470 
2471    struct u_trace * trace;
2472    const char * pc_reasons[4];
2473    uint32_t pc_reasons_count;
2474 
2475 };
2476 
2477 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
2478 VkResult anv_batch_emit_ensure_space(struct anv_batch *batch, uint32_t size);
2479 void anv_batch_advance(struct anv_batch *batch, uint32_t size);
2480 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
2481 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
2482 
2483 static inline struct anv_address
anv_batch_current_address(struct anv_batch * batch)2484 anv_batch_current_address(struct anv_batch *batch)
2485 {
2486    return anv_batch_address(batch, batch->next);
2487 }
2488 
2489 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)2490 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
2491                       void *map, size_t size)
2492 {
2493    batch->start_addr = addr;
2494    batch->next = batch->start = map;
2495    batch->end = map + size;
2496 }
2497 
2498 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)2499 anv_batch_set_error(struct anv_batch *batch, VkResult error)
2500 {
2501    assert(error != VK_SUCCESS);
2502    if (batch->status == VK_SUCCESS)
2503       batch->status = error;
2504    return batch->status;
2505 }
2506 
2507 static inline bool
anv_batch_has_error(struct anv_batch * batch)2508 anv_batch_has_error(struct anv_batch *batch)
2509 {
2510    return batch->status != VK_SUCCESS;
2511 }
2512 
2513 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)2514 _anv_combine_address(struct anv_batch *batch, void *location,
2515                      const struct anv_address address, uint32_t delta)
2516 {
2517    if (address.bo == NULL)
2518       return address.offset + delta;
2519 
2520    if (batch)
2521       anv_reloc_list_add_bo(batch->relocs, address.bo);
2522 
2523    return anv_address_physical(anv_address_add(address, delta));
2524 }
2525 
2526 #define __gen_address_type struct anv_address
2527 #define __gen_user_data struct anv_batch
2528 #define __gen_combine_address _anv_combine_address
2529 
2530 /* Wrapper macros needed to work around preprocessor argument issues.  In
2531  * particular, arguments don't get pre-evaluated if they are concatenated.
2532  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
2533  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
2534  * We can work around this easily enough with these helpers.
2535  */
2536 #define __anv_cmd_length(cmd) cmd ## _length
2537 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
2538 #define __anv_cmd_header(cmd) cmd ## _header
2539 #define __anv_cmd_pack(cmd) cmd ## _pack
2540 #define __anv_reg_num(reg) reg ## _num
2541 
2542 #define anv_pack_struct(dst, struc, ...) do {                              \
2543       struct struc __template = {                                          \
2544          __VA_ARGS__                                                       \
2545       };                                                                   \
2546       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
2547       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
2548    } while (0)
2549 
2550 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
2551       void *__dst = anv_batch_emit_dwords(batch, n);       \
2552       if (__dst) {                                         \
2553          struct cmd __template = {                         \
2554             __anv_cmd_header(cmd),                         \
2555            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
2556             __VA_ARGS__                                    \
2557          };                                                \
2558          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
2559       }                                                    \
2560       __dst;                                               \
2561    })
2562 
2563 #define anv_batch_emit_merge(batch, cmd, pipeline, state, name)         \
2564    for (struct cmd name = { 0 },                                        \
2565         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2566         __builtin_expect(_dst != NULL, 1);                              \
2567         ({ uint32_t _partial[__anv_cmd_length(cmd)];                    \
2568            assert((pipeline)->state.len == __anv_cmd_length(cmd));      \
2569            __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
2570            for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) {       \
2571               assert((_partial[i] &                                     \
2572                       (pipeline)->batch_data[                           \
2573                          (pipeline)->state.offset + i]) == 0);          \
2574               ((uint32_t *)_dst)[i] = _partial[i] |                     \
2575                  (pipeline)->batch_data[(pipeline)->state.offset + i];  \
2576            }                                                            \
2577            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2578            _dst = NULL;                                                 \
2579          }))
2580 
2581 #define anv_batch_emit_merge_protected(batch, cmd, pipeline, state,     \
2582                                        name, protected)                 \
2583    for (struct cmd name = { 0 },                                        \
2584         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2585         __builtin_expect(_dst != NULL, 1);                              \
2586         ({ struct anv_gfx_state_ptr *_cmd_state = protected ?           \
2587               &(pipeline)->state##_protected :                          \
2588               &(pipeline)->state;                                       \
2589            uint32_t _partial[__anv_cmd_length(cmd)];                    \
2590            assert(_cmd_state->len == __anv_cmd_length(cmd));            \
2591            __anv_cmd_pack(cmd)(batch, _partial, &name);                 \
2592            for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) {       \
2593               assert((_partial[i] &                                     \
2594                       (pipeline)->batch_data[                           \
2595                          (pipeline)->state.offset + i]) == 0);          \
2596               ((uint32_t *)_dst)[i] = _partial[i] |                     \
2597                  (pipeline)->batch_data[_cmd_state->offset + i];        \
2598            }                                                            \
2599            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2600            _dst = NULL;                                                 \
2601          }))
2602 
2603 #define anv_batch_emit(batch, cmd, name)                            \
2604    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
2605         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
2606         __builtin_expect(_dst != NULL, 1);                              \
2607         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
2608            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
2609            _dst = NULL;                                                 \
2610          }))
2611 
2612 #define anv_batch_write_reg(batch, reg, name)                           \
2613    for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
2614         ({                                                              \
2615             uint32_t _dw[__anv_cmd_length(reg)];                        \
2616             __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
2617             for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
2618                anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
2619                   lri.RegisterOffset   = __anv_reg_num(reg);            \
2620                   lri.DataDWord        = _dw[i];                        \
2621                }                                                        \
2622             }                                                           \
2623            _cont = NULL;                                                \
2624          }))
2625 
2626 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
2627 /* #define __gen_get_batch_address anv_batch_address */
2628 /* #define __gen_address_value anv_address_physical */
2629 /* #define __gen_address_offset anv_address_add */
2630 
2631 /* Base structure used to track a submission that needs some clean operations
2632  * upon completion. Should be embedded into a larger structure.
2633  */
2634 struct anv_async_submit {
2635    struct anv_queue *queue;
2636 
2637    struct anv_bo_pool *bo_pool;
2638 
2639    bool use_companion_rcs;
2640 
2641    bool owns_sync;
2642    struct vk_sync_signal signal;
2643 
2644    struct anv_reloc_list relocs;
2645    struct anv_batch batch;
2646    struct util_dynarray batch_bos;
2647 };
2648 
2649 VkResult
2650 anv_async_submit_init(struct anv_async_submit *submit,
2651                       struct anv_queue *queue,
2652                       struct anv_bo_pool *bo_pool,
2653                       bool use_companion_rcs,
2654                       bool create_signal_sync);
2655 
2656 void
2657 anv_async_submit_fini(struct anv_async_submit *submit);
2658 
2659 VkResult
2660 anv_async_submit_create(struct anv_queue *queue,
2661                         struct anv_bo_pool *bo_pool,
2662                         bool use_companion_rcs,
2663                         bool create_signal_sync,
2664                         struct anv_async_submit **out_submit);
2665 
2666 void
2667 anv_async_submit_destroy(struct anv_async_submit *submit);
2668 
2669 bool
2670 anv_async_submit_done(struct anv_async_submit *submit);
2671 
2672 bool
2673 anv_async_submit_wait(struct anv_async_submit *submit);
2674 
2675 struct anv_sparse_submission {
2676    struct anv_queue *queue;
2677 
2678    struct anv_vm_bind *binds;
2679    int binds_len;
2680    int binds_capacity;
2681 
2682    uint32_t wait_count;
2683    uint32_t signal_count;
2684 
2685    struct vk_sync_wait *waits;
2686    struct vk_sync_signal *signals;
2687 };
2688 
2689 struct anv_trtt_bind {
2690    uint64_t pte_addr;
2691    uint64_t entry_addr;
2692 };
2693 
2694 struct anv_trtt_submission {
2695    struct anv_async_submit base;
2696 
2697    struct anv_sparse_submission *sparse;
2698 
2699    struct list_head link;
2700 };
2701 
2702 struct anv_device_memory {
2703    struct vk_device_memory                      vk;
2704 
2705    struct list_head                             link;
2706 
2707    struct anv_bo *                              bo;
2708    const struct anv_memory_type *               type;
2709 
2710    void *                                       map;
2711    size_t                                       map_size;
2712 
2713    /* The map, from the user PoV is map + map_delta */
2714    uint64_t                                     map_delta;
2715 };
2716 
2717 /**
2718  * Header for Vertex URB Entry (VUE)
2719  */
2720 struct anv_vue_header {
2721    uint32_t Reserved;
2722    uint32_t RTAIndex; /* RenderTargetArrayIndex */
2723    uint32_t ViewportIndex;
2724    float PointWidth;
2725 };
2726 
2727 /** Struct representing a sampled image descriptor
2728  *
2729  * This descriptor layout is used for sampled images, bare sampler, and
2730  * combined image/sampler descriptors.
2731  */
2732 struct anv_sampled_image_descriptor {
2733    /** Bindless image handle
2734     *
2735     * This is expected to already be shifted such that the 20-bit
2736     * SURFACE_STATE table index is in the top 20 bits.
2737     */
2738    uint32_t image;
2739 
2740    /** Bindless sampler handle
2741     *
2742     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
2743     * to the dynamic state base address.
2744     */
2745    uint32_t sampler;
2746 };
2747 
2748 /** Struct representing a storage image descriptor */
2749 struct anv_storage_image_descriptor {
2750    /** Bindless image handles
2751     *
2752     * These are expected to already be shifted such that the 20-bit
2753     * SURFACE_STATE table index is in the top 20 bits.
2754     */
2755    uint32_t vanilla;
2756 
2757    /** Image depth
2758     *
2759     * By default the HW RESINFO message allows us to query the depth of an image :
2760     *
2761     * From the Kaby Lake docs for the RESINFO message:
2762     *
2763     *    "Surface Type | ... | Blue
2764     *    --------------+-----+----------------
2765     *    SURFTYPE_3D  | ... | (Depth+1)»LOD"
2766     *
2767     * With VK_EXT_sliced_view_of_3d, we have to support a slice of a 3D image,
2768     * meaning at a depth offset with a new depth value potentially reduced
2769     * from the original image. Unfortunately if we change the Depth value of
2770     * the image, we then run into issues with Yf/Ys tilings where the HW fetch
2771     * data at incorrect locations.
2772     *
2773     * To solve this, we put the slice depth in the descriptor and recompose
2774     * the vec3 (width, height, depth) using this field for z and xy using the
2775     * RESINFO result.
2776     */
2777    uint32_t image_depth;
2778 };
2779 
2780 /** Struct representing a address/range descriptor
2781  *
2782  * The fields of this struct correspond directly to the data layout of
2783  * nir_address_format_64bit_bounded_global addresses.  The last field is the
2784  * offset in the NIR address so it must be zero so that when you load the
2785  * descriptor you get a pointer to the start of the range.
2786  */
2787 struct anv_address_range_descriptor {
2788    uint64_t address;
2789    uint32_t range;
2790    uint32_t zero;
2791 };
2792 
2793 enum anv_descriptor_data {
2794    /** The descriptor contains a BTI reference to a surface state */
2795    ANV_DESCRIPTOR_BTI_SURFACE_STATE       = BITFIELD_BIT(0),
2796    /** The descriptor contains a BTI reference to a sampler state */
2797    ANV_DESCRIPTOR_BTI_SAMPLER_STATE       = BITFIELD_BIT(1),
2798    /** The descriptor contains an actual buffer view */
2799    ANV_DESCRIPTOR_BUFFER_VIEW             = BITFIELD_BIT(2),
2800    /** The descriptor contains inline uniform data */
2801    ANV_DESCRIPTOR_INLINE_UNIFORM          = BITFIELD_BIT(3),
2802    /** anv_address_range_descriptor with a buffer address and range */
2803    ANV_DESCRIPTOR_INDIRECT_ADDRESS_RANGE  = BITFIELD_BIT(4),
2804    /** Bindless surface handle (through anv_sampled_image_descriptor) */
2805    ANV_DESCRIPTOR_INDIRECT_SAMPLED_IMAGE  = BITFIELD_BIT(5),
2806    /** Storage image handles (through anv_storage_image_descriptor) */
2807    ANV_DESCRIPTOR_INDIRECT_STORAGE_IMAGE  = BITFIELD_BIT(6),
2808    /** The descriptor contains a single RENDER_SURFACE_STATE */
2809    ANV_DESCRIPTOR_SURFACE                 = BITFIELD_BIT(7),
2810    /** The descriptor contains a SAMPLER_STATE */
2811    ANV_DESCRIPTOR_SAMPLER                 = BITFIELD_BIT(8),
2812    /** A tuple of RENDER_SURFACE_STATE & SAMPLER_STATE */
2813    ANV_DESCRIPTOR_SURFACE_SAMPLER         = BITFIELD_BIT(9),
2814 };
2815 
2816 struct anv_descriptor_set_binding_layout {
2817    /* The type of the descriptors in this binding */
2818    VkDescriptorType type;
2819 
2820    /* Flags provided when this binding was created */
2821    VkDescriptorBindingFlags flags;
2822 
2823    /* Bitfield representing the type of data this descriptor contains */
2824    enum anv_descriptor_data data;
2825 
2826    /* Maximum number of YCbCr texture/sampler planes */
2827    uint8_t max_plane_count;
2828 
2829    /* Number of array elements in this binding (or size in bytes for inline
2830     * uniform data)
2831     */
2832    uint32_t array_size;
2833 
2834    /* Index into the flattened descriptor set */
2835    uint32_t descriptor_index;
2836 
2837    /* Index into the dynamic state array for a dynamic buffer, relative to the
2838     * set.
2839     */
2840    int16_t dynamic_offset_index;
2841 
2842    /* Computed surface size from data (for one plane) */
2843    uint16_t descriptor_data_surface_size;
2844 
2845    /* Computed sampler size from data (for one plane) */
2846    uint16_t descriptor_data_sampler_size;
2847 
2848    /* Index into the descriptor set buffer views */
2849    int32_t buffer_view_index;
2850 
2851    /* Offset into the descriptor buffer where the surface descriptor lives */
2852    uint32_t descriptor_surface_offset;
2853 
2854    /* Offset into the descriptor buffer where the sampler descriptor lives */
2855    uint16_t descriptor_sampler_offset;
2856 
2857    /* Pre computed surface stride (with multiplane descriptor, the descriptor
2858     * includes all the planes)
2859     */
2860    uint16_t descriptor_surface_stride;
2861 
2862    /* Pre computed sampler stride (with multiplane descriptor, the descriptor
2863     * includes all the planes)
2864     */
2865    uint16_t descriptor_sampler_stride;
2866 
2867    /* Immutable samplers (or NULL if no immutable samplers) */
2868    struct anv_sampler **immutable_samplers;
2869 };
2870 
2871 enum anv_descriptor_set_layout_type {
2872    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_UNKNOWN,
2873    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT,
2874    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT,
2875    ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER,
2876 };
2877 
2878 struct anv_descriptor_set_layout {
2879    struct vk_object_base base;
2880 
2881    VkDescriptorSetLayoutCreateFlags flags;
2882 
2883    /* Type of descriptor set layout */
2884    enum anv_descriptor_set_layout_type type;
2885 
2886    /* Descriptor set layouts can be destroyed at almost any time */
2887    uint32_t ref_cnt;
2888 
2889    /* Number of bindings in this descriptor set */
2890    uint32_t binding_count;
2891 
2892    /* Total number of descriptors */
2893    uint32_t descriptor_count;
2894 
2895    /* Shader stages affected by this descriptor set */
2896    uint16_t shader_stages;
2897 
2898    /* Number of buffer views in this descriptor set */
2899    uint32_t buffer_view_count;
2900 
2901    /* Number of dynamic offsets used by this descriptor set */
2902    uint16_t dynamic_offset_count;
2903 
2904    /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
2905     * this buffer
2906     */
2907    VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
2908 
2909    /* Size of the descriptor buffer dedicated to surface states for this
2910     * descriptor set
2911     */
2912    uint32_t descriptor_buffer_surface_size;
2913 
2914    /* Size of the descriptor buffer dedicated to sampler states for this
2915     * descriptor set
2916     */
2917    uint32_t descriptor_buffer_sampler_size;
2918 
2919    /* Number of embedded sampler count */
2920    uint32_t embedded_sampler_count;
2921 
2922    /* Bindings in this descriptor set */
2923    struct anv_descriptor_set_binding_layout binding[0];
2924 };
2925 
2926 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
2927                                       const struct anv_descriptor_set_layout *set,
2928                                       const struct anv_descriptor_set_binding_layout *binding);
2929 
2930 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
2931                                       const struct anv_descriptor_set_layout *set,
2932                                       const struct anv_descriptor_set_binding_layout *binding);
2933 
2934 void anv_descriptor_set_layout_destroy(struct anv_device *device,
2935                                        struct anv_descriptor_set_layout *layout);
2936 
2937 void anv_descriptor_set_layout_print(const struct anv_descriptor_set_layout *layout);
2938 
2939 static inline struct anv_descriptor_set_layout *
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)2940 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
2941 {
2942    assert(layout && layout->ref_cnt >= 1);
2943    p_atomic_inc(&layout->ref_cnt);
2944 
2945    return layout;
2946 }
2947 
2948 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)2949 anv_descriptor_set_layout_unref(struct anv_device *device,
2950                                 struct anv_descriptor_set_layout *layout)
2951 {
2952    assert(layout && layout->ref_cnt >= 1);
2953    if (p_atomic_dec_zero(&layout->ref_cnt))
2954       anv_descriptor_set_layout_destroy(device, layout);
2955 }
2956 
2957 struct anv_descriptor {
2958    VkDescriptorType type;
2959 
2960    union {
2961       struct {
2962          VkImageLayout layout;
2963          struct anv_image_view *image_view;
2964          struct anv_sampler *sampler;
2965       };
2966 
2967       struct {
2968          struct anv_buffer_view *set_buffer_view;
2969          struct anv_buffer *buffer;
2970          uint64_t offset;
2971          uint64_t range;
2972          uint64_t bind_range;
2973       };
2974 
2975       struct anv_buffer_view *buffer_view;
2976 
2977       struct vk_acceleration_structure *accel_struct;
2978    };
2979 };
2980 
2981 struct anv_descriptor_set {
2982    struct vk_object_base base;
2983 
2984    struct anv_descriptor_pool *pool;
2985    struct anv_descriptor_set_layout *layout;
2986 
2987    /* Amount of space occupied in the the pool by this descriptor set. It can
2988     * be larger than the size of the descriptor set.
2989     */
2990    uint32_t size;
2991 
2992    /* Is this descriptor set a push descriptor */
2993    bool is_push;
2994 
2995    /* Bitfield of descriptors for which we need to generate surface states.
2996     * Only valid for push descriptors
2997     */
2998    uint32_t generate_surface_states;
2999 
3000    /* State relative to anv_descriptor_pool::surface_bo */
3001    struct anv_state desc_surface_mem;
3002    /* State relative to anv_descriptor_pool::sampler_bo */
3003    struct anv_state desc_sampler_mem;
3004    /* Surface state for the descriptor buffer */
3005    struct anv_state desc_surface_state;
3006 
3007    /* Descriptor set address pointing to desc_surface_mem (we don't need one
3008     * for sampler because they're never accessed other than by the HW through
3009     * the shader sampler handle).
3010     */
3011    struct anv_address desc_surface_addr;
3012 
3013    struct anv_address desc_sampler_addr;
3014 
3015    /* Descriptor offset from the
3016     * device->va.internal_surface_state_pool.addr
3017     *
3018     * It just needs to be added to the binding table offset to be put into the
3019     * HW BTI entry.
3020     */
3021    uint32_t desc_offset;
3022 
3023    uint32_t buffer_view_count;
3024    struct anv_buffer_view *buffer_views;
3025 
3026    /* Link to descriptor pool's desc_sets list . */
3027    struct list_head pool_link;
3028 
3029    uint32_t descriptor_count;
3030    struct anv_descriptor descriptors[0];
3031 };
3032 
3033 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)3034 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
3035 {
3036    return set->pool == NULL;
3037 }
3038 
3039 struct anv_surface_state_data {
3040    uint8_t data[ANV_SURFACE_STATE_SIZE];
3041 };
3042 
3043 struct anv_buffer_state {
3044    /** Surface state allocated from the bindless heap
3045     *
3046     * Only valid if anv_physical_device::indirect_descriptors is true
3047     */
3048    struct anv_state state;
3049 
3050    /** Surface state after genxml packing
3051     *
3052     * Only valid if anv_physical_device::indirect_descriptors is false
3053     */
3054    struct anv_surface_state_data state_data;
3055 };
3056 
3057 struct anv_buffer_view {
3058    struct vk_buffer_view vk;
3059 
3060    struct anv_address address;
3061 
3062    struct anv_buffer_state general;
3063    struct anv_buffer_state storage;
3064 };
3065 
3066 struct anv_push_descriptor_set {
3067    struct anv_descriptor_set set;
3068 
3069    /* Put this field right behind anv_descriptor_set so it fills up the
3070     * descriptors[0] field. */
3071    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
3072 
3073    /** True if the descriptor set buffer has been referenced by a draw or
3074     * dispatch command.
3075     */
3076    bool set_used_on_gpu;
3077 
3078    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
3079 };
3080 
3081 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)3082 anv_descriptor_set_address(struct anv_descriptor_set *set)
3083 {
3084    if (anv_descriptor_set_is_push(set)) {
3085       /* We have to flag push descriptor set as used on the GPU
3086        * so that the next time we push descriptors, we grab a new memory.
3087        */
3088       struct anv_push_descriptor_set *push_set =
3089          (struct anv_push_descriptor_set *)set;
3090       push_set->set_used_on_gpu = true;
3091    }
3092 
3093    return set->desc_surface_addr;
3094 }
3095 
3096 struct anv_descriptor_pool_heap {
3097    /* BO allocated to back the pool (unused for host pools) */
3098    struct anv_bo        *bo;
3099 
3100    /* Host memory allocated to back a host pool */
3101    void                 *host_mem;
3102 
3103    /* Heap tracking allocations in bo/host_mem */
3104    struct util_vma_heap  heap;
3105 
3106    /* Size of the heap */
3107    uint32_t              size;
3108 
3109    /* Allocated size in the heap */
3110    uint32_t              alloc_size;
3111 };
3112 
3113 struct anv_descriptor_pool {
3114    struct vk_object_base base;
3115 
3116    struct anv_descriptor_pool_heap surfaces;
3117    struct anv_descriptor_pool_heap samplers;
3118 
3119    struct anv_state_stream surface_state_stream;
3120    void *surface_state_free_list;
3121 
3122    /** List of anv_descriptor_set. */
3123    struct list_head desc_sets;
3124 
3125    /** Heap over host_mem */
3126    struct util_vma_heap host_heap;
3127 
3128    /** Allocated size of host_mem */
3129    uint32_t host_mem_size;
3130 
3131    /**
3132     * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT. If set, then
3133     * surface_state_stream is unused.
3134     */
3135    bool host_only;
3136 
3137    alignas(8) char host_mem[0];
3138 };
3139 
3140 bool
3141 anv_push_descriptor_set_init(struct anv_cmd_buffer *cmd_buffer,
3142                              struct anv_push_descriptor_set *push_set,
3143                              struct anv_descriptor_set_layout *layout);
3144 
3145 void
3146 anv_push_descriptor_set_finish(struct anv_push_descriptor_set *push_set);
3147 
3148 void
3149 anv_descriptor_set_write_image_view(struct anv_device *device,
3150                                     struct anv_descriptor_set *set,
3151                                     const VkDescriptorImageInfo * const info,
3152                                     VkDescriptorType type,
3153                                     uint32_t binding,
3154                                     uint32_t element);
3155 
3156 void
3157 anv_descriptor_set_write_buffer_view(struct anv_device *device,
3158                                      struct anv_descriptor_set *set,
3159                                      VkDescriptorType type,
3160                                      struct anv_buffer_view *buffer_view,
3161                                      uint32_t binding,
3162                                      uint32_t element);
3163 
3164 void
3165 anv_descriptor_set_write_buffer(struct anv_device *device,
3166                                 struct anv_descriptor_set *set,
3167                                 VkDescriptorType type,
3168                                 struct anv_buffer *buffer,
3169                                 uint32_t binding,
3170                                 uint32_t element,
3171                                 VkDeviceSize offset,
3172                                 VkDeviceSize range);
3173 
3174 void
3175 anv_descriptor_write_surface_state(struct anv_device *device,
3176                                    struct anv_descriptor *desc,
3177                                    struct anv_state surface_state);
3178 
3179 void
3180 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
3181                                                 struct anv_descriptor_set *set,
3182                                                 struct vk_acceleration_structure *accel,
3183                                                 uint32_t binding,
3184                                                 uint32_t element);
3185 
3186 void
3187 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
3188                                              struct anv_descriptor_set *set,
3189                                              uint32_t binding,
3190                                              const void *data,
3191                                              size_t offset,
3192                                              size_t size);
3193 
3194 void
3195 anv_descriptor_set_write(struct anv_device *device,
3196                          struct anv_descriptor_set *set_override,
3197                          uint32_t write_count,
3198                          const VkWriteDescriptorSet *writes);
3199 
3200 void
3201 anv_descriptor_set_write_template(struct anv_device *device,
3202                                   struct anv_descriptor_set *set,
3203                                   const struct vk_descriptor_update_template *template,
3204                                   const void *data);
3205 
3206 #define ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER (UINT8_MAX - 4)
3207 #define ANV_DESCRIPTOR_SET_NULL               (UINT8_MAX - 3)
3208 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS     (UINT8_MAX - 2)
3209 #define ANV_DESCRIPTOR_SET_DESCRIPTORS        (UINT8_MAX - 1)
3210 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS   UINT8_MAX
3211 
3212 struct anv_pipeline_binding {
3213    /** Index in the descriptor set
3214     *
3215     * This is a flattened index; the descriptor set layout is already taken
3216     * into account.
3217     */
3218    uint32_t index;
3219 
3220    /** Binding in the descriptor set. Not valid for any of the
3221     * ANV_DESCRIPTOR_SET_*
3222     */
3223    uint32_t binding;
3224 
3225    /** Offset in the descriptor buffer
3226     *
3227     * Relative to anv_descriptor_set::desc_addr. This is useful for
3228     * ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT, to generate the binding
3229     * table entry.
3230     */
3231    uint32_t set_offset;
3232 
3233    /** The descriptor set this surface corresponds to.
3234     *
3235     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
3236     * binding is not a normal descriptor set but something else.
3237     */
3238    uint8_t set;
3239 
3240    union {
3241       /** Plane in the binding index for images */
3242       uint8_t plane;
3243 
3244       /** Input attachment index (relative to the subpass) */
3245       uint8_t input_attachment_index;
3246 
3247       /** Dynamic offset index
3248        *
3249        * For dynamic UBOs and SSBOs, relative to set.
3250        */
3251       uint8_t dynamic_offset_index;
3252    };
3253 };
3254 
3255 struct anv_embedded_sampler_key {
3256    /** No need to track binding elements for embedded samplers as :
3257     *
3258     *    VUID-VkDescriptorSetLayoutBinding-flags-08006:
3259     *
3260     *       "If VkDescriptorSetLayoutCreateInfo:flags contains
3261     *        VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT,
3262     *        descriptorCount must: less than or equal to 1"
3263     *
3264     * The following struct can be safely hash as it doesn't include in
3265     * address/offset.
3266     */
3267    uint32_t sampler[4];
3268    uint32_t color[4];
3269 };
3270 
3271 struct anv_pipeline_embedded_sampler_binding {
3272    /** The descriptor set this sampler belongs to */
3273    uint8_t set;
3274 
3275    /** The binding in the set this sampler belongs to */
3276    uint32_t binding;
3277 
3278    /** The data configuring the sampler */
3279    struct anv_embedded_sampler_key key;
3280 };
3281 
3282 struct anv_push_range {
3283    /** Index in the descriptor set */
3284    uint32_t index;
3285 
3286    /** Descriptor set index */
3287    uint8_t set;
3288 
3289    /** Dynamic offset index (for dynamic UBOs), relative to set. */
3290    uint8_t dynamic_offset_index;
3291 
3292    /** Start offset in units of 32B */
3293    uint8_t start;
3294 
3295    /** Range in units of 32B */
3296    uint8_t length;
3297 };
3298 
3299 struct anv_pipeline_sets_layout {
3300    struct anv_device *device;
3301 
3302    struct {
3303       struct anv_descriptor_set_layout *layout;
3304       uint32_t dynamic_offset_start;
3305    } set[MAX_SETS];
3306 
3307    enum anv_descriptor_set_layout_type type;
3308 
3309    uint32_t num_sets;
3310    uint32_t num_dynamic_buffers;
3311    int push_descriptor_set_index;
3312 
3313    bool independent_sets;
3314 
3315    unsigned char sha1[20];
3316 };
3317 
3318 void anv_pipeline_sets_layout_init(struct anv_pipeline_sets_layout *layout,
3319                                    struct anv_device *device,
3320                                    bool independent_sets);
3321 
3322 void anv_pipeline_sets_layout_fini(struct anv_pipeline_sets_layout *layout);
3323 
3324 void anv_pipeline_sets_layout_add(struct anv_pipeline_sets_layout *layout,
3325                                   uint32_t set_idx,
3326                                   struct anv_descriptor_set_layout *set_layout);
3327 
3328 uint32_t
3329 anv_pipeline_sets_layout_embedded_sampler_count(const struct anv_pipeline_sets_layout *layout);
3330 
3331 void anv_pipeline_sets_layout_hash(struct anv_pipeline_sets_layout *layout);
3332 
3333 void anv_pipeline_sets_layout_print(const struct anv_pipeline_sets_layout *layout);
3334 
3335 struct anv_pipeline_layout {
3336    struct vk_object_base base;
3337 
3338    struct anv_pipeline_sets_layout sets_layout;
3339 };
3340 
3341 const struct anv_descriptor_set_layout *
3342 anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout,
3343                                  uint8_t *desc_idx);
3344 
3345 struct anv_sparse_binding_data {
3346    uint64_t address;
3347    uint64_t size;
3348 
3349    /* This is kept only because it's given to us by vma_alloc() and need to be
3350     * passed back to vma_free(), we have no other particular use for it
3351     */
3352    struct util_vma_heap *vma_heap;
3353 };
3354 
3355 #define ANV_SPARSE_BLOCK_SIZE (64 * 1024)
3356 
3357 static inline bool
anv_sparse_binding_is_enabled(struct anv_device * device)3358 anv_sparse_binding_is_enabled(struct anv_device *device)
3359 {
3360    return device->vk.enabled_features.sparseBinding;
3361 }
3362 
3363 static inline bool
anv_sparse_residency_is_enabled(struct anv_device * device)3364 anv_sparse_residency_is_enabled(struct anv_device *device)
3365 {
3366    return device->vk.enabled_features.sparseResidencyBuffer ||
3367           device->vk.enabled_features.sparseResidencyImage2D ||
3368           device->vk.enabled_features.sparseResidencyImage3D ||
3369           device->vk.enabled_features.sparseResidency2Samples ||
3370           device->vk.enabled_features.sparseResidency4Samples ||
3371           device->vk.enabled_features.sparseResidency8Samples ||
3372           device->vk.enabled_features.sparseResidency16Samples ||
3373           device->vk.enabled_features.sparseResidencyAliased;
3374 }
3375 
3376 VkResult anv_init_sparse_bindings(struct anv_device *device,
3377                                   uint64_t size,
3378                                   struct anv_sparse_binding_data *sparse,
3379                                   enum anv_bo_alloc_flags alloc_flags,
3380                                   uint64_t client_address,
3381                                   struct anv_address *out_address);
3382 void anv_free_sparse_bindings(struct anv_device *device,
3383                               struct anv_sparse_binding_data *sparse);
3384 VkResult anv_sparse_bind_buffer(struct anv_device *device,
3385                                 struct anv_buffer *buffer,
3386                                 const VkSparseMemoryBind *vk_bind,
3387                                 struct anv_sparse_submission *submit);
3388 VkResult anv_sparse_bind_image_opaque(struct anv_device *device,
3389                                       struct anv_image *image,
3390                                       const VkSparseMemoryBind *vk_bind,
3391                                       struct anv_sparse_submission *submit);
3392 VkResult anv_sparse_bind_image_memory(struct anv_queue *queue,
3393                                       struct anv_image *image,
3394                                       const VkSparseImageMemoryBind *bind,
3395                                       struct anv_sparse_submission *submit);
3396 VkResult anv_sparse_bind(struct anv_device *device,
3397                          struct anv_sparse_submission *sparse_submit);
3398 
3399 VkResult anv_sparse_trtt_garbage_collect_batches(struct anv_device *device,
3400                                                  bool wait_completion);
3401 
3402 VkSparseImageFormatProperties
3403 anv_sparse_calc_image_format_properties(struct anv_physical_device *pdevice,
3404                                         VkImageAspectFlags aspect,
3405                                         VkImageType vk_image_type,
3406                                         VkSampleCountFlagBits vk_samples,
3407                                         struct isl_surf *surf);
3408 void anv_sparse_calc_miptail_properties(struct anv_device *device,
3409                                         struct anv_image *image,
3410                                         VkImageAspectFlags vk_aspect,
3411                                         uint32_t *imageMipTailFirstLod,
3412                                         VkDeviceSize *imageMipTailSize,
3413                                         VkDeviceSize *imageMipTailOffset,
3414                                         VkDeviceSize *imageMipTailStride);
3415 VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice,
3416                                         VkImageCreateFlags flags,
3417                                         VkImageTiling tiling,
3418                                         VkSampleCountFlagBits samples,
3419                                         VkImageType type,
3420                                         VkFormat format);
3421 
3422 struct anv_buffer {
3423    struct vk_buffer vk;
3424 
3425    /* Set when bound */
3426    struct anv_address address;
3427 
3428    struct anv_sparse_binding_data sparse_data;
3429 };
3430 
3431 static inline bool
anv_buffer_is_protected(const struct anv_buffer * buffer)3432 anv_buffer_is_protected(const struct anv_buffer *buffer)
3433 {
3434    return buffer->vk.create_flags & VK_BUFFER_CREATE_PROTECTED_BIT;
3435 }
3436 
3437 static inline bool
anv_buffer_is_sparse(const struct anv_buffer * buffer)3438 anv_buffer_is_sparse(const struct anv_buffer *buffer)
3439 {
3440    return buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
3441 }
3442 
3443 enum anv_cmd_dirty_bits {
3444    ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
3445    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
3446    ANV_CMD_DIRTY_RENDER_AREA                         = 1 << 2,
3447    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 3,
3448    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 4,
3449    ANV_CMD_DIRTY_RESTART_INDEX                       = 1 << 5,
3450    ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE              = 1 << 6,
3451    ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE                = 1 << 7,
3452 };
3453 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
3454 
3455 enum anv_pipe_bits {
3456    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
3457    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
3458    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
3459    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
3460    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
3461    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
3462    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
3463    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
3464    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
3465    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
3466    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
3467 
3468    /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
3469     * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
3470     * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
3471     */
3472    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
3473    ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
3474 
3475    /*
3476     * This bit flush data-port's Untyped L1 data cache (LSC L1).
3477     */
3478    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT = (1 << 16),
3479 
3480    /* This bit controls the flushing of the engine (Render, Compute) specific
3481     * entries from the compression cache.
3482     */
3483    ANV_PIPE_CCS_CACHE_FLUSH_BIT              = (1 << 17),
3484 
3485    ANV_PIPE_TLB_INVALIDATE_BIT               = (1 << 18),
3486 
3487    /* L3 Fabric Flush */
3488    ANV_PIPE_L3_FABRIC_FLUSH_BIT              = (1 << 19),
3489 
3490    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
3491    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
3492 
3493    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
3494     * a flush has happened but not a CS stall.  The next time we do any sort
3495     * of invalidation we need to insert a CS stall at that time.  Otherwise,
3496     * we would have to CS stall on every flush which could be bad.
3497     */
3498    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
3499 
3500    /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
3501     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
3502     * done by writing the AUX-TT register.
3503     */
3504    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 23),
3505 
3506    /* This bit does not exist directly in PIPE_CONTROL. It means that a
3507     * PIPE_CONTROL with a post-sync operation will follow. This is used to
3508     * implement a workaround for Gfx9.
3509     */
3510    ANV_PIPE_POST_SYNC_BIT                    = (1 << 24),
3511 
3512 };
3513 
3514 /* These bits track the state of buffer writes for queries. They get cleared
3515  * based on PIPE_CONTROL emissions.
3516  */
3517 enum anv_query_bits {
3518    ANV_QUERY_WRITES_RT_FLUSH      = (1 << 0),
3519 
3520    ANV_QUERY_WRITES_TILE_FLUSH    = (1 << 1),
3521 
3522    ANV_QUERY_WRITES_CS_STALL      = (1 << 2),
3523 
3524    ANV_QUERY_WRITES_DATA_FLUSH    = (1 << 3),
3525 };
3526 
3527 /* It's not clear why DG2 doesn't have issues with L3/CS coherency. But it's
3528  * likely related to performance workaround 14015868140.
3529  *
3530  * For now we enable this only on DG2 and platform prior to Gfx12 where there
3531  * is no tile cache.
3532  */
3533 #define ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) \
3534    (intel_device_info_is_dg2(devinfo))
3535 
3536 /* Things we need to flush before accessing query data using the command
3537  * streamer.
3538  *
3539  * Prior to DG2 experiments show that the command streamer is not coherent
3540  * with the tile cache so we need to flush it to make any data visible to CS.
3541  *
3542  * Otherwise we want to flush the RT cache which is where blorp writes, either
3543  * for clearing the query buffer or for clearing the destination buffer in
3544  * vkCopyQueryPoolResults().
3545  */
3546 #define ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(devinfo) \
3547    (((!ANV_DEVINFO_HAS_COHERENT_L3_CS(devinfo) && \
3548       (devinfo)->ver >= 12) ? \
3549      ANV_QUERY_WRITES_TILE_FLUSH : 0) | \
3550     ANV_QUERY_WRITES_RT_FLUSH | \
3551     ANV_QUERY_WRITES_CS_STALL)
3552 #define ANV_QUERY_COMPUTE_WRITES_PENDING_BITS \
3553    (ANV_QUERY_WRITES_DATA_FLUSH | \
3554     ANV_QUERY_WRITES_CS_STALL)
3555 
3556 #define ANV_PIPE_QUERY_BITS(pending_query_bits) ( \
3557    ((pending_query_bits & ANV_QUERY_WRITES_RT_FLUSH) ?   \
3558     ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT : 0) | \
3559    ((pending_query_bits & ANV_QUERY_WRITES_TILE_FLUSH) ?   \
3560     ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
3561    ((pending_query_bits & ANV_QUERY_WRITES_CS_STALL) ?   \
3562     ANV_PIPE_CS_STALL_BIT : 0) | \
3563    ((pending_query_bits & ANV_QUERY_WRITES_DATA_FLUSH) ?  \
3564     (ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3565      ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3566      ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT) : 0))
3567 
3568 #define ANV_PIPE_FLUSH_BITS ( \
3569    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3570    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3571    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3572    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3573    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3574    ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3575    ANV_PIPE_L3_FABRIC_FLUSH_BIT)
3576 
3577 #define ANV_PIPE_BARRIER_FLUSH_BITS ( \
3578    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3579    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
3580    ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
3581    ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
3582    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3583    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
3584 
3585 #define ANV_PIPE_STALL_BITS ( \
3586    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3587    ANV_PIPE_DEPTH_STALL_BIT | \
3588    ANV_PIPE_CS_STALL_BIT | \
3589    ANV_PIPE_PSS_STALL_SYNC_BIT)
3590 
3591 #define ANV_PIPE_INVALIDATE_BITS ( \
3592    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
3593    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
3594    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
3595    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
3596    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
3597    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
3598 
3599 /* PIPE_CONTROL bits that should be set only in 3D RCS mode.
3600  * For more details see genX(emit_apply_pipe_flushes).
3601  */
3602 #define ANV_PIPE_GFX_BITS ( \
3603    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
3604    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
3605    ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
3606    ANV_PIPE_DEPTH_STALL_BIT | \
3607    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
3608    (GFX_VERx10 >= 125 ? ANV_PIPE_PSS_STALL_SYNC_BIT : 0) | \
3609    ANV_PIPE_VF_CACHE_INVALIDATE_BIT)
3610 
3611 /* PIPE_CONTROL bits that should be set only in Media/GPGPU RCS mode.
3612  * For more details see genX(emit_apply_pipe_flushes).
3613  *
3614  * Documentation says that untyped L1 dataport cache flush is controlled by
3615  * HDC pipeline flush in 3D mode according to HDC_CHICKEN0 register:
3616  *
3617  * BSpec 47112: PIPE_CONTROL::HDC Pipeline Flush:
3618  *
3619  *    "When the "Pipeline Select" mode in PIPELINE_SELECT command is set to
3620  *     "3D", HDC Pipeline Flush can also flush/invalidate the LSC Untyped L1
3621  *     cache based on the programming of HDC_Chicken0 register bits 13:11."
3622  *
3623  *    "When the 'Pipeline Select' mode is set to 'GPGPU', the LSC Untyped L1
3624  *     cache flush is controlled by 'Untyped Data-Port Cache Flush' bit in the
3625  *     PIPE_CONTROL command."
3626  *
3627  *    As part of Wa_22010960976 & Wa_14013347512, i915 is programming
3628  *    HDC_CHICKEN0[11:13] = 0 ("Untyped L1 is flushed, for both 3D Pipecontrol
3629  *    Dataport flush, and UAV coherency barrier event"). So there is no need
3630  *    to set "Untyped Data-Port Cache" in 3D mode.
3631  *
3632  * On MTL the HDC_CHICKEN0 default values changed to match what was programmed
3633  * by Wa_22010960976 & Wa_14013347512 on DG2, but experiments show that the
3634  * change runs a bit deeper. Even manually writing to the HDC_CHICKEN0
3635  * register to force L1 untyped flush with HDC pipeline flush has no effect on
3636  * MTL.
3637  *
3638  * It seems like the HW change completely disconnected L1 untyped flush from
3639  * HDC pipeline flush with no way to bring that behavior back. So leave the L1
3640  * untyped flush active in 3D mode on all platforms since it doesn't seems to
3641  * cause issues there too.
3642  *
3643  * Maybe we'll have some GPGPU only bits here at some point.
3644  */
3645 #define ANV_PIPE_GPGPU_BITS (0)
3646 
3647 enum intel_ds_stall_flag
3648 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
3649 
3650 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
3651    VK_IMAGE_ASPECT_PLANE_0_BIT | \
3652    VK_IMAGE_ASPECT_PLANE_1_BIT | \
3653    VK_IMAGE_ASPECT_PLANE_2_BIT)
3654 
3655 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
3656    VK_IMAGE_ASPECT_COLOR_BIT | \
3657    VK_IMAGE_ASPECT_PLANES_BITS_ANV)
3658 
3659 struct anv_vertex_binding {
3660    struct anv_buffer *                          buffer;
3661    VkDeviceSize                                 offset;
3662    VkDeviceSize                                 size;
3663 };
3664 
3665 struct anv_xfb_binding {
3666    struct anv_buffer *                          buffer;
3667    VkDeviceSize                                 offset;
3668    VkDeviceSize                                 size;
3669 };
3670 
3671 struct anv_push_constants {
3672    /** Push constant data provided by the client through vkPushConstants */
3673    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
3674 
3675 #define ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK ((uint32_t)ANV_UBO_ALIGNMENT - 1)
3676 #define ANV_DESCRIPTOR_SET_OFFSET_MASK        (~(uint32_t)(ANV_UBO_ALIGNMENT - 1))
3677 
3678    /**
3679     * Base offsets for descriptor sets from
3680     *
3681     * The offset has different meaning depending on a number of factors :
3682     *
3683     *    - with descriptor sets (direct or indirect), this relative
3684     *      pdevice->va.descriptor_pool
3685     *
3686     *    - with descriptor buffers on DG2+, relative
3687     *      device->va.descriptor_buffer_pool
3688     *
3689     *    - with descriptor buffers prior to DG2, relative the programmed value
3690     *      in STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress
3691     */
3692    uint32_t desc_surface_offsets[MAX_SETS];
3693 
3694    /**
3695     * Base offsets for descriptor sets from
3696     */
3697    uint32_t desc_sampler_offsets[MAX_SETS];
3698 
3699    /** Dynamic offsets for dynamic UBOs and SSBOs */
3700    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
3701 
3702    /** Surface buffer base offset
3703     *
3704     * Only used prior to DG2 with descriptor buffers.
3705     *
3706     * (surfaces_base_offset + desc_offsets[set_index]) is relative to
3707     * device->va.descriptor_buffer_pool and can be used to compute a 64bit
3708     * address to the descriptor buffer (using load_desc_set_address_intel).
3709     */
3710    uint32_t surfaces_base_offset;
3711 
3712    /* Robust access pushed registers. */
3713    uint64_t push_reg_mask[MESA_SHADER_STAGES];
3714 
3715    /** Ray query globals (RT_DISPATCH_GLOBALS) */
3716    uint64_t ray_query_globals;
3717 
3718    union {
3719       struct {
3720          /** Dynamic MSAA value */
3721          uint32_t fs_msaa_flags;
3722 
3723          /** Dynamic TCS input vertices */
3724          uint32_t tcs_input_vertices;
3725       } gfx;
3726 
3727       struct {
3728          /** Base workgroup ID
3729           *
3730           * Used for vkCmdDispatchBase.
3731           */
3732          uint32_t base_work_group_id[3];
3733 
3734          /** gl_NumWorkgroups */
3735          uint32_t num_work_groups[3];
3736 
3737          /** Subgroup ID
3738           *
3739           * This is never set by software but is implicitly filled out when
3740           * uploading the push constants for compute shaders.
3741           *
3742           * This *MUST* be the last field of the anv_push_constants structure.
3743           */
3744          uint32_t subgroup_id;
3745       } cs;
3746    };
3747 };
3748 
3749 struct anv_surface_state {
3750    /** Surface state allocated from the bindless heap
3751     *
3752     * Can be NULL if unused.
3753     */
3754    struct anv_state state;
3755 
3756    /** Surface state after genxml packing
3757     *
3758     * Same data as in state.
3759     */
3760    struct anv_surface_state_data state_data;
3761 
3762    /** Address of the surface referred to by this state
3763     *
3764     * This address is relative to the start of the BO.
3765     */
3766    struct anv_address address;
3767    /* Address of the aux surface, if any
3768     *
3769     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
3770     *
3771     * With the exception of gfx8, the bottom 12 bits of this address' offset
3772     * include extra aux information.
3773     */
3774    struct anv_address aux_address;
3775    /* Address of the clear color, if any
3776     *
3777     * This address is relative to the start of the BO.
3778     */
3779    struct anv_address clear_address;
3780 };
3781 
3782 struct anv_attachment {
3783    VkFormat vk_format;
3784    const struct anv_image_view *iview;
3785    VkImageLayout layout;
3786    enum isl_aux_usage aux_usage;
3787    struct anv_surface_state surface_state;
3788 
3789    VkResolveModeFlagBits resolve_mode;
3790    const struct anv_image_view *resolve_iview;
3791    VkImageLayout resolve_layout;
3792 };
3793 
3794 /** State tracking for vertex buffer flushes
3795  *
3796  * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
3797  * addresses.  If you happen to have two vertex buffers which get placed
3798  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
3799  * collisions.  In order to solve this problem, we track vertex address ranges
3800  * which are live in the cache and invalidate the cache if one ever exceeds 32
3801  * bits.
3802  */
3803 struct anv_vb_cache_range {
3804    /* Virtual address at which the live vertex buffer cache range starts for
3805     * this vertex buffer index.
3806     */
3807    uint64_t start;
3808 
3809    /* Virtual address of the byte after where vertex buffer cache range ends.
3810     * This is exclusive such that end - start is the size of the range.
3811     */
3812    uint64_t end;
3813 };
3814 
3815 static inline void
anv_merge_vb_cache_range(struct anv_vb_cache_range * dirty,const struct anv_vb_cache_range * bound)3816 anv_merge_vb_cache_range(struct anv_vb_cache_range *dirty,
3817                          const struct anv_vb_cache_range *bound)
3818 {
3819    if (dirty->start == dirty->end) {
3820       *dirty = *bound;
3821    } else if (bound->start != bound->end) {
3822       dirty->start = MIN2(dirty->start, bound->start);
3823       dirty->end = MAX2(dirty->end, bound->end);
3824    }
3825 }
3826 
3827 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
3828 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)3829 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
3830                                            struct anv_vb_cache_range *dirty,
3831                                            struct anv_address vb_address,
3832                                            uint32_t vb_size)
3833 {
3834    if (vb_size == 0) {
3835       bound->start = 0;
3836       bound->end = 0;
3837       return false;
3838    }
3839 
3840    bound->start = intel_48b_address(anv_address_physical(vb_address));
3841    bound->end = bound->start + vb_size;
3842    assert(bound->end > bound->start); /* No overflow */
3843 
3844    /* Align everything to a cache line */
3845    bound->start &= ~(64ull - 1ull);
3846    bound->end = align64(bound->end, 64);
3847 
3848    anv_merge_vb_cache_range(dirty, bound);
3849 
3850    /* If our range is larger than 32 bits, we have to flush */
3851    assert(bound->end - bound->start <= (1ull << 32));
3852    return (dirty->end - dirty->start) > (1ull << 32);
3853 }
3854 
3855 /**
3856  * State tracking for simple internal shaders
3857  */
3858 struct anv_simple_shader {
3859    /* The device associated with this emission */
3860    struct anv_device *device;
3861    /* The command buffer associated with this emission (can be NULL) */
3862    struct anv_cmd_buffer *cmd_buffer;
3863    /* State stream used for various internal allocations */
3864    struct anv_state_stream *dynamic_state_stream;
3865    struct anv_state_stream *general_state_stream;
3866    /* Where to emit the commands (can be different from cmd_buffer->batch) */
3867    struct anv_batch *batch;
3868    /* Shader to use */
3869    struct anv_shader_bin *kernel;
3870    /* L3 config used by the shader */
3871    const struct intel_l3_config *l3_config;
3872    /* Current URB config */
3873    const struct intel_urb_config *urb_cfg;
3874 
3875    /* Managed by the simpler shader helper*/
3876    struct anv_state bt_state;
3877 };
3878 
3879 /** State tracking for particular pipeline bind point
3880  *
3881  * This struct is the base struct for anv_cmd_graphics_state and
3882  * anv_cmd_compute_state.  These are used to track state which is bound to a
3883  * particular type of pipeline.  Generic state that applies per-stage such as
3884  * binding table offsets and push constants is tracked generically with a
3885  * per-stage array in anv_cmd_state.
3886  */
3887 struct anv_cmd_pipeline_state {
3888    struct anv_descriptor_set *descriptors[MAX_SETS];
3889    struct {
3890       bool             bound;
3891       /**
3892        * Buffer index used by this descriptor set.
3893        */
3894       int32_t          buffer_index; /* -1 means push descriptor */
3895       /**
3896        * Offset of the descriptor set in the descriptor buffer.
3897        */
3898       uint32_t         buffer_offset;
3899       /**
3900        * Final computed address to be emitted in the descriptor set surface
3901        * state.
3902        */
3903       uint64_t         address;
3904       /**
3905        * The descriptor set surface state.
3906        */
3907       struct anv_state state;
3908    } descriptor_buffers[MAX_SETS];
3909    struct anv_push_descriptor_set push_descriptor;
3910 
3911    struct anv_push_constants push_constants;
3912 
3913    /** Amount of data written to anv_push_constants::client_data */
3914    uint16_t push_constants_client_size;
3915 
3916    /** Tracks whether the push constant data has changed and need to be reemitted */
3917    bool                                         push_constants_data_dirty;
3918 
3919    /* Push constant state allocated when flushing push constants. */
3920    struct anv_state          push_constants_state;
3921 
3922    /**
3923     * Dynamic buffer offsets.
3924     *
3925     * We have a maximum of MAX_DYNAMIC_BUFFERS per pipeline, but with
3926     * independent sets we cannot know which how much in total is going to be
3927     * used. As a result we need to store the maximum possible number per set.
3928     *
3929     * Those values are written into anv_push_constants::dynamic_offsets at
3930     * flush time when have the pipeline with the final
3931     * anv_pipeline_sets_layout.
3932     */
3933    struct {
3934       uint32_t                                  offsets[MAX_DYNAMIC_BUFFERS];
3935    }                                            dynamic_offsets[MAX_SETS];
3936 
3937    /**
3938     * The current bound pipeline.
3939     */
3940    struct anv_pipeline      *pipeline;
3941 };
3942 
3943 enum anv_depth_reg_mode {
3944    ANV_DEPTH_REG_MODE_UNKNOWN = 0,
3945    ANV_DEPTH_REG_MODE_HW_DEFAULT,
3946    ANV_DEPTH_REG_MODE_D16_1X_MSAA,
3947 };
3948 
3949 /** State tracking for graphics pipeline
3950  *
3951  * This has anv_cmd_pipeline_state as a base struct to track things which get
3952  * bound to a graphics pipeline.  Along with general pipeline bind point state
3953  * which is in the anv_cmd_pipeline_state base struct, it also contains other
3954  * state which is graphics-specific.
3955  */
3956 struct anv_cmd_graphics_state {
3957    struct anv_cmd_pipeline_state base;
3958 
3959    VkRenderingFlags rendering_flags;
3960    VkRect2D render_area;
3961    uint32_t layer_count;
3962    uint32_t samples;
3963    uint32_t view_mask;
3964    uint32_t color_att_count;
3965    struct anv_state att_states;
3966    struct anv_attachment color_att[MAX_RTS];
3967    struct anv_attachment depth_att;
3968    struct anv_attachment stencil_att;
3969    struct anv_state null_surface_state;
3970 
3971    /* Map of color output from the last dispatched fragment shader to color
3972     * attachments in the render pass.
3973     */
3974    uint8_t color_output_mapping[MAX_RTS];
3975 
3976    anv_cmd_dirty_mask_t dirty;
3977    uint32_t vb_dirty;
3978 
3979    struct anv_vb_cache_range ib_bound_range;
3980    struct anv_vb_cache_range ib_dirty_range;
3981    struct anv_vb_cache_range vb_bound_ranges[33];
3982    struct anv_vb_cache_range vb_dirty_ranges[33];
3983 
3984    uint32_t restart_index;
3985 
3986    VkShaderStageFlags push_constant_stages;
3987 
3988    bool used_task_shader;
3989 
3990    struct anv_buffer *index_buffer;
3991    uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
3992    uint32_t index_offset;
3993    uint32_t index_size;
3994 
3995    uint32_t indirect_data_stride;
3996    bool indirect_data_stride_aligned;
3997 
3998    struct vk_vertex_input_state vertex_input;
3999    struct vk_sample_locations_state sample_locations;
4000 
4001    bool object_preemption;
4002    bool has_uint_rt;
4003 
4004    /* State tracking for Wa_14018912822. */
4005    bool color_blend_zero;
4006    bool alpha_blend_zero;
4007 
4008    /**
4009     * State tracking for Wa_18020335297.
4010     */
4011    bool                                         viewport_set;
4012 
4013    struct intel_urb_config urb_cfg;
4014 
4015    uint32_t n_occlusion_queries;
4016 
4017    /**
4018     * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
4019     * of any command buffer it is disabled by disabling it in EndCommandBuffer
4020     * and before invoking the secondary in ExecuteCommands.
4021     */
4022    bool                                         pma_fix_enabled;
4023 
4024    /**
4025     * Whether or not we know for certain that HiZ is enabled for the current
4026     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
4027     * enabled or not, this will be false.
4028     */
4029    bool                                         hiz_enabled;
4030 
4031    /**
4032     * We ensure the registers for the gfx12 D16 fix are initialized at the
4033     * first non-NULL depth stencil packet emission of every command buffer.
4034     * For secondary command buffer execution, we transfer the state from the
4035     * last command buffer to the primary (if known).
4036     */
4037    enum anv_depth_reg_mode                      depth_reg_mode;
4038 
4039    struct anv_gfx_dynamic_state dyn_state;
4040 };
4041 
4042 /** State tracking for compute pipeline
4043  *
4044  * This has anv_cmd_pipeline_state as a base struct to track things which get
4045  * bound to a compute pipeline.  Along with general pipeline bind point state
4046  * which is in the anv_cmd_pipeline_state base struct, it also contains other
4047  * state which is compute-specific.
4048  */
4049 struct anv_cmd_compute_state {
4050    struct anv_cmd_pipeline_state base;
4051 
4052    bool pipeline_dirty;
4053 
4054    uint32_t scratch_size;
4055 };
4056 
4057 struct anv_cmd_ray_tracing_state {
4058    struct anv_cmd_pipeline_state base;
4059 
4060    bool pipeline_dirty;
4061 
4062    struct {
4063       struct anv_bo *bo;
4064       struct brw_rt_scratch_layout layout;
4065    } scratch;
4066 
4067    uint32_t debug_marker_count;
4068    enum vk_acceleration_structure_build_step debug_markers[5];
4069 
4070    struct anv_address build_priv_mem_addr;
4071    size_t             build_priv_mem_size;
4072 };
4073 
4074 enum anv_cmd_descriptor_buffer_mode {
4075    ANV_CMD_DESCRIPTOR_BUFFER_MODE_UNKNOWN,
4076    ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY,
4077    ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER,
4078 };
4079 
4080 /** State required while building cmd buffer */
4081 struct anv_cmd_state {
4082    /* PIPELINE_SELECT.PipelineSelection */
4083    uint32_t                                     current_pipeline;
4084    const struct intel_l3_config *               current_l3_config;
4085    uint32_t                                     last_aux_map_state;
4086 
4087    struct anv_cmd_graphics_state                gfx;
4088    struct anv_cmd_compute_state                 compute;
4089    struct anv_cmd_ray_tracing_state             rt;
4090 
4091    enum anv_pipe_bits                           pending_pipe_bits;
4092 
4093    /**
4094     * Whether the last programmed STATE_BASE_ADDRESS references
4095     * anv_device::dynamic_state_pool or anv_device::dynamic_state_pool_db for
4096     * the dynamic state heap.
4097     */
4098    enum anv_cmd_descriptor_buffer_mode          current_db_mode;
4099 
4100    /**
4101     * Whether the command buffer has pending descriptor buffers bound it. This
4102     * variable changes before anv_device::current_db_mode.
4103     */
4104    enum anv_cmd_descriptor_buffer_mode          pending_db_mode;
4105 
4106    struct {
4107       /**
4108        * Tracks operations susceptible to interfere with queries in the
4109        * destination buffer of vkCmdCopyQueryResults, we need those operations to
4110        * have completed before we do the work of vkCmdCopyQueryResults.
4111        */
4112       enum anv_query_bits                          buffer_write_bits;
4113 
4114       /**
4115        * Tracks clear operations of query buffers that can interact with
4116        * vkCmdQueryBegin*, vkCmdWriteTimestamp*,
4117        * vkCmdWriteAccelerationStructuresPropertiesKHR, etc...
4118        *
4119        * We need the clearing of the buffer completed before with write data with
4120        * the command streamer or a shader.
4121        */
4122       enum anv_query_bits                          clear_bits;
4123    } queries;
4124 
4125    VkShaderStageFlags                           descriptors_dirty;
4126    VkShaderStageFlags                           push_descriptors_dirty;
4127    /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */
4128    VkShaderStageFlags                           push_constants_dirty;
4129 
4130    struct {
4131       uint64_t                                  surfaces_address;
4132       uint64_t                                  samplers_address;
4133       bool                                      dirty;
4134       VkShaderStageFlags                        offsets_dirty;
4135       uint64_t                                  address[MAX_SETS];
4136    }                                            descriptor_buffers;
4137 
4138    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
4139    bool                                         xfb_enabled;
4140    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
4141    struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
4142    struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
4143 
4144    unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
4145    unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
4146    unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
4147 
4148    /* The last auxiliary surface operation (or equivalent operation) provided
4149     * to genX(cmd_buffer_update_color_aux_op).
4150     */
4151    enum isl_aux_op                              color_aux_op;
4152 
4153    /**
4154     * Whether RHWO optimization is enabled (Wa_1508744258).
4155     */
4156    bool                                         rhwo_optimization_enabled;
4157 
4158    /**
4159     * Pending state of the RHWO optimization, to be applied at the next
4160     * genX(cmd_buffer_apply_pipe_flushes).
4161     */
4162    bool                                         pending_rhwo_optimization_enabled;
4163 
4164    bool                                         conditional_render_enabled;
4165 
4166    /**
4167     * Last rendering scale argument provided to
4168     * genX(cmd_buffer_emit_hashing_mode)().
4169     */
4170    unsigned                                     current_hash_scale;
4171 
4172    /**
4173     * A buffer used for spill/fill of ray queries.
4174     */
4175    struct anv_bo *                              ray_query_shadow_bo;
4176 
4177    /** Pointer to the last emitted COMPUTE_WALKER.
4178     *
4179     * This is used to edit the instruction post emission to replace the "Post
4180     * Sync" field for utrace timestamp emission.
4181     */
4182    void                                        *last_compute_walker;
4183 
4184    /** Pointer to the last emitted EXECUTE_INDIRECT_DISPATCH.
4185     *
4186     * This is used to edit the instruction post emission to replace the "Post
4187     * Sync" field for utrace timestamp emission.
4188     */
4189    void                                        *last_indirect_dispatch;
4190 };
4191 
4192 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
4193 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
4194 
4195 enum anv_cmd_buffer_exec_mode {
4196    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
4197    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
4198    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
4199    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
4200    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
4201    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
4202 };
4203 
4204 struct anv_measure_batch;
4205 
4206 struct anv_cmd_buffer {
4207    struct vk_command_buffer                     vk;
4208 
4209    struct anv_device *                          device;
4210    struct anv_queue_family *                    queue_family;
4211 
4212    /** Batch where the main commands live */
4213    struct anv_batch                             batch;
4214 
4215    /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
4216     * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
4217     * rewrite the end to chain multiple batch together at vkQueueSubmit().
4218     */
4219    void *                                       batch_end;
4220 
4221    /* Fields required for the actual chain of anv_batch_bo's.
4222     *
4223     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
4224     */
4225    struct list_head                             batch_bos;
4226    enum anv_cmd_buffer_exec_mode                exec_mode;
4227 
4228    /* A vector of anv_batch_bo pointers for every batch or surface buffer
4229     * referenced by this command buffer
4230     *
4231     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4232     */
4233    struct u_vector                            seen_bbos;
4234 
4235    /* A vector of int32_t's for every block of binding tables.
4236     *
4237     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4238     */
4239    struct u_vector                              bt_block_states;
4240    struct anv_state                             bt_next;
4241 
4242    struct anv_reloc_list                        surface_relocs;
4243 
4244    /* Serial for tracking buffer completion */
4245    uint32_t                                     serial;
4246 
4247    /* Stream objects for storing temporary data */
4248    struct anv_state_stream                      surface_state_stream;
4249    struct anv_state_stream                      dynamic_state_stream;
4250    struct anv_state_stream                      general_state_stream;
4251    struct anv_state_stream                      indirect_push_descriptor_stream;
4252    struct anv_state_stream                      push_descriptor_buffer_stream;
4253 
4254    VkCommandBufferUsageFlags                    usage_flags;
4255 
4256    struct anv_query_pool                       *perf_query_pool;
4257 
4258    struct anv_cmd_state                         state;
4259 
4260    /* Fast-clear statistics. */
4261    uint64_t                                     num_dependent_clears;
4262    uint64_t                                     num_independent_clears;
4263 
4264    struct anv_address                           return_addr;
4265 
4266    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
4267    uint64_t                                     intel_perf_marker;
4268 
4269    struct anv_measure_batch *measure;
4270 
4271    /**
4272     * KHR_performance_query requires self modifying command buffers and this
4273     * array has the location of modifying commands to the query begin and end
4274     * instructions storing performance counters. The array length is
4275     * anv_physical_device::n_perf_query_commands.
4276     */
4277    struct mi_address_token                  *self_mod_locations;
4278 
4279    /**
4280     * Index tracking which of the self_mod_locations items have already been
4281     * used.
4282     */
4283    uint32_t                                      perf_reloc_idx;
4284 
4285    /**
4286     * Sum of all the anv_batch_bo written sizes for this command buffer
4287     * including any executed secondary command buffer.
4288     */
4289    uint32_t                                     total_batch_size;
4290 
4291    struct {
4292       /** Batch generating part of the anv_cmd_buffer::batch */
4293       struct anv_batch                          batch;
4294 
4295       /**
4296        * Location in anv_cmd_buffer::batch at which we left some space to
4297        * insert a MI_BATCH_BUFFER_START into the
4298        * anv_cmd_buffer::generation::batch if needed.
4299        */
4300       struct anv_address                        jump_addr;
4301 
4302       /**
4303        * Location in anv_cmd_buffer::batch at which the generation batch
4304        * should jump back to.
4305        */
4306       struct anv_address                        return_addr;
4307 
4308       /** List of anv_batch_bo used for generation
4309        *
4310        * We have to keep this separated of the anv_cmd_buffer::batch_bos that
4311        * is used for a chaining optimization.
4312        */
4313       struct list_head                          batch_bos;
4314 
4315       /** Ring buffer of generated commands
4316        *
4317        * When generating draws in ring mode, this buffer will hold generated
4318        * 3DPRIMITIVE commands.
4319        */
4320       struct anv_bo                            *ring_bo;
4321 
4322       /**
4323        * State tracking of the generation shader (only used for the non-ring
4324        * mode).
4325        */
4326       struct anv_simple_shader                  shader_state;
4327    } generation;
4328 
4329    /**
4330     * A vector of anv_bo pointers for chunks of memory used by the command
4331     * buffer that are too large to be allocated through dynamic_state_stream.
4332     * This is the case for large enough acceleration structures.
4333     *
4334     * initialized by anv_cmd_buffer_init_batch_bo_chain()
4335     */
4336    struct u_vector                              dynamic_bos;
4337 
4338    /**
4339     * Structure holding tracepoints recorded in the command buffer.
4340     */
4341    struct u_trace                               trace;
4342 
4343    struct {
4344       struct anv_video_session *vid;
4345       struct anv_video_session_params *params;
4346    } video;
4347 
4348    /**
4349     * Companion RCS command buffer to support the MSAA operations on compute
4350     * queue.
4351     */
4352    struct anv_cmd_buffer                        *companion_rcs_cmd_buffer;
4353 
4354    /**
4355     * Whether this command buffer is a companion command buffer of compute one.
4356     */
4357    bool                                         is_companion_rcs_cmd_buffer;
4358 
4359 };
4360 
4361 extern const struct vk_command_buffer_ops anv_cmd_buffer_ops;
4362 
4363 /* Determine whether we can chain a given cmd_buffer to another one. We need
4364  * to make sure that we can edit the end of the batch to point to next one,
4365  * which requires the command buffer to not be used simultaneously.
4366  *
4367  * We could in theory also implement chaining with companion command buffers,
4368  * but let's sparse ourselves some pain and misery. This optimization has no
4369  * benefit on the brand new Xe kernel driver.
4370  */
4371 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)4372 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
4373 {
4374    return !(cmd_buffer->usage_flags &
4375             VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) &&
4376           !(cmd_buffer->is_companion_rcs_cmd_buffer);
4377 }
4378 
4379 static inline bool
anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer * cmd_buffer)4380 anv_cmd_buffer_is_render_queue(const struct anv_cmd_buffer *cmd_buffer)
4381 {
4382    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4383    return (queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0;
4384 }
4385 
4386 static inline bool
anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer * cmd_buffer)4387 anv_cmd_buffer_is_video_queue(const struct anv_cmd_buffer *cmd_buffer)
4388 {
4389    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4390    return ((queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) |
4391            (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) != 0;
4392 }
4393 
4394 static inline bool
anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4395 anv_cmd_buffer_is_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4396 {
4397    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4398    return queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
4399 }
4400 
4401 static inline bool
anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer * cmd_buffer)4402 anv_cmd_buffer_is_blitter_queue(const struct anv_cmd_buffer *cmd_buffer)
4403 {
4404    struct anv_queue_family *queue_family = cmd_buffer->queue_family;
4405    return queue_family->engine_class == INTEL_ENGINE_CLASS_COPY;
4406 }
4407 
4408 static inline bool
anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer * cmd_buffer)4409 anv_cmd_buffer_is_render_or_compute_queue(const struct anv_cmd_buffer *cmd_buffer)
4410 {
4411    return anv_cmd_buffer_is_render_queue(cmd_buffer) ||
4412           anv_cmd_buffer_is_compute_queue(cmd_buffer);
4413 }
4414 
4415 static inline uint8_t
anv_get_ray_query_bo_index(struct anv_cmd_buffer * cmd_buffer)4416 anv_get_ray_query_bo_index(struct anv_cmd_buffer *cmd_buffer)
4417 {
4418    if (intel_needs_workaround(cmd_buffer->device->isl_dev.info, 14022863161))
4419       return anv_cmd_buffer_is_compute_queue(cmd_buffer) ? 1 : 0;
4420    return 0;
4421 }
4422 
4423 static inline struct anv_address
anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4424 anv_cmd_buffer_dynamic_state_address(struct anv_cmd_buffer *cmd_buffer,
4425                                      struct anv_state state)
4426 {
4427    return anv_state_pool_state_address(
4428       &cmd_buffer->device->dynamic_state_pool, state);
4429 }
4430 
4431 static inline uint64_t
anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer * cmd_buffer,int32_t buffer_index)4432 anv_cmd_buffer_descriptor_buffer_address(struct anv_cmd_buffer *cmd_buffer,
4433                                          int32_t buffer_index)
4434 {
4435    if (buffer_index == -1)
4436       return cmd_buffer->device->physical->va.push_descriptor_buffer_pool.addr;
4437 
4438    return cmd_buffer->state.descriptor_buffers.address[buffer_index];
4439 }
4440 
4441 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4442 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4443 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
4444 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
4445 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
4446                                   struct anv_cmd_buffer *secondary);
4447 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
4448 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
4449                                 struct anv_cmd_buffer *cmd_buffer,
4450                                 const VkSemaphore *in_semaphores,
4451                                 const uint64_t *in_wait_values,
4452                                 uint32_t num_in_semaphores,
4453                                 const VkSemaphore *out_semaphores,
4454                                 const uint64_t *out_signal_values,
4455                                 uint32_t num_out_semaphores,
4456                                 VkFence fence,
4457                                 int perf_query_pass);
4458 
4459 void anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
4460                           UNUSED VkCommandBufferResetFlags flags);
4461 
4462 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
4463                                              const void *data, uint32_t size, uint32_t alignment);
4464 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
4465                                               uint32_t *a, uint32_t *b,
4466                                               uint32_t dwords, uint32_t alignment);
4467 
4468 struct anv_address
4469 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
4470 struct anv_state
4471 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
4472                                    uint32_t entries, uint32_t *state_offset);
4473 struct anv_state
4474 anv_cmd_buffer_alloc_surface_states(struct anv_cmd_buffer *cmd_buffer,
4475                                     uint32_t count);
4476 struct anv_state
4477 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
4478                                    uint32_t size, uint32_t alignment);
4479 struct anv_state
4480 anv_cmd_buffer_alloc_general_state(struct anv_cmd_buffer *cmd_buffer,
4481                                    uint32_t size, uint32_t alignment);
4482 static inline struct anv_state
anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer * cmd_buffer,uint32_t size,uint32_t alignment)4483 anv_cmd_buffer_alloc_temporary_state(struct anv_cmd_buffer *cmd_buffer,
4484                                      uint32_t size, uint32_t alignment)
4485 {
4486    struct anv_state state =
4487       anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
4488                              size, alignment);
4489    if (state.map == NULL)
4490       anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4491    return state;
4492 }
4493 static inline struct anv_address
anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer * cmd_buffer,struct anv_state state)4494 anv_cmd_buffer_temporary_state_address(struct anv_cmd_buffer *cmd_buffer,
4495                                        struct anv_state state)
4496 {
4497    return anv_state_pool_state_address(
4498       &cmd_buffer->device->dynamic_state_pool, state);
4499 }
4500 
4501 void
4502 anv_cmd_buffer_chain_command_buffers(struct anv_cmd_buffer **cmd_buffers,
4503                                      uint32_t num_cmd_buffers);
4504 void
4505 anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
4506                                 uint32_t cmd_buffer_count,
4507                                 struct anv_cmd_buffer **cmd_buffers,
4508                                 struct anv_query_pool *perf_query_pool,
4509                                 uint32_t perf_query_pass);
4510 void
4511 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
4512                        uint32_t num_cmd_buffers);
4513 
4514 void
4515 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
4516                                          enum anv_pipe_bits flushed_bits);
4517 
4518 /**
4519  * A allocation tied to a command buffer.
4520  *
4521  * Don't use anv_cmd_alloc::address::map to write memory from userspace, use
4522  * anv_cmd_alloc::map instead.
4523  */
4524 struct anv_cmd_alloc {
4525    struct anv_address  address;
4526    void               *map;
4527    size_t              size;
4528 };
4529 
4530 #define ANV_EMPTY_ALLOC ((struct anv_cmd_alloc) { .map = NULL, .size = 0 })
4531 
4532 static inline bool
anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)4533 anv_cmd_alloc_is_empty(struct anv_cmd_alloc alloc)
4534 {
4535    return alloc.size == 0;
4536 }
4537 
4538 struct anv_cmd_alloc
4539 anv_cmd_buffer_alloc_space(struct anv_cmd_buffer *cmd_buffer,
4540                            size_t size, uint32_t alignment,
4541                            bool private);
4542 
4543 VkResult
4544 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
4545 
4546 void anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer);
4547 
4548 struct anv_state
4549 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
4550 struct anv_state
4551 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
4552 
4553 VkResult
4554 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
4555                                          uint32_t num_entries,
4556                                          uint32_t *state_offset,
4557                                          struct anv_state *bt_state);
4558 
4559 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
4560 
4561 static inline unsigned
anv_cmd_buffer_get_view_count(struct anv_cmd_buffer * cmd_buffer)4562 anv_cmd_buffer_get_view_count(struct anv_cmd_buffer *cmd_buffer)
4563 {
4564    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
4565    return MAX2(1, util_bitcount(gfx->view_mask));
4566 }
4567 
4568 /* Save/restore cmd buffer states for meta operations */
4569 enum anv_cmd_saved_state_flags {
4570    ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE         = BITFIELD_BIT(0),
4571    ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0         = BITFIELD_BIT(1),
4572    ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL       = BITFIELD_BIT(2),
4573    ANV_CMD_SAVED_STATE_PUSH_CONSTANTS           = BITFIELD_BIT(3),
4574 };
4575 
4576 struct anv_cmd_saved_state {
4577    uint32_t flags;
4578 
4579    struct anv_pipeline *pipeline;
4580    struct anv_descriptor_set *descriptor_set[MAX_SETS];
4581    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
4582 };
4583 
4584 void anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
4585                                uint32_t flags,
4586                                struct anv_cmd_saved_state *state);
4587 
4588 void anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
4589                                   struct anv_cmd_saved_state *state);
4590 
4591 enum anv_bo_sync_state {
4592    /** Indicates that this is a new (or newly reset fence) */
4593    ANV_BO_SYNC_STATE_RESET,
4594 
4595    /** Indicates that this fence has been submitted to the GPU but is still
4596     * (as far as we know) in use by the GPU.
4597     */
4598    ANV_BO_SYNC_STATE_SUBMITTED,
4599 
4600    ANV_BO_SYNC_STATE_SIGNALED,
4601 };
4602 
4603 struct anv_bo_sync {
4604    struct vk_sync sync;
4605 
4606    enum anv_bo_sync_state state;
4607    struct anv_bo *bo;
4608 };
4609 
4610 extern const struct vk_sync_type anv_bo_sync_type;
4611 
4612 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)4613 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
4614 {
4615    return sync->type == &anv_bo_sync_type;
4616 }
4617 
4618 VkResult anv_create_sync_for_memory(struct vk_device *device,
4619                                     VkDeviceMemory memory,
4620                                     bool signal_memory,
4621                                     struct vk_sync **sync_out);
4622 
4623 struct anv_event {
4624    struct vk_object_base                        base;
4625    uint64_t                                     semaphore;
4626    struct anv_state                             state;
4627 };
4628 
4629 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
4630 
4631 #define anv_foreach_stage(stage, stage_bits)                         \
4632    for (gl_shader_stage stage,                                       \
4633         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
4634         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
4635         __tmp &= ~(1 << (stage)))
4636 
4637 struct anv_pipeline_bind_map {
4638    unsigned char                                surface_sha1[20];
4639    unsigned char                                sampler_sha1[20];
4640    unsigned char                                push_sha1[20];
4641 
4642    uint32_t surface_count;
4643    uint32_t sampler_count;
4644    uint32_t embedded_sampler_count;
4645    uint16_t kernel_args_size;
4646    uint16_t kernel_arg_count;
4647 
4648    struct anv_pipeline_binding *                surface_to_descriptor;
4649    struct anv_pipeline_binding *                sampler_to_descriptor;
4650    struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
4651    struct brw_kernel_arg_desc *                 kernel_args;
4652 
4653    struct anv_push_range                        push_ranges[4];
4654 };
4655 
4656 struct anv_push_descriptor_info {
4657    /* A bitfield of descriptors used. */
4658    uint32_t used_descriptors;
4659 
4660    /* A bitfield of UBOs bindings fully promoted to push constants. */
4661    uint32_t fully_promoted_ubo_descriptors;
4662 
4663    /* */
4664    uint8_t used_set_buffer;
4665 };
4666 
4667 /* A list of values we push to implement some of the dynamic states */
4668 enum anv_dynamic_push_bits {
4669    ANV_DYNAMIC_PUSH_INPUT_VERTICES = BITFIELD_BIT(0),
4670 };
4671 
4672 struct anv_shader_upload_params {
4673    gl_shader_stage stage;
4674 
4675    const void *key_data;
4676    uint32_t key_size;
4677 
4678    const void *kernel_data;
4679    uint32_t kernel_size;
4680 
4681    const struct brw_stage_prog_data *prog_data;
4682    uint32_t prog_data_size;
4683 
4684    const struct brw_compile_stats *stats;
4685    uint32_t num_stats;
4686 
4687    const struct nir_xfb_info *xfb_info;
4688 
4689    const struct anv_pipeline_bind_map *bind_map;
4690 
4691    const struct anv_push_descriptor_info *push_desc_info;
4692 
4693    enum anv_dynamic_push_bits dynamic_push_values;
4694 };
4695 
4696 struct anv_embedded_sampler {
4697    uint32_t ref_cnt;
4698 
4699    struct anv_embedded_sampler_key key;
4700 
4701    struct anv_state sampler_state;
4702    struct anv_state border_color_state;
4703 };
4704 
4705 struct anv_shader_bin {
4706    struct vk_pipeline_cache_object base;
4707 
4708    gl_shader_stage stage;
4709 
4710    struct anv_state kernel;
4711    uint32_t kernel_size;
4712 
4713    const struct brw_stage_prog_data *prog_data;
4714    uint32_t prog_data_size;
4715 
4716    struct brw_compile_stats stats[3];
4717    uint32_t num_stats;
4718 
4719    struct nir_xfb_info *xfb_info;
4720 
4721    struct anv_push_descriptor_info push_desc_info;
4722 
4723    struct anv_pipeline_bind_map bind_map;
4724 
4725    enum anv_dynamic_push_bits dynamic_push_values;
4726 
4727    /* Not saved in the pipeline cache.
4728     *
4729     * Array of pointers of length bind_map.embedded_sampler_count
4730     */
4731    struct anv_embedded_sampler **embedded_samplers;
4732 };
4733 
4734 static inline struct anv_shader_bin *
anv_shader_bin_ref(struct anv_shader_bin * shader)4735 anv_shader_bin_ref(struct anv_shader_bin *shader)
4736 {
4737    vk_pipeline_cache_object_ref(&shader->base);
4738 
4739    return shader;
4740 }
4741 
4742 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)4743 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
4744 {
4745    vk_pipeline_cache_object_unref(&device->vk, &shader->base);
4746 }
4747 
4748 struct anv_pipeline_executable {
4749    gl_shader_stage stage;
4750 
4751    struct brw_compile_stats stats;
4752 
4753    char *nir;
4754    char *disasm;
4755 };
4756 
4757 enum anv_pipeline_type {
4758    ANV_PIPELINE_GRAPHICS,
4759    ANV_PIPELINE_GRAPHICS_LIB,
4760    ANV_PIPELINE_COMPUTE,
4761    ANV_PIPELINE_RAY_TRACING,
4762 };
4763 
4764 struct anv_pipeline {
4765    struct vk_object_base                        base;
4766 
4767    struct anv_device *                          device;
4768 
4769    struct anv_batch                             batch;
4770    struct anv_reloc_list                        batch_relocs;
4771 
4772    void *                                       mem_ctx;
4773 
4774    enum anv_pipeline_type                       type;
4775    VkPipelineCreateFlags2KHR                    flags;
4776 
4777    VkShaderStageFlags                           active_stages;
4778 
4779    uint32_t                                     ray_queries;
4780 
4781    /**
4782     * Mask of stages that are accessing push descriptors.
4783     */
4784    VkShaderStageFlags                           use_push_descriptor;
4785 
4786    /**
4787     * Mask of stages that are accessing the push descriptors buffer.
4788     */
4789    VkShaderStageFlags                           use_push_descriptor_buffer;
4790 
4791    /**
4792     * Maximum scratch size for all shaders in this pipeline.
4793     */
4794    uint32_t                                     scratch_size;
4795 
4796    /* Layout of the sets used by the pipeline. */
4797    struct anv_pipeline_sets_layout              layout;
4798 
4799    struct util_dynarray                         executables;
4800 
4801    const struct intel_l3_config *               l3_config;
4802 };
4803 
4804 /* The base graphics pipeline object only hold shaders. */
4805 struct anv_graphics_base_pipeline {
4806    struct anv_pipeline                          base;
4807 
4808    struct vk_sample_locations_state             sample_locations;
4809 
4810    /* Shaders */
4811    struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4812 
4813    /* A small hash based of shader_info::source_sha1 for identifying
4814     * shaders in renderdoc/shader-db.
4815     */
4816    uint32_t                                     source_hashes[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4817 
4818    /* Feedback index in
4819     * VkPipelineCreationFeedbackCreateInfo::pPipelineStageCreationFeedbacks
4820     *
4821     * For pipeline libraries, we need to remember the order at creation when
4822     * included into a linked pipeline.
4823     */
4824    uint32_t                                     feedback_index[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4825 
4826    /* Robustness flags used shaders
4827     */
4828    enum brw_robustness_flags                    robust_flags[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4829 
4830    /* True if at the time the fragment shader was compiled, it didn't have all
4831     * the information to avoid INTEL_MSAA_FLAG_ENABLE_DYNAMIC.
4832     */
4833    bool                                         fragment_dynamic;
4834 };
4835 
4836 /* The library graphics pipeline object has a partial graphic state and
4837  * possibly some shaders. If requested, shaders are also present in NIR early
4838  * form.
4839  */
4840 struct anv_graphics_lib_pipeline {
4841    struct anv_graphics_base_pipeline            base;
4842 
4843    VkGraphicsPipelineLibraryFlagsEXT            lib_flags;
4844 
4845    struct vk_graphics_pipeline_all_state        all_state;
4846    struct vk_graphics_pipeline_state            state;
4847 
4848    /* Retained shaders for link optimization. */
4849    struct {
4850       /* This hash is the same as computed in
4851        * anv_graphics_pipeline_gather_shaders().
4852        */
4853       unsigned char                             shader_sha1[20];
4854 
4855       enum gl_subgroup_size                     subgroup_size_type;
4856 
4857       /* Hold on the value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT
4858        * from library that introduces the stage, so it remains consistent.
4859        */
4860       bool                                      view_index_from_device_index;
4861 
4862       /* NIR captured in anv_pipeline_stage_get_nir(), includes specialization
4863        * constants.
4864        */
4865       nir_shader *                              nir;
4866    }                                            retained_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
4867 
4868    /* Whether the shaders have been retained */
4869    bool                                         retain_shaders;
4870 };
4871 
4872 struct anv_gfx_state_ptr {
4873    /* Both in dwords */
4874    uint16_t  offset;
4875    uint16_t  len;
4876 };
4877 
4878 /* The final graphics pipeline object has all the graphics state ready to be
4879  * programmed into HW packets (dynamic_state field) or fully baked in its
4880  * batch.
4881  */
4882 struct anv_graphics_pipeline {
4883    struct anv_graphics_base_pipeline            base;
4884 
4885    struct vk_vertex_input_state                 vertex_input;
4886    struct vk_sample_locations_state             sample_locations;
4887    struct vk_dynamic_graphics_state             dynamic_state;
4888 
4889    /* If true, the patch control points are passed through push constants
4890     * (anv_push_constants::gfx::tcs_input_vertices)
4891     */
4892    bool                                         dynamic_patch_control_points;
4893 
4894    uint32_t                                     view_mask;
4895    uint32_t                                     instance_multiplier;
4896 
4897    bool                                         rp_has_ds_self_dep;
4898 
4899    bool                                         kill_pixel;
4900    bool                                         uses_xfb;
4901    bool                                         sample_shading_enable;
4902    float                                        min_sample_shading;
4903 
4904    /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */
4905    uint32_t                                     vs_input_elements;
4906 
4907    /* Number of VERTEX_ELEMENT_STATE elements we need to implement some of the
4908     * draw parameters
4909     */
4910    uint32_t                                     svgs_count;
4911 
4912    /* Pre computed VERTEX_ELEMENT_STATE structures for the vertex input that
4913     * can be copied into the anv_cmd_buffer behind a 3DSTATE_VERTEX_BUFFER.
4914     *
4915     * When MESA_VK_DYNAMIC_VI is not dynamic
4916     *
4917     *     vertex_input_elems = vs_input_elements + svgs_count
4918     *
4919     * All the VERTEX_ELEMENT_STATE can be directly copied behind a
4920     * 3DSTATE_VERTEX_ELEMENTS instruction in the command buffer. Otherwise
4921     * this array only holds the svgs_count elements.
4922     */
4923    uint32_t                                     vertex_input_elems;
4924    uint32_t                                     vertex_input_data[2 * 31 /* MAX_VES + 2 internal */];
4925 
4926    /* Number of color outputs used by the fragment shader. */
4927    uint8_t                                      num_color_outputs;
4928    /* Map of color output of the fragment shader to color attachments in the
4929     * render pass.
4930     */
4931    uint8_t                                      color_output_mapping[MAX_RTS];
4932 
4933    /* Pre computed CS instructions that can directly be copied into
4934     * anv_cmd_buffer.
4935     */
4936    uint32_t                                     batch_data[480];
4937 
4938    /* Urb setup utilized by this pipeline. */
4939    struct intel_urb_config urb_cfg;
4940 
4941    /* Fully backed instructions, ready to be emitted in the anv_cmd_buffer */
4942    struct {
4943       struct anv_gfx_state_ptr                  urb;
4944       struct anv_gfx_state_ptr                  vf_sgvs;
4945       struct anv_gfx_state_ptr                  vf_sgvs_2;
4946       struct anv_gfx_state_ptr                  vf_sgvs_instancing;
4947       struct anv_gfx_state_ptr                  vf_instancing;
4948       struct anv_gfx_state_ptr                  primitive_replication;
4949       struct anv_gfx_state_ptr                  sbe;
4950       struct anv_gfx_state_ptr                  sbe_swiz;
4951       struct anv_gfx_state_ptr                  so_decl_list;
4952       struct anv_gfx_state_ptr                  vs;
4953       struct anv_gfx_state_ptr                  hs;
4954       struct anv_gfx_state_ptr                  ds;
4955       struct anv_gfx_state_ptr                  vs_protected;
4956       struct anv_gfx_state_ptr                  hs_protected;
4957       struct anv_gfx_state_ptr                  ds_protected;
4958 
4959       struct anv_gfx_state_ptr                  task_control;
4960       struct anv_gfx_state_ptr                  task_control_protected;
4961       struct anv_gfx_state_ptr                  task_shader;
4962       struct anv_gfx_state_ptr                  task_redistrib;
4963       struct anv_gfx_state_ptr                  clip_mesh;
4964       struct anv_gfx_state_ptr                  mesh_control;
4965       struct anv_gfx_state_ptr                  mesh_control_protected;
4966       struct anv_gfx_state_ptr                  mesh_shader;
4967       struct anv_gfx_state_ptr                  mesh_distrib;
4968       struct anv_gfx_state_ptr                  sbe_mesh;
4969    } final;
4970 
4971    /* Pre packed CS instructions & structures that need to be merged later
4972     * with dynamic state.
4973     */
4974    struct {
4975       struct anv_gfx_state_ptr                  clip;
4976       struct anv_gfx_state_ptr                  sf;
4977       struct anv_gfx_state_ptr                  ps_extra;
4978       struct anv_gfx_state_ptr                  wm;
4979       struct anv_gfx_state_ptr                  so;
4980       struct anv_gfx_state_ptr                  gs;
4981       struct anv_gfx_state_ptr                  gs_protected;
4982       struct anv_gfx_state_ptr                  te;
4983       struct anv_gfx_state_ptr                  ps;
4984       struct anv_gfx_state_ptr                  ps_protected;
4985       struct anv_gfx_state_ptr                  vfg;
4986    } partial;
4987 };
4988 
4989 #define anv_batch_emit_pipeline_state(batch, pipeline, state)           \
4990    do {                                                                 \
4991       if ((pipeline)->state.len == 0)                                   \
4992          break;                                                         \
4993       uint32_t *dw;                                                     \
4994       dw = anv_batch_emit_dwords((batch), (pipeline)->state.len);       \
4995       if (!dw)                                                          \
4996          break;                                                         \
4997       memcpy(dw, &(pipeline)->batch_data[(pipeline)->state.offset],     \
4998              4 * (pipeline)->state.len);                                \
4999    } while (0)
5000 
5001 #define anv_batch_emit_pipeline_state_protected(batch, pipeline,        \
5002                                                 state, protected)       \
5003    do {                                                                 \
5004       struct anv_gfx_state_ptr *_cmd_state = protected ?                \
5005          &(pipeline)->state##_protected : &(pipeline)->state;           \
5006       if (_cmd_state->len == 0)                                         \
5007          break;                                                         \
5008       uint32_t *dw;                                                     \
5009       dw = anv_batch_emit_dwords((batch), _cmd_state->len);             \
5010       if (!dw)                                                          \
5011          break;                                                         \
5012       memcpy(dw, &(pipeline)->batch_data[_cmd_state->offset],           \
5013              4 * _cmd_state->len);                                      \
5014    } while (0)
5015 
5016 
5017 struct anv_compute_pipeline {
5018    struct anv_pipeline                          base;
5019 
5020    struct anv_shader_bin *                      cs;
5021    uint32_t                                     batch_data[9];
5022    uint32_t                                     interface_descriptor_data[8];
5023 
5024    /* A small hash based of shader_info::source_sha1 for identifying shaders
5025     * in renderdoc/shader-db.
5026     */
5027    uint32_t                                     source_hash;
5028 };
5029 
5030 struct anv_rt_shader_group {
5031    VkRayTracingShaderGroupTypeKHR type;
5032 
5033    /* Whether this group was imported from another pipeline */
5034    bool imported;
5035 
5036    struct anv_shader_bin *general;
5037    struct anv_shader_bin *closest_hit;
5038    struct anv_shader_bin *any_hit;
5039    struct anv_shader_bin *intersection;
5040 
5041    /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
5042    uint32_t handle[8];
5043 };
5044 
5045 struct anv_ray_tracing_pipeline {
5046    struct anv_pipeline                          base;
5047 
5048    /* All shaders in the pipeline */
5049    struct util_dynarray                         shaders;
5050 
5051    uint32_t                                     group_count;
5052    struct anv_rt_shader_group *                 groups;
5053 
5054    /* If non-zero, this is the default computed stack size as per the stack
5055     * size computation in the Vulkan spec.  If zero, that indicates that the
5056     * client has requested a dynamic stack size.
5057     */
5058    uint32_t                                     stack_size;
5059 };
5060 
5061 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
5062    static inline struct anv_##pipe_type##_pipeline *                 \
5063    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
5064    {                                                                 \
5065       assert(pipeline->type == pipe_enum);                           \
5066       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
5067    }
5068 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)5069 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
5070 ANV_DECL_PIPELINE_DOWNCAST(graphics_lib, ANV_PIPELINE_GRAPHICS_LIB)
5071 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
5072 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
5073 
5074 /* Can't use the macro because we need to handle both types. */
5075 static inline struct anv_graphics_base_pipeline *
5076 anv_pipeline_to_graphics_base(struct anv_pipeline *pipeline)
5077 {
5078    assert(pipeline->type == ANV_PIPELINE_GRAPHICS ||
5079           pipeline->type == ANV_PIPELINE_GRAPHICS_LIB);
5080    return (struct anv_graphics_base_pipeline *) pipeline;
5081 }
5082 
5083 static inline bool
anv_pipeline_has_stage(const struct anv_graphics_pipeline * pipeline,gl_shader_stage stage)5084 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
5085                        gl_shader_stage stage)
5086 {
5087    return (pipeline->base.base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
5088 }
5089 
5090 static inline bool
anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline * pipeline,gl_shader_stage stage)5091 anv_pipeline_base_has_stage(const struct anv_graphics_base_pipeline *pipeline,
5092                             gl_shader_stage stage)
5093 {
5094    return (pipeline->base.active_stages & mesa_to_vk_shader_stage(stage)) != 0;
5095 }
5096 
5097 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)5098 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
5099 {
5100    return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
5101 }
5102 
5103 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)5104 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
5105 {
5106    return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
5107 }
5108 
5109 static inline bool
anv_gfx_all_color_write_masked(const struct anv_cmd_graphics_state * gfx,const struct vk_dynamic_graphics_state * dyn)5110 anv_gfx_all_color_write_masked(const struct anv_cmd_graphics_state *gfx,
5111                                const struct vk_dynamic_graphics_state *dyn)
5112 {
5113    uint8_t color_writes = dyn->cb.color_write_enables;
5114 
5115    /* All writes disabled through vkCmdSetColorWriteEnableEXT */
5116    if ((color_writes & ((1u << gfx->color_att_count) - 1)) == 0)
5117       return true;
5118 
5119    /* Or all write masks are empty */
5120    for (uint32_t i = 0; i < gfx->color_att_count; i++) {
5121       if (dyn->cb.attachments[i].write_mask != 0)
5122          return false;
5123    }
5124 
5125    return true;
5126 }
5127 
5128 static inline void
anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state * state)5129 anv_cmd_graphic_state_update_has_uint_rt(struct anv_cmd_graphics_state *state)
5130 {
5131    state->has_uint_rt = false;
5132    for (unsigned a = 0; a < state->color_att_count; a++) {
5133       if (vk_format_is_int(state->color_att[a].vk_format)) {
5134          state->has_uint_rt = true;
5135          break;
5136       }
5137    }
5138 }
5139 
5140 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
5141 static inline const struct brw_##prefix##_prog_data *                   \
5142 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
5143 {                                                                       \
5144    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
5145       return (const struct brw_##prefix##_prog_data *)                  \
5146          pipeline->base.shaders[stage]->prog_data;                      \
5147    } else {                                                             \
5148       return NULL;                                                      \
5149    }                                                                    \
5150 }
5151 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)5152 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
5153 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
5154 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
5155 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
5156 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
5157 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
5158 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
5159 
5160 static inline const struct brw_cs_prog_data *
5161 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
5162 {
5163    assert(pipeline->cs);
5164    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
5165 }
5166 
5167 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)5168 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
5169 {
5170    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
5171       return &get_gs_prog_data(pipeline)->base;
5172    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
5173       return &get_tes_prog_data(pipeline)->base;
5174    else
5175       return &get_vs_prog_data(pipeline)->base;
5176 }
5177 
5178 VkResult
5179 anv_device_init_rt_shaders(struct anv_device *device);
5180 
5181 void
5182 anv_device_finish_rt_shaders(struct anv_device *device);
5183 
5184 struct anv_kernel_arg {
5185    bool is_ptr;
5186    uint16_t size;
5187 
5188    union {
5189       uint64_t u64;
5190       void *ptr;
5191    };
5192 };
5193 
5194 struct anv_kernel {
5195 #ifndef NDEBUG
5196    const char *name;
5197 #endif
5198    struct anv_shader_bin *bin;
5199    const struct intel_l3_config *l3_config;
5200 };
5201 
5202 struct anv_format_plane {
5203    enum isl_format isl_format:16;
5204    struct isl_swizzle swizzle;
5205 
5206    /* What aspect is associated to this plane */
5207    VkImageAspectFlags aspect;
5208 };
5209 
5210 struct anv_format {
5211    struct anv_format_plane planes[3];
5212    VkFormat vk_format;
5213    uint8_t n_planes;
5214    bool can_ycbcr;
5215    bool can_video;
5216 };
5217 
5218 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)5219 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
5220 {
5221    if (util_bitcount(aspects) == 1) {
5222       assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
5223                         VK_IMAGE_ASPECT_DEPTH_BIT |
5224                         VK_IMAGE_ASPECT_STENCIL_BIT |
5225                         VK_IMAGE_ASPECT_PLANE_0_BIT |
5226                         VK_IMAGE_ASPECT_PLANE_1_BIT |
5227                         VK_IMAGE_ASPECT_PLANE_2_BIT));
5228    } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
5229       assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
5230              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5231                          VK_IMAGE_ASPECT_PLANE_1_BIT) ||
5232              aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
5233                          VK_IMAGE_ASPECT_PLANE_1_BIT |
5234                          VK_IMAGE_ASPECT_PLANE_2_BIT));
5235    } else {
5236       assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
5237                          VK_IMAGE_ASPECT_STENCIL_BIT));
5238    }
5239 }
5240 
5241 /**
5242  * Return the aspect's plane relative to all_aspects.  For an image, for
5243  * instance, all_aspects would be the set of aspects in the image.  For
5244  * an image view, all_aspects would be the subset of aspects represented
5245  * by that particular view.
5246  */
5247 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)5248 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
5249                     VkImageAspectFlagBits aspect)
5250 {
5251    anv_assert_valid_aspect_set(all_aspects);
5252    assert(util_bitcount(aspect) == 1);
5253    assert(!(aspect & ~all_aspects));
5254 
5255    /* Because we always put image and view planes in aspect-bit-order, the
5256     * plane index is the number of bits in all_aspects before aspect.
5257     */
5258    return util_bitcount(all_aspects & (aspect - 1));
5259 }
5260 
5261 #define anv_foreach_image_aspect_bit(b, image, aspects) \
5262    u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
5263 
5264 const struct anv_format *
5265 anv_get_format(VkFormat format);
5266 
5267 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)5268 anv_get_format_planes(VkFormat vk_format)
5269 {
5270    const struct anv_format *format = anv_get_format(vk_format);
5271 
5272    return format != NULL ? format->n_planes : 0;
5273 }
5274 
5275 struct anv_format_plane
5276 anv_get_format_plane(const struct intel_device_info *devinfo,
5277                      VkFormat vk_format, uint32_t plane,
5278                      VkImageTiling tiling);
5279 
5280 struct anv_format_plane
5281 anv_get_format_aspect(const struct intel_device_info *devinfo,
5282                       VkFormat vk_format,
5283                       VkImageAspectFlagBits aspect, VkImageTiling tiling);
5284 
5285 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)5286 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
5287                    VkImageAspectFlags aspect, VkImageTiling tiling)
5288 {
5289    return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
5290 }
5291 
5292 bool anv_format_supports_ccs_e(const struct intel_device_info *devinfo,
5293                                const enum isl_format format);
5294 
5295 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
5296                                   VkImageCreateFlags create_flags,
5297                                   VkFormat vk_format, VkImageTiling vk_tiling,
5298                                   VkImageUsageFlags vk_usage,
5299                                   const VkImageFormatListCreateInfo *fmt_list);
5300 
5301 extern VkFormat
5302 vk_format_from_android(unsigned android_format, unsigned android_usage);
5303 
5304 static inline VkFormat
anv_get_emulation_format(const struct anv_physical_device * pdevice,VkFormat format)5305 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
5306 {
5307    if (pdevice->flush_astc_ldr_void_extent_denorms) {
5308       const struct util_format_description *desc =
5309          vk_format_description(format);
5310       if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
5311           desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
5312          return format;
5313    }
5314 
5315    if (pdevice->emu_astc_ldr)
5316       return vk_texcompress_astc_emulation_format(format);
5317 
5318    return VK_FORMAT_UNDEFINED;
5319 }
5320 
5321 static inline bool
anv_is_format_emulated(const struct anv_physical_device * pdevice,VkFormat format)5322 anv_is_format_emulated(const struct anv_physical_device *pdevice, VkFormat format)
5323 {
5324    return anv_get_emulation_format(pdevice, format) != VK_FORMAT_UNDEFINED;
5325 }
5326 
5327 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)5328 anv_swizzle_for_render(struct isl_swizzle swizzle)
5329 {
5330    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
5331     * RGB as RGBA for texturing
5332     */
5333    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
5334           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
5335 
5336    /* But it doesn't matter what we render to that channel */
5337    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
5338 
5339    return swizzle;
5340 }
5341 
5342 void
5343 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
5344 
5345 /**
5346  * Describes how each part of anv_image will be bound to memory.
5347  */
5348 struct anv_image_memory_range {
5349    /**
5350     * Disjoint bindings into which each portion of the image will be bound.
5351     *
5352     * Binding images to memory can be complicated and invold binding different
5353     * portions of the image to different memory objects or regions.  For most
5354     * images, everything lives in the MAIN binding and gets bound by
5355     * vkBindImageMemory.  For disjoint multi-planar images, each plane has
5356     * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
5357     * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
5358     * implicit or driver-managed and live in special-case bindings.
5359     */
5360    enum anv_image_memory_binding {
5361       /**
5362        * Used if and only if image is not multi-planar disjoint. Bound by
5363        * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
5364        */
5365       ANV_IMAGE_MEMORY_BINDING_MAIN,
5366 
5367       /**
5368        * Used if and only if image is multi-planar disjoint.  Bound by
5369        * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
5370        */
5371       ANV_IMAGE_MEMORY_BINDING_PLANE_0,
5372       ANV_IMAGE_MEMORY_BINDING_PLANE_1,
5373       ANV_IMAGE_MEMORY_BINDING_PLANE_2,
5374 
5375       /**
5376        * Driver-private bo. In special cases we may store the aux surface and/or
5377        * aux state in this binding.
5378        */
5379       ANV_IMAGE_MEMORY_BINDING_PRIVATE,
5380 
5381       /** Sentinel */
5382       ANV_IMAGE_MEMORY_BINDING_END,
5383    } binding;
5384 
5385    uint32_t alignment;
5386    uint64_t size;
5387 
5388    /**
5389     * Offset is relative to the start of the binding created by
5390     * vkBindImageMemory, not to the start of the bo.
5391     */
5392    uint64_t offset;
5393 };
5394 
5395 /**
5396  * Subsurface of an anv_image.
5397  */
5398 struct anv_surface {
5399    struct isl_surf isl;
5400    struct anv_image_memory_range memory_range;
5401 };
5402 
5403 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)5404 anv_surface_is_valid(const struct anv_surface *surface)
5405 {
5406    return surface->isl.size_B > 0 && surface->memory_range.size > 0;
5407 }
5408 
5409 struct anv_image {
5410    struct vk_image vk;
5411 
5412    uint32_t n_planes;
5413 
5414    /**
5415     * Image has multi-planar format and was created with
5416     * VK_IMAGE_CREATE_DISJOINT_BIT.
5417     */
5418    bool disjoint;
5419 
5420    /**
5421     * Image is a WSI image
5422     */
5423    bool from_wsi;
5424 
5425    /**
5426     * Image was imported from an struct AHardwareBuffer.  We have to delay
5427     * final image creation until bind time.
5428     */
5429    bool from_ahb;
5430 
5431    /**
5432     * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
5433     * must be released when the image is destroyed.
5434     */
5435    bool from_gralloc;
5436 
5437    /**
5438     * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
5439     * LDR workaround or emulation.
5440     */
5441    VkFormat emu_plane_format;
5442 
5443    /**
5444     * The set of formats that will be used with the first plane of this image.
5445     *
5446     * Assuming all view formats have the same bits-per-channel, we support the
5447     * largest number of variations which may exist.
5448     */
5449    enum isl_format view_formats[5];
5450    unsigned num_view_formats;
5451 
5452    /**
5453     * The memory bindings created by vkCreateImage and vkBindImageMemory.
5454     *
5455     * For details on the image's memory layout, see check_memory_bindings().
5456     *
5457     * vkCreateImage constructs the `memory_range` for each
5458     * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
5459     * and only if `memory_range::size > 0`.
5460     *
5461     * vkBindImageMemory binds each valid `memory_range` to an `address`.
5462     * Usually, the app will provide the address via the parameters of
5463     * vkBindImageMemory.  However, special-case bindings may be bound to
5464     * driver-private memory.
5465     *
5466     * If needed a host pointer to the image is mapped for host image copies.
5467     */
5468    struct anv_image_binding {
5469       struct anv_image_memory_range memory_range;
5470       struct anv_address address;
5471       struct anv_sparse_binding_data sparse_data;
5472       void *host_map;
5473       uint64_t map_delta;
5474       uint64_t map_size;
5475    } bindings[ANV_IMAGE_MEMORY_BINDING_END];
5476 
5477    /**
5478     * Image subsurfaces
5479     *
5480     * For each foo, anv_image::planes[x].surface is valid if and only if
5481     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
5482     * to figure the number associated with a given aspect.
5483     *
5484     * The hardware requires that the depth buffer and stencil buffer be
5485     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
5486     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
5487     * allocate the depth and stencil buffers as separate surfaces in the same
5488     * bo.
5489     */
5490    struct anv_image_plane {
5491       struct anv_surface primary_surface;
5492 
5493       /**
5494        * The base aux usage for this image.  For color images, this can be
5495        * either CCS_E or CCS_D depending on whether or not we can reliably
5496        * leave CCS on all the time.
5497        */
5498       enum isl_aux_usage aux_usage;
5499 
5500       struct anv_surface aux_surface;
5501 
5502       /** Location of the compression control surface.  */
5503       struct anv_image_memory_range compr_ctrl_memory_range;
5504 
5505       /** Location of the fast clear state.  */
5506       struct anv_image_memory_range fast_clear_memory_range;
5507 
5508       struct {
5509          /** Whether the image has CCS data mapped through AUX-TT. */
5510          bool mapped;
5511 
5512          /** Main address of the mapping. */
5513          uint64_t addr;
5514 
5515          /** Size of the mapping. */
5516          uint64_t size;
5517       } aux_tt;
5518    } planes[3];
5519 
5520    struct anv_image_memory_range vid_dmv_top_surface;
5521 
5522    /* Link in the anv_device.image_private_objects list */
5523    struct list_head link;
5524    struct anv_image_memory_range av1_cdf_table;
5525 };
5526 
5527 static inline bool
anv_image_is_protected(const struct anv_image * image)5528 anv_image_is_protected(const struct anv_image *image)
5529 {
5530    return image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT;
5531 }
5532 
5533 static inline bool
anv_image_is_sparse(const struct anv_image * image)5534 anv_image_is_sparse(const struct anv_image *image)
5535 {
5536    return image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
5537 }
5538 
5539 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)5540 anv_image_is_externally_shared(const struct anv_image *image)
5541 {
5542    return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
5543           image->vk.external_handle_types != 0;
5544 }
5545 
5546 static inline bool
anv_image_has_private_binding(const struct anv_image * image)5547 anv_image_has_private_binding(const struct anv_image *image)
5548 {
5549    const struct anv_image_binding private_binding =
5550       image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
5551    return private_binding.memory_range.size != 0;
5552 }
5553 
5554 static inline bool
anv_image_format_is_d16_or_s8(const struct anv_image * image)5555 anv_image_format_is_d16_or_s8(const struct anv_image *image)
5556 {
5557    return image->vk.format == VK_FORMAT_D16_UNORM ||
5558       image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT ||
5559       image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
5560       image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
5561       image->vk.format == VK_FORMAT_S8_UINT;
5562 }
5563 
5564 static inline bool
anv_image_can_host_memcpy(const struct anv_image * image)5565 anv_image_can_host_memcpy(const struct anv_image *image)
5566 {
5567    const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
5568    struct isl_tile_info tile_info;
5569    isl_surf_get_tile_info(surf, &tile_info);
5570 
5571    const bool array_pitch_aligned_to_tile =
5572       surf->array_pitch_el_rows % tile_info.logical_extent_el.height == 0;
5573 
5574    return image->vk.tiling != VK_IMAGE_TILING_LINEAR &&
5575           image->n_planes == 1 &&
5576           array_pitch_aligned_to_tile &&
5577           image->vk.mip_levels == 1;
5578 }
5579 
5580 /* The ordering of this enum is important */
5581 enum anv_fast_clear_type {
5582    /** Image does not have/support any fast-clear blocks */
5583    ANV_FAST_CLEAR_NONE = 0,
5584    /** Image has/supports fast-clear but only to the default value */
5585    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
5586    /** Image has/supports fast-clear with an arbitrary fast-clear value */
5587    ANV_FAST_CLEAR_ANY = 2,
5588 };
5589 
5590 /**
5591  * Return the aspect's _format_ plane, not its _memory_ plane (using the
5592  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
5593  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
5594  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
5595  */
5596 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)5597 anv_image_aspect_to_plane(const struct anv_image *image,
5598                           VkImageAspectFlagBits aspect)
5599 {
5600    return anv_aspect_to_plane(image->vk.aspects, aspect);
5601 }
5602 
5603 /* Returns the number of auxiliary buffer levels attached to an image. */
5604 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)5605 anv_image_aux_levels(const struct anv_image * const image,
5606                      VkImageAspectFlagBits aspect)
5607 {
5608    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5609    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
5610       return 0;
5611 
5612    return image->vk.mip_levels;
5613 }
5614 
5615 /* Returns the number of auxiliary buffer layers attached to an image. */
5616 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)5617 anv_image_aux_layers(const struct anv_image * const image,
5618                      VkImageAspectFlagBits aspect,
5619                      const uint8_t miplevel)
5620 {
5621    assert(image);
5622 
5623    /* The miplevel must exist in the main buffer. */
5624    assert(miplevel < image->vk.mip_levels);
5625 
5626    if (miplevel >= anv_image_aux_levels(image, aspect)) {
5627       /* There are no layers with auxiliary data because the miplevel has no
5628        * auxiliary data.
5629        */
5630       return 0;
5631    }
5632 
5633    return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
5634 }
5635 
5636 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)5637 anv_image_address(const struct anv_image *image,
5638                   const struct anv_image_memory_range *mem_range)
5639 {
5640    const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
5641    assert(binding->memory_range.offset == 0);
5642 
5643    if (mem_range->size == 0)
5644       return ANV_NULL_ADDRESS;
5645 
5646    return anv_address_add(binding->address, mem_range->offset);
5647 }
5648 
5649 bool
5650 anv_image_view_formats_incomplete(const struct anv_image *image);
5651 
5652 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,enum isl_format view_format,VkImageAspectFlagBits aspect,bool for_sampler)5653 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
5654                                const struct anv_image *image,
5655                                enum isl_format view_format,
5656                                VkImageAspectFlagBits aspect,
5657                                bool for_sampler)
5658 {
5659    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5660    const struct anv_image_memory_range *mem_range =
5661       &image->planes[plane].fast_clear_memory_range;
5662 
5663    const struct anv_address base_addr = anv_image_address(image, mem_range);
5664    if (anv_address_is_null(base_addr))
5665       return ANV_NULL_ADDRESS;
5666 
5667    if (view_format == ISL_FORMAT_UNSUPPORTED)
5668       view_format = image->planes[plane].primary_surface.isl.format;
5669 
5670    uint64_t access_offset = device->info->ver == 9 && for_sampler ? 16 : 0;
5671    const unsigned clear_state_size = device->info->ver >= 11 ? 64 : 32;
5672    for (int i = 0; i < image->num_view_formats; i++) {
5673       if (view_format == image->view_formats[i]) {
5674          uint64_t entry_offset = i * clear_state_size + access_offset;
5675          return anv_address_add(base_addr, entry_offset);
5676       }
5677    }
5678 
5679    assert(anv_image_view_formats_incomplete(image));
5680    return anv_address_add(base_addr, access_offset);
5681 }
5682 
5683 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)5684 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
5685                                    const struct anv_image *image,
5686                                    VkImageAspectFlagBits aspect)
5687 {
5688    /* Xe2+ platforms don't need fast clear type. We shouldn't get here. */
5689    assert(device->info->ver < 20);
5690    struct anv_address addr =
5691       anv_image_get_clear_color_addr(device, image, ISL_FORMAT_UNSUPPORTED,
5692                                      aspect, false);
5693 
5694    /* Refer to add_aux_state_tracking_buffer(). */
5695    unsigned clear_color_state_size;
5696    if (device->info->ver >= 11) {
5697       assert(device->isl_dev.ss.clear_color_state_size == 32);
5698       clear_color_state_size = (image->num_view_formats - 1) * 64 + 32 - 8;
5699    } else {
5700       assert(device->isl_dev.ss.clear_value_size == 16);
5701       clear_color_state_size = image->num_view_formats * 16 * 2;
5702    }
5703 
5704    return anv_address_add(addr, clear_color_state_size);
5705 }
5706 
5707 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)5708 anv_image_get_compression_state_addr(const struct anv_device *device,
5709                                      const struct anv_image *image,
5710                                      VkImageAspectFlagBits aspect,
5711                                      uint32_t level, uint32_t array_layer)
5712 {
5713    /* Xe2+ platforms don't use compression state. We shouldn't get here. */
5714    assert(device->info->ver < 20);
5715    assert(level < anv_image_aux_levels(image, aspect));
5716    assert(array_layer < anv_image_aux_layers(image, aspect, level));
5717    UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
5718    assert(isl_aux_usage_has_ccs_e(image->planes[plane].aux_usage));
5719 
5720    /* Relative to start of the plane's fast clear type */
5721    uint32_t offset;
5722 
5723    offset = 4; /* Go past the fast clear type */
5724 
5725    if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
5726       for (uint32_t l = 0; l < level; l++)
5727          offset += u_minify(image->vk.extent.depth, l) * 4;
5728    } else {
5729       offset += level * image->vk.array_layers * 4;
5730    }
5731 
5732    offset += array_layer * 4;
5733 
5734    assert(offset < image->planes[plane].fast_clear_memory_range.size);
5735 
5736    return anv_address_add(
5737       anv_image_get_fast_clear_type_addr(device, image, aspect),
5738       offset);
5739 }
5740 
5741 static inline const struct anv_image_memory_range *
anv_image_get_aux_memory_range(const struct anv_image * image,uint32_t plane)5742 anv_image_get_aux_memory_range(const struct anv_image *image,
5743                                uint32_t plane)
5744 {
5745    if (image->planes[plane].aux_surface.memory_range.size > 0)
5746      return &image->planes[plane].aux_surface.memory_range;
5747    else
5748      return &image->planes[plane].compr_ctrl_memory_range;
5749 }
5750 
5751 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
5752 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)5753 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
5754                         const struct anv_image *image)
5755 {
5756    if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
5757       return false;
5758 
5759    /* For Gfx8-11, there are some restrictions around sampling from HiZ.
5760     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
5761     * say:
5762     *
5763     *    "If this field is set to AUX_HIZ, Number of Multisamples must
5764     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
5765     */
5766    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
5767       return false;
5768 
5769    if (!devinfo->has_sample_with_hiz)
5770       return false;
5771 
5772    return image->vk.samples == 1;
5773 }
5774 
5775 /* Returns true if an MCS-enabled buffer can be sampled from. */
5776 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)5777 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
5778                               const struct anv_image *image)
5779 {
5780    assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
5781    const uint32_t plane =
5782       anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
5783 
5784    assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
5785 
5786    const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
5787 
5788    /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
5789     * See HSD 1707282275, wa_14013111325. Due to the use of
5790     * format-reinterpretation, a simplified workaround is implemented.
5791     */
5792    if (intel_needs_workaround(devinfo, 14013111325) &&
5793        isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
5794       return false;
5795    }
5796 
5797    return true;
5798 }
5799 
5800 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)5801 anv_image_plane_uses_aux_map(const struct anv_device *device,
5802                              const struct anv_image *image,
5803                              uint32_t plane)
5804 {
5805    return device->info->has_aux_map &&
5806       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
5807 }
5808 
5809 static inline bool
anv_image_uses_aux_map(const struct anv_device * device,const struct anv_image * image)5810 anv_image_uses_aux_map(const struct anv_device *device,
5811                        const struct anv_image *image)
5812 {
5813    for (uint32_t p = 0; p < image->n_planes; ++p) {
5814       if (anv_image_plane_uses_aux_map(device, image, p))
5815          return true;
5816    }
5817 
5818    return false;
5819 }
5820 
5821 static inline bool
anv_bo_allows_aux_map(const struct anv_device * device,const struct anv_bo * bo)5822 anv_bo_allows_aux_map(const struct anv_device *device,
5823                       const struct anv_bo *bo)
5824 {
5825    if (device->aux_map_ctx == NULL)
5826       return false;
5827 
5828    return (bo->alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) != 0;
5829 }
5830 
5831 static inline bool
anv_address_allows_aux_map(const struct anv_device * device,struct anv_address addr)5832 anv_address_allows_aux_map(const struct anv_device *device,
5833                            struct anv_address addr)
5834 {
5835    if (device->aux_map_ctx == NULL)
5836       return false;
5837 
5838    /* Technically, we really only care about what offset the image is bound
5839     * into on the BO, but we don't have that information here. As a heuristic,
5840     * rely on the BO offset instead.
5841     */
5842    if (anv_address_physical(addr) %
5843        intel_aux_map_get_alignment(device->aux_map_ctx) != 0)
5844       return false;
5845 
5846    return true;
5847 }
5848 
5849 void
5850 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
5851                                   const struct anv_image *image,
5852                                   VkImageAspectFlagBits aspect,
5853                                   enum isl_aux_usage aux_usage,
5854                                   uint32_t level,
5855                                   uint32_t base_layer,
5856                                   uint32_t layer_count);
5857 
5858 void
5859 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
5860                                        const struct anv_image *image,
5861                                        const enum isl_format format,
5862                                        const struct isl_swizzle swizzle,
5863                                        union isl_color_value clear_color);
5864 
5865 void
5866 anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer *cmd_buffer,
5867                                 struct anv_state state,
5868                                 const struct anv_image_view *iview);
5869 
5870 enum anv_image_memory_binding
5871 anv_image_aspect_to_binding(struct anv_image *image,
5872                             VkImageAspectFlags aspect);
5873 
5874 void
5875 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
5876                       const struct anv_image *image,
5877                       VkImageAspectFlagBits aspect,
5878                       enum isl_aux_usage aux_usage,
5879                       enum isl_format format, struct isl_swizzle swizzle,
5880                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
5881                       VkRect2D area, union isl_color_value clear_color);
5882 void
5883 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
5884                               const struct anv_image *image,
5885                               VkImageAspectFlags aspects,
5886                               enum isl_aux_usage depth_aux_usage,
5887                               uint32_t level,
5888                               uint32_t base_layer, uint32_t layer_count,
5889                               VkRect2D area,
5890                               const VkClearDepthStencilValue *clear_value);
5891 void
5892 anv_attachment_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
5893                             const struct anv_attachment *att,
5894                             VkImageLayout layout,
5895                             VkImageAspectFlagBits aspect);
5896 
5897 static inline union isl_color_value
anv_image_hiz_clear_value(const struct anv_image * image)5898 anv_image_hiz_clear_value(const struct anv_image *image)
5899 {
5900    /* The benchmarks we're tracking tend to prefer clearing depth buffers to
5901     * 0.0f when the depth buffers are part of images with multiple aspects.
5902     * Otherwise, they tend to prefer clearing depth buffers to 1.0f.
5903     */
5904    if (image->n_planes == 2)
5905       return (union isl_color_value) { .f32 = { 0.0f, } };
5906    else
5907       return (union isl_color_value) { .f32 = { 1.0f, } };
5908 }
5909 
5910 void
5911 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
5912                  const struct anv_image *image,
5913                  VkImageAspectFlagBits aspect, uint32_t level,
5914                  uint32_t base_layer, uint32_t layer_count,
5915                  enum isl_aux_op hiz_op);
5916 void
5917 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
5918                     const struct anv_image *image,
5919                     VkImageAspectFlags aspects,
5920                     uint32_t level,
5921                     uint32_t base_layer, uint32_t layer_count,
5922                     VkRect2D area,
5923                     const VkClearDepthStencilValue *clear_value);
5924 void
5925 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
5926                  const struct anv_image *image,
5927                  enum isl_format format, struct isl_swizzle swizzle,
5928                  VkImageAspectFlagBits aspect,
5929                  uint32_t base_layer, uint32_t layer_count,
5930                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
5931                  bool predicate);
5932 void
5933 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
5934                  const struct anv_image *image,
5935                  enum isl_format format, struct isl_swizzle swizzle,
5936                  VkImageAspectFlagBits aspect, uint32_t level,
5937                  uint32_t base_layer, uint32_t layer_count,
5938                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
5939                  bool predicate);
5940 
5941 isl_surf_usage_flags_t
5942 anv_image_choose_isl_surf_usage(struct anv_physical_device *device,
5943                                 VkImageCreateFlags vk_create_flags,
5944                                 VkImageUsageFlags vk_usage,
5945                                 isl_surf_usage_flags_t isl_extra_usage,
5946                                 VkImageAspectFlagBits aspect,
5947                                 VkImageCompressionFlagsEXT comp_flags);
5948 
5949 void
5950 anv_cmd_copy_addr(struct anv_cmd_buffer *cmd_buffer,
5951                   struct anv_address src_addr,
5952                   struct anv_address dst_addr,
5953                   uint64_t size);
5954 void
5955 anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
5956                          struct anv_address address,
5957                          VkDeviceSize size,
5958                          uint32_t data,
5959                          bool protected);
5960 void
5961 anv_cmd_fill_buffer_addr(VkCommandBuffer cmd_buffer,
5962                          VkDeviceAddress dstAddr,
5963                          VkDeviceSize size,
5964                          uint32_t data);
5965 void
5966 anv_cmd_buffer_update_addr(struct anv_cmd_buffer *cmd_buffer,
5967                            struct anv_address address,
5968                            VkDeviceSize dstOffset,
5969                            VkDeviceSize dataSize,
5970                            const void* pData,
5971                            bool is_protected);
5972 void
5973 anv_cmd_write_buffer_cp(VkCommandBuffer cmd_buffer,
5974                         VkDeviceAddress dstAddr,
5975                         void *data,
5976                         uint32_t size);
5977 void
5978 anv_cmd_dispatch_unaligned(VkCommandBuffer cmd_buffer,
5979                            uint32_t invocations_x,
5980                            uint32_t invocations_y,
5981                            uint32_t invocations_z);
5982 
5983 void
5984 anv_cmd_flush_buffer_write_cp(VkCommandBuffer cmd_buffer);
5985 
5986 VkResult
5987 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
5988 
5989 bool
5990 anv_can_hiz_clear_ds_view(struct anv_device *device,
5991                           const struct anv_image_view *iview,
5992                           VkImageLayout layout,
5993                           VkImageAspectFlags clear_aspects,
5994                           float depth_clear_value,
5995                           VkRect2D render_area,
5996                           const VkQueueFlagBits queue_flags);
5997 
5998 bool
5999 anv_can_fast_clear_color(const struct anv_cmd_buffer *cmd_buffer,
6000                          const struct anv_image *image,
6001                          unsigned level,
6002                          const struct VkClearRect *clear_rect,
6003                          VkImageLayout layout,
6004                          enum isl_format view_format,
6005                          union isl_color_value clear_color);
6006 
6007 enum isl_aux_state ATTRIBUTE_PURE
6008 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
6009                         const struct anv_image *image,
6010                         const VkImageAspectFlagBits aspect,
6011                         const VkImageLayout layout,
6012                         const VkQueueFlagBits queue_flags);
6013 
6014 enum isl_aux_usage ATTRIBUTE_PURE
6015 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
6016                         const struct anv_image *image,
6017                         const VkImageAspectFlagBits aspect,
6018                         const VkImageUsageFlagBits usage,
6019                         const VkImageLayout layout,
6020                         const VkQueueFlagBits queue_flags);
6021 
6022 enum anv_fast_clear_type ATTRIBUTE_PURE
6023 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
6024                               const struct anv_image * const image,
6025                               const VkImageAspectFlagBits aspect,
6026                               const VkImageLayout layout,
6027                               const VkQueueFlagBits queue_flags);
6028 
6029 bool ATTRIBUTE_PURE
6030 anv_layout_has_untracked_aux_writes(const struct intel_device_info * const devinfo,
6031                                     const struct anv_image * const image,
6032                                     const VkImageAspectFlagBits aspect,
6033                                     const VkImageLayout layout,
6034                                     const VkQueueFlagBits queue_flags);
6035 
6036 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)6037 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
6038                              VkImageAspectFlags aspects2)
6039 {
6040    if (aspects1 == aspects2)
6041       return true;
6042 
6043    /* Only 1 color aspects are compatibles. */
6044    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
6045        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
6046        util_bitcount(aspects1) == util_bitcount(aspects2))
6047       return true;
6048 
6049    return false;
6050 }
6051 
6052 struct anv_image_view {
6053    struct vk_image_view vk;
6054 
6055    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
6056 
6057    unsigned n_planes;
6058 
6059    /**
6060     * True if the surface states (if any) are owned by some anv_state_stream
6061     * from internal_surface_state_pool.
6062     */
6063    bool use_surface_state_stream;
6064 
6065    struct {
6066       struct isl_view isl;
6067 
6068       /**
6069        * A version of the image view for storage usage (can apply 3D image
6070        * slicing).
6071        */
6072       struct isl_view isl_storage;
6073 
6074       /**
6075        * RENDER_SURFACE_STATE when using image as a sampler surface with an
6076        * image layout of SHADER_READ_ONLY_OPTIMAL or
6077        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
6078        */
6079       struct anv_surface_state optimal_sampler;
6080 
6081       /**
6082        * RENDER_SURFACE_STATE when using image as a sampler surface with an
6083        * image layout of GENERAL.
6084        */
6085       struct anv_surface_state general_sampler;
6086 
6087       /**
6088        * RENDER_SURFACE_STATE when using image as a storage image.
6089        */
6090       struct anv_surface_state storage;
6091    } planes[3];
6092 };
6093 
6094 enum anv_image_view_state_flags {
6095    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 0),
6096 };
6097 
6098 void anv_image_fill_surface_state(struct anv_device *device,
6099                                   const struct anv_image *image,
6100                                   VkImageAspectFlagBits aspect,
6101                                   const struct isl_view *view,
6102                                   isl_surf_usage_flags_t view_usage,
6103                                   enum isl_aux_usage aux_usage,
6104                                   const union isl_color_value *clear_color,
6105                                   enum anv_image_view_state_flags flags,
6106                                   struct anv_surface_state *state_inout);
6107 
6108 
6109 static inline const struct anv_surface_state *
anv_image_view_texture_surface_state(const struct anv_image_view * iview,uint32_t plane,VkImageLayout layout)6110 anv_image_view_texture_surface_state(const struct anv_image_view *iview,
6111                                      uint32_t plane, VkImageLayout layout)
6112 {
6113    return (layout == VK_IMAGE_LAYOUT_GENERAL ||
6114            layout == VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR) ?
6115           &iview->planes[plane].general_sampler :
6116           &iview->planes[plane].optimal_sampler;
6117 }
6118 
6119 static inline const struct anv_surface_state *
anv_image_view_storage_surface_state(const struct anv_image_view * iview)6120 anv_image_view_storage_surface_state(const struct anv_image_view *iview)
6121 {
6122    return &iview->planes[0].storage;
6123 }
6124 
6125 static inline bool
anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state * state,const struct anv_image * image)6126 anv_cmd_graphics_state_has_image_as_attachment(const struct anv_cmd_graphics_state *state,
6127                                                const struct anv_image *image)
6128 {
6129    for (unsigned a = 0; a < state->color_att_count; a++) {
6130       if (state->color_att[a].iview &&
6131           state->color_att[a].iview->image == image)
6132          return true;
6133    }
6134 
6135    if (state->depth_att.iview && state->depth_att.iview->image == image)
6136       return true;
6137    if (state->stencil_att.iview && state->stencil_att.iview->image == image)
6138       return true;
6139 
6140    return false;
6141 }
6142 
6143 struct anv_image_create_info {
6144    const VkImageCreateInfo *vk_info;
6145 
6146    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
6147    isl_tiling_flags_t isl_tiling_flags;
6148 
6149    /** These flags will be added to any derived from VkImageCreateInfo. */
6150    isl_surf_usage_flags_t isl_extra_usage_flags;
6151 
6152    /** An opt-in stride in pixels, should be 0 for implicit layouts */
6153    uint32_t stride;
6154 
6155    /** Whether to allocate private binding */
6156    bool no_private_binding_alloc;
6157 };
6158 
6159 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
6160                         const struct anv_image_create_info *create_info);
6161 
6162 void anv_image_finish(struct anv_image *image);
6163 
6164 void anv_image_get_memory_requirements(struct anv_device *device,
6165                                        struct anv_image *image,
6166                                        VkImageAspectFlags aspects,
6167                                        VkMemoryRequirements2 *pMemoryRequirements);
6168 
6169 void anv_image_view_init(struct anv_device *device,
6170                          struct anv_image_view *iview,
6171                          const VkImageViewCreateInfo *pCreateInfo,
6172                          struct anv_state_stream *state_stream);
6173 
6174 void anv_image_view_finish(struct anv_image_view *iview);
6175 
6176 enum isl_format
6177 anv_isl_format_for_descriptor_type(const struct anv_device *device,
6178                                    VkDescriptorType type);
6179 
6180 static inline isl_surf_usage_flags_t
anv_isl_usage_for_descriptor_type(const VkDescriptorType type)6181 anv_isl_usage_for_descriptor_type(const VkDescriptorType type)
6182 {
6183    switch(type) {
6184       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
6185       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
6186          return ISL_SURF_USAGE_CONSTANT_BUFFER_BIT;
6187       default:
6188          return ISL_SURF_USAGE_STORAGE_BIT;
6189    }
6190 }
6191 
6192 VkFormatFeatureFlags2
6193 anv_get_image_format_features2(const struct anv_physical_device *physical_device,
6194                                VkFormat vk_format,
6195                                const struct anv_format *anv_format,
6196                                VkImageTiling vk_tiling,
6197                                const struct isl_drm_modifier_info *isl_mod_info);
6198 
6199 void anv_fill_buffer_surface_state(struct anv_device *device,
6200                                    void *surface_state_ptr,
6201                                    enum isl_format format,
6202                                    struct isl_swizzle swizzle,
6203                                    isl_surf_usage_flags_t usage,
6204                                    struct anv_address address,
6205                                    uint32_t range, uint32_t stride);
6206 
6207 
6208 struct gfx8_border_color {
6209    union {
6210       float float32[4];
6211       uint32_t uint32[4];
6212    };
6213    /* Pad out to 64 bytes */
6214    uint32_t _pad[12];
6215 };
6216 
6217 struct anv_sampler {
6218    struct vk_sampler            vk;
6219 
6220    /* Hash of the sampler state + border color, useful for embedded samplers
6221     * and included in the descriptor layout hash.
6222     */
6223    unsigned char                sha1[20];
6224 
6225    uint32_t                     state[3][4];
6226    /* Packed SAMPLER_STATE without the border color pointer. */
6227    uint32_t                     state_no_bc[3][4];
6228    uint32_t                     n_planes;
6229 
6230    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
6231     * and with a 32-byte stride for use as bindless samplers.
6232     */
6233    struct anv_state             bindless_state;
6234 
6235    struct anv_state             custom_border_color;
6236 };
6237 
6238 
6239 struct anv_query_pool {
6240    struct vk_query_pool                         vk;
6241 
6242    /** Stride between queries, in bytes */
6243    uint32_t                                     stride;
6244    /** Number of slots in this query pool */
6245    struct anv_bo *                              bo;
6246 
6247    /** Location for the KHR_performance_query small batch updating
6248     *  ANV_PERF_QUERY_OFFSET_REG
6249     */
6250    uint32_t                                     khr_perf_preambles_offset;
6251 
6252    /** Size of each small batch */
6253    uint32_t                                     khr_perf_preamble_stride;
6254 
6255    /* KHR perf queries : */
6256    /** Query pass size in bytes(availability + padding + query data) */
6257    uint32_t                                     pass_size;
6258    /** Offset of the query data within a pass */
6259    uint32_t                                     data_offset;
6260    /** query data / 2 */
6261    uint32_t                                     snapshot_size;
6262    uint32_t                                     n_counters;
6263    struct intel_perf_counter_pass                *counter_pass;
6264    uint32_t                                     n_passes;
6265    struct intel_perf_query_info                 **pass_query;
6266 
6267    /* Video encoding queries */
6268    VkVideoCodecOperationFlagsKHR                codec;
6269 };
6270 
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)6271 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
6272                                                       uint32_t pass)
6273 {
6274    return pool->khr_perf_preambles_offset +
6275           pool->khr_perf_preamble_stride * pass;
6276 }
6277 
6278 struct anv_vid_mem {
6279    struct anv_device_memory *mem;
6280    VkDeviceSize       offset;
6281    VkDeviceSize       size;
6282 };
6283 
6284 #define ANV_MB_WIDTH 16
6285 #define ANV_MB_HEIGHT 16
6286 #define ANV_VIDEO_H264_MAX_DPB_SLOTS 17
6287 #define ANV_VIDEO_H264_MAX_NUM_REF_FRAME 16
6288 #define ANV_VIDEO_H265_MAX_NUM_REF_FRAME 16
6289 #define ANV_VIDEO_H265_HCP_NUM_REF_FRAME 8
6290 #define ANV_MAX_H265_CTB_SIZE 64
6291 
6292 enum anv_vid_mem_h264_types {
6293    ANV_VID_MEM_H264_INTRA_ROW_STORE,
6294    ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE,
6295    ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH,
6296    ANV_VID_MEM_H264_MPR_ROW_SCRATCH,
6297    ANV_VID_MEM_H264_MAX,
6298 };
6299 
6300 enum anv_vid_mem_h265_types {
6301    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE,
6302    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE,
6303    ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN,
6304    ANV_VID_MEM_H265_METADATA_LINE,
6305    ANV_VID_MEM_H265_METADATA_TILE_LINE,
6306    ANV_VID_MEM_H265_METADATA_TILE_COLUMN,
6307    ANV_VID_MEM_H265_SAO_LINE,
6308    ANV_VID_MEM_H265_SAO_TILE_LINE,
6309    ANV_VID_MEM_H265_SAO_TILE_COLUMN,
6310    ANV_VID_MEM_H265_DEC_MAX,
6311    ANV_VID_MEM_H265_SSE_SRC_PIX_ROW_STORE = ANV_VID_MEM_H265_DEC_MAX,
6312    ANV_VID_MEM_H265_ENC_MAX,
6313 };
6314 
6315 enum anv_vid_mem_av1_types {
6316    ANV_VID_MEM_AV1_BITSTREAM_LINE_ROWSTORE,
6317    ANV_VID_MEM_AV1_BITSTREAM_TILE_LINE_ROWSTORE,
6318    ANV_VID_MEM_AV1_INTRA_PREDICTION_LINE_ROWSTORE,
6319    ANV_VID_MEM_AV1_INTRA_PREDICTION_TILE_LINE_ROWSTORE,
6320    ANV_VID_MEM_AV1_SPATIAL_MOTION_VECTOR_LINE,
6321    ANV_VID_MEM_AV1_SPATIAL_MOTION_VECTOR_TILE_LINE,
6322    ANV_VID_MEM_AV1_LOOP_RESTORATION_META_TILE_COLUMN,
6323    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_LINE_Y,
6324    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_LINE_U,
6325    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_LINE_V,
6326    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_LINE_Y,
6327    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_LINE_U,
6328    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_LINE_V,
6329    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_LINE_Y,
6330    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_LINE_U,
6331    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_LINE_V,
6332    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_COLUMN_Y,
6333    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_COLUMN_U,
6334    ANV_VID_MEM_AV1_DEBLOCKER_FILTER_TILE_COLUMN_V,
6335    ANV_VID_MEM_AV1_CDEF_FILTER_LINE,
6336    ANV_VID_MEM_AV1_CDEF_FILTER_TILE_LINE,
6337    ANV_VID_MEM_AV1_CDEF_FILTER_TILE_COLUMN,
6338    ANV_VID_MEM_AV1_CDEF_FILTER_META_TILE_LINE,
6339    ANV_VID_MEM_AV1_CDEF_FILTER_META_TILE_COLUMN,
6340    ANV_VID_MEM_AV1_CDEF_FILTER_TOP_LEFT_CORNER,
6341    ANV_VID_MEM_AV1_SUPER_RES_TILE_COLUMN_Y,
6342    ANV_VID_MEM_AV1_SUPER_RES_TILE_COLUMN_U,
6343    ANV_VID_MEM_AV1_SUPER_RES_TILE_COLUMN_V,
6344    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_COLUMN_Y,
6345    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_COLUMN_U,
6346    ANV_VID_MEM_AV1_LOOP_RESTORATION_FILTER_TILE_COLUMN_V,
6347    ANV_VID_MEM_AV1_CDF_DEFAULTS_0,
6348    ANV_VID_MEM_AV1_CDF_DEFAULTS_1,
6349    ANV_VID_MEM_AV1_CDF_DEFAULTS_2,
6350    ANV_VID_MEM_AV1_CDF_DEFAULTS_3,
6351    ANV_VID_MEM_AV1_DBD_BUFFER,
6352    ANV_VID_MEM_AV1_MAX,
6353 };
6354 
6355 struct anv_av1_video_refs_info {
6356    const struct anv_image *img;
6357    uint8_t default_cdf_index;
6358 };
6359 
6360 struct anv_video_session {
6361    struct vk_video_session vk;
6362 
6363    bool cdf_initialized;
6364    /* the decoder needs some private memory allocations */
6365    struct anv_vid_mem vid_mem[ANV_VID_MEM_AV1_MAX];
6366    struct anv_av1_video_refs_info prev_refs[STD_VIDEO_AV1_NUM_REF_FRAMES];
6367 };
6368 
6369 struct anv_video_session_params {
6370    struct vk_video_session_parameters vk;
6371    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
6372 };
6373 
6374 void anv_init_av1_cdf_tables(struct anv_cmd_buffer *cmd,
6375                              struct anv_video_session *vid);
6376 
6377 uint32_t anv_video_get_image_mv_size(struct anv_device *device,
6378                                      struct anv_image *image,
6379                                      const struct VkVideoProfileListInfoKHR *profile_list);
6380 
6381 void
6382 anv_dump_pipe_bits(enum anv_pipe_bits bits, FILE *f);
6383 
6384 void
6385 anv_cmd_buffer_pending_pipe_debug(struct anv_cmd_buffer *cmd_buffer,
6386                                   enum anv_pipe_bits bits,
6387                                   const char* reason);
6388 
6389 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)6390 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
6391                           enum anv_pipe_bits bits,
6392                           const char* reason)
6393 {
6394    cmd_buffer->state.pending_pipe_bits |= bits;
6395    if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) {
6396       anv_cmd_buffer_pending_pipe_debug(cmd_buffer, bits, reason);
6397    }
6398 }
6399 
6400 struct anv_performance_configuration_intel {
6401    struct vk_object_base      base;
6402 
6403    struct intel_perf_registers *register_config;
6404 
6405    uint64_t                   config_id;
6406 };
6407 
6408 void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
6409 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
6410 void anv_device_perf_init(struct anv_device *device);
6411 void anv_device_perf_close(struct anv_device *device);
6412 void anv_perf_write_pass_results(struct intel_perf_config *perf,
6413                                  struct anv_query_pool *pool, uint32_t pass,
6414                                  const struct intel_perf_query_result *accumulated_results,
6415                                  union VkPerformanceCounterResultKHR *results);
6416 
6417 void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
6418                                 struct nir_shader *fs_nir,
6419                                 struct anv_device *device,
6420                                 const VkGraphicsPipelineCreateInfo *info);
6421 
6422 /* Use to emit a series of memcpy operations */
6423 struct anv_memcpy_state {
6424    struct anv_device *device;
6425    struct anv_cmd_buffer *cmd_buffer;
6426    struct anv_batch *batch;
6427 
6428    /* Configuration programmed by the memcpy operation */
6429    struct intel_urb_config urb_cfg;
6430 
6431    struct anv_vb_cache_range vb_bound;
6432    struct anv_vb_cache_range vb_dirty;
6433 };
6434 
6435 VkResult anv_device_init_internal_kernels(struct anv_device *device);
6436 void anv_device_finish_internal_kernels(struct anv_device *device);
6437 VkResult anv_device_get_internal_shader(struct anv_device *device,
6438                                         enum anv_internal_kernel_name name,
6439                                         struct anv_shader_bin **out_bin);
6440 
6441 VkResult anv_device_init_astc_emu(struct anv_device *device);
6442 void anv_device_finish_astc_emu(struct anv_device *device);
6443 void anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
6444                           struct anv_image *image,
6445                           VkImageLayout layout,
6446                           const VkImageSubresourceLayers *subresource,
6447                           VkOffset3D block_offset,
6448                           VkExtent3D block_extent);
6449 
6450 /* This structure is used in 2 scenarios :
6451  *
6452  *    - copy utrace timestamps from command buffer so that command buffer can
6453  *      be resubmitted multiple times without the recorded timestamps being
6454  *      overwritten before they're read back
6455  *
6456  *    - emit trace points for queue debug tagging
6457  *      (vkQueueBeginDebugUtilsLabelEXT/vkQueueEndDebugUtilsLabelEXT)
6458  */
6459 struct anv_utrace_submit {
6460    struct anv_async_submit base;
6461 
6462    /* structure used by the perfetto glue */
6463    struct intel_ds_flush_data ds;
6464 
6465    /* Stream for temporary allocations */
6466    struct anv_state_stream dynamic_state_stream;
6467    struct anv_state_stream general_state_stream;
6468 
6469    /* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
6470     * timestamps)
6471     */
6472    uint64_t last_full_timestamp;
6473 
6474    /* Memcpy state tracking (only used for timestamp copies on render engine) */
6475    struct anv_memcpy_state memcpy_state;
6476 
6477    /* Memcpy state tracking (only used for timestamp copies on compute engine) */
6478    struct anv_simple_shader simple_state;
6479 };
6480 
6481 void anv_device_utrace_init(struct anv_device *device);
6482 void anv_device_utrace_finish(struct anv_device *device);
6483 VkResult
6484 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
6485                                     uint32_t cmd_buffer_count,
6486                                     struct anv_cmd_buffer **cmd_buffers,
6487                                     struct anv_utrace_submit **out_submit);
6488 
6489 void
6490 anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
6491                                        void *cmdstream,
6492                                        void *ts_from, uint64_t from_offset_B,
6493                                        void *ts_to, uint64_t to_offset_B,
6494                                        uint64_t size_B);
6495 
6496 static bool
anv_has_cooperative_matrix(const struct anv_physical_device * device)6497 anv_has_cooperative_matrix(const struct anv_physical_device *device)
6498 {
6499    return device->has_cooperative_matrix;
6500 }
6501 
6502 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
6503    VK_FROM_HANDLE(__anv_type, __name, __handle)
6504 
6505 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
6506                        VK_OBJECT_TYPE_COMMAND_BUFFER)
6507 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
6508 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
6509 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
6510                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
6511 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
6512 
6513 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
6514                                VK_OBJECT_TYPE_BUFFER)
6515 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, vk.base, VkBufferView,
6516                                VK_OBJECT_TYPE_BUFFER_VIEW)
6517 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
6518                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
6519 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
6520                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
6521 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
6522                                VkDescriptorSetLayout,
6523                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
6524 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, vk.base, VkDeviceMemory,
6525                                VK_OBJECT_TYPE_DEVICE_MEMORY)
6526 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
6527 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
6528 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
6529                                VK_OBJECT_TYPE_IMAGE_VIEW);
6530 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
6531                                VK_OBJECT_TYPE_PIPELINE)
6532 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
6533                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
6534 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, vk.base, VkQueryPool,
6535                                VK_OBJECT_TYPE_QUERY_POOL)
6536 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, vk.base, VkSampler,
6537                                VK_OBJECT_TYPE_SAMPLER)
6538 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
6539                                VkPerformanceConfigurationINTEL,
6540                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
6541 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session, vk.base,
6542                                VkVideoSessionKHR,
6543                                VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
6544 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_video_session_params, vk.base,
6545                                VkVideoSessionParametersKHR,
6546                                VK_OBJECT_TYPE_VIDEO_SESSION_PARAMETERS_KHR)
6547 
6548 #define anv_genX(devinfo, thing) ({             \
6549    __typeof(&gfx9_##thing) genX_thing;          \
6550    switch ((devinfo)->verx10) {                 \
6551    case 90:                                     \
6552       genX_thing = &gfx9_##thing;               \
6553       break;                                    \
6554    case 110:                                    \
6555       genX_thing = &gfx11_##thing;              \
6556       break;                                    \
6557    case 120:                                    \
6558       genX_thing = &gfx12_##thing;              \
6559       break;                                    \
6560    case 125:                                    \
6561       genX_thing = &gfx125_##thing;             \
6562       break;                                    \
6563    case 200:                                    \
6564       genX_thing = &gfx20_##thing;              \
6565       break;                                    \
6566    case 300:                                    \
6567       genX_thing = &gfx30_##thing;              \
6568       break;                                    \
6569    default:                                     \
6570       unreachable("Unknown hardware generation"); \
6571    }                                            \
6572    genX_thing;                                  \
6573 })
6574 
6575 /* Gen-specific function declarations */
6576 #ifdef genX
6577 #  include "anv_genX.h"
6578 #else
6579 #  define genX(x) gfx9_##x
6580 #  include "anv_genX.h"
6581 #  undef genX
6582 #  define genX(x) gfx11_##x
6583 #  include "anv_genX.h"
6584 #  undef genX
6585 #  define genX(x) gfx12_##x
6586 #  include "anv_genX.h"
6587 #  undef genX
6588 #  define genX(x) gfx125_##x
6589 #  include "anv_genX.h"
6590 #  undef genX
6591 #  define genX(x) gfx20_##x
6592 #  include "anv_genX.h"
6593 #  undef genX
6594 #  define genX(x) gfx30_##x
6595 #  include "anv_genX.h"
6596 #  undef genX
6597 #endif
6598 
6599 #ifdef __cplusplus
6600 }
6601 #endif
6602 
6603 #endif /* ANV_PRIVATE_H */
6604