1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #ifndef NDEBUG
40 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41 #endif
42 #else
43 #define VG(x) ((void)0)
44 #endif
45
46 #include "common/intel_clflush.h"
47 #include "common/intel_decoder.h"
48 #include "common/intel_gem.h"
49 #include "common/intel_l3_config.h"
50 #include "common/intel_measure.h"
51 #include "dev/intel_device_info.h"
52 #include "blorp/blorp.h"
53 #include "compiler/brw_compiler.h"
54 #include "compiler/brw_rt.h"
55 #include "util/bitset.h"
56 #include "util/bitscan.h"
57 #include "util/macros.h"
58 #include "util/hash_table.h"
59 #include "util/list.h"
60 #include "util/sparse_array.h"
61 #include "util/u_atomic.h"
62 #include "util/u_vector.h"
63 #include "util/u_math.h"
64 #include "util/vma.h"
65 #include "util/xmlconfig.h"
66 #include "vk_alloc.h"
67 #include "vk_debug_report.h"
68 #include "vk_device.h"
69 #include "vk_enum_defines.h"
70 #include "vk_image.h"
71 #include "vk_instance.h"
72 #include "vk_physical_device.h"
73 #include "vk_shader_module.h"
74 #include "vk_util.h"
75 #include "vk_command_buffer.h"
76 #include "vk_queue.h"
77 #include "vk_log.h"
78
79 /* Pre-declarations needed for WSI entrypoints */
80 struct wl_surface;
81 struct wl_display;
82 typedef struct xcb_connection_t xcb_connection_t;
83 typedef uint32_t xcb_visualid_t;
84 typedef uint32_t xcb_window_t;
85
86 struct anv_batch;
87 struct anv_buffer;
88 struct anv_buffer_view;
89 struct anv_image_view;
90 struct anv_acceleration_structure;
91 struct anv_instance;
92
93 struct intel_aux_map_context;
94 struct intel_perf_config;
95 struct intel_perf_counter_pass;
96 struct intel_perf_query_result;
97
98 #include <vulkan/vulkan.h>
99 #include <vulkan/vk_icd.h>
100
101 #include "anv_android.h"
102 #include "anv_entrypoints.h"
103 #include "isl/isl.h"
104
105 #include "dev/intel_debug.h"
106 #undef MESA_LOG_TAG
107 #define MESA_LOG_TAG "MESA-INTEL"
108 #include "util/log.h"
109 #include "wsi_common.h"
110
111 #define NSEC_PER_SEC 1000000000ull
112
113 /* anv Virtual Memory Layout
114 * =========================
115 *
116 * When the anv driver is determining the virtual graphics addresses of memory
117 * objects itself using the softpin mechanism, the following memory ranges
118 * will be used.
119 *
120 * Three special considerations to notice:
121 *
122 * (1) the dynamic state pool is located within the same 4 GiB as the low
123 * heap. This is to work around a VF cache issue described in a comment in
124 * anv_physical_device_init_heaps.
125 *
126 * (2) the binding table pool is located at lower addresses than the surface
127 * state pool, within a 4 GiB range. This allows surface state base addresses
128 * to cover both binding tables (16 bit offsets) and surface states (32 bit
129 * offsets).
130 *
131 * (3) the last 4 GiB of the address space is withheld from the high
132 * heap. Various hardware units will read past the end of an object for
133 * various reasons. This healthy margin prevents reads from wrapping around
134 * 48-bit addresses.
135 */
136 #define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000010000ULL /* 64 KiB */
137 #define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL
138 #define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */
139 #define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL
140 #define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
141 #define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
142 #define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
143 #define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
144 #define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
145 #define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
146 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
147 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
148 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
149 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL
150 #define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */
151
152 #define GENERAL_STATE_POOL_SIZE \
153 (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
154 #define LOW_HEAP_SIZE \
155 (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
156 #define DYNAMIC_STATE_POOL_SIZE \
157 (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
158 #define BINDING_TABLE_POOL_SIZE \
159 (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
160 #define SURFACE_STATE_POOL_SIZE \
161 (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
162 #define INSTRUCTION_STATE_POOL_SIZE \
163 (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
164 #define CLIENT_VISIBLE_HEAP_SIZE \
165 (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
166
167 /* Allowing different clear colors requires us to perform a depth resolve at
168 * the end of certain render passes. This is because while slow clears store
169 * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
170 * See the PRMs for examples describing when additional resolves would be
171 * necessary. To enable fast clears without requiring extra resolves, we set
172 * the clear value to a globally-defined one. We could allow different values
173 * if the user doesn't expect coherent data during or after a render passes
174 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
175 * don't seem to exist yet. In almost all Vulkan applications tested thus far,
176 * 1.0f seems to be the only value used. The only application that doesn't set
177 * this value does so through the usage of an seemingly uninitialized clear
178 * value.
179 */
180 #define ANV_HZ_FC_VAL 1.0f
181
182 #define MAX_VBS 28
183 #define MAX_XFB_BUFFERS 4
184 #define MAX_XFB_STREAMS 4
185 #define MAX_SETS 8
186 #define MAX_RTS 8
187 #define MAX_VIEWPORTS 16
188 #define MAX_SCISSORS 16
189 #define MAX_PUSH_CONSTANTS_SIZE 128
190 #define MAX_DYNAMIC_BUFFERS 16
191 #define MAX_IMAGES 64
192 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
193 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
194 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
195 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
196 * use 64 here to avoid cache issues. This could most likely bring it back to
197 * 32 if we had different virtual addresses for the different views on a given
198 * GEM object.
199 */
200 #define ANV_UBO_ALIGNMENT 64
201 #define ANV_SSBO_ALIGNMENT 4
202 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
203 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
204 #define MAX_SAMPLE_LOCATIONS 16
205
206 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
207 *
208 * "The surface state model is used when a Binding Table Index (specified
209 * in the message descriptor) of less than 240 is specified. In this model,
210 * the Binding Table Index is used to index into the binding table, and the
211 * binding table entry contains a pointer to the SURFACE_STATE."
212 *
213 * Binding table values above 240 are used for various things in the hardware
214 * such as stateless, stateless with incoherent cache, SLM, and bindless.
215 */
216 #define MAX_BINDING_TABLE_SIZE 240
217
218 /* The kernel relocation API has a limitation of a 32-bit delta value
219 * applied to the address before it is written which, in spite of it being
220 * unsigned, is treated as signed . Because of the way that this maps to
221 * the Vulkan API, we cannot handle an offset into a buffer that does not
222 * fit into a signed 32 bits. The only mechanism we have for dealing with
223 * this at the moment is to limit all VkDeviceMemory objects to a maximum
224 * of 2GB each. The Vulkan spec allows us to do this:
225 *
226 * "Some platforms may have a limit on the maximum size of a single
227 * allocation. For example, certain systems may fail to create
228 * allocations with a size greater than or equal to 4GB. Such a limit is
229 * implementation-dependent, and if such a failure occurs then the error
230 * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
231 */
232 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
233
234 #define ANV_SVGS_VB_INDEX MAX_VBS
235 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
236
237 /* We reserve this MI ALU register for the purpose of handling predication.
238 * Other code which uses the MI ALU should leave it alone.
239 */
240 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
241
242 /* We reserve this MI ALU register to pass around an offset computed from
243 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
244 * Other code which uses the MI ALU should leave it alone.
245 */
246 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
247
248 /* For gfx12 we set the streamout buffers using 4 separate commands
249 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
250 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
251 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
252 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
253 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
254 * 3DSTATE_SO_BUFFER_INDEX_0.
255 */
256 #define SO_BUFFER_INDEX_0_CMD 0x60
257 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
258
259 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)260 align_down_npot_u32(uint32_t v, uint32_t a)
261 {
262 return v - (v % a);
263 }
264
265 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)266 align_down_u32(uint32_t v, uint32_t a)
267 {
268 assert(a != 0 && a == (a & -a));
269 return v & ~(a - 1);
270 }
271
272 static inline uint32_t
align_u32(uint32_t v,uint32_t a)273 align_u32(uint32_t v, uint32_t a)
274 {
275 assert(a != 0 && a == (a & -a));
276 return align_down_u32(v + a - 1, a);
277 }
278
279 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)280 align_down_u64(uint64_t v, uint64_t a)
281 {
282 assert(a != 0 && a == (a & -a));
283 return v & ~(a - 1);
284 }
285
286 static inline uint64_t
align_u64(uint64_t v,uint64_t a)287 align_u64(uint64_t v, uint64_t a)
288 {
289 return align_down_u64(v + a - 1, a);
290 }
291
292 static inline int32_t
align_i32(int32_t v,int32_t a)293 align_i32(int32_t v, int32_t a)
294 {
295 assert(a != 0 && a == (a & -a));
296 return (v + a - 1) & ~(a - 1);
297 }
298
299 /** Alignment must be a power of 2. */
300 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)301 anv_is_aligned(uintmax_t n, uintmax_t a)
302 {
303 assert(a == (a & -a));
304 return (n & (a - 1)) == 0;
305 }
306
307 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)308 anv_minify(uint32_t n, uint32_t levels)
309 {
310 if (unlikely(n == 0))
311 return 0;
312 else
313 return MAX2(n >> levels, 1);
314 }
315
316 static inline float
anv_clamp_f(float f,float min,float max)317 anv_clamp_f(float f, float min, float max)
318 {
319 assert(min < max);
320
321 if (f > max)
322 return max;
323 else if (f < min)
324 return min;
325 else
326 return f;
327 }
328
329 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)330 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
331 {
332 if (*inout_mask & clear_mask) {
333 *inout_mask &= ~clear_mask;
334 return true;
335 } else {
336 return false;
337 }
338 }
339
340 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)341 vk_to_isl_color(VkClearColorValue color)
342 {
343 return (union isl_color_value) {
344 .u32 = {
345 color.uint32[0],
346 color.uint32[1],
347 color.uint32[2],
348 color.uint32[3],
349 },
350 };
351 }
352
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)353 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
354 {
355 uintptr_t mask = (1ull << bits) - 1;
356 *flags = ptr & mask;
357 return (void *) (ptr & ~mask);
358 }
359
anv_pack_ptr(void * ptr,int bits,int flags)360 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
361 {
362 uintptr_t value = (uintptr_t) ptr;
363 uintptr_t mask = (1ull << bits) - 1;
364 return value | (mask & flags);
365 }
366
367 /**
368 * Warn on ignored extension structs.
369 *
370 * The Vulkan spec requires us to ignore unsupported or unknown structs in
371 * a pNext chain. In debug mode, emitting warnings for ignored structs may
372 * help us discover structs that we should not have ignored.
373 *
374 *
375 * From the Vulkan 1.0.38 spec:
376 *
377 * Any component of the implementation (the loader, any enabled layers,
378 * and drivers) must skip over, without processing (other than reading the
379 * sType and pNext members) any chained structures with sType values not
380 * defined by extensions supported by that component.
381 */
382 #define anv_debug_ignored_stype(sType) \
383 mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
384
385 void __anv_perf_warn(struct anv_device *device,
386 const struct vk_object_base *object,
387 const char *file, int line, const char *format, ...)
388 anv_printflike(5, 6);
389
390 /**
391 * Print a FINISHME message, including its source location.
392 */
393 #define anv_finishme(format, ...) \
394 do { \
395 static bool reported = false; \
396 if (!reported) { \
397 mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
398 ##__VA_ARGS__); \
399 reported = true; \
400 } \
401 } while (0)
402
403 /**
404 * Print a perf warning message. Set INTEL_DEBUG=perf to see these.
405 */
406 #define anv_perf_warn(objects_macro, format, ...) \
407 do { \
408 static bool reported = false; \
409 if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
410 __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \
411 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \
412 objects_macro, __FILE__, __LINE__, \
413 format, ## __VA_ARGS__); \
414 reported = true; \
415 } \
416 } while (0)
417
418 /* A non-fatal assert. Useful for debugging. */
419 #ifdef DEBUG
420 #define anv_assert(x) ({ \
421 if (unlikely(!(x))) \
422 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
423 })
424 #else
425 #define anv_assert(x)
426 #endif
427
428 struct anv_bo {
429 const char *name;
430
431 uint32_t gem_handle;
432
433 uint32_t refcount;
434
435 /* Index into the current validation list. This is used by the
436 * validation list building alrogithm to track which buffers are already
437 * in the validation list so that we can ensure uniqueness.
438 */
439 uint32_t index;
440
441 /* Index for use with util_sparse_array_free_list */
442 uint32_t free_index;
443
444 /* Last known offset. This value is provided by the kernel when we
445 * execbuf and is used as the presumed offset for the next bunch of
446 * relocations.
447 */
448 uint64_t offset;
449
450 /** Size of the buffer not including implicit aux */
451 uint64_t size;
452
453 /* Map for internally mapped BOs.
454 *
455 * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
456 */
457 void *map;
458
459 /** Size of the implicit CCS range at the end of the buffer
460 *
461 * On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
462 * page of main surface data maps to a 256B chunk of CCS data and that
463 * mapping is provided on TGL-LP by the AUX table which maps virtual memory
464 * addresses in the main surface to virtual memory addresses for CCS data.
465 *
466 * Because we can't change these maps around easily and because Vulkan
467 * allows two VkImages to be bound to overlapping memory regions (as long
468 * as the app is careful), it's not feasible to make this mapping part of
469 * the image. (On Gfx11 and earlier, the mapping was provided via
470 * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
471 * Instead, we attach the CCS data directly to the buffer object and setup
472 * the AUX table mapping at BO creation time.
473 *
474 * This field is for internal tracking use by the BO allocator only and
475 * should not be touched by other parts of the code. If something wants to
476 * know if a BO has implicit CCS data, it should instead look at the
477 * has_implicit_ccs boolean below.
478 *
479 * This data is not included in maps of this buffer.
480 */
481 uint32_t _ccs_size;
482
483 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
484 uint32_t flags;
485
486 /** True if this BO may be shared with other processes */
487 bool is_external:1;
488
489 /** True if this BO is a wrapper
490 *
491 * When set to true, none of the fields in this BO are meaningful except
492 * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
493 * See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin
494 * is set in the physical device.
495 */
496 bool is_wrapper:1;
497
498 /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
499 bool has_fixed_address:1;
500
501 /** True if this BO wraps a host pointer */
502 bool from_host_ptr:1;
503
504 /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
505 bool has_client_visible_address:1;
506
507 /** True if this BO has implicit CCS data attached to it */
508 bool has_implicit_ccs:1;
509 };
510
511 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)512 anv_bo_ref(struct anv_bo *bo)
513 {
514 p_atomic_inc(&bo->refcount);
515 return bo;
516 }
517
518 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)519 anv_bo_unwrap(struct anv_bo *bo)
520 {
521 while (bo->is_wrapper)
522 bo = bo->map;
523 return bo;
524 }
525
526 /* Represents a lock-free linked list of "free" things. This is used by
527 * both the block pool and the state pools. Unfortunately, in order to
528 * solve the ABA problem, we can't use a single uint32_t head.
529 */
530 union anv_free_list {
531 struct {
532 uint32_t offset;
533
534 /* A simple count that is incremented every time the head changes. */
535 uint32_t count;
536 };
537 /* Make sure it's aligned to 64 bits. This will make atomic operations
538 * faster on 32 bit platforms.
539 */
540 uint64_t u64 __attribute__ ((aligned (8)));
541 };
542
543 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
544
545 struct anv_block_state {
546 union {
547 struct {
548 uint32_t next;
549 uint32_t end;
550 };
551 /* Make sure it's aligned to 64 bits. This will make atomic operations
552 * faster on 32 bit platforms.
553 */
554 uint64_t u64 __attribute__ ((aligned (8)));
555 };
556 };
557
558 #define anv_block_pool_foreach_bo(bo, pool) \
559 for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
560 _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
561 _pp_bo++)
562
563 #define ANV_MAX_BLOCK_POOL_BOS 20
564
565 struct anv_block_pool {
566 const char *name;
567
568 struct anv_device *device;
569 bool use_softpin;
570
571 /* Wrapper BO for use in relocation lists. This BO is simply a wrapper
572 * around the actual BO so that we grow the pool after the wrapper BO has
573 * been put in a relocation list. This is only used in the non-softpin
574 * case.
575 */
576 struct anv_bo wrapper_bo;
577
578 struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
579 struct anv_bo *bo;
580 uint32_t nbos;
581
582 uint64_t size;
583
584 /* The address where the start of the pool is pinned. The various bos that
585 * are created as the pool grows will have addresses in the range
586 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
587 */
588 uint64_t start_address;
589
590 /* The offset from the start of the bo to the "center" of the block
591 * pool. Pointers to allocated blocks are given by
592 * bo.map + center_bo_offset + offsets.
593 */
594 uint32_t center_bo_offset;
595
596 /* Current memory map of the block pool. This pointer may or may not
597 * point to the actual beginning of the block pool memory. If
598 * anv_block_pool_alloc_back has ever been called, then this pointer
599 * will point to the "center" position of the buffer and all offsets
600 * (negative or positive) given out by the block pool alloc functions
601 * will be valid relative to this pointer.
602 *
603 * In particular, map == bo.map + center_offset
604 *
605 * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
606 * since it will handle the softpin case as well, where this points to NULL.
607 */
608 void *map;
609 int fd;
610
611 /**
612 * Array of mmaps and gem handles owned by the block pool, reclaimed when
613 * the block pool is destroyed.
614 */
615 struct u_vector mmap_cleanups;
616
617 struct anv_block_state state;
618
619 struct anv_block_state back_state;
620 };
621
622 /* Block pools are backed by a fixed-size 1GB memfd */
623 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
624
625 /* The center of the block pool is also the middle of the memfd. This may
626 * change in the future if we decide differently for some reason.
627 */
628 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
629
630 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)631 anv_block_pool_size(struct anv_block_pool *pool)
632 {
633 return pool->state.end + pool->back_state.end;
634 }
635
636 struct anv_state {
637 int32_t offset;
638 uint32_t alloc_size;
639 void *map;
640 uint32_t idx;
641 };
642
643 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
644
645 struct anv_fixed_size_state_pool {
646 union anv_free_list free_list;
647 struct anv_block_state block;
648 };
649
650 #define ANV_MIN_STATE_SIZE_LOG2 6
651 #define ANV_MAX_STATE_SIZE_LOG2 21
652
653 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
654
655 struct anv_free_entry {
656 uint32_t next;
657 struct anv_state state;
658 };
659
660 struct anv_state_table {
661 struct anv_device *device;
662 int fd;
663 struct anv_free_entry *map;
664 uint32_t size;
665 struct anv_block_state state;
666 struct u_vector cleanups;
667 };
668
669 struct anv_state_pool {
670 struct anv_block_pool block_pool;
671
672 /* Offset into the relevant state base address where the state pool starts
673 * allocating memory.
674 */
675 int32_t start_offset;
676
677 struct anv_state_table table;
678
679 /* The size of blocks which will be allocated from the block pool */
680 uint32_t block_size;
681
682 /** Free list for "back" allocations */
683 union anv_free_list back_alloc_free_list;
684
685 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
686 };
687
688 struct anv_state_reserved_pool {
689 struct anv_state_pool *pool;
690 union anv_free_list reserved_blocks;
691 uint32_t count;
692 };
693
694 struct anv_state_stream {
695 struct anv_state_pool *state_pool;
696
697 /* The size of blocks to allocate from the state pool */
698 uint32_t block_size;
699
700 /* Current block we're allocating from */
701 struct anv_state block;
702
703 /* Offset into the current block at which to allocate the next state */
704 uint32_t next;
705
706 /* List of all blocks allocated from this pool */
707 struct util_dynarray all_blocks;
708 };
709
710 /* The block_pool functions exported for testing only. The block pool should
711 * only be used via a state pool (see below).
712 */
713 VkResult anv_block_pool_init(struct anv_block_pool *pool,
714 struct anv_device *device,
715 const char *name,
716 uint64_t start_address,
717 uint32_t initial_size);
718 void anv_block_pool_finish(struct anv_block_pool *pool);
719 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
720 uint32_t block_size, uint32_t *padding);
721 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
722 uint32_t block_size);
723 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
724 size);
725
726 VkResult anv_state_pool_init(struct anv_state_pool *pool,
727 struct anv_device *device,
728 const char *name,
729 uint64_t base_address,
730 int32_t start_offset,
731 uint32_t block_size);
732 void anv_state_pool_finish(struct anv_state_pool *pool);
733 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
734 uint32_t state_size, uint32_t alignment);
735 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
736 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
737 void anv_state_stream_init(struct anv_state_stream *stream,
738 struct anv_state_pool *state_pool,
739 uint32_t block_size);
740 void anv_state_stream_finish(struct anv_state_stream *stream);
741 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
742 uint32_t size, uint32_t alignment);
743
744 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
745 struct anv_state_pool *parent,
746 uint32_t count, uint32_t size,
747 uint32_t alignment);
748 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
749 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
750 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
751 struct anv_state state);
752
753 VkResult anv_state_table_init(struct anv_state_table *table,
754 struct anv_device *device,
755 uint32_t initial_entries);
756 void anv_state_table_finish(struct anv_state_table *table);
757 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
758 uint32_t count);
759 void anv_free_list_push(union anv_free_list *list,
760 struct anv_state_table *table,
761 uint32_t idx, uint32_t count);
762 struct anv_state* anv_free_list_pop(union anv_free_list *list,
763 struct anv_state_table *table);
764
765
766 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)767 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
768 {
769 return &table->map[idx].state;
770 }
771 /**
772 * Implements a pool of re-usable BOs. The interface is identical to that
773 * of block_pool except that each block is its own BO.
774 */
775 struct anv_bo_pool {
776 const char *name;
777
778 struct anv_device *device;
779
780 struct util_sparse_array_free_list free_list[16];
781 };
782
783 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
784 const char *name);
785 void anv_bo_pool_finish(struct anv_bo_pool *pool);
786 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
787 struct anv_bo **bo_out);
788 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
789
790 struct anv_scratch_pool {
791 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
792 struct anv_bo *bos[16][MESA_SHADER_STAGES];
793 uint32_t surfs[16];
794 struct anv_state surf_states[16];
795 };
796
797 void anv_scratch_pool_init(struct anv_device *device,
798 struct anv_scratch_pool *pool);
799 void anv_scratch_pool_finish(struct anv_device *device,
800 struct anv_scratch_pool *pool);
801 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
802 struct anv_scratch_pool *pool,
803 gl_shader_stage stage,
804 unsigned per_thread_scratch);
805 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
806 struct anv_scratch_pool *pool,
807 unsigned per_thread_scratch);
808
809 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
810 struct anv_bo_cache {
811 struct util_sparse_array bo_map;
812 pthread_mutex_t mutex;
813 };
814
815 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
816 struct anv_device *device);
817 void anv_bo_cache_finish(struct anv_bo_cache *cache);
818
819 struct anv_queue_family {
820 /* Standard bits passed on to the client */
821 VkQueueFlags queueFlags;
822 uint32_t queueCount;
823
824 /* Driver internal information */
825 enum drm_i915_gem_engine_class engine_class;
826 };
827
828 #define ANV_MAX_QUEUE_FAMILIES 3
829
830 struct anv_memory_type {
831 /* Standard bits passed on to the client */
832 VkMemoryPropertyFlags propertyFlags;
833 uint32_t heapIndex;
834 };
835
836 struct anv_memory_heap {
837 /* Standard bits passed on to the client */
838 VkDeviceSize size;
839 VkMemoryHeapFlags flags;
840
841 /** Driver-internal book-keeping.
842 *
843 * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
844 */
845 VkDeviceSize used __attribute__ ((aligned (8)));
846
847 bool is_local_mem;
848 };
849
850 struct anv_memregion {
851 struct drm_i915_gem_memory_class_instance region;
852 uint64_t size;
853 uint64_t available;
854 };
855
856 struct anv_physical_device {
857 struct vk_physical_device vk;
858
859 /* Link in anv_instance::physical_devices */
860 struct list_head link;
861
862 struct anv_instance * instance;
863 char path[20];
864 struct {
865 uint16_t domain;
866 uint8_t bus;
867 uint8_t device;
868 uint8_t function;
869 } pci_info;
870 struct intel_device_info info;
871 /** Amount of "GPU memory" we want to advertise
872 *
873 * Clearly, this value is bogus since Intel is a UMA architecture. On
874 * gfx7 platforms, we are limited by GTT size unless we want to implement
875 * fine-grained tracking and GTT splitting. On Broadwell and above we are
876 * practically unlimited. However, we will never report more than 3/4 of
877 * the total system ram to try and avoid running out of RAM.
878 */
879 bool supports_48bit_addresses;
880 struct brw_compiler * compiler;
881 struct isl_device isl_dev;
882 struct intel_perf_config * perf;
883 /* True if hardware support is incomplete/alpha */
884 bool is_alpha;
885 /*
886 * Number of commands required to implement a performance query begin +
887 * end.
888 */
889 uint32_t n_perf_query_commands;
890 int cmd_parser_version;
891 bool has_exec_async;
892 bool has_exec_capture;
893 bool has_exec_fence;
894 bool has_syncobj_wait;
895 bool has_syncobj_wait_available;
896 bool has_context_priority;
897 bool has_context_isolation;
898 bool has_thread_submit;
899 bool has_mmap_offset;
900 bool has_userptr_probe;
901 uint64_t gtt_size;
902
903 bool use_softpin;
904 bool always_use_bindless;
905 bool use_call_secondary;
906
907 /** True if we can access buffers using A64 messages */
908 bool has_a64_buffer_access;
909 /** True if we can use bindless access for images */
910 bool has_bindless_images;
911 /** True if we can use bindless access for samplers */
912 bool has_bindless_samplers;
913 /** True if we can use timeline semaphores through execbuf */
914 bool has_exec_timeline;
915
916 /** True if we can read the GPU timestamp register
917 *
918 * When running in a virtual context, the timestamp register is unreadable
919 * on Gfx12+.
920 */
921 bool has_reg_timestamp;
922
923 /** True if this device has implicit AUX
924 *
925 * If true, CCS is handled as an implicit attachment to the BO rather than
926 * as an explicitly bound surface.
927 */
928 bool has_implicit_ccs;
929
930 bool always_flush_cache;
931
932 struct {
933 uint32_t family_count;
934 struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
935 } queue;
936
937 struct {
938 uint32_t type_count;
939 struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
940 uint32_t heap_count;
941 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
942 bool need_clflush;
943 } memory;
944
945 struct anv_memregion vram;
946 struct anv_memregion sys;
947 uint8_t driver_build_sha1[20];
948 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
949 uint8_t driver_uuid[VK_UUID_SIZE];
950 uint8_t device_uuid[VK_UUID_SIZE];
951
952 struct disk_cache * disk_cache;
953
954 struct wsi_device wsi_device;
955 int local_fd;
956 bool has_local;
957 int64_t local_major;
958 int64_t local_minor;
959 int master_fd;
960 bool has_master;
961 int64_t master_major;
962 int64_t master_minor;
963 struct drm_i915_query_engine_info * engine_info;
964
965 void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t );
966 struct intel_measure_device measure_device;
967 };
968
969 struct anv_app_info {
970 const char* app_name;
971 uint32_t app_version;
972 const char* engine_name;
973 uint32_t engine_version;
974 uint32_t api_version;
975 };
976
977 struct anv_instance {
978 struct vk_instance vk;
979
980 bool physical_devices_enumerated;
981 struct list_head physical_devices;
982
983 bool pipeline_cache_enabled;
984
985 struct driOptionCache dri_options;
986 struct driOptionCache available_dri_options;
987 };
988
989 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
990 void anv_finish_wsi(struct anv_physical_device *physical_device);
991
992 struct anv_queue_submit {
993 struct anv_cmd_buffer ** cmd_buffers;
994 uint32_t cmd_buffer_count;
995 uint32_t cmd_buffer_array_length;
996
997 uint32_t fence_count;
998 uint32_t fence_array_length;
999 struct drm_i915_gem_exec_fence * fences;
1000 uint64_t * fence_values;
1001
1002 uint32_t temporary_semaphore_count;
1003 uint32_t temporary_semaphore_array_length;
1004 struct anv_semaphore_impl * temporary_semaphores;
1005
1006 /* Allocated only with non shareable timelines. */
1007 union {
1008 struct anv_timeline ** wait_timelines;
1009 uint32_t * wait_timeline_syncobjs;
1010 };
1011 uint32_t wait_timeline_count;
1012 uint32_t wait_timeline_array_length;
1013 uint64_t * wait_timeline_values;
1014
1015 struct anv_timeline ** signal_timelines;
1016 uint32_t signal_timeline_count;
1017 uint32_t signal_timeline_array_length;
1018 uint64_t * signal_timeline_values;
1019
1020 int in_fence;
1021 bool need_out_fence;
1022 int out_fence;
1023
1024 uint32_t fence_bo_count;
1025 uint32_t fence_bo_array_length;
1026 /* An array of struct anv_bo pointers with lower bit used as a flag to
1027 * signal we will wait on that BO (see anv_(un)pack_ptr).
1028 */
1029 uintptr_t * fence_bos;
1030
1031 int perf_query_pass;
1032 struct anv_query_pool * perf_query_pool;
1033
1034 const VkAllocationCallbacks * alloc;
1035 VkSystemAllocationScope alloc_scope;
1036
1037 struct anv_bo * simple_bo;
1038 uint32_t simple_bo_size;
1039
1040 struct list_head link;
1041 };
1042
1043 struct anv_queue {
1044 struct vk_queue vk;
1045
1046 struct anv_device * device;
1047
1048 const struct anv_queue_family * family;
1049
1050 uint32_t exec_flags;
1051
1052 /* Set once from the device api calls. */
1053 bool lost_signaled;
1054
1055 /* Only set once atomically by the queue */
1056 int lost;
1057 int error_line;
1058 const char * error_file;
1059 char error_msg[80];
1060
1061 /*
1062 * This mutext protects the variables below.
1063 */
1064 pthread_mutex_t mutex;
1065
1066 pthread_t thread;
1067 pthread_cond_t cond;
1068
1069 /*
1070 * A list of struct anv_queue_submit to be submitted to i915.
1071 */
1072 struct list_head queued_submits;
1073
1074 /* Set to true to stop the submission thread */
1075 bool quit;
1076 };
1077
1078 struct anv_pipeline_cache {
1079 struct vk_object_base base;
1080 struct anv_device * device;
1081 pthread_mutex_t mutex;
1082
1083 struct hash_table * nir_cache;
1084
1085 struct hash_table * cache;
1086
1087 bool external_sync;
1088 };
1089
1090 struct nir_xfb_info;
1091 struct anv_pipeline_bind_map;
1092
1093 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1094 struct anv_device *device,
1095 bool cache_enabled,
1096 bool external_sync);
1097 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1098
1099 struct anv_shader_bin *
1100 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1101 const void *key, uint32_t key_size);
1102 struct anv_shader_bin *
1103 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1104 gl_shader_stage stage,
1105 const void *key_data, uint32_t key_size,
1106 const void *kernel_data, uint32_t kernel_size,
1107 const struct brw_stage_prog_data *prog_data,
1108 uint32_t prog_data_size,
1109 const struct brw_compile_stats *stats,
1110 uint32_t num_stats,
1111 const struct nir_xfb_info *xfb_info,
1112 const struct anv_pipeline_bind_map *bind_map);
1113
1114 struct anv_shader_bin *
1115 anv_device_search_for_kernel(struct anv_device *device,
1116 struct anv_pipeline_cache *cache,
1117 const void *key_data, uint32_t key_size,
1118 bool *user_cache_bit);
1119
1120 struct anv_shader_bin *
1121 anv_device_upload_kernel(struct anv_device *device,
1122 struct anv_pipeline_cache *cache,
1123 gl_shader_stage stage,
1124 const void *key_data, uint32_t key_size,
1125 const void *kernel_data, uint32_t kernel_size,
1126 const struct brw_stage_prog_data *prog_data,
1127 uint32_t prog_data_size,
1128 const struct brw_compile_stats *stats,
1129 uint32_t num_stats,
1130 const struct nir_xfb_info *xfb_info,
1131 const struct anv_pipeline_bind_map *bind_map);
1132
1133 struct nir_shader;
1134 struct nir_shader_compiler_options;
1135
1136 struct nir_shader *
1137 anv_device_search_for_nir(struct anv_device *device,
1138 struct anv_pipeline_cache *cache,
1139 const struct nir_shader_compiler_options *nir_options,
1140 unsigned char sha1_key[20],
1141 void *mem_ctx);
1142
1143 void
1144 anv_device_upload_nir(struct anv_device *device,
1145 struct anv_pipeline_cache *cache,
1146 const struct nir_shader *nir,
1147 unsigned char sha1_key[20]);
1148
1149 struct anv_address {
1150 struct anv_bo *bo;
1151 int64_t offset;
1152 };
1153
1154 struct anv_device {
1155 struct vk_device vk;
1156
1157 struct anv_physical_device * physical;
1158 struct intel_device_info info;
1159 struct isl_device isl_dev;
1160 int context_id;
1161 int fd;
1162 bool can_chain_batches;
1163 bool robust_buffer_access;
1164 bool has_thread_submit;
1165
1166 pthread_mutex_t vma_mutex;
1167 struct util_vma_heap vma_lo;
1168 struct util_vma_heap vma_cva;
1169 struct util_vma_heap vma_hi;
1170
1171 /** List of all anv_device_memory objects */
1172 struct list_head memory_objects;
1173
1174 struct anv_bo_pool batch_bo_pool;
1175
1176 struct anv_bo_cache bo_cache;
1177
1178 struct anv_state_pool general_state_pool;
1179 struct anv_state_pool dynamic_state_pool;
1180 struct anv_state_pool instruction_state_pool;
1181 struct anv_state_pool binding_table_pool;
1182 struct anv_state_pool surface_state_pool;
1183
1184 struct anv_state_reserved_pool custom_border_colors;
1185
1186 /** BO used for various workarounds
1187 *
1188 * There are a number of workarounds on our hardware which require writing
1189 * data somewhere and it doesn't really matter where. For that, we use
1190 * this BO and just write to the first dword or so.
1191 *
1192 * We also need to be able to handle NULL buffers bound as pushed UBOs.
1193 * For that, we use the high bytes (>= 1024) of the workaround BO.
1194 */
1195 struct anv_bo * workaround_bo;
1196 struct anv_address workaround_address;
1197
1198 struct anv_bo * trivial_batch_bo;
1199 struct anv_state null_surface_state;
1200
1201 struct anv_pipeline_cache default_pipeline_cache;
1202 struct blorp_context blorp;
1203
1204 struct anv_state border_colors;
1205
1206 struct anv_state slice_hash;
1207
1208 uint32_t queue_count;
1209 struct anv_queue * queues;
1210
1211 struct anv_scratch_pool scratch_pool;
1212 struct anv_bo *rt_scratch_bos[16];
1213
1214 struct anv_shader_bin *rt_trampoline;
1215 struct anv_shader_bin *rt_trivial_return;
1216
1217 pthread_mutex_t mutex;
1218 pthread_cond_t queue_submit;
1219 int _lost;
1220 int lost_reported;
1221
1222 struct intel_batch_decode_ctx decoder_ctx;
1223 /*
1224 * When decoding a anv_cmd_buffer, we might need to search for BOs through
1225 * the cmd_buffer's list.
1226 */
1227 struct anv_cmd_buffer *cmd_buffer_being_decoded;
1228
1229 int perf_fd; /* -1 if no opened */
1230 uint64_t perf_metric; /* 0 if unset */
1231
1232 struct intel_aux_map_context *aux_map_ctx;
1233
1234 const struct intel_l3_config *l3_config;
1235
1236 struct intel_debug_block_frame *debug_frame_desc;
1237 };
1238
1239 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1240 #define ANV_ALWAYS_SOFTPIN true
1241 #else
1242 #define ANV_ALWAYS_SOFTPIN false
1243 #endif
1244
1245 static inline bool
anv_use_softpin(const struct anv_physical_device * pdevice)1246 anv_use_softpin(const struct anv_physical_device *pdevice)
1247 {
1248 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1249 /* Sky Lake and later always uses softpin */
1250 assert(pdevice->use_softpin);
1251 return true;
1252 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
1253 /* Haswell and earlier never use softpin */
1254 assert(!pdevice->use_softpin);
1255 return false;
1256 #else
1257 /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1258 * device. Also, for GFX version 8, we need to look at the physical
1259 * device because Broadwell softpins but Cherryview doesn't.
1260 */
1261 return pdevice->use_softpin;
1262 #endif
1263 }
1264
1265 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1266 anv_binding_table_pool(struct anv_device *device)
1267 {
1268 if (anv_use_softpin(device->physical))
1269 return &device->binding_table_pool;
1270 else
1271 return &device->surface_state_pool;
1272 }
1273
1274 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1275 anv_binding_table_pool_alloc(struct anv_device *device)
1276 {
1277 if (anv_use_softpin(device->physical))
1278 return anv_state_pool_alloc(&device->binding_table_pool,
1279 device->binding_table_pool.block_size, 0);
1280 else
1281 return anv_state_pool_alloc_back(&device->surface_state_pool);
1282 }
1283
1284 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1285 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1286 anv_state_pool_free(anv_binding_table_pool(device), state);
1287 }
1288
1289 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1290 anv_mocs(const struct anv_device *device,
1291 const struct anv_bo *bo,
1292 isl_surf_usage_flags_t usage)
1293 {
1294 return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1295 }
1296
1297 void anv_device_init_blorp(struct anv_device *device);
1298 void anv_device_finish_blorp(struct anv_device *device);
1299
1300 void _anv_device_report_lost(struct anv_device *device);
1301 VkResult _anv_device_set_lost(struct anv_device *device,
1302 const char *file, int line,
1303 const char *msg, ...)
1304 anv_printflike(4, 5);
1305 VkResult _anv_queue_set_lost(struct anv_queue *queue,
1306 const char *file, int line,
1307 const char *msg, ...)
1308 anv_printflike(4, 5);
1309 #define anv_device_set_lost(dev, ...) \
1310 _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1311 #define anv_queue_set_lost(queue, ...) \
1312 (queue)->device->has_thread_submit ? \
1313 _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
1314 _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
1315
1316 static inline bool
anv_device_is_lost(struct anv_device * device)1317 anv_device_is_lost(struct anv_device *device)
1318 {
1319 int lost = p_atomic_read(&device->_lost);
1320 if (unlikely(lost && !device->lost_reported))
1321 _anv_device_report_lost(device);
1322 return lost;
1323 }
1324
1325 VkResult anv_device_query_status(struct anv_device *device);
1326
1327
1328 enum anv_bo_alloc_flags {
1329 /** Specifies that the BO must have a 32-bit address
1330 *
1331 * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1332 */
1333 ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
1334
1335 /** Specifies that the BO may be shared externally */
1336 ANV_BO_ALLOC_EXTERNAL = (1 << 1),
1337
1338 /** Specifies that the BO should be mapped */
1339 ANV_BO_ALLOC_MAPPED = (1 << 2),
1340
1341 /** Specifies that the BO should be snooped so we get coherency */
1342 ANV_BO_ALLOC_SNOOPED = (1 << 3),
1343
1344 /** Specifies that the BO should be captured in error states */
1345 ANV_BO_ALLOC_CAPTURE = (1 << 4),
1346
1347 /** Specifies that the BO will have an address assigned by the caller
1348 *
1349 * Such BOs do not exist in any VMA heap.
1350 */
1351 ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1352
1353 /** Enables implicit synchronization on the BO
1354 *
1355 * This is the opposite of EXEC_OBJECT_ASYNC.
1356 */
1357 ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
1358
1359 /** Enables implicit synchronization on the BO
1360 *
1361 * This is equivalent to EXEC_OBJECT_WRITE.
1362 */
1363 ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1364
1365 /** Has an address which is visible to the client */
1366 ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1367
1368 /** This buffer has implicit CCS data attached to it */
1369 ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1370
1371 /** This buffer is allocated from local memory */
1372 ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1373 };
1374
1375 VkResult anv_device_alloc_bo(struct anv_device *device,
1376 const char *name, uint64_t size,
1377 enum anv_bo_alloc_flags alloc_flags,
1378 uint64_t explicit_address,
1379 struct anv_bo **bo);
1380 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1381 void *host_ptr, uint32_t size,
1382 enum anv_bo_alloc_flags alloc_flags,
1383 uint64_t client_address,
1384 struct anv_bo **bo_out);
1385 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1386 enum anv_bo_alloc_flags alloc_flags,
1387 uint64_t client_address,
1388 struct anv_bo **bo);
1389 VkResult anv_device_export_bo(struct anv_device *device,
1390 struct anv_bo *bo, int *fd_out);
1391 void anv_device_release_bo(struct anv_device *device,
1392 struct anv_bo *bo);
1393
1394 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1395 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1396 {
1397 return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1398 }
1399
1400 VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1401 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1402 int64_t timeout);
1403
1404 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1405 uint32_t exec_flags,
1406 const VkDeviceQueueCreateInfo *pCreateInfo,
1407 uint32_t index_in_family);
1408 void anv_queue_finish(struct anv_queue *queue);
1409
1410 VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
1411 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1412 struct anv_batch *batch);
1413
1414 uint64_t anv_gettime_ns(void);
1415 uint64_t anv_get_absolute_timeout(uint64_t timeout);
1416
1417 void* anv_gem_mmap(struct anv_device *device,
1418 uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1419 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1420 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1421 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1422 uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1423 uint32_t num_regions,
1424 struct drm_i915_gem_memory_class_instance *regions);
1425 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1426 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1427 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1428 int anv_gem_execbuffer(struct anv_device *device,
1429 struct drm_i915_gem_execbuffer2 *execbuf);
1430 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1431 uint32_t stride, uint32_t tiling);
1432 int anv_gem_create_context(struct anv_device *device);
1433 int anv_gem_create_context_engines(struct anv_device *device,
1434 const struct drm_i915_query_engine_info *info,
1435 int num_engines,
1436 uint16_t *engine_classes);
1437 bool anv_gem_has_context_priority(int fd);
1438 int anv_gem_destroy_context(struct anv_device *device, int context);
1439 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1440 uint64_t value);
1441 int anv_gem_get_context_param(int fd, int context, uint32_t param,
1442 uint64_t *value);
1443 int anv_gem_get_param(int fd, uint32_t param);
1444 uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);
1445 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1446 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1447 int anv_gem_context_get_reset_stats(int fd, int context,
1448 uint32_t *active, uint32_t *pending);
1449 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1450 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1451 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1452 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1453 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1454 uint32_t read_domains, uint32_t write_domain);
1455 int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1456 uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1457 void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1458 int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1459 uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1460 int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1461 uint32_t handle);
1462 int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1463 uint32_t handle, int fd);
1464 void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1465 bool anv_gem_supports_syncobj_wait(int fd);
1466 int anv_gem_syncobj_wait(struct anv_device *device,
1467 const uint32_t *handles, uint32_t num_handles,
1468 int64_t abs_timeout_ns, bool wait_all);
1469 int anv_gem_syncobj_timeline_wait(struct anv_device *device,
1470 const uint32_t *handles, const uint64_t *points,
1471 uint32_t num_items, int64_t abs_timeout_ns,
1472 bool wait_all, bool wait_materialize);
1473 int anv_gem_syncobj_timeline_signal(struct anv_device *device,
1474 const uint32_t *handles, const uint64_t *points,
1475 uint32_t num_items);
1476 int anv_gem_syncobj_timeline_query(struct anv_device *device,
1477 const uint32_t *handles, uint64_t *points,
1478 uint32_t num_items);
1479 int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1480 int32_t *buffer_len);
1481 struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1482 int anv_gem_count_engines(const struct drm_i915_query_engine_info *info,
1483 uint16_t engine_class);
1484
1485 uint64_t anv_vma_alloc(struct anv_device *device,
1486 uint64_t size, uint64_t align,
1487 enum anv_bo_alloc_flags alloc_flags,
1488 uint64_t client_address);
1489 void anv_vma_free(struct anv_device *device,
1490 uint64_t address, uint64_t size);
1491
1492 struct anv_reloc_list {
1493 uint32_t num_relocs;
1494 uint32_t array_length;
1495 struct drm_i915_gem_relocation_entry * relocs;
1496 struct anv_bo ** reloc_bos;
1497 uint32_t dep_words;
1498 BITSET_WORD * deps;
1499 };
1500
1501 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1502 const VkAllocationCallbacks *alloc);
1503 void anv_reloc_list_finish(struct anv_reloc_list *list,
1504 const VkAllocationCallbacks *alloc);
1505
1506 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1507 const VkAllocationCallbacks *alloc,
1508 uint32_t offset, struct anv_bo *target_bo,
1509 uint32_t delta, uint64_t *address_u64_out);
1510
1511 VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1512 const VkAllocationCallbacks *alloc,
1513 struct anv_bo *target_bo);
1514
1515 struct anv_batch_bo {
1516 /* Link in the anv_cmd_buffer.owned_batch_bos list */
1517 struct list_head link;
1518
1519 struct anv_bo * bo;
1520
1521 /* Bytes actually consumed in this batch BO */
1522 uint32_t length;
1523
1524 /* When this batch BO is used as part of a primary batch buffer, this
1525 * tracked whether it is chained to another primary batch buffer.
1526 *
1527 * If this is the case, the relocation list's last entry points the
1528 * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1529 */
1530 bool chained;
1531
1532 struct anv_reloc_list relocs;
1533 };
1534
1535 struct anv_batch {
1536 const VkAllocationCallbacks * alloc;
1537
1538 struct anv_address start_addr;
1539
1540 void * start;
1541 void * end;
1542 void * next;
1543
1544 struct anv_reloc_list * relocs;
1545
1546 /* This callback is called (with the associated user data) in the event
1547 * that the batch runs out of space.
1548 */
1549 VkResult (*extend_cb)(struct anv_batch *, void *);
1550 void * user_data;
1551
1552 /**
1553 * Current error status of the command buffer. Used to track inconsistent
1554 * or incomplete command buffer states that are the consequence of run-time
1555 * errors such as out of memory scenarios. We want to track this in the
1556 * batch because the command buffer object is not visible to some parts
1557 * of the driver.
1558 */
1559 VkResult status;
1560 };
1561
1562 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1563 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1564 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1565
1566 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1567 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1568 void *map, size_t size)
1569 {
1570 batch->start_addr = addr;
1571 batch->next = batch->start = map;
1572 batch->end = map + size;
1573 }
1574
1575 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1576 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1577 {
1578 assert(error != VK_SUCCESS);
1579 if (batch->status == VK_SUCCESS)
1580 batch->status = error;
1581 return batch->status;
1582 }
1583
1584 static inline bool
anv_batch_has_error(struct anv_batch * batch)1585 anv_batch_has_error(struct anv_batch *batch)
1586 {
1587 return batch->status != VK_SUCCESS;
1588 }
1589
1590 static inline uint64_t
anv_batch_emit_reloc(struct anv_batch * batch,void * location,struct anv_bo * bo,uint32_t delta)1591 anv_batch_emit_reloc(struct anv_batch *batch,
1592 void *location, struct anv_bo *bo, uint32_t delta)
1593 {
1594 uint64_t address_u64 = 0;
1595 VkResult result;
1596
1597 if (ANV_ALWAYS_SOFTPIN) {
1598 address_u64 = bo->offset + delta;
1599 result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1600 } else {
1601 result = anv_reloc_list_add(batch->relocs, batch->alloc,
1602 location - batch->start, bo, delta,
1603 &address_u64);
1604 }
1605 if (unlikely(result != VK_SUCCESS)) {
1606 anv_batch_set_error(batch, result);
1607 return 0;
1608 }
1609
1610 return address_u64;
1611 }
1612
1613
1614 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1615
1616 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)1617 anv_address_from_u64(uint64_t addr_u64)
1618 {
1619 assert(addr_u64 == intel_canonical_address(addr_u64));
1620 return (struct anv_address) {
1621 .bo = NULL,
1622 .offset = addr_u64,
1623 };
1624 }
1625
1626 static inline bool
anv_address_is_null(struct anv_address addr)1627 anv_address_is_null(struct anv_address addr)
1628 {
1629 return addr.bo == NULL && addr.offset == 0;
1630 }
1631
1632 static inline uint64_t
anv_address_physical(struct anv_address addr)1633 anv_address_physical(struct anv_address addr)
1634 {
1635 if (addr.bo && (ANV_ALWAYS_SOFTPIN ||
1636 (addr.bo->flags & EXEC_OBJECT_PINNED))) {
1637 assert(addr.bo->flags & EXEC_OBJECT_PINNED);
1638 return intel_canonical_address(addr.bo->offset + addr.offset);
1639 } else {
1640 return intel_canonical_address(addr.offset);
1641 }
1642 }
1643
1644 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)1645 anv_address_add(struct anv_address addr, uint64_t offset)
1646 {
1647 addr.offset += offset;
1648 return addr;
1649 }
1650
1651 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1652 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1653 {
1654 unsigned reloc_size = 0;
1655 if (device->info.ver >= 8) {
1656 reloc_size = sizeof(uint64_t);
1657 *(uint64_t *)p = intel_canonical_address(v);
1658 } else {
1659 reloc_size = sizeof(uint32_t);
1660 *(uint32_t *)p = v;
1661 }
1662
1663 if (flush && !device->info.has_llc)
1664 intel_flush_range(p, reloc_size);
1665 }
1666
1667 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1668 _anv_combine_address(struct anv_batch *batch, void *location,
1669 const struct anv_address address, uint32_t delta)
1670 {
1671 if (address.bo == NULL) {
1672 return address.offset + delta;
1673 } else if (batch == NULL) {
1674 assert(address.bo->flags & EXEC_OBJECT_PINNED);
1675 return anv_address_physical(anv_address_add(address, delta));
1676 } else {
1677 assert(batch->start <= location && location < batch->end);
1678 /* i915 relocations are signed. */
1679 assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1680 return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1681 }
1682 }
1683
1684 #define __gen_address_type struct anv_address
1685 #define __gen_user_data struct anv_batch
1686 #define __gen_combine_address _anv_combine_address
1687
1688 /* Wrapper macros needed to work around preprocessor argument issues. In
1689 * particular, arguments don't get pre-evaluated if they are concatenated.
1690 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1691 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1692 * We can work around this easily enough with these helpers.
1693 */
1694 #define __anv_cmd_length(cmd) cmd ## _length
1695 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1696 #define __anv_cmd_header(cmd) cmd ## _header
1697 #define __anv_cmd_pack(cmd) cmd ## _pack
1698 #define __anv_reg_num(reg) reg ## _num
1699
1700 #define anv_pack_struct(dst, struc, ...) do { \
1701 struct struc __template = { \
1702 __VA_ARGS__ \
1703 }; \
1704 __anv_cmd_pack(struc)(NULL, dst, &__template); \
1705 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1706 } while (0)
1707
1708 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
1709 void *__dst = anv_batch_emit_dwords(batch, n); \
1710 if (__dst) { \
1711 struct cmd __template = { \
1712 __anv_cmd_header(cmd), \
1713 .DWordLength = n - __anv_cmd_length_bias(cmd), \
1714 __VA_ARGS__ \
1715 }; \
1716 __anv_cmd_pack(cmd)(batch, __dst, &__template); \
1717 } \
1718 __dst; \
1719 })
1720
1721 #define anv_batch_emit_merge(batch, dwords0, dwords1) \
1722 do { \
1723 uint32_t *dw; \
1724 \
1725 STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
1726 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
1727 if (!dw) \
1728 break; \
1729 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
1730 dw[i] = (dwords0)[i] | (dwords1)[i]; \
1731 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1732 } while (0)
1733
1734 #define anv_batch_emit(batch, cmd, name) \
1735 for (struct cmd name = { __anv_cmd_header(cmd) }, \
1736 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
1737 __builtin_expect(_dst != NULL, 1); \
1738 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
1739 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1740 _dst = NULL; \
1741 }))
1742
1743 #define anv_batch_write_reg(batch, reg, name) \
1744 for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
1745 ({ \
1746 uint32_t _dw[__anv_cmd_length(reg)]; \
1747 __anv_cmd_pack(reg)(NULL, _dw, &name); \
1748 for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \
1749 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1750 lri.RegisterOffset = __anv_reg_num(reg); \
1751 lri.DataDWord = _dw[i]; \
1752 } \
1753 } \
1754 _cont = NULL; \
1755 }))
1756
1757 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1758 /* #define __gen_get_batch_address anv_batch_address */
1759 /* #define __gen_address_value anv_address_physical */
1760 /* #define __gen_address_offset anv_address_add */
1761
1762 struct anv_device_memory {
1763 struct vk_object_base base;
1764
1765 struct list_head link;
1766
1767 struct anv_bo * bo;
1768 const struct anv_memory_type * type;
1769 VkDeviceSize map_size;
1770 void * map;
1771
1772 /* The map, from the user PoV is map + map_delta */
1773 uint32_t map_delta;
1774
1775 /* If set, we are holding reference to AHardwareBuffer
1776 * which we must release when memory is freed.
1777 */
1778 struct AHardwareBuffer * ahw;
1779
1780 /* If set, this memory comes from a host pointer. */
1781 void * host_ptr;
1782 };
1783
1784 /**
1785 * Header for Vertex URB Entry (VUE)
1786 */
1787 struct anv_vue_header {
1788 uint32_t Reserved;
1789 uint32_t RTAIndex; /* RenderTargetArrayIndex */
1790 uint32_t ViewportIndex;
1791 float PointWidth;
1792 };
1793
1794 /** Struct representing a sampled image descriptor
1795 *
1796 * This descriptor layout is used for sampled images, bare sampler, and
1797 * combined image/sampler descriptors.
1798 */
1799 struct anv_sampled_image_descriptor {
1800 /** Bindless image handle
1801 *
1802 * This is expected to already be shifted such that the 20-bit
1803 * SURFACE_STATE table index is in the top 20 bits.
1804 */
1805 uint32_t image;
1806
1807 /** Bindless sampler handle
1808 *
1809 * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1810 * to the dynamic state base address.
1811 */
1812 uint32_t sampler;
1813 };
1814
1815 struct anv_texture_swizzle_descriptor {
1816 /** Texture swizzle
1817 *
1818 * See also nir_intrinsic_channel_select_intel
1819 */
1820 uint8_t swizzle[4];
1821
1822 /** Unused padding to ensure the struct is a multiple of 64 bits */
1823 uint32_t _pad;
1824 };
1825
1826 /** Struct representing a storage image descriptor */
1827 struct anv_storage_image_descriptor {
1828 /** Bindless image handles
1829 *
1830 * These are expected to already be shifted such that the 20-bit
1831 * SURFACE_STATE table index is in the top 20 bits.
1832 */
1833 uint32_t vanilla;
1834 uint32_t lowered;
1835 };
1836
1837 /** Struct representing a address/range descriptor
1838 *
1839 * The fields of this struct correspond directly to the data layout of
1840 * nir_address_format_64bit_bounded_global addresses. The last field is the
1841 * offset in the NIR address so it must be zero so that when you load the
1842 * descriptor you get a pointer to the start of the range.
1843 */
1844 struct anv_address_range_descriptor {
1845 uint64_t address;
1846 uint32_t range;
1847 uint32_t zero;
1848 };
1849
1850 enum anv_descriptor_data {
1851 /** The descriptor contains a BTI reference to a surface state */
1852 ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
1853 /** The descriptor contains a BTI reference to a sampler state */
1854 ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1),
1855 /** The descriptor contains an actual buffer view */
1856 ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2),
1857 /** The descriptor contains auxiliary image layout data */
1858 ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
1859 /** The descriptor contains auxiliary image layout data */
1860 ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1861 /** anv_address_range_descriptor with a buffer address and range */
1862 ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
1863 /** Bindless surface handle */
1864 ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
1865 /** Storage image handles */
1866 ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
1867 /** Storage image handles */
1868 ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
1869 };
1870
1871 struct anv_descriptor_set_binding_layout {
1872 /* The type of the descriptors in this binding */
1873 VkDescriptorType type;
1874
1875 /* Flags provided when this binding was created */
1876 VkDescriptorBindingFlagsEXT flags;
1877
1878 /* Bitfield representing the type of data this descriptor contains */
1879 enum anv_descriptor_data data;
1880
1881 /* Maximum number of YCbCr texture/sampler planes */
1882 uint8_t max_plane_count;
1883
1884 /* Number of array elements in this binding (or size in bytes for inline
1885 * uniform data)
1886 */
1887 uint32_t array_size;
1888
1889 /* Index into the flattend descriptor set */
1890 uint32_t descriptor_index;
1891
1892 /* Index into the dynamic state array for a dynamic buffer */
1893 int16_t dynamic_offset_index;
1894
1895 /* Index into the descriptor set buffer views */
1896 int32_t buffer_view_index;
1897
1898 /* Offset into the descriptor buffer where this descriptor lives */
1899 uint32_t descriptor_offset;
1900
1901 /* Immutable samplers (or NULL if no immutable samplers) */
1902 struct anv_sampler **immutable_samplers;
1903 };
1904
1905 unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
1906
1907 unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
1908 VkDescriptorType type);
1909
1910 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1911 const struct anv_descriptor_set_binding_layout *binding,
1912 bool sampler);
1913
1914 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1915 const struct anv_descriptor_set_binding_layout *binding,
1916 bool sampler);
1917
1918 struct anv_descriptor_set_layout {
1919 struct vk_object_base base;
1920
1921 /* Descriptor set layouts can be destroyed at almost any time */
1922 uint32_t ref_cnt;
1923
1924 /* Number of bindings in this descriptor set */
1925 uint32_t binding_count;
1926
1927 /* Total number of descriptors */
1928 uint32_t descriptor_count;
1929
1930 /* Shader stages affected by this descriptor set */
1931 uint16_t shader_stages;
1932
1933 /* Number of buffer views in this descriptor set */
1934 uint32_t buffer_view_count;
1935
1936 /* Number of dynamic offsets used by this descriptor set */
1937 uint16_t dynamic_offset_count;
1938
1939 /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1940 * this buffer
1941 */
1942 VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1943
1944 /* Size of the descriptor buffer for this descriptor set */
1945 uint32_t descriptor_buffer_size;
1946
1947 /* Bindings in this descriptor set */
1948 struct anv_descriptor_set_binding_layout binding[0];
1949 };
1950
1951 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1952 struct anv_descriptor_set_layout *layout);
1953
1954 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1955 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1956 {
1957 assert(layout && layout->ref_cnt >= 1);
1958 p_atomic_inc(&layout->ref_cnt);
1959 }
1960
1961 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1962 anv_descriptor_set_layout_unref(struct anv_device *device,
1963 struct anv_descriptor_set_layout *layout)
1964 {
1965 assert(layout && layout->ref_cnt >= 1);
1966 if (p_atomic_dec_zero(&layout->ref_cnt))
1967 anv_descriptor_set_layout_destroy(device, layout);
1968 }
1969
1970 struct anv_descriptor {
1971 VkDescriptorType type;
1972
1973 union {
1974 struct {
1975 VkImageLayout layout;
1976 struct anv_image_view *image_view;
1977 struct anv_sampler *sampler;
1978 };
1979
1980 struct {
1981 struct anv_buffer *buffer;
1982 uint64_t offset;
1983 uint64_t range;
1984 };
1985
1986 struct anv_buffer_view *buffer_view;
1987 };
1988 };
1989
1990 struct anv_descriptor_set {
1991 struct vk_object_base base;
1992
1993 struct anv_descriptor_pool *pool;
1994 struct anv_descriptor_set_layout *layout;
1995
1996 /* Amount of space occupied in the the pool by this descriptor set. It can
1997 * be larger than the size of the descriptor set.
1998 */
1999 uint32_t size;
2000
2001 /* State relative to anv_descriptor_pool::bo */
2002 struct anv_state desc_mem;
2003 /* Surface state for the descriptor buffer */
2004 struct anv_state desc_surface_state;
2005
2006 /* Descriptor set address. */
2007 struct anv_address desc_addr;
2008
2009 uint32_t buffer_view_count;
2010 struct anv_buffer_view *buffer_views;
2011
2012 /* Link to descriptor pool's desc_sets list . */
2013 struct list_head pool_link;
2014
2015 uint32_t descriptor_count;
2016 struct anv_descriptor descriptors[0];
2017 };
2018
2019 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)2020 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
2021 {
2022 return set->pool == NULL;
2023 }
2024
2025 struct anv_buffer_view {
2026 struct vk_object_base base;
2027
2028 enum isl_format format; /**< VkBufferViewCreateInfo::format */
2029 uint64_t range; /**< VkBufferViewCreateInfo::range */
2030
2031 struct anv_address address;
2032
2033 struct anv_state surface_state;
2034 struct anv_state storage_surface_state;
2035 struct anv_state lowered_storage_surface_state;
2036
2037 struct brw_image_param lowered_storage_image_param;
2038 };
2039
2040 struct anv_push_descriptor_set {
2041 struct anv_descriptor_set set;
2042
2043 /* Put this field right behind anv_descriptor_set so it fills up the
2044 * descriptors[0] field. */
2045 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2046
2047 /** True if the descriptor set buffer has been referenced by a draw or
2048 * dispatch command.
2049 */
2050 bool set_used_on_gpu;
2051
2052 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2053 };
2054
2055 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2056 anv_descriptor_set_address(struct anv_descriptor_set *set)
2057 {
2058 if (anv_descriptor_set_is_push(set)) {
2059 /* We have to flag push descriptor set as used on the GPU
2060 * so that the next time we push descriptors, we grab a new memory.
2061 */
2062 struct anv_push_descriptor_set *push_set =
2063 (struct anv_push_descriptor_set *)set;
2064 push_set->set_used_on_gpu = true;
2065 }
2066
2067 return set->desc_addr;
2068 }
2069
2070 struct anv_descriptor_pool {
2071 struct vk_object_base base;
2072
2073 uint32_t size;
2074 uint32_t next;
2075 uint32_t free_list;
2076
2077 struct anv_bo *bo;
2078 struct util_vma_heap bo_heap;
2079
2080 struct anv_state_stream surface_state_stream;
2081 void *surface_state_free_list;
2082
2083 struct list_head desc_sets;
2084
2085 char data[0];
2086 };
2087
2088 enum anv_descriptor_template_entry_type {
2089 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
2090 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
2091 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
2092 };
2093
2094 struct anv_descriptor_template_entry {
2095 /* The type of descriptor in this entry */
2096 VkDescriptorType type;
2097
2098 /* Binding in the descriptor set */
2099 uint32_t binding;
2100
2101 /* Offset at which to write into the descriptor set binding */
2102 uint32_t array_element;
2103
2104 /* Number of elements to write into the descriptor set binding */
2105 uint32_t array_count;
2106
2107 /* Offset into the user provided data */
2108 size_t offset;
2109
2110 /* Stride between elements into the user provided data */
2111 size_t stride;
2112 };
2113
2114 struct anv_descriptor_update_template {
2115 struct vk_object_base base;
2116
2117 VkPipelineBindPoint bind_point;
2118
2119 /* The descriptor set this template corresponds to. This value is only
2120 * valid if the template was created with the templateType
2121 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2122 */
2123 uint8_t set;
2124
2125 /* Number of entries in this template */
2126 uint32_t entry_count;
2127
2128 /* Entries of the template */
2129 struct anv_descriptor_template_entry entries[0];
2130 };
2131
2132 size_t
2133 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2134 uint32_t var_desc_count);
2135
2136 uint32_t
2137 anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2138 uint32_t var_desc_count);
2139
2140 void
2141 anv_descriptor_set_write_image_view(struct anv_device *device,
2142 struct anv_descriptor_set *set,
2143 const VkDescriptorImageInfo * const info,
2144 VkDescriptorType type,
2145 uint32_t binding,
2146 uint32_t element);
2147
2148 void
2149 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2150 struct anv_descriptor_set *set,
2151 VkDescriptorType type,
2152 struct anv_buffer_view *buffer_view,
2153 uint32_t binding,
2154 uint32_t element);
2155
2156 void
2157 anv_descriptor_set_write_buffer(struct anv_device *device,
2158 struct anv_descriptor_set *set,
2159 struct anv_state_stream *alloc_stream,
2160 VkDescriptorType type,
2161 struct anv_buffer *buffer,
2162 uint32_t binding,
2163 uint32_t element,
2164 VkDeviceSize offset,
2165 VkDeviceSize range);
2166
2167 void
2168 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2169 struct anv_descriptor_set *set,
2170 struct anv_acceleration_structure *accel,
2171 uint32_t binding,
2172 uint32_t element);
2173
2174 void
2175 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2176 struct anv_descriptor_set *set,
2177 uint32_t binding,
2178 const void *data,
2179 size_t offset,
2180 size_t size);
2181
2182 void
2183 anv_descriptor_set_write_template(struct anv_device *device,
2184 struct anv_descriptor_set *set,
2185 struct anv_state_stream *alloc_stream,
2186 const struct anv_descriptor_update_template *template,
2187 const void *data);
2188
2189 VkResult
2190 anv_descriptor_set_create(struct anv_device *device,
2191 struct anv_descriptor_pool *pool,
2192 struct anv_descriptor_set_layout *layout,
2193 uint32_t var_desc_count,
2194 struct anv_descriptor_set **out_set);
2195
2196 void
2197 anv_descriptor_set_destroy(struct anv_device *device,
2198 struct anv_descriptor_pool *pool,
2199 struct anv_descriptor_set *set);
2200
2201 #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)
2202 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)
2203 #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
2204 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
2205 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2206 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2207
2208 struct anv_pipeline_binding {
2209 /** Index in the descriptor set
2210 *
2211 * This is a flattened index; the descriptor set layout is already taken
2212 * into account.
2213 */
2214 uint32_t index;
2215
2216 /** The descriptor set this surface corresponds to.
2217 *
2218 * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2219 * binding is not a normal descriptor set but something else.
2220 */
2221 uint8_t set;
2222
2223 union {
2224 /** Plane in the binding index for images */
2225 uint8_t plane;
2226
2227 /** Input attachment index (relative to the subpass) */
2228 uint8_t input_attachment_index;
2229
2230 /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2231 uint8_t dynamic_offset_index;
2232 };
2233
2234 /** For a storage image, whether it requires a lowered surface */
2235 uint8_t lowered_storage_surface;
2236
2237 /** Pad to 64 bits so that there are no holes and we can safely memcmp
2238 * assuming POD zero-initialization.
2239 */
2240 uint8_t pad;
2241 };
2242
2243 struct anv_push_range {
2244 /** Index in the descriptor set */
2245 uint32_t index;
2246
2247 /** Descriptor set index */
2248 uint8_t set;
2249
2250 /** Dynamic offset index (for dynamic UBOs) */
2251 uint8_t dynamic_offset_index;
2252
2253 /** Start offset in units of 32B */
2254 uint8_t start;
2255
2256 /** Range in units of 32B */
2257 uint8_t length;
2258 };
2259
2260 struct anv_pipeline_layout {
2261 struct vk_object_base base;
2262
2263 struct {
2264 struct anv_descriptor_set_layout *layout;
2265 uint32_t dynamic_offset_start;
2266 } set[MAX_SETS];
2267
2268 uint32_t num_sets;
2269
2270 unsigned char sha1[20];
2271 };
2272
2273 struct anv_buffer {
2274 struct vk_object_base base;
2275
2276 struct anv_device * device;
2277 VkDeviceSize size;
2278
2279 VkBufferCreateFlags create_flags;
2280 VkBufferUsageFlags usage;
2281
2282 /* Set when bound */
2283 struct anv_address address;
2284 };
2285
2286 static inline uint64_t
anv_buffer_get_range(struct anv_buffer * buffer,uint64_t offset,uint64_t range)2287 anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
2288 {
2289 assert(offset <= buffer->size);
2290 if (range == VK_WHOLE_SIZE) {
2291 return buffer->size - offset;
2292 } else {
2293 assert(range + offset >= range);
2294 assert(range + offset <= buffer->size);
2295 return range;
2296 }
2297 }
2298
2299 enum anv_cmd_dirty_bits {
2300 ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
2301 ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
2302 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
2303 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
2304 ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
2305 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
2306 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
2307 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
2308 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
2309 ANV_CMD_DIRTY_PIPELINE = 1 << 9,
2310 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
2311 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
2312 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
2313 ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
2314 ANV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
2315 ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
2316 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
2317 ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
2318 ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
2319 ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
2320 ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
2321 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
2322 ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
2323 ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
2324 ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
2325 ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
2326 ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */
2327 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1 << 27, /* VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT */
2328 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1 << 28, /* VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT */
2329 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1 << 29, /* VK_DYNAMIC_STATE_LOGIC_OP_EXT */
2330 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1 << 30, /* VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT */
2331 };
2332 typedef uint32_t anv_cmd_dirty_mask_t;
2333
2334 #define ANV_CMD_DIRTY_DYNAMIC_ALL \
2335 (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \
2336 ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \
2337 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \
2338 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \
2339 ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \
2340 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \
2341 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \
2342 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \
2343 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \
2344 ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE | \
2345 ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | \
2346 ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | \
2347 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | \
2348 ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
2349 ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | \
2350 ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | \
2351 ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | \
2352 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \
2353 ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \
2354 ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP | \
2355 ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | \
2356 ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | \
2357 ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE | \
2358 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | \
2359 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | \
2360 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP | \
2361 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)
2362
2363 static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)2364 anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
2365 {
2366 switch (vk_state) {
2367 case VK_DYNAMIC_STATE_VIEWPORT:
2368 case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
2369 return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
2370 case VK_DYNAMIC_STATE_SCISSOR:
2371 case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
2372 return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
2373 case VK_DYNAMIC_STATE_LINE_WIDTH:
2374 return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2375 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2376 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
2377 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2378 return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
2379 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2380 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
2381 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2382 return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2383 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2384 return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2385 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2386 return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2387 case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
2388 return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
2389 case VK_DYNAMIC_STATE_CULL_MODE_EXT:
2390 return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
2391 case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
2392 return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
2393 case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
2394 return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
2395 case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
2396 return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
2397 case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
2398 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
2399 case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
2400 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
2401 case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
2402 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
2403 case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
2404 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
2405 case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
2406 return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
2407 case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
2408 return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
2409 case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
2410 return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
2411 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2412 return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
2413 case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
2414 return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;
2415 case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT:
2416 return ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
2417 case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT:
2418 return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE;
2419 case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
2420 return ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
2421 case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT:
2422 return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
2423 default:
2424 assert(!"Unsupported dynamic state");
2425 return 0;
2426 }
2427 }
2428
2429
2430 enum anv_pipe_bits {
2431 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
2432 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
2433 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
2434 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
2435 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
2436 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
2437 ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
2438 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
2439 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2440 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
2441 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
2442
2443 /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2444 * cache work has completed. Available on Gfx12+. For earlier Gfx we
2445 * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2446 */
2447 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),
2448 ANV_PIPE_CS_STALL_BIT = (1 << 20),
2449 ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
2450
2451 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
2452 * a flush has happened but not a CS stall. The next time we do any sort
2453 * of invalidation we need to insert a CS stall at that time. Otherwise,
2454 * we would have to CS stall on every flush which could be bad.
2455 */
2456 ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
2457
2458 /* This bit does not exist directly in PIPE_CONTROL. It means that render
2459 * target operations related to transfer commands with VkBuffer as
2460 * destination are ongoing. Some operations like copies on the command
2461 * streamer might need to be aware of this to trigger the appropriate stall
2462 * before they can proceed with the copy.
2463 */
2464 ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
2465
2466 /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2467 * AUX-TT data has changed and we need to invalidate AUX-TT data. This is
2468 * done by writing the AUX-TT register.
2469 */
2470 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24),
2471
2472 /* This bit does not exist directly in PIPE_CONTROL. It means that a
2473 * PIPE_CONTROL with a post-sync operation will follow. This is used to
2474 * implement a workaround for Gfx9.
2475 */
2476 ANV_PIPE_POST_SYNC_BIT = (1 << 25),
2477 };
2478
2479 #define ANV_PIPE_FLUSH_BITS ( \
2480 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2481 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2482 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2483 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2484 ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2485
2486 #define ANV_PIPE_STALL_BITS ( \
2487 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2488 ANV_PIPE_DEPTH_STALL_BIT | \
2489 ANV_PIPE_CS_STALL_BIT)
2490
2491 #define ANV_PIPE_INVALIDATE_BITS ( \
2492 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2493 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2494 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2495 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2496 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2497 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2498 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2499
2500 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags2KHR flags)2501 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2502 VkAccessFlags2KHR flags)
2503 {
2504 enum anv_pipe_bits pipe_bits = 0;
2505
2506 u_foreach_bit64(b, flags) {
2507 switch ((VkAccessFlags2KHR)BITFIELD64_BIT(b)) {
2508 case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
2509 case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
2510 /* We're transitioning a buffer that was previously used as write
2511 * destination through the data port. To make its content available
2512 * to future operations, flush the hdc pipeline.
2513 */
2514 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2515 break;
2516 case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
2517 /* We're transitioning a buffer that was previously used as render
2518 * target. To make its content available to future operations, flush
2519 * the render target cache.
2520 */
2521 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2522 break;
2523 case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
2524 /* We're transitioning a buffer that was previously used as depth
2525 * buffer. To make its content available to future operations, flush
2526 * the depth cache.
2527 */
2528 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2529 break;
2530 case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
2531 /* We're transitioning a buffer that was previously used as a
2532 * transfer write destination. Generic write operations include color
2533 * & depth operations as well as buffer operations like :
2534 * - vkCmdClearColorImage()
2535 * - vkCmdClearDepthStencilImage()
2536 * - vkCmdBlitImage()
2537 * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2538 *
2539 * Most of these operations are implemented using Blorp which writes
2540 * through the render target, so flush that cache to make it visible
2541 * to future operations. And for depth related operations we also
2542 * need to flush the depth cache.
2543 */
2544 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2545 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2546 break;
2547 case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
2548 /* We're transitioning a buffer for generic write operations. Flush
2549 * all the caches.
2550 */
2551 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2552 break;
2553 case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
2554 /* We're transitioning a buffer for access by CPU. Invalidate
2555 * all the caches. Since data and tile caches don't have invalidate,
2556 * we are forced to flush those as well.
2557 */
2558 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2559 pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2560 break;
2561 case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2562 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2563 /* We're transitioning a buffer written either from VS stage or from
2564 * the command streamer (see CmdEndTransformFeedbackEXT), we just
2565 * need to stall the CS.
2566 */
2567 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2568 break;
2569 default:
2570 break; /* Nothing to do */
2571 }
2572 }
2573
2574 return pipe_bits;
2575 }
2576
2577 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags2KHR flags)2578 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2579 VkAccessFlags2KHR flags)
2580 {
2581 enum anv_pipe_bits pipe_bits = 0;
2582
2583 u_foreach_bit64(b, flags) {
2584 switch ((VkAccessFlags2KHR)BITFIELD64_BIT(b)) {
2585 case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
2586 /* Indirect draw commands take a buffer as input that we're going to
2587 * read from the command streamer to load some of the HW registers
2588 * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2589 * command streamer stall so that all the cache flushes have
2590 * completed before the command streamer loads from memory.
2591 */
2592 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2593 /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2594 * through a vertex buffer, so invalidate that cache.
2595 */
2596 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2597 /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2598 * UBO from the buffer, so we need to invalidate constant cache.
2599 */
2600 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2601 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2602 /* Tile cache flush needed For CmdDipatchIndirect since command
2603 * streamer and vertex fetch aren't L3 coherent.
2604 */
2605 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2606 break;
2607 case VK_ACCESS_2_INDEX_READ_BIT_KHR:
2608 case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
2609 /* We transitioning a buffer to be used for as input for vkCmdDraw*
2610 * commands, so we invalidate the VF cache to make sure there is no
2611 * stale data when we start rendering.
2612 */
2613 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2614 break;
2615 case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
2616 /* We transitioning a buffer to be used as uniform data. Because
2617 * uniform is accessed through the data port & sampler, we need to
2618 * invalidate the texture cache (sampler) & constant cache (data
2619 * port) to avoid stale data.
2620 */
2621 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2622 if (device->physical->compiler->indirect_ubos_use_sampler)
2623 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2624 else
2625 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2626 break;
2627 case VK_ACCESS_2_SHADER_READ_BIT_KHR:
2628 case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
2629 case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
2630 /* Transitioning a buffer to be read through the sampler, so
2631 * invalidate the texture cache, we don't want any stale data.
2632 */
2633 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2634 break;
2635 case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
2636 /* Transitioning a buffer for generic read, invalidate all the
2637 * caches.
2638 */
2639 pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2640 break;
2641 case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
2642 /* Generic write, make sure all previously written things land in
2643 * memory.
2644 */
2645 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2646 break;
2647 case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2648 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2649 /* Transitioning a buffer for conditional rendering or transform
2650 * feedback. We'll load the content of this buffer into HW registers
2651 * using the command streamer, so we need to stall the command
2652 * streamer , so we need to stall the command streamer to make sure
2653 * any in-flight flush operations have completed.
2654 */
2655 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2656 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2657 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2658 break;
2659 case VK_ACCESS_2_HOST_READ_BIT_KHR:
2660 /* We're transitioning a buffer that was written by CPU. Flush
2661 * all the caches.
2662 */
2663 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2664 break;
2665 default:
2666 break; /* Nothing to do */
2667 }
2668 }
2669
2670 return pipe_bits;
2671 }
2672
2673 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
2674 VK_IMAGE_ASPECT_COLOR_BIT | \
2675 VK_IMAGE_ASPECT_PLANE_0_BIT | \
2676 VK_IMAGE_ASPECT_PLANE_1_BIT | \
2677 VK_IMAGE_ASPECT_PLANE_2_BIT)
2678 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2679 VK_IMAGE_ASPECT_PLANE_0_BIT | \
2680 VK_IMAGE_ASPECT_PLANE_1_BIT | \
2681 VK_IMAGE_ASPECT_PLANE_2_BIT)
2682
2683 struct anv_vertex_binding {
2684 struct anv_buffer * buffer;
2685 VkDeviceSize offset;
2686 VkDeviceSize stride;
2687 VkDeviceSize size;
2688 };
2689
2690 struct anv_xfb_binding {
2691 struct anv_buffer * buffer;
2692 VkDeviceSize offset;
2693 VkDeviceSize size;
2694 };
2695
2696 struct anv_push_constants {
2697 /** Push constant data provided by the client through vkPushConstants */
2698 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2699
2700 /** Dynamic offsets for dynamic UBOs and SSBOs */
2701 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2702
2703 /* Robust access pushed registers. */
2704 uint64_t push_reg_mask[MESA_SHADER_STAGES];
2705
2706 /** Pad out to a multiple of 32 bytes */
2707 uint32_t pad[2];
2708
2709 /* Base addresses for descriptor sets */
2710 uint64_t desc_sets[MAX_SETS];
2711
2712 struct {
2713 /** Base workgroup ID
2714 *
2715 * Used for vkCmdDispatchBase.
2716 */
2717 uint32_t base_work_group_id[3];
2718
2719 /** Subgroup ID
2720 *
2721 * This is never set by software but is implicitly filled out when
2722 * uploading the push constants for compute shaders.
2723 */
2724 uint32_t subgroup_id;
2725 } cs;
2726 };
2727
2728 struct anv_dynamic_state {
2729 struct {
2730 uint32_t count;
2731 VkViewport viewports[MAX_VIEWPORTS];
2732 } viewport;
2733
2734 struct {
2735 uint32_t count;
2736 VkRect2D scissors[MAX_SCISSORS];
2737 } scissor;
2738
2739 float line_width;
2740
2741 struct {
2742 float bias;
2743 float clamp;
2744 float slope;
2745 } depth_bias;
2746
2747 float blend_constants[4];
2748
2749 struct {
2750 float min;
2751 float max;
2752 } depth_bounds;
2753
2754 struct {
2755 uint32_t front;
2756 uint32_t back;
2757 } stencil_compare_mask;
2758
2759 struct {
2760 uint32_t front;
2761 uint32_t back;
2762 } stencil_write_mask;
2763
2764 struct {
2765 uint32_t front;
2766 uint32_t back;
2767 } stencil_reference;
2768
2769 struct {
2770 struct {
2771 VkStencilOp fail_op;
2772 VkStencilOp pass_op;
2773 VkStencilOp depth_fail_op;
2774 VkCompareOp compare_op;
2775 } front;
2776 struct {
2777 VkStencilOp fail_op;
2778 VkStencilOp pass_op;
2779 VkStencilOp depth_fail_op;
2780 VkCompareOp compare_op;
2781 } back;
2782 } stencil_op;
2783
2784 struct {
2785 uint32_t factor;
2786 uint16_t pattern;
2787 } line_stipple;
2788
2789 struct {
2790 uint32_t samples;
2791 VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
2792 } sample_locations;
2793
2794 VkExtent2D fragment_shading_rate;
2795
2796 VkCullModeFlags cull_mode;
2797 VkFrontFace front_face;
2798 VkPrimitiveTopology primitive_topology;
2799 bool depth_test_enable;
2800 bool depth_write_enable;
2801 VkCompareOp depth_compare_op;
2802 bool depth_bounds_test_enable;
2803 bool stencil_test_enable;
2804 bool raster_discard;
2805 bool depth_bias_enable;
2806 bool primitive_restart_enable;
2807 VkLogicOp logic_op;
2808 bool dyn_vbo_stride;
2809 bool dyn_vbo_size;
2810
2811 /* Bitfield, one bit per render target */
2812 uint8_t color_writes;
2813 };
2814
2815 extern const struct anv_dynamic_state default_dynamic_state;
2816
2817 uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2818 const struct anv_dynamic_state *src,
2819 uint32_t copy_mask);
2820
2821 struct anv_surface_state {
2822 struct anv_state state;
2823 /** Address of the surface referred to by this state
2824 *
2825 * This address is relative to the start of the BO.
2826 */
2827 struct anv_address address;
2828 /* Address of the aux surface, if any
2829 *
2830 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2831 *
2832 * With the exception of gfx8, the bottom 12 bits of this address' offset
2833 * include extra aux information.
2834 */
2835 struct anv_address aux_address;
2836 /* Address of the clear color, if any
2837 *
2838 * This address is relative to the start of the BO.
2839 */
2840 struct anv_address clear_address;
2841 };
2842
2843 /**
2844 * Attachment state when recording a renderpass instance.
2845 *
2846 * The clear value is valid only if there exists a pending clear.
2847 */
2848 struct anv_attachment_state {
2849 enum isl_aux_usage aux_usage;
2850 struct anv_surface_state color;
2851 struct anv_surface_state input;
2852
2853 VkImageLayout current_layout;
2854 VkImageLayout current_stencil_layout;
2855 VkImageAspectFlags pending_clear_aspects;
2856 VkImageAspectFlags pending_load_aspects;
2857 bool fast_clear;
2858 VkClearValue clear_value;
2859
2860 /* When multiview is active, attachments with a renderpass clear
2861 * operation have their respective layers cleared on the first
2862 * subpass that uses them, and only in that subpass. We keep track
2863 * of this using a bitfield to indicate which layers of an attachment
2864 * have not been cleared yet when multiview is active.
2865 */
2866 uint32_t pending_clear_views;
2867 struct anv_image_view * image_view;
2868 };
2869
2870 /** State tracking for vertex buffer flushes
2871 *
2872 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2873 * addresses. If you happen to have two vertex buffers which get placed
2874 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2875 * collisions. In order to solve this problem, we track vertex address ranges
2876 * which are live in the cache and invalidate the cache if one ever exceeds 32
2877 * bits.
2878 */
2879 struct anv_vb_cache_range {
2880 /* Virtual address at which the live vertex buffer cache range starts for
2881 * this vertex buffer index.
2882 */
2883 uint64_t start;
2884
2885 /* Virtual address of the byte after where vertex buffer cache range ends.
2886 * This is exclusive such that end - start is the size of the range.
2887 */
2888 uint64_t end;
2889 };
2890
2891 /** State tracking for particular pipeline bind point
2892 *
2893 * This struct is the base struct for anv_cmd_graphics_state and
2894 * anv_cmd_compute_state. These are used to track state which is bound to a
2895 * particular type of pipeline. Generic state that applies per-stage such as
2896 * binding table offsets and push constants is tracked generically with a
2897 * per-stage array in anv_cmd_state.
2898 */
2899 struct anv_cmd_pipeline_state {
2900 struct anv_descriptor_set *descriptors[MAX_SETS];
2901 struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2902
2903 struct anv_push_constants push_constants;
2904
2905 /* Push constant state allocated when flushing push constants. */
2906 struct anv_state push_constants_state;
2907 };
2908
2909 /** State tracking for graphics pipeline
2910 *
2911 * This has anv_cmd_pipeline_state as a base struct to track things which get
2912 * bound to a graphics pipeline. Along with general pipeline bind point state
2913 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2914 * state which is graphics-specific.
2915 */
2916 struct anv_cmd_graphics_state {
2917 struct anv_cmd_pipeline_state base;
2918
2919 struct anv_graphics_pipeline *pipeline;
2920
2921 anv_cmd_dirty_mask_t dirty;
2922 uint32_t vb_dirty;
2923
2924 struct anv_vb_cache_range ib_bound_range;
2925 struct anv_vb_cache_range ib_dirty_range;
2926 struct anv_vb_cache_range vb_bound_ranges[33];
2927 struct anv_vb_cache_range vb_dirty_ranges[33];
2928
2929 VkShaderStageFlags push_constant_stages;
2930
2931 struct anv_dynamic_state dynamic;
2932
2933 uint32_t primitive_topology;
2934
2935 struct {
2936 struct anv_buffer *index_buffer;
2937 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2938 uint32_t index_offset;
2939 } gfx7;
2940 };
2941
2942 enum anv_depth_reg_mode {
2943 ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2944 ANV_DEPTH_REG_MODE_HW_DEFAULT,
2945 ANV_DEPTH_REG_MODE_D16,
2946 };
2947
2948 /** State tracking for compute pipeline
2949 *
2950 * This has anv_cmd_pipeline_state as a base struct to track things which get
2951 * bound to a compute pipeline. Along with general pipeline bind point state
2952 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2953 * state which is compute-specific.
2954 */
2955 struct anv_cmd_compute_state {
2956 struct anv_cmd_pipeline_state base;
2957
2958 struct anv_compute_pipeline *pipeline;
2959
2960 bool pipeline_dirty;
2961
2962 struct anv_state push_data;
2963
2964 struct anv_address num_workgroups;
2965 };
2966
2967 struct anv_cmd_ray_tracing_state {
2968 struct anv_cmd_pipeline_state base;
2969
2970 struct anv_ray_tracing_pipeline *pipeline;
2971
2972 bool pipeline_dirty;
2973
2974 struct {
2975 struct anv_bo *bo;
2976 struct brw_rt_scratch_layout layout;
2977 } scratch;
2978 };
2979
2980 /** State required while building cmd buffer */
2981 struct anv_cmd_state {
2982 /* PIPELINE_SELECT.PipelineSelection */
2983 uint32_t current_pipeline;
2984 const struct intel_l3_config * current_l3_config;
2985 uint32_t last_aux_map_state;
2986
2987 struct anv_cmd_graphics_state gfx;
2988 struct anv_cmd_compute_state compute;
2989 struct anv_cmd_ray_tracing_state rt;
2990
2991 enum anv_pipe_bits pending_pipe_bits;
2992 VkShaderStageFlags descriptors_dirty;
2993 VkShaderStageFlags push_constants_dirty;
2994
2995 struct anv_framebuffer * framebuffer;
2996 struct anv_render_pass * pass;
2997 struct anv_subpass * subpass;
2998 VkRect2D render_area;
2999 uint32_t restart_index;
3000 struct anv_vertex_binding vertex_bindings[MAX_VBS];
3001 bool xfb_enabled;
3002 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
3003 struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
3004 struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
3005
3006 unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];
3007 unsigned char surface_sha1s[MESA_SHADER_STAGES][20];
3008 unsigned char push_sha1s[MESA_SHADER_STAGES][20];
3009
3010 /**
3011 * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
3012 * of any command buffer it is disabled by disabling it in EndCommandBuffer
3013 * and before invoking the secondary in ExecuteCommands.
3014 */
3015 bool pma_fix_enabled;
3016
3017 /**
3018 * Whether or not we know for certain that HiZ is enabled for the current
3019 * subpass. If, for whatever reason, we are unsure as to whether HiZ is
3020 * enabled or not, this will be false.
3021 */
3022 bool hiz_enabled;
3023
3024 /* We ensure the registers for the gfx12 D16 fix are initalized at the
3025 * first non-NULL depth stencil packet emission of every command buffer.
3026 * For secondary command buffer execution, we transfer the state from the
3027 * last command buffer to the primary (if known).
3028 */
3029 enum anv_depth_reg_mode depth_reg_mode;
3030
3031 bool conditional_render_enabled;
3032
3033 /**
3034 * Last rendering scale argument provided to
3035 * genX(cmd_buffer_emit_hashing_mode)().
3036 */
3037 unsigned current_hash_scale;
3038
3039 /**
3040 * Array length is anv_cmd_state::pass::attachment_count. Array content is
3041 * valid only when recording a render pass instance.
3042 */
3043 struct anv_attachment_state * attachments;
3044
3045 /**
3046 * Surface states for color render targets. These are stored in a single
3047 * flat array. For depth-stencil attachments, the surface state is simply
3048 * left blank.
3049 */
3050 struct anv_state attachment_states;
3051
3052 /**
3053 * A null surface state of the right size to match the framebuffer. This
3054 * is one of the states in attachment_states.
3055 */
3056 struct anv_state null_surface_state;
3057 };
3058
3059 struct anv_cmd_pool {
3060 struct vk_object_base base;
3061 VkAllocationCallbacks alloc;
3062 struct list_head cmd_buffers;
3063
3064 VkCommandPoolCreateFlags flags;
3065 struct anv_queue_family * queue_family;
3066 };
3067
3068 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
3069 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
3070
3071 enum anv_cmd_buffer_exec_mode {
3072 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
3073 ANV_CMD_BUFFER_EXEC_MODE_EMIT,
3074 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
3075 ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
3076 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
3077 ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
3078 };
3079
3080 struct anv_measure_batch;
3081
3082 struct anv_cmd_buffer {
3083 struct vk_command_buffer vk;
3084
3085 struct anv_device * device;
3086
3087 struct anv_cmd_pool * pool;
3088 struct list_head pool_link;
3089
3090 struct anv_batch batch;
3091
3092 /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
3093 * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
3094 * rewrite the end to chain multiple batch together at vkQueueSubmit().
3095 */
3096 void * batch_end;
3097
3098 /* Fields required for the actual chain of anv_batch_bo's.
3099 *
3100 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
3101 */
3102 struct list_head batch_bos;
3103 enum anv_cmd_buffer_exec_mode exec_mode;
3104
3105 /* A vector of anv_batch_bo pointers for every batch or surface buffer
3106 * referenced by this command buffer
3107 *
3108 * initialized by anv_cmd_buffer_init_batch_bo_chain()
3109 */
3110 struct u_vector seen_bbos;
3111
3112 /* A vector of int32_t's for every block of binding tables.
3113 *
3114 * initialized by anv_cmd_buffer_init_batch_bo_chain()
3115 */
3116 struct u_vector bt_block_states;
3117 struct anv_state bt_next;
3118
3119 struct anv_reloc_list surface_relocs;
3120 /** Last seen surface state block pool center bo offset */
3121 uint32_t last_ss_pool_center;
3122
3123 /* Serial for tracking buffer completion */
3124 uint32_t serial;
3125
3126 /* Stream objects for storing temporary data */
3127 struct anv_state_stream surface_state_stream;
3128 struct anv_state_stream dynamic_state_stream;
3129 struct anv_state_stream general_state_stream;
3130
3131 VkCommandBufferUsageFlags usage_flags;
3132 VkCommandBufferLevel level;
3133
3134 struct anv_query_pool *perf_query_pool;
3135
3136 struct anv_cmd_state state;
3137
3138 struct anv_address return_addr;
3139
3140 /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3141 uint64_t intel_perf_marker;
3142
3143 struct anv_measure_batch *measure;
3144
3145 /**
3146 * KHR_performance_query requires self modifying command buffers and this
3147 * array has the location of modifying commands to the query begin and end
3148 * instructions storing performance counters. The array length is
3149 * anv_physical_device::n_perf_query_commands.
3150 */
3151 struct mi_address_token *self_mod_locations;
3152
3153 /**
3154 * Index tracking which of the self_mod_locations items have already been
3155 * used.
3156 */
3157 uint32_t perf_reloc_idx;
3158
3159 /**
3160 * Sum of all the anv_batch_bo sizes allocated for this command buffer.
3161 * Used to increase allocation size for long command buffers.
3162 */
3163 uint32_t total_batch_size;
3164 };
3165
3166 /* Determine whether we can chain a given cmd_buffer to another one. We need
3167 * softpin and we also need to make sure that we can edit the end of the batch
3168 * to point to next one, which requires the command buffer to not be used
3169 * simultaneously.
3170 */
3171 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)3172 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
3173 {
3174 return anv_use_softpin(cmd_buffer->device->physical) &&
3175 !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
3176 }
3177
3178 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3179 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3180 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3181 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3182 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3183 struct anv_cmd_buffer *secondary);
3184 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3185 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3186 struct anv_cmd_buffer *cmd_buffer,
3187 const VkSemaphore *in_semaphores,
3188 const uint64_t *in_wait_values,
3189 uint32_t num_in_semaphores,
3190 const VkSemaphore *out_semaphores,
3191 const uint64_t *out_signal_values,
3192 uint32_t num_out_semaphores,
3193 VkFence fence,
3194 int perf_query_pass);
3195
3196 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
3197
3198 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3199 const void *data, uint32_t size, uint32_t alignment);
3200 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3201 uint32_t *a, uint32_t *b,
3202 uint32_t dwords, uint32_t alignment);
3203
3204 struct anv_address
3205 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3206 struct anv_state
3207 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3208 uint32_t entries, uint32_t *state_offset);
3209 struct anv_state
3210 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
3211 struct anv_state
3212 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3213 uint32_t size, uint32_t alignment);
3214
3215 VkResult
3216 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3217
3218 void gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
3219 void gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
3220 bool depth_clamp_enable);
3221 void gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
3222
3223 void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
3224 struct anv_render_pass *pass,
3225 struct anv_framebuffer *framebuffer,
3226 const VkClearValue *clear_values);
3227
3228 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3229
3230 struct anv_state
3231 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
3232 struct anv_state
3233 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3234
3235 const struct anv_image_view *
3236 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
3237
3238 VkResult
3239 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3240 uint32_t num_entries,
3241 uint32_t *state_offset,
3242 struct anv_state *bt_state);
3243
3244 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
3245
3246 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3247
3248 enum anv_fence_type {
3249 ANV_FENCE_TYPE_NONE = 0,
3250 ANV_FENCE_TYPE_BO,
3251 ANV_FENCE_TYPE_WSI_BO,
3252 ANV_FENCE_TYPE_SYNCOBJ,
3253 ANV_FENCE_TYPE_WSI,
3254 };
3255
3256 enum anv_bo_fence_state {
3257 /** Indicates that this is a new (or newly reset fence) */
3258 ANV_BO_FENCE_STATE_RESET,
3259
3260 /** Indicates that this fence has been submitted to the GPU but is still
3261 * (as far as we know) in use by the GPU.
3262 */
3263 ANV_BO_FENCE_STATE_SUBMITTED,
3264
3265 ANV_BO_FENCE_STATE_SIGNALED,
3266 };
3267
3268 struct anv_fence_impl {
3269 enum anv_fence_type type;
3270
3271 union {
3272 /** Fence implementation for BO fences
3273 *
3274 * These fences use a BO and a set of CPU-tracked state flags. The BO
3275 * is added to the object list of the last execbuf call in a QueueSubmit
3276 * and is marked EXEC_WRITE. The state flags track when the BO has been
3277 * submitted to the kernel. We need to do this because Vulkan lets you
3278 * wait on a fence that has not yet been submitted and I915_GEM_BUSY
3279 * will say it's idle in this case.
3280 */
3281 struct {
3282 struct anv_bo *bo;
3283 enum anv_bo_fence_state state;
3284 } bo;
3285
3286 /** DRM syncobj handle for syncobj-based fences */
3287 uint32_t syncobj;
3288
3289 /** WSI fence */
3290 struct wsi_fence *fence_wsi;
3291 };
3292 };
3293
3294 struct anv_fence {
3295 struct vk_object_base base;
3296
3297 /* Permanent fence state. Every fence has some form of permanent state
3298 * (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for
3299 * cross-process fences) or it could just be a dummy for use internally.
3300 */
3301 struct anv_fence_impl permanent;
3302
3303 /* Temporary fence state. A fence *may* have temporary state. That state
3304 * is added to the fence by an import operation and is reset back to
3305 * ANV_SEMAPHORE_TYPE_NONE when the fence is reset. A fence with temporary
3306 * state cannot be signaled because the fence must already be signaled
3307 * before the temporary state can be exported from the fence in the other
3308 * process and imported here.
3309 */
3310 struct anv_fence_impl temporary;
3311 };
3312
3313 void anv_fence_reset_temporary(struct anv_device *device,
3314 struct anv_fence *fence);
3315
3316 struct anv_event {
3317 struct vk_object_base base;
3318 uint64_t semaphore;
3319 struct anv_state state;
3320 };
3321
3322 enum anv_semaphore_type {
3323 ANV_SEMAPHORE_TYPE_NONE = 0,
3324 ANV_SEMAPHORE_TYPE_DUMMY,
3325 ANV_SEMAPHORE_TYPE_WSI_BO,
3326 ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
3327 ANV_SEMAPHORE_TYPE_TIMELINE,
3328 ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
3329 };
3330
3331 struct anv_timeline_point {
3332 struct list_head link;
3333
3334 uint64_t serial;
3335
3336 /* Number of waiter on this point, when > 0 the point should not be garbage
3337 * collected.
3338 */
3339 int waiting;
3340
3341 /* BO used for synchronization. */
3342 struct anv_bo *bo;
3343 };
3344
3345 struct anv_timeline {
3346 pthread_mutex_t mutex;
3347 pthread_cond_t cond;
3348
3349 uint64_t highest_past;
3350 uint64_t highest_pending;
3351
3352 struct list_head points;
3353 struct list_head free_points;
3354 };
3355
3356 struct anv_semaphore_impl {
3357 enum anv_semaphore_type type;
3358
3359 union {
3360 /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
3361 * or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the
3362 * object list on any execbuf2 calls for which this semaphore is used as
3363 * a wait or signal fence. When used as a signal fence or when type ==
3364 * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
3365 */
3366 struct anv_bo *bo;
3367
3368 /* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
3369 * Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
3370 * import so we don't need to bother with a userspace cache.
3371 */
3372 uint32_t syncobj;
3373
3374 /* Non shareable timeline semaphore
3375 *
3376 * Used when kernel don't have support for timeline semaphores.
3377 */
3378 struct anv_timeline timeline;
3379 };
3380 };
3381
3382 struct anv_semaphore {
3383 struct vk_object_base base;
3384
3385 /* Permanent semaphore state. Every semaphore has some form of permanent
3386 * state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
3387 * (for cross-process semaphores0 or it could just be a dummy for use
3388 * internally.
3389 */
3390 struct anv_semaphore_impl permanent;
3391
3392 /* Temporary semaphore state. A semaphore *may* have temporary state.
3393 * That state is added to the semaphore by an import operation and is reset
3394 * back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on. A
3395 * semaphore with temporary state cannot be signaled because the semaphore
3396 * must already be signaled before the temporary state can be exported from
3397 * the semaphore in the other process and imported here.
3398 */
3399 struct anv_semaphore_impl temporary;
3400 };
3401
3402 void anv_semaphore_reset_temporary(struct anv_device *device,
3403 struct anv_semaphore *semaphore);
3404
3405 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
3406
3407 #define anv_foreach_stage(stage, stage_bits) \
3408 for (gl_shader_stage stage, \
3409 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
3410 stage = __builtin_ffs(__tmp) - 1, __tmp; \
3411 __tmp &= ~(1 << (stage)))
3412
3413 struct anv_pipeline_bind_map {
3414 unsigned char surface_sha1[20];
3415 unsigned char sampler_sha1[20];
3416 unsigned char push_sha1[20];
3417
3418 uint32_t surface_count;
3419 uint32_t sampler_count;
3420
3421 struct anv_pipeline_binding * surface_to_descriptor;
3422 struct anv_pipeline_binding * sampler_to_descriptor;
3423
3424 struct anv_push_range push_ranges[4];
3425 };
3426
3427 struct anv_shader_bin_key {
3428 uint32_t size;
3429 uint8_t data[0];
3430 };
3431
3432 struct anv_shader_bin {
3433 uint32_t ref_cnt;
3434
3435 gl_shader_stage stage;
3436
3437 const struct anv_shader_bin_key *key;
3438
3439 struct anv_state kernel;
3440 uint32_t kernel_size;
3441
3442 const struct brw_stage_prog_data *prog_data;
3443 uint32_t prog_data_size;
3444
3445 struct brw_compile_stats stats[3];
3446 uint32_t num_stats;
3447
3448 struct nir_xfb_info *xfb_info;
3449
3450 struct anv_pipeline_bind_map bind_map;
3451 };
3452
3453 struct anv_shader_bin *
3454 anv_shader_bin_create(struct anv_device *device,
3455 gl_shader_stage stage,
3456 const void *key, uint32_t key_size,
3457 const void *kernel, uint32_t kernel_size,
3458 const struct brw_stage_prog_data *prog_data,
3459 uint32_t prog_data_size,
3460 const struct brw_compile_stats *stats, uint32_t num_stats,
3461 const struct nir_xfb_info *xfb_info,
3462 const struct anv_pipeline_bind_map *bind_map);
3463
3464 void
3465 anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
3466
3467 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3468 anv_shader_bin_ref(struct anv_shader_bin *shader)
3469 {
3470 assert(shader && shader->ref_cnt >= 1);
3471 p_atomic_inc(&shader->ref_cnt);
3472 }
3473
3474 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3475 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3476 {
3477 assert(shader && shader->ref_cnt >= 1);
3478 if (p_atomic_dec_zero(&shader->ref_cnt))
3479 anv_shader_bin_destroy(device, shader);
3480 }
3481
3482 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
3483 assert((local_arg_offset) % 8 == 0); \
3484 const struct brw_bs_prog_data *prog_data = \
3485 brw_bs_prog_data_const(bin->prog_data); \
3486 assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
3487 \
3488 (struct GFX_BINDLESS_SHADER_RECORD) { \
3489 .OffsetToLocalArguments = (local_arg_offset) / 8, \
3490 .BindlessShaderDispatchMode = prog_data->simd_size / 16, \
3491 .KernelStartPointer = bin->kernel.offset, \
3492 }; \
3493 })
3494
3495 struct anv_pipeline_executable {
3496 gl_shader_stage stage;
3497
3498 struct brw_compile_stats stats;
3499
3500 char *nir;
3501 char *disasm;
3502 };
3503
3504 enum anv_pipeline_type {
3505 ANV_PIPELINE_GRAPHICS,
3506 ANV_PIPELINE_COMPUTE,
3507 ANV_PIPELINE_RAY_TRACING,
3508 };
3509
3510 struct anv_pipeline {
3511 struct vk_object_base base;
3512
3513 struct anv_device * device;
3514
3515 struct anv_batch batch;
3516 struct anv_reloc_list batch_relocs;
3517
3518 void * mem_ctx;
3519
3520 enum anv_pipeline_type type;
3521 VkPipelineCreateFlags flags;
3522
3523 struct util_dynarray executables;
3524
3525 const struct intel_l3_config * l3_config;
3526 };
3527
3528 struct anv_graphics_pipeline {
3529 struct anv_pipeline base;
3530
3531 uint32_t batch_data[512];
3532
3533 /* States that are part of batch_data and should be not emitted
3534 * dynamically.
3535 */
3536 anv_cmd_dirty_mask_t static_state_mask;
3537
3538 /* States that need to be reemitted in cmd_buffer_flush_dynamic_state().
3539 * This might cover more than the dynamic states specified at pipeline
3540 * creation.
3541 */
3542 anv_cmd_dirty_mask_t dynamic_state_mask;
3543
3544 struct anv_dynamic_state dynamic_state;
3545
3546 /* States declared dynamic at pipeline creation. */
3547 anv_cmd_dirty_mask_t dynamic_states;
3548
3549 uint32_t topology;
3550
3551 /* These fields are required with dynamic primitive topology,
3552 * rasterization_samples used only with gen < 8.
3553 */
3554 VkLineRasterizationModeEXT line_mode;
3555 VkPolygonMode polygon_mode;
3556 uint32_t rasterization_samples;
3557
3558 struct anv_subpass * subpass;
3559
3560 struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
3561
3562 VkShaderStageFlags active_stages;
3563
3564 bool writes_depth;
3565 bool depth_test_enable;
3566 bool writes_stencil;
3567 bool stencil_test_enable;
3568 bool depth_clamp_enable;
3569 bool depth_clip_enable;
3570 bool sample_shading_enable;
3571 bool kill_pixel;
3572 bool depth_bounds_test_enable;
3573 bool force_fragment_thread_dispatch;
3574
3575 /* When primitive replication is used, subpass->view_mask will describe what
3576 * views to replicate.
3577 */
3578 bool use_primitive_replication;
3579
3580 struct anv_state blend_state;
3581
3582 struct anv_state cps_state;
3583
3584 uint32_t vb_used;
3585 struct anv_pipeline_vertex_binding {
3586 uint32_t stride;
3587 bool instanced;
3588 uint32_t instance_divisor;
3589 } vb[MAX_VBS];
3590
3591 struct {
3592 uint32_t sf[7];
3593 uint32_t depth_stencil_state[3];
3594 uint32_t clip[4];
3595 uint32_t xfb_bo_pitch[4];
3596 uint32_t wm[3];
3597 uint32_t blend_state[MAX_RTS * 2];
3598 uint32_t streamout_state[3];
3599 } gfx7;
3600
3601 struct {
3602 uint32_t sf[4];
3603 uint32_t raster[5];
3604 uint32_t wm_depth_stencil[3];
3605 uint32_t wm[2];
3606 uint32_t ps_blend[2];
3607 uint32_t blend_state[1 + MAX_RTS * 2];
3608 uint32_t streamout_state[5];
3609 } gfx8;
3610
3611 struct {
3612 uint32_t wm_depth_stencil[4];
3613 } gfx9;
3614 };
3615
3616 struct anv_compute_pipeline {
3617 struct anv_pipeline base;
3618
3619 struct anv_shader_bin * cs;
3620 uint32_t batch_data[9];
3621 uint32_t interface_descriptor_data[8];
3622 };
3623
3624 struct anv_rt_shader_group {
3625 VkRayTracingShaderGroupTypeKHR type;
3626
3627 struct anv_shader_bin *general;
3628 struct anv_shader_bin *closest_hit;
3629 struct anv_shader_bin *any_hit;
3630 struct anv_shader_bin *intersection;
3631
3632 /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3633 uint32_t handle[8];
3634 };
3635
3636 struct anv_ray_tracing_pipeline {
3637 struct anv_pipeline base;
3638
3639 /* All shaders in the pipeline */
3640 struct util_dynarray shaders;
3641
3642 uint32_t group_count;
3643 struct anv_rt_shader_group * groups;
3644
3645 /* If non-zero, this is the default computed stack size as per the stack
3646 * size computation in the Vulkan spec. If zero, that indicates that the
3647 * client has requested a dynamic stack size.
3648 */
3649 uint32_t stack_size;
3650 };
3651
3652 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
3653 static inline struct anv_##pipe_type##_pipeline * \
3654 anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
3655 { \
3656 assert(pipeline->type == pipe_enum); \
3657 return (struct anv_##pipe_type##_pipeline *) pipeline; \
3658 }
3659
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3660 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3661 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3662 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3663
3664 static inline bool
3665 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3666 gl_shader_stage stage)
3667 {
3668 return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3669 }
3670
3671 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)3672 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
3673 {
3674 return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
3675 }
3676
3677 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
3678 static inline const struct brw_##prefix##_prog_data * \
3679 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
3680 { \
3681 if (anv_pipeline_has_stage(pipeline, stage)) { \
3682 return (const struct brw_##prefix##_prog_data *) \
3683 pipeline->shaders[stage]->prog_data; \
3684 } else { \
3685 return NULL; \
3686 } \
3687 }
3688
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3689 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3690 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3691 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3692 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3693 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3694
3695 static inline const struct brw_cs_prog_data *
3696 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3697 {
3698 assert(pipeline->cs);
3699 return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3700 }
3701
3702 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3703 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3704 {
3705 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3706 return &get_gs_prog_data(pipeline)->base;
3707 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3708 return &get_tes_prog_data(pipeline)->base;
3709 else
3710 return &get_vs_prog_data(pipeline)->base;
3711 }
3712
3713 VkResult
3714 anv_device_init_rt_shaders(struct anv_device *device);
3715
3716 void
3717 anv_device_finish_rt_shaders(struct anv_device *device);
3718
3719 VkResult
3720 anv_pipeline_init(struct anv_pipeline *pipeline,
3721 struct anv_device *device,
3722 enum anv_pipeline_type type,
3723 VkPipelineCreateFlags flags,
3724 const VkAllocationCallbacks *pAllocator);
3725
3726 void
3727 anv_pipeline_finish(struct anv_pipeline *pipeline,
3728 struct anv_device *device,
3729 const VkAllocationCallbacks *pAllocator);
3730
3731 VkResult
3732 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
3733 struct anv_pipeline_cache *cache,
3734 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3735 const VkAllocationCallbacks *alloc);
3736
3737 VkResult
3738 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
3739 struct anv_pipeline_cache *cache,
3740 const VkComputePipelineCreateInfo *info,
3741 const struct vk_shader_module *module,
3742 const char *entrypoint,
3743 const VkSpecializationInfo *spec_info);
3744
3745 VkResult
3746 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3747 struct anv_device *device,
3748 struct anv_pipeline_cache *cache,
3749 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3750 const VkAllocationCallbacks *alloc);
3751
3752 struct anv_format_plane {
3753 enum isl_format isl_format:16;
3754 struct isl_swizzle swizzle;
3755
3756 /* Whether this plane contains chroma channels */
3757 bool has_chroma;
3758
3759 /* For downscaling of YUV planes */
3760 uint8_t denominator_scales[2];
3761
3762 /* How to map sampled ycbcr planes to a single 4 component element. */
3763 struct isl_swizzle ycbcr_swizzle;
3764
3765 /* What aspect is associated to this plane */
3766 VkImageAspectFlags aspect;
3767 };
3768
3769
3770 struct anv_format {
3771 struct anv_format_plane planes[3];
3772 VkFormat vk_format;
3773 uint8_t n_planes;
3774 bool can_ycbcr;
3775 };
3776
3777 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)3778 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3779 {
3780 if (util_bitcount(aspects) == 1) {
3781 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3782 VK_IMAGE_ASPECT_DEPTH_BIT |
3783 VK_IMAGE_ASPECT_STENCIL_BIT |
3784 VK_IMAGE_ASPECT_PLANE_0_BIT |
3785 VK_IMAGE_ASPECT_PLANE_1_BIT |
3786 VK_IMAGE_ASPECT_PLANE_2_BIT));
3787 } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3788 assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3789 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3790 VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3791 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3792 VK_IMAGE_ASPECT_PLANE_1_BIT |
3793 VK_IMAGE_ASPECT_PLANE_2_BIT));
3794 } else {
3795 assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3796 VK_IMAGE_ASPECT_STENCIL_BIT));
3797 }
3798 }
3799
3800 /**
3801 * Return the aspect's plane relative to all_aspects. For an image, for
3802 * instance, all_aspects would be the set of aspects in the image. For
3803 * an image view, all_aspects would be the subset of aspects represented
3804 * by that particular view.
3805 */
3806 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)3807 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3808 VkImageAspectFlagBits aspect)
3809 {
3810 anv_assert_valid_aspect_set(all_aspects);
3811 assert(util_bitcount(aspect) == 1);
3812 assert(!(aspect & ~all_aspects));
3813
3814 /* Because we always put image and view planes in aspect-bit-order, the
3815 * plane index is the number of bits in all_aspects before aspect.
3816 */
3817 return util_bitcount(all_aspects & (aspect - 1));
3818 }
3819
3820 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3821 u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3822
3823 const struct anv_format *
3824 anv_get_format(VkFormat format);
3825
3826 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3827 anv_get_format_planes(VkFormat vk_format)
3828 {
3829 const struct anv_format *format = anv_get_format(vk_format);
3830
3831 return format != NULL ? format->n_planes : 0;
3832 }
3833
3834 struct anv_format_plane
3835 anv_get_format_plane(const struct intel_device_info *devinfo,
3836 VkFormat vk_format, uint32_t plane,
3837 VkImageTiling tiling);
3838
3839 struct anv_format_plane
3840 anv_get_format_aspect(const struct intel_device_info *devinfo,
3841 VkFormat vk_format,
3842 VkImageAspectFlagBits aspect, VkImageTiling tiling);
3843
3844 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3845 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3846 VkImageAspectFlags aspect, VkImageTiling tiling)
3847 {
3848 return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3849 }
3850
3851 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3852 VkImageCreateFlags create_flags,
3853 VkFormat vk_format,
3854 VkImageTiling vk_tiling,
3855 const VkImageFormatListCreateInfoKHR *fmt_list);
3856
3857 extern VkFormat
3858 vk_format_from_android(unsigned android_format, unsigned android_usage);
3859
3860 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3861 anv_swizzle_for_render(struct isl_swizzle swizzle)
3862 {
3863 /* Sometimes the swizzle will have alpha map to one. We do this to fake
3864 * RGB as RGBA for texturing
3865 */
3866 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3867 swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3868
3869 /* But it doesn't matter what we render to that channel */
3870 swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3871
3872 return swizzle;
3873 }
3874
3875 void
3876 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3877
3878 /**
3879 * Describes how each part of anv_image will be bound to memory.
3880 */
3881 struct anv_image_memory_range {
3882 /**
3883 * Disjoint bindings into which each portion of the image will be bound.
3884 *
3885 * Binding images to memory can be complicated and invold binding different
3886 * portions of the image to different memory objects or regions. For most
3887 * images, everything lives in the MAIN binding and gets bound by
3888 * vkBindImageMemory. For disjoint multi-planar images, each plane has
3889 * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3890 * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are
3891 * implicit or driver-managed and live in special-case bindings.
3892 */
3893 enum anv_image_memory_binding {
3894 /**
3895 * Used if and only if image is not multi-planar disjoint. Bound by
3896 * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3897 */
3898 ANV_IMAGE_MEMORY_BINDING_MAIN,
3899
3900 /**
3901 * Used if and only if image is multi-planar disjoint. Bound by
3902 * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3903 */
3904 ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3905 ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3906 ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3907
3908 /**
3909 * Driver-private bo. In special cases we may store the aux surface and/or
3910 * aux state in this binding.
3911 */
3912 ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3913
3914 /** Sentinel */
3915 ANV_IMAGE_MEMORY_BINDING_END,
3916 } binding;
3917
3918 /**
3919 * Offset is relative to the start of the binding created by
3920 * vkBindImageMemory, not to the start of the bo.
3921 */
3922 uint64_t offset;
3923
3924 uint64_t size;
3925 uint32_t alignment;
3926 };
3927
3928 /**
3929 * Subsurface of an anv_image.
3930 */
3931 struct anv_surface {
3932 struct isl_surf isl;
3933 struct anv_image_memory_range memory_range;
3934 };
3935
3936 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)3937 anv_surface_is_valid(const struct anv_surface *surface)
3938 {
3939 return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3940 }
3941
3942 struct anv_image {
3943 struct vk_image vk;
3944
3945 uint32_t n_planes;
3946
3947 /**
3948 * Image has multi-planar format and was created with
3949 * VK_IMAGE_CREATE_DISJOINT_BIT.
3950 */
3951 bool disjoint;
3952
3953 /**
3954 * Image was imported from an struct AHardwareBuffer. We have to delay
3955 * final image creation until bind time.
3956 */
3957 bool from_ahb;
3958
3959 /**
3960 * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3961 * must be released when the image is destroyed.
3962 */
3963 bool from_gralloc;
3964
3965 /**
3966 * The memory bindings created by vkCreateImage and vkBindImageMemory.
3967 *
3968 * For details on the image's memory layout, see check_memory_bindings().
3969 *
3970 * vkCreateImage constructs the `memory_range` for each
3971 * anv_image_memory_binding. After vkCreateImage, each binding is valid if
3972 * and only if `memory_range::size > 0`.
3973 *
3974 * vkBindImageMemory binds each valid `memory_range` to an `address`.
3975 * Usually, the app will provide the address via the parameters of
3976 * vkBindImageMemory. However, special-case bindings may be bound to
3977 * driver-private memory.
3978 */
3979 struct anv_image_binding {
3980 struct anv_image_memory_range memory_range;
3981 struct anv_address address;
3982 } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3983
3984 /**
3985 * Image subsurfaces
3986 *
3987 * For each foo, anv_image::planes[x].surface is valid if and only if
3988 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3989 * to figure the number associated with a given aspect.
3990 *
3991 * The hardware requires that the depth buffer and stencil buffer be
3992 * separate surfaces. From Vulkan's perspective, though, depth and stencil
3993 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we
3994 * allocate the depth and stencil buffers as separate surfaces in the same
3995 * bo.
3996 */
3997 struct anv_image_plane {
3998 struct anv_surface primary_surface;
3999
4000 /**
4001 * A surface which shadows the main surface and may have different
4002 * tiling. This is used for sampling using a tiling that isn't supported
4003 * for other operations.
4004 */
4005 struct anv_surface shadow_surface;
4006
4007 /**
4008 * The base aux usage for this image. For color images, this can be
4009 * either CCS_E or CCS_D depending on whether or not we can reliably
4010 * leave CCS on all the time.
4011 */
4012 enum isl_aux_usage aux_usage;
4013
4014 struct anv_surface aux_surface;
4015
4016 /** Location of the fast clear state. */
4017 struct anv_image_memory_range fast_clear_memory_range;
4018 } planes[3];
4019 };
4020
4021 /* The ordering of this enum is important */
4022 enum anv_fast_clear_type {
4023 /** Image does not have/support any fast-clear blocks */
4024 ANV_FAST_CLEAR_NONE = 0,
4025 /** Image has/supports fast-clear but only to the default value */
4026 ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
4027 /** Image has/supports fast-clear with an arbitrary fast-clear value */
4028 ANV_FAST_CLEAR_ANY = 2,
4029 };
4030
4031 /**
4032 * Return the aspect's _format_ plane, not its _memory_ plane (using the
4033 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
4034 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
4035 * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
4036 */
4037 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)4038 anv_image_aspect_to_plane(const struct anv_image *image,
4039 VkImageAspectFlagBits aspect)
4040 {
4041 return anv_aspect_to_plane(image->vk.aspects, aspect);
4042 }
4043
4044 /* Returns the number of auxiliary buffer levels attached to an image. */
4045 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)4046 anv_image_aux_levels(const struct anv_image * const image,
4047 VkImageAspectFlagBits aspect)
4048 {
4049 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4050 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
4051 return 0;
4052
4053 return image->vk.mip_levels;
4054 }
4055
4056 /* Returns the number of auxiliary buffer layers attached to an image. */
4057 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)4058 anv_image_aux_layers(const struct anv_image * const image,
4059 VkImageAspectFlagBits aspect,
4060 const uint8_t miplevel)
4061 {
4062 assert(image);
4063
4064 /* The miplevel must exist in the main buffer. */
4065 assert(miplevel < image->vk.mip_levels);
4066
4067 if (miplevel >= anv_image_aux_levels(image, aspect)) {
4068 /* There are no layers with auxiliary data because the miplevel has no
4069 * auxiliary data.
4070 */
4071 return 0;
4072 }
4073
4074 return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
4075 }
4076
4077 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)4078 anv_image_address(const struct anv_image *image,
4079 const struct anv_image_memory_range *mem_range)
4080 {
4081 const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
4082 assert(binding->memory_range.offset == 0);
4083
4084 if (mem_range->size == 0)
4085 return ANV_NULL_ADDRESS;
4086
4087 return anv_address_add(binding->address, mem_range->offset);
4088 }
4089
4090 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)4091 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
4092 const struct anv_image *image,
4093 VkImageAspectFlagBits aspect)
4094 {
4095 assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
4096 VK_IMAGE_ASPECT_DEPTH_BIT));
4097
4098 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4099 const struct anv_image_memory_range *mem_range =
4100 &image->planes[plane].fast_clear_memory_range;
4101
4102 return anv_image_address(image, mem_range);
4103 }
4104
4105 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)4106 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
4107 const struct anv_image *image,
4108 VkImageAspectFlagBits aspect)
4109 {
4110 struct anv_address addr =
4111 anv_image_get_clear_color_addr(device, image, aspect);
4112
4113 const unsigned clear_color_state_size = device->info.ver >= 10 ?
4114 device->isl_dev.ss.clear_color_state_size :
4115 device->isl_dev.ss.clear_value_size;
4116 return anv_address_add(addr, clear_color_state_size);
4117 }
4118
4119 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)4120 anv_image_get_compression_state_addr(const struct anv_device *device,
4121 const struct anv_image *image,
4122 VkImageAspectFlagBits aspect,
4123 uint32_t level, uint32_t array_layer)
4124 {
4125 assert(level < anv_image_aux_levels(image, aspect));
4126 assert(array_layer < anv_image_aux_layers(image, aspect, level));
4127 UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
4128 assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
4129
4130 /* Relative to start of the plane's fast clear memory range */
4131 uint32_t offset;
4132
4133 offset = 4; /* Go past the fast clear type */
4134
4135 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
4136 for (uint32_t l = 0; l < level; l++)
4137 offset += anv_minify(image->vk.extent.depth, l) * 4;
4138 } else {
4139 offset += level * image->vk.array_layers * 4;
4140 }
4141
4142 offset += array_layer * 4;
4143
4144 assert(offset < image->planes[plane].fast_clear_memory_range.size);
4145
4146 return anv_address_add(
4147 anv_image_get_fast_clear_type_addr(device, image, aspect),
4148 offset);
4149 }
4150
4151 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
4152 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)4153 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
4154 const struct anv_image *image)
4155 {
4156 if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
4157 return false;
4158
4159 /* For Gfx8-11, there are some restrictions around sampling from HiZ.
4160 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
4161 * say:
4162 *
4163 * "If this field is set to AUX_HIZ, Number of Multisamples must
4164 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
4165 */
4166 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
4167 return false;
4168
4169 /* Allow this feature on BDW even though it is disabled in the BDW devinfo
4170 * struct. There's documentation which suggests that this feature actually
4171 * reduces performance on BDW, but it has only been observed to help so
4172 * far. Sampling fast-cleared blocks on BDW must also be handled with care
4173 * (see depth_stencil_attachment_compute_aux_usage() for more info).
4174 */
4175 if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
4176 return false;
4177
4178 return image->vk.samples == 1;
4179 }
4180
4181 /* Returns true if an MCS-enabled buffer can be sampled from. */
4182 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)4183 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
4184 const struct anv_image *image)
4185 {
4186 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
4187 const uint32_t plane =
4188 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
4189
4190 assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
4191
4192 const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
4193
4194 /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
4195 * See HSD 1707282275, wa_14013111325. Due to the use of
4196 * format-reinterpretation, a simplified workaround is implemented.
4197 */
4198 if (devinfo->ver >= 12 &&
4199 isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
4200 return false;
4201 }
4202
4203 return true;
4204 }
4205
4206 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)4207 anv_image_plane_uses_aux_map(const struct anv_device *device,
4208 const struct anv_image *image,
4209 uint32_t plane)
4210 {
4211 return device->info.has_aux_map &&
4212 isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
4213 }
4214
4215 void
4216 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
4217 const struct anv_image *image,
4218 VkImageAspectFlagBits aspect,
4219 enum isl_aux_usage aux_usage,
4220 uint32_t level,
4221 uint32_t base_layer,
4222 uint32_t layer_count);
4223
4224 void
4225 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
4226 const struct anv_image *image,
4227 VkImageAspectFlagBits aspect,
4228 enum isl_aux_usage aux_usage,
4229 enum isl_format format, struct isl_swizzle swizzle,
4230 uint32_t level, uint32_t base_layer, uint32_t layer_count,
4231 VkRect2D area, union isl_color_value clear_color);
4232 void
4233 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
4234 const struct anv_image *image,
4235 VkImageAspectFlags aspects,
4236 enum isl_aux_usage depth_aux_usage,
4237 uint32_t level,
4238 uint32_t base_layer, uint32_t layer_count,
4239 VkRect2D area,
4240 float depth_value, uint8_t stencil_value);
4241 void
4242 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
4243 const struct anv_image *src_image,
4244 enum isl_aux_usage src_aux_usage,
4245 uint32_t src_level, uint32_t src_base_layer,
4246 const struct anv_image *dst_image,
4247 enum isl_aux_usage dst_aux_usage,
4248 uint32_t dst_level, uint32_t dst_base_layer,
4249 VkImageAspectFlagBits aspect,
4250 uint32_t src_x, uint32_t src_y,
4251 uint32_t dst_x, uint32_t dst_y,
4252 uint32_t width, uint32_t height,
4253 uint32_t layer_count,
4254 enum blorp_filter filter);
4255 void
4256 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
4257 const struct anv_image *image,
4258 VkImageAspectFlagBits aspect, uint32_t level,
4259 uint32_t base_layer, uint32_t layer_count,
4260 enum isl_aux_op hiz_op);
4261 void
4262 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
4263 const struct anv_image *image,
4264 VkImageAspectFlags aspects,
4265 uint32_t level,
4266 uint32_t base_layer, uint32_t layer_count,
4267 VkRect2D area, uint8_t stencil_value);
4268 void
4269 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
4270 const struct anv_image *image,
4271 enum isl_format format, struct isl_swizzle swizzle,
4272 VkImageAspectFlagBits aspect,
4273 uint32_t base_layer, uint32_t layer_count,
4274 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
4275 bool predicate);
4276 void
4277 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
4278 const struct anv_image *image,
4279 enum isl_format format, struct isl_swizzle swizzle,
4280 VkImageAspectFlagBits aspect, uint32_t level,
4281 uint32_t base_layer, uint32_t layer_count,
4282 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
4283 bool predicate);
4284
4285 void
4286 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
4287 const struct anv_image *image,
4288 VkImageAspectFlagBits aspect,
4289 uint32_t base_level, uint32_t level_count,
4290 uint32_t base_layer, uint32_t layer_count);
4291
4292 enum isl_aux_state ATTRIBUTE_PURE
4293 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
4294 const struct anv_image *image,
4295 const VkImageAspectFlagBits aspect,
4296 const VkImageLayout layout);
4297
4298 enum isl_aux_usage ATTRIBUTE_PURE
4299 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
4300 const struct anv_image *image,
4301 const VkImageAspectFlagBits aspect,
4302 const VkImageUsageFlagBits usage,
4303 const VkImageLayout layout);
4304
4305 enum anv_fast_clear_type ATTRIBUTE_PURE
4306 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
4307 const struct anv_image * const image,
4308 const VkImageAspectFlagBits aspect,
4309 const VkImageLayout layout);
4310
4311 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)4312 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
4313 VkImageAspectFlags aspects2)
4314 {
4315 if (aspects1 == aspects2)
4316 return true;
4317
4318 /* Only 1 color aspects are compatibles. */
4319 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4320 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4321 util_bitcount(aspects1) == util_bitcount(aspects2))
4322 return true;
4323
4324 return false;
4325 }
4326
4327 struct anv_image_view {
4328 struct vk_image_view vk;
4329
4330 const struct anv_image *image; /**< VkImageViewCreateInfo::image */
4331
4332 unsigned n_planes;
4333 struct {
4334 uint32_t image_plane;
4335
4336 struct isl_view isl;
4337
4338 /**
4339 * RENDER_SURFACE_STATE when using image as a sampler surface with an
4340 * image layout of SHADER_READ_ONLY_OPTIMAL or
4341 * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
4342 */
4343 struct anv_surface_state optimal_sampler_surface_state;
4344
4345 /**
4346 * RENDER_SURFACE_STATE when using image as a sampler surface with an
4347 * image layout of GENERAL.
4348 */
4349 struct anv_surface_state general_sampler_surface_state;
4350
4351 /**
4352 * RENDER_SURFACE_STATE when using image as a storage image. Separate
4353 * states for vanilla (with the original format) and one which has been
4354 * lowered to a format suitable for reading. This may be a raw surface
4355 * in extreme cases or simply a surface with a different format where we
4356 * expect some conversion to be done in the shader.
4357 */
4358 struct anv_surface_state storage_surface_state;
4359 struct anv_surface_state lowered_storage_surface_state;
4360
4361 struct brw_image_param lowered_storage_image_param;
4362 } planes[3];
4363 };
4364
4365 enum anv_image_view_state_flags {
4366 ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED = (1 << 0),
4367 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1),
4368 };
4369
4370 void anv_image_fill_surface_state(struct anv_device *device,
4371 const struct anv_image *image,
4372 VkImageAspectFlagBits aspect,
4373 const struct isl_view *view,
4374 isl_surf_usage_flags_t view_usage,
4375 enum isl_aux_usage aux_usage,
4376 const union isl_color_value *clear_color,
4377 enum anv_image_view_state_flags flags,
4378 struct anv_surface_state *state_inout,
4379 struct brw_image_param *image_param_out);
4380
4381 struct anv_image_create_info {
4382 const VkImageCreateInfo *vk_info;
4383
4384 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
4385 isl_tiling_flags_t isl_tiling_flags;
4386
4387 /** These flags will be added to any derived from VkImageCreateInfo. */
4388 isl_surf_usage_flags_t isl_extra_usage_flags;
4389 };
4390
4391 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
4392 const struct anv_image_create_info *create_info);
4393
4394 void anv_image_finish(struct anv_image *image);
4395
4396 void anv_image_get_memory_requirements(struct anv_device *device,
4397 struct anv_image *image,
4398 VkImageAspectFlags aspects,
4399 VkMemoryRequirements2 *pMemoryRequirements);
4400
4401 enum isl_format
4402 anv_isl_format_for_descriptor_type(const struct anv_device *device,
4403 VkDescriptorType type);
4404
4405 static inline VkExtent3D
anv_sanitize_image_extent(const VkImageType imageType,const VkExtent3D imageExtent)4406 anv_sanitize_image_extent(const VkImageType imageType,
4407 const VkExtent3D imageExtent)
4408 {
4409 switch (imageType) {
4410 case VK_IMAGE_TYPE_1D:
4411 return (VkExtent3D) { imageExtent.width, 1, 1 };
4412 case VK_IMAGE_TYPE_2D:
4413 return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
4414 case VK_IMAGE_TYPE_3D:
4415 return imageExtent;
4416 default:
4417 unreachable("invalid image type");
4418 }
4419 }
4420
4421 static inline VkOffset3D
anv_sanitize_image_offset(const VkImageType imageType,const VkOffset3D imageOffset)4422 anv_sanitize_image_offset(const VkImageType imageType,
4423 const VkOffset3D imageOffset)
4424 {
4425 switch (imageType) {
4426 case VK_IMAGE_TYPE_1D:
4427 return (VkOffset3D) { imageOffset.x, 0, 0 };
4428 case VK_IMAGE_TYPE_2D:
4429 return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
4430 case VK_IMAGE_TYPE_3D:
4431 return imageOffset;
4432 default:
4433 unreachable("invalid image type");
4434 }
4435 }
4436
4437 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeEXT line_mode)4438 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4439 VkLineRasterizationModeEXT line_mode)
4440 {
4441 if (raster_mode == VK_POLYGON_MODE_LINE &&
4442 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4443 return true;
4444 return false;
4445 }
4446
4447 VkFormatFeatureFlags2KHR
4448 anv_get_image_format_features2(const struct intel_device_info *devinfo,
4449 VkFormat vk_format,
4450 const struct anv_format *anv_format,
4451 VkImageTiling vk_tiling,
4452 const struct isl_drm_modifier_info *isl_mod_info);
4453
4454 void anv_fill_buffer_surface_state(struct anv_device *device,
4455 struct anv_state state,
4456 enum isl_format format,
4457 isl_surf_usage_flags_t usage,
4458 struct anv_address address,
4459 uint32_t range, uint32_t stride);
4460
4461 static inline void
anv_clear_color_from_att_state(union isl_color_value * clear_color,const struct anv_attachment_state * att_state,const struct anv_image_view * iview)4462 anv_clear_color_from_att_state(union isl_color_value *clear_color,
4463 const struct anv_attachment_state *att_state,
4464 const struct anv_image_view *iview)
4465 {
4466 const struct isl_format_layout *view_fmtl =
4467 isl_format_get_layout(iview->planes[0].isl.format);
4468
4469 #define COPY_CLEAR_COLOR_CHANNEL(c, i) \
4470 if (view_fmtl->channels.c.bits) \
4471 clear_color->u32[i] = att_state->clear_value.color.uint32[i]
4472
4473 COPY_CLEAR_COLOR_CHANNEL(r, 0);
4474 COPY_CLEAR_COLOR_CHANNEL(g, 1);
4475 COPY_CLEAR_COLOR_CHANNEL(b, 2);
4476 COPY_CLEAR_COLOR_CHANNEL(a, 3);
4477
4478 #undef COPY_CLEAR_COLOR_CHANNEL
4479 }
4480
4481
4482 /* Haswell border color is a bit of a disaster. Float and unorm formats use a
4483 * straightforward 32-bit float color in the first 64 bytes. Instead of using
4484 * a nice float/integer union like Gfx8+, Haswell specifies the integer border
4485 * color as a separate entry /after/ the float color. The layout of this entry
4486 * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4487 *
4488 * Since we don't know the format/bpp, we can't make any of the border colors
4489 * containing '1' work for all formats, as it would be in the wrong place for
4490 * some of them. We opt to make 32-bit integers work as this seems like the
4491 * most common option. Fortunately, transparent black works regardless, as
4492 * all zeroes is the same in every bit-size.
4493 */
4494 struct hsw_border_color {
4495 float float32[4];
4496 uint32_t _pad0[12];
4497 uint32_t uint32[4];
4498 uint32_t _pad1[108];
4499 };
4500
4501 struct gfx8_border_color {
4502 union {
4503 float float32[4];
4504 uint32_t uint32[4];
4505 };
4506 /* Pad out to 64 bytes */
4507 uint32_t _pad[12];
4508 };
4509
4510 struct anv_ycbcr_conversion {
4511 struct vk_object_base base;
4512
4513 const struct anv_format * format;
4514 VkSamplerYcbcrModelConversion ycbcr_model;
4515 VkSamplerYcbcrRange ycbcr_range;
4516 VkComponentSwizzle mapping[4];
4517 VkChromaLocation chroma_offsets[2];
4518 VkFilter chroma_filter;
4519 bool chroma_reconstruction;
4520 };
4521
4522 struct anv_sampler {
4523 struct vk_object_base base;
4524
4525 uint32_t state[3][4];
4526 uint32_t n_planes;
4527 struct anv_ycbcr_conversion *conversion;
4528
4529 /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4530 * and with a 32-byte stride for use as bindless samplers.
4531 */
4532 struct anv_state bindless_state;
4533
4534 struct anv_state custom_border_color;
4535 };
4536
4537 struct anv_framebuffer {
4538 struct vk_object_base base;
4539
4540 uint32_t width;
4541 uint32_t height;
4542 uint32_t layers;
4543
4544 uint32_t attachment_count;
4545 struct anv_image_view * attachments[0];
4546 };
4547
4548 struct anv_subpass_attachment {
4549 VkImageUsageFlagBits usage;
4550 uint32_t attachment;
4551 VkImageLayout layout;
4552
4553 /* Used only with attachment containing stencil data. */
4554 VkImageLayout stencil_layout;
4555 };
4556
4557 struct anv_subpass {
4558 uint32_t attachment_count;
4559
4560 /**
4561 * A pointer to all attachment references used in this subpass.
4562 * Only valid if ::attachment_count > 0.
4563 */
4564 struct anv_subpass_attachment * attachments;
4565 uint32_t input_count;
4566 struct anv_subpass_attachment * input_attachments;
4567 uint32_t color_count;
4568 struct anv_subpass_attachment * color_attachments;
4569 struct anv_subpass_attachment * resolve_attachments;
4570
4571 struct anv_subpass_attachment * depth_stencil_attachment;
4572 struct anv_subpass_attachment * ds_resolve_attachment;
4573 VkResolveModeFlagBitsKHR depth_resolve_mode;
4574 VkResolveModeFlagBitsKHR stencil_resolve_mode;
4575
4576 uint32_t view_mask;
4577
4578 /** Subpass has a depth/stencil self-dependency */
4579 bool has_ds_self_dep;
4580
4581 /** Subpass has at least one color resolve attachment */
4582 bool has_color_resolve;
4583 };
4584
4585 static inline unsigned
anv_subpass_view_count(const struct anv_subpass * subpass)4586 anv_subpass_view_count(const struct anv_subpass *subpass)
4587 {
4588 return MAX2(1, util_bitcount(subpass->view_mask));
4589 }
4590
4591 struct anv_render_pass_attachment {
4592 /* TODO: Consider using VkAttachmentDescription instead of storing each of
4593 * its members individually.
4594 */
4595 VkFormat format;
4596 uint32_t samples;
4597 VkImageUsageFlags usage;
4598 VkAttachmentLoadOp load_op;
4599 VkAttachmentStoreOp store_op;
4600 VkAttachmentLoadOp stencil_load_op;
4601 VkImageLayout initial_layout;
4602 VkImageLayout final_layout;
4603 VkImageLayout first_subpass_layout;
4604
4605 VkImageLayout stencil_initial_layout;
4606 VkImageLayout stencil_final_layout;
4607
4608 /* The subpass id in which the attachment will be used last. */
4609 uint32_t last_subpass_idx;
4610 };
4611
4612 struct anv_render_pass {
4613 struct vk_object_base base;
4614
4615 uint32_t attachment_count;
4616 uint32_t subpass_count;
4617 /* An array of subpass_count+1 flushes, one per subpass boundary */
4618 enum anv_pipe_bits * subpass_flushes;
4619 struct anv_render_pass_attachment * attachments;
4620 struct anv_subpass subpasses[0];
4621 };
4622
4623 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4624
4625 struct anv_query_pool {
4626 struct vk_object_base base;
4627
4628 VkQueryType type;
4629 VkQueryPipelineStatisticFlags pipeline_statistics;
4630 /** Stride between slots, in bytes */
4631 uint32_t stride;
4632 /** Number of slots in this query pool */
4633 uint32_t slots;
4634 struct anv_bo * bo;
4635
4636 /* KHR perf queries : */
4637 uint32_t pass_size;
4638 uint32_t data_offset;
4639 uint32_t snapshot_size;
4640 uint32_t n_counters;
4641 struct intel_perf_counter_pass *counter_pass;
4642 uint32_t n_passes;
4643 struct intel_perf_query_info **pass_query;
4644 };
4645
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)4646 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4647 uint32_t pass)
4648 {
4649 return pool->pass_size * pass + 8;
4650 }
4651
4652 struct anv_acceleration_structure {
4653 struct vk_object_base base;
4654
4655 VkDeviceSize size;
4656 struct anv_address address;
4657 };
4658
4659 int anv_get_instance_entrypoint_index(const char *name);
4660 int anv_get_device_entrypoint_index(const char *name);
4661 int anv_get_physical_device_entrypoint_index(const char *name);
4662
4663 const char *anv_get_instance_entry_name(int index);
4664 const char *anv_get_physical_device_entry_name(int index);
4665 const char *anv_get_device_entry_name(int index);
4666
4667 bool
4668 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4669 const struct vk_instance_extension_table *instance);
4670 bool
4671 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4672 const struct vk_instance_extension_table *instance);
4673 bool
4674 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4675 const struct vk_instance_extension_table *instance,
4676 const struct vk_device_extension_table *device);
4677
4678 const struct vk_device_dispatch_table *
4679 anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4680
4681 void
4682 anv_dump_pipe_bits(enum anv_pipe_bits bits);
4683
4684 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)4685 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4686 enum anv_pipe_bits bits,
4687 const char* reason)
4688 {
4689 cmd_buffer->state.pending_pipe_bits |= bits;
4690 if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
4691 {
4692 fputs("pc: add ", stderr);
4693 anv_dump_pipe_bits(bits);
4694 fprintf(stderr, "reason: %s\n", reason);
4695 }
4696 }
4697
4698 static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)4699 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
4700 {
4701 /* This function must be called from within a subpass. */
4702 assert(cmd_state->pass && cmd_state->subpass);
4703
4704 const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
4705
4706 /* The id of this subpass shouldn't exceed the number of subpasses in this
4707 * render pass minus 1.
4708 */
4709 assert(subpass_id < cmd_state->pass->subpass_count);
4710 return subpass_id;
4711 }
4712
4713 struct anv_performance_configuration_intel {
4714 struct vk_object_base base;
4715
4716 struct intel_perf_registers *register_config;
4717
4718 uint64_t config_id;
4719 };
4720
4721 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4722 void anv_device_perf_init(struct anv_device *device);
4723 void anv_perf_write_pass_results(struct intel_perf_config *perf,
4724 struct anv_query_pool *pool, uint32_t pass,
4725 const struct intel_perf_query_result *accumulated_results,
4726 union VkPerformanceCounterResultKHR *results);
4727
4728 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4729 VK_FROM_HANDLE(__anv_type, __name, __handle)
4730
4731 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
4732 VK_OBJECT_TYPE_COMMAND_BUFFER)
4733 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4734 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4735 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4736 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4737 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4738
4739 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4740 VkAccelerationStructureKHR,
4741 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4742 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,
4743 VK_OBJECT_TYPE_COMMAND_POOL)
4744 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,
4745 VK_OBJECT_TYPE_BUFFER)
4746 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4747 VK_OBJECT_TYPE_BUFFER_VIEW)
4748 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4749 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4750 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4751 VK_OBJECT_TYPE_DESCRIPTOR_SET)
4752 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4753 VkDescriptorSetLayout,
4754 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4755 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4756 VkDescriptorUpdateTemplate,
4757 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4758 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4759 VK_OBJECT_TYPE_DEVICE_MEMORY)
4760 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
4761 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4762 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
4763 VK_OBJECT_TYPE_FRAMEBUFFER)
4764 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
4765 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
4766 VK_OBJECT_TYPE_IMAGE_VIEW);
4767 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
4768 VK_OBJECT_TYPE_PIPELINE_CACHE)
4769 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4770 VK_OBJECT_TYPE_PIPELINE)
4771 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4772 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4773 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4774 VK_OBJECT_TYPE_QUERY_POOL)
4775 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
4776 VK_OBJECT_TYPE_RENDER_PASS)
4777 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4778 VK_OBJECT_TYPE_SAMPLER)
4779 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
4780 VK_OBJECT_TYPE_SEMAPHORE)
4781 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4782 VkSamplerYcbcrConversion,
4783 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4784 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4785 VkPerformanceConfigurationINTEL,
4786 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4787
4788 #define anv_genX(devinfo, thing) ({ \
4789 __typeof(&gfx9_##thing) genX_thing; \
4790 switch ((devinfo)->verx10) { \
4791 case 70: \
4792 genX_thing = &gfx7_##thing; \
4793 break; \
4794 case 75: \
4795 genX_thing = &gfx75_##thing; \
4796 break; \
4797 case 80: \
4798 genX_thing = &gfx8_##thing; \
4799 break; \
4800 case 90: \
4801 genX_thing = &gfx9_##thing; \
4802 break; \
4803 case 110: \
4804 genX_thing = &gfx11_##thing; \
4805 break; \
4806 case 120: \
4807 genX_thing = &gfx12_##thing; \
4808 break; \
4809 case 125: \
4810 genX_thing = &gfx125_##thing; \
4811 break; \
4812 default: \
4813 unreachable("Unknown hardware generation"); \
4814 } \
4815 genX_thing; \
4816 })
4817
4818 /* Gen-specific function declarations */
4819 #ifdef genX
4820 # include "anv_genX.h"
4821 #else
4822 # define genX(x) gfx7_##x
4823 # include "anv_genX.h"
4824 # undef genX
4825 # define genX(x) gfx75_##x
4826 # include "anv_genX.h"
4827 # undef genX
4828 # define genX(x) gfx8_##x
4829 # include "anv_genX.h"
4830 # undef genX
4831 # define genX(x) gfx9_##x
4832 # include "anv_genX.h"
4833 # undef genX
4834 # define genX(x) gfx11_##x
4835 # include "anv_genX.h"
4836 # undef genX
4837 # define genX(x) gfx12_##x
4838 # include "anv_genX.h"
4839 # undef genX
4840 # define genX(x) gfx125_##x
4841 # include "anv_genX.h"
4842 # undef genX
4843 #endif
4844
4845 #endif /* ANV_PRIVATE_H */
4846