1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 #include "drm-uapi/drm_fourcc.h"
35
36 #ifdef HAVE_VALGRIND
37 #include <valgrind.h>
38 #include <memcheck.h>
39 #define VG(x) x
40 #ifndef NDEBUG
41 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
42 #endif
43 #else
44 #define VG(x) ((void)0)
45 #endif
46
47 #include "common/intel_clflush.h"
48 #include "common/intel_decoder.h"
49 #include "common/intel_gem.h"
50 #include "common/intel_l3_config.h"
51 #include "common/intel_measure.h"
52 #include "common/intel_sample_positions.h"
53 #include "dev/intel_device_info.h"
54 #include "blorp/blorp.h"
55 #include "compiler/brw_compiler.h"
56 #include "compiler/brw_rt.h"
57 #include "ds/intel_driver_ds.h"
58 #include "util/bitset.h"
59 #include "util/bitscan.h"
60 #include "util/macros.h"
61 #include "util/hash_table.h"
62 #include "util/list.h"
63 #include "util/perf/u_trace.h"
64 #include "util/sparse_array.h"
65 #include "util/u_atomic.h"
66 #include "util/u_vector.h"
67 #include "util/u_math.h"
68 #include "util/vma.h"
69 #include "util/xmlconfig.h"
70 #include "vk_alloc.h"
71 #include "vk_buffer.h"
72 #include "vk_command_buffer.h"
73 #include "vk_command_pool.h"
74 #include "vk_debug_report.h"
75 #include "vk_device.h"
76 #include "vk_drm_syncobj.h"
77 #include "vk_enum_defines.h"
78 #include "vk_framebuffer.h"
79 #include "vk_graphics_state.h"
80 #include "vk_image.h"
81 #include "vk_instance.h"
82 #include "vk_pipeline_cache.h"
83 #include "vk_physical_device.h"
84 #include "vk_shader_module.h"
85 #include "vk_sync.h"
86 #include "vk_sync_timeline.h"
87 #include "vk_util.h"
88 #include "vk_queue.h"
89 #include "vk_log.h"
90
91 /* Pre-declarations needed for WSI entrypoints */
92 struct wl_surface;
93 struct wl_display;
94 typedef struct xcb_connection_t xcb_connection_t;
95 typedef uint32_t xcb_visualid_t;
96 typedef uint32_t xcb_window_t;
97
98 struct anv_batch;
99 struct anv_buffer;
100 struct anv_buffer_view;
101 struct anv_image_view;
102 struct anv_acceleration_structure;
103 struct anv_instance;
104
105 struct intel_aux_map_context;
106 struct intel_perf_config;
107 struct intel_perf_counter_pass;
108 struct intel_perf_query_result;
109
110 #include <vulkan/vulkan.h>
111 #include <vulkan/vk_icd.h>
112
113 #include "anv_android.h"
114 #include "anv_entrypoints.h"
115 #include "isl/isl.h"
116
117 #include "dev/intel_debug.h"
118 #undef MESA_LOG_TAG
119 #define MESA_LOG_TAG "MESA-INTEL"
120 #include "util/log.h"
121 #include "wsi_common.h"
122
123 #define NSEC_PER_SEC 1000000000ull
124
125 /* anv Virtual Memory Layout
126 * =========================
127 *
128 * When the anv driver is determining the virtual graphics addresses of memory
129 * objects itself using the softpin mechanism, the following memory ranges
130 * will be used.
131 *
132 * Three special considerations to notice:
133 *
134 * (1) the dynamic state pool is located within the same 4 GiB as the low
135 * heap. This is to work around a VF cache issue described in a comment in
136 * anv_physical_device_init_heaps.
137 *
138 * (2) the binding table pool is located at lower addresses than the surface
139 * state pool, within a 4 GiB range. This allows surface state base addresses
140 * to cover both binding tables (16 bit offsets) and surface states (32 bit
141 * offsets).
142 *
143 * (3) the last 4 GiB of the address space is withheld from the high
144 * heap. Various hardware units will read past the end of an object for
145 * various reasons. This healthy margin prevents reads from wrapping around
146 * 48-bit addresses.
147 */
148 #define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */
149 #define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL
150 #define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */
151 #define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL
152 #define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
153 #define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
154 #define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
155 #define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
156 #define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
157 #define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
158 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
159 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
160 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
161 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL
162 #define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */
163
164 #define GENERAL_STATE_POOL_SIZE \
165 (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
166 #define LOW_HEAP_SIZE \
167 (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
168 #define DYNAMIC_STATE_POOL_SIZE \
169 (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
170 #define BINDING_TABLE_POOL_SIZE \
171 (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
172 #define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
173 #define SURFACE_STATE_POOL_SIZE \
174 (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
175 #define INSTRUCTION_STATE_POOL_SIZE \
176 (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
177 #define CLIENT_VISIBLE_HEAP_SIZE \
178 (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
179
180 /* Allowing different clear colors requires us to perform a depth resolve at
181 * the end of certain render passes. This is because while slow clears store
182 * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
183 * See the PRMs for examples describing when additional resolves would be
184 * necessary. To enable fast clears without requiring extra resolves, we set
185 * the clear value to a globally-defined one. We could allow different values
186 * if the user doesn't expect coherent data during or after a render passes
187 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
188 * don't seem to exist yet. In almost all Vulkan applications tested thus far,
189 * 1.0f seems to be the only value used. The only application that doesn't set
190 * this value does so through the usage of an seemingly uninitialized clear
191 * value.
192 */
193 #define ANV_HZ_FC_VAL 1.0f
194
195 /* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
196 #define MAX_VBS (33 - 2)
197
198 /* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
199 * only supports the push model of VS inputs, and we only have 128 GRFs,
200 * minus the g0 and g1 payload, which gives us a maximum of 31 VEs. Plus,
201 * we use two of them for SGVs.
202 */
203 #define MAX_VES (31 - 2)
204
205 #define MAX_XFB_BUFFERS 4
206 #define MAX_XFB_STREAMS 4
207 #define MAX_SETS 32
208 #define MAX_RTS 8
209 #define MAX_VIEWPORTS 16
210 #define MAX_SCISSORS 16
211 #define MAX_PUSH_CONSTANTS_SIZE 128
212 #define MAX_DYNAMIC_BUFFERS 16
213 #define MAX_IMAGES 64
214 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
215 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
216 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
217 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
218 * use 64 here to avoid cache issues. This could most likely bring it back to
219 * 32 if we had different virtual addresses for the different views on a given
220 * GEM object.
221 */
222 #define ANV_UBO_ALIGNMENT 64
223 #define ANV_SSBO_ALIGNMENT 4
224 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
225 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
226 #define MAX_SAMPLE_LOCATIONS 16
227
228 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
229 *
230 * "The surface state model is used when a Binding Table Index (specified
231 * in the message descriptor) of less than 240 is specified. In this model,
232 * the Binding Table Index is used to index into the binding table, and the
233 * binding table entry contains a pointer to the SURFACE_STATE."
234 *
235 * Binding table values above 240 are used for various things in the hardware
236 * such as stateless, stateless with incoherent cache, SLM, and bindless.
237 */
238 #define MAX_BINDING_TABLE_SIZE 240
239
240 /* The kernel relocation API has a limitation of a 32-bit delta value
241 * applied to the address before it is written which, in spite of it being
242 * unsigned, is treated as signed . Because of the way that this maps to
243 * the Vulkan API, we cannot handle an offset into a buffer that does not
244 * fit into a signed 32 bits. The only mechanism we have for dealing with
245 * this at the moment is to limit all VkDeviceMemory objects to a maximum
246 * of 2GB each. The Vulkan spec allows us to do this:
247 *
248 * "Some platforms may have a limit on the maximum size of a single
249 * allocation. For example, certain systems may fail to create
250 * allocations with a size greater than or equal to 4GB. Such a limit is
251 * implementation-dependent, and if such a failure occurs then the error
252 * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
253 */
254 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
255
256 #define ANV_SVGS_VB_INDEX MAX_VBS
257 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
258
259 /* We reserve this MI ALU register for the purpose of handling predication.
260 * Other code which uses the MI ALU should leave it alone.
261 */
262 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
263
264 /* We reserve this MI ALU register to pass around an offset computed from
265 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
266 * Other code which uses the MI ALU should leave it alone.
267 */
268 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
269
270 #define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
271
272 /* For gfx12 we set the streamout buffers using 4 separate commands
273 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
274 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
275 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
276 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
277 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
278 * 3DSTATE_SO_BUFFER_INDEX_0.
279 */
280 #define SO_BUFFER_INDEX_0_CMD 0x60
281 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
282
283 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)284 align_down_npot_u32(uint32_t v, uint32_t a)
285 {
286 return v - (v % a);
287 }
288
289 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)290 align_down_u32(uint32_t v, uint32_t a)
291 {
292 assert(a != 0 && a == (a & -a));
293 return v & ~(a - 1);
294 }
295
296 static inline uint32_t
align_u32(uint32_t v,uint32_t a)297 align_u32(uint32_t v, uint32_t a)
298 {
299 assert(a != 0 && a == (a & -a));
300 return align_down_u32(v + a - 1, a);
301 }
302
303 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)304 align_down_u64(uint64_t v, uint64_t a)
305 {
306 assert(a != 0 && a == (a & -a));
307 return v & ~(a - 1);
308 }
309
310 static inline uint64_t
align_u64(uint64_t v,uint64_t a)311 align_u64(uint64_t v, uint64_t a)
312 {
313 return align_down_u64(v + a - 1, a);
314 }
315
316 static inline int32_t
align_i32(int32_t v,int32_t a)317 align_i32(int32_t v, int32_t a)
318 {
319 assert(a != 0 && a == (a & -a));
320 return (v + a - 1) & ~(a - 1);
321 }
322
323 /** Alignment must be a power of 2. */
324 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)325 anv_is_aligned(uintmax_t n, uintmax_t a)
326 {
327 assert(a == (a & -a));
328 return (n & (a - 1)) == 0;
329 }
330
331 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)332 anv_minify(uint32_t n, uint32_t levels)
333 {
334 if (unlikely(n == 0))
335 return 0;
336 else
337 return MAX2(n >> levels, 1);
338 }
339
340 static inline float
anv_clamp_f(float f,float min,float max)341 anv_clamp_f(float f, float min, float max)
342 {
343 assert(min < max);
344
345 if (f > max)
346 return max;
347 else if (f < min)
348 return min;
349 else
350 return f;
351 }
352
353 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)354 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
355 {
356 if (*inout_mask & clear_mask) {
357 *inout_mask &= ~clear_mask;
358 return true;
359 } else {
360 return false;
361 }
362 }
363
364 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)365 vk_to_isl_color(VkClearColorValue color)
366 {
367 return (union isl_color_value) {
368 .u32 = {
369 color.uint32[0],
370 color.uint32[1],
371 color.uint32[2],
372 color.uint32[3],
373 },
374 };
375 }
376
377 static inline union isl_color_value
vk_to_isl_color_with_format(VkClearColorValue color,enum isl_format format)378 vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
379 {
380 const struct isl_format_layout *fmtl = isl_format_get_layout(format);
381 union isl_color_value isl_color = { .u32 = {0, } };
382
383 #define COPY_COLOR_CHANNEL(c, i) \
384 if (fmtl->channels.c.bits) \
385 isl_color.u32[i] = color.uint32[i]
386
387 COPY_COLOR_CHANNEL(r, 0);
388 COPY_COLOR_CHANNEL(g, 1);
389 COPY_COLOR_CHANNEL(b, 2);
390 COPY_COLOR_CHANNEL(a, 3);
391
392 #undef COPY_COLOR_CHANNEL
393
394 return isl_color;
395 }
396
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)397 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
398 {
399 uintptr_t mask = (1ull << bits) - 1;
400 *flags = ptr & mask;
401 return (void *) (ptr & ~mask);
402 }
403
anv_pack_ptr(void * ptr,int bits,int flags)404 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
405 {
406 uintptr_t value = (uintptr_t) ptr;
407 uintptr_t mask = (1ull << bits) - 1;
408 return value | (mask & flags);
409 }
410
411 /**
412 * Warn on ignored extension structs.
413 *
414 * The Vulkan spec requires us to ignore unsupported or unknown structs in
415 * a pNext chain. In debug mode, emitting warnings for ignored structs may
416 * help us discover structs that we should not have ignored.
417 *
418 *
419 * From the Vulkan 1.0.38 spec:
420 *
421 * Any component of the implementation (the loader, any enabled layers,
422 * and drivers) must skip over, without processing (other than reading the
423 * sType and pNext members) any chained structures with sType values not
424 * defined by extensions supported by that component.
425 */
426 #define anv_debug_ignored_stype(sType) \
427 mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
428
429 void __anv_perf_warn(struct anv_device *device,
430 const struct vk_object_base *object,
431 const char *file, int line, const char *format, ...)
432 anv_printflike(5, 6);
433
434 /**
435 * Print a FINISHME message, including its source location.
436 */
437 #define anv_finishme(format, ...) \
438 do { \
439 static bool reported = false; \
440 if (!reported) { \
441 mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
442 ##__VA_ARGS__); \
443 reported = true; \
444 } \
445 } while (0)
446
447 /**
448 * Print a perf warning message. Set INTEL_DEBUG=perf to see these.
449 */
450 #define anv_perf_warn(objects_macro, format, ...) \
451 do { \
452 static bool reported = false; \
453 if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
454 __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT, \
455 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, \
456 objects_macro, __FILE__, __LINE__, \
457 format, ## __VA_ARGS__); \
458 reported = true; \
459 } \
460 } while (0)
461
462 /* A non-fatal assert. Useful for debugging. */
463 #ifdef DEBUG
464 #define anv_assert(x) ({ \
465 if (unlikely(!(x))) \
466 mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
467 })
468 #else
469 #define anv_assert(x)
470 #endif
471
472 struct anv_bo {
473 const char *name;
474
475 uint32_t gem_handle;
476
477 uint32_t refcount;
478
479 /* Index into the current validation list. This is used by the
480 * validation list building algorithm to track which buffers are already
481 * in the validation list so that we can ensure uniqueness.
482 */
483 uint32_t exec_obj_index;
484
485 /* Index for use with util_sparse_array_free_list */
486 uint32_t free_index;
487
488 /* Last known offset. This value is provided by the kernel when we
489 * execbuf and is used as the presumed offset for the next bunch of
490 * relocations.
491 */
492 uint64_t offset;
493
494 /** Size of the buffer not including implicit aux */
495 uint64_t size;
496
497 /* Map for internally mapped BOs.
498 *
499 * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
500 * BO. If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
501 */
502 void *map;
503
504 /** Size of the implicit CCS range at the end of the buffer
505 *
506 * On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
507 * page of main surface data maps to a 256B chunk of CCS data and that
508 * mapping is provided on TGL-LP by the AUX table which maps virtual memory
509 * addresses in the main surface to virtual memory addresses for CCS data.
510 *
511 * Because we can't change these maps around easily and because Vulkan
512 * allows two VkImages to be bound to overlapping memory regions (as long
513 * as the app is careful), it's not feasible to make this mapping part of
514 * the image. (On Gfx11 and earlier, the mapping was provided via
515 * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
516 * Instead, we attach the CCS data directly to the buffer object and setup
517 * the AUX table mapping at BO creation time.
518 *
519 * This field is for internal tracking use by the BO allocator only and
520 * should not be touched by other parts of the code. If something wants to
521 * know if a BO has implicit CCS data, it should instead look at the
522 * has_implicit_ccs boolean below.
523 *
524 * This data is not included in maps of this buffer.
525 */
526 uint32_t _ccs_size;
527
528 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
529 uint32_t flags;
530
531 /** True if this BO may be shared with other processes */
532 bool is_external:1;
533
534 /** True if this BO is a wrapper
535 *
536 * When set to true, none of the fields in this BO are meaningful except
537 * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
538 * See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin
539 * is set in the physical device.
540 */
541 bool is_wrapper:1;
542
543 /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
544 bool has_fixed_address:1;
545
546 /** True if this BO wraps a host pointer */
547 bool from_host_ptr:1;
548
549 /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
550 bool has_client_visible_address:1;
551
552 /** True if this BO has implicit CCS data attached to it */
553 bool has_implicit_ccs:1;
554 };
555
556 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)557 anv_bo_ref(struct anv_bo *bo)
558 {
559 p_atomic_inc(&bo->refcount);
560 return bo;
561 }
562
563 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)564 anv_bo_unwrap(struct anv_bo *bo)
565 {
566 while (bo->is_wrapper)
567 bo = bo->map;
568 return bo;
569 }
570
571 static inline bool
anv_bo_is_pinned(struct anv_bo * bo)572 anv_bo_is_pinned(struct anv_bo *bo)
573 {
574 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
575 /* Sky Lake and later always uses softpin */
576 assert(bo->flags & EXEC_OBJECT_PINNED);
577 return true;
578 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
579 /* Haswell and earlier never use softpin */
580 assert(!(bo->flags & EXEC_OBJECT_PINNED));
581 assert(!bo->has_fixed_address);
582 return false;
583 #else
584 /* If we don't have a GFX_VERx10 #define, we need to look at the BO. Also,
585 * for GFX version 8, we need to look at the BO because Broadwell softpins
586 * but Cherryview doesn't.
587 */
588 assert((bo->flags & EXEC_OBJECT_PINNED) || !bo->has_fixed_address);
589 return (bo->flags & EXEC_OBJECT_PINNED) != 0;
590 #endif
591 }
592
593 struct anv_address {
594 struct anv_bo *bo;
595 int64_t offset;
596 };
597
598 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
599
600 static inline struct anv_address
anv_address_from_u64(uint64_t addr_u64)601 anv_address_from_u64(uint64_t addr_u64)
602 {
603 assert(addr_u64 == intel_canonical_address(addr_u64));
604 return (struct anv_address) {
605 .bo = NULL,
606 .offset = addr_u64,
607 };
608 }
609
610 static inline bool
anv_address_is_null(struct anv_address addr)611 anv_address_is_null(struct anv_address addr)
612 {
613 return addr.bo == NULL && addr.offset == 0;
614 }
615
616 static inline uint64_t
anv_address_physical(struct anv_address addr)617 anv_address_physical(struct anv_address addr)
618 {
619 if (addr.bo && anv_bo_is_pinned(addr.bo)) {
620 return intel_canonical_address(addr.bo->offset + addr.offset);
621 } else {
622 return intel_canonical_address(addr.offset);
623 }
624 }
625
626 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)627 anv_address_add(struct anv_address addr, uint64_t offset)
628 {
629 addr.offset += offset;
630 return addr;
631 }
632
633 /* Represents a lock-free linked list of "free" things. This is used by
634 * both the block pool and the state pools. Unfortunately, in order to
635 * solve the ABA problem, we can't use a single uint32_t head.
636 */
637 union anv_free_list {
638 struct {
639 uint32_t offset;
640
641 /* A simple count that is incremented every time the head changes. */
642 uint32_t count;
643 };
644 /* Make sure it's aligned to 64 bits. This will make atomic operations
645 * faster on 32 bit platforms.
646 */
647 uint64_t u64 __attribute__ ((aligned (8)));
648 };
649
650 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
651
652 struct anv_block_state {
653 union {
654 struct {
655 uint32_t next;
656 uint32_t end;
657 };
658 /* Make sure it's aligned to 64 bits. This will make atomic operations
659 * faster on 32 bit platforms.
660 */
661 uint64_t u64 __attribute__ ((aligned (8)));
662 };
663 };
664
665 #define anv_block_pool_foreach_bo(bo, pool) \
666 for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
667 _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
668 _pp_bo++)
669
670 #define ANV_MAX_BLOCK_POOL_BOS 20
671
672 struct anv_block_pool {
673 const char *name;
674
675 struct anv_device *device;
676 bool use_relocations;
677
678 /* Wrapper BO for use in relocation lists. This BO is simply a wrapper
679 * around the actual BO so that we grow the pool after the wrapper BO has
680 * been put in a relocation list. This is only used in the non-softpin
681 * case.
682 */
683 struct anv_bo wrapper_bo;
684
685 struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
686 struct anv_bo *bo;
687 uint32_t nbos;
688
689 uint64_t size;
690
691 /* The address where the start of the pool is pinned. The various bos that
692 * are created as the pool grows will have addresses in the range
693 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
694 */
695 uint64_t start_address;
696
697 /* The offset from the start of the bo to the "center" of the block
698 * pool. Pointers to allocated blocks are given by
699 * bo.map + center_bo_offset + offsets.
700 */
701 uint32_t center_bo_offset;
702
703 /* Current memory map of the block pool. This pointer may or may not
704 * point to the actual beginning of the block pool memory. If
705 * anv_block_pool_alloc_back has ever been called, then this pointer
706 * will point to the "center" position of the buffer and all offsets
707 * (negative or positive) given out by the block pool alloc functions
708 * will be valid relative to this pointer.
709 *
710 * In particular, map == bo.map + center_offset
711 *
712 * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
713 * since it will handle the softpin case as well, where this points to NULL.
714 */
715 void *map;
716 int fd;
717
718 /**
719 * Array of mmaps and gem handles owned by the block pool, reclaimed when
720 * the block pool is destroyed.
721 */
722 struct u_vector mmap_cleanups;
723
724 struct anv_block_state state;
725
726 struct anv_block_state back_state;
727 };
728
729 /* Block pools are backed by a fixed-size 1GB memfd */
730 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
731
732 /* The center of the block pool is also the middle of the memfd. This may
733 * change in the future if we decide differently for some reason.
734 */
735 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
736
737 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)738 anv_block_pool_size(struct anv_block_pool *pool)
739 {
740 return pool->state.end + pool->back_state.end;
741 }
742
743 struct anv_state {
744 int32_t offset;
745 uint32_t alloc_size;
746 void *map;
747 uint32_t idx;
748 };
749
750 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
751
752 struct anv_fixed_size_state_pool {
753 union anv_free_list free_list;
754 struct anv_block_state block;
755 };
756
757 #define ANV_MIN_STATE_SIZE_LOG2 6
758 #define ANV_MAX_STATE_SIZE_LOG2 22
759
760 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
761
762 struct anv_free_entry {
763 uint32_t next;
764 struct anv_state state;
765 };
766
767 struct anv_state_table {
768 struct anv_device *device;
769 int fd;
770 struct anv_free_entry *map;
771 uint32_t size;
772 struct anv_block_state state;
773 struct u_vector cleanups;
774 };
775
776 struct anv_state_pool {
777 struct anv_block_pool block_pool;
778
779 /* Offset into the relevant state base address where the state pool starts
780 * allocating memory.
781 */
782 int32_t start_offset;
783
784 struct anv_state_table table;
785
786 /* The size of blocks which will be allocated from the block pool */
787 uint32_t block_size;
788
789 /** Free list for "back" allocations */
790 union anv_free_list back_alloc_free_list;
791
792 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
793 };
794
795 struct anv_state_reserved_pool {
796 struct anv_state_pool *pool;
797 union anv_free_list reserved_blocks;
798 uint32_t count;
799 };
800
801 struct anv_state_stream {
802 struct anv_state_pool *state_pool;
803
804 /* The size of blocks to allocate from the state pool */
805 uint32_t block_size;
806
807 /* Current block we're allocating from */
808 struct anv_state block;
809
810 /* Offset into the current block at which to allocate the next state */
811 uint32_t next;
812
813 /* List of all blocks allocated from this pool */
814 struct util_dynarray all_blocks;
815 };
816
817 /* The block_pool functions exported for testing only. The block pool should
818 * only be used via a state pool (see below).
819 */
820 VkResult anv_block_pool_init(struct anv_block_pool *pool,
821 struct anv_device *device,
822 const char *name,
823 uint64_t start_address,
824 uint32_t initial_size);
825 void anv_block_pool_finish(struct anv_block_pool *pool);
826 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
827 uint32_t block_size, uint32_t *padding);
828 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
829 uint32_t block_size);
830 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
831 size);
832
833 VkResult anv_state_pool_init(struct anv_state_pool *pool,
834 struct anv_device *device,
835 const char *name,
836 uint64_t base_address,
837 int32_t start_offset,
838 uint32_t block_size);
839 void anv_state_pool_finish(struct anv_state_pool *pool);
840 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
841 uint32_t state_size, uint32_t alignment);
842 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
843 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
844 void anv_state_stream_init(struct anv_state_stream *stream,
845 struct anv_state_pool *state_pool,
846 uint32_t block_size);
847 void anv_state_stream_finish(struct anv_state_stream *stream);
848 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
849 uint32_t size, uint32_t alignment);
850
851 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
852 struct anv_state_pool *parent,
853 uint32_t count, uint32_t size,
854 uint32_t alignment);
855 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
856 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
857 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
858 struct anv_state state);
859
860 VkResult anv_state_table_init(struct anv_state_table *table,
861 struct anv_device *device,
862 uint32_t initial_entries);
863 void anv_state_table_finish(struct anv_state_table *table);
864 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
865 uint32_t count);
866 void anv_free_list_push(union anv_free_list *list,
867 struct anv_state_table *table,
868 uint32_t idx, uint32_t count);
869 struct anv_state* anv_free_list_pop(union anv_free_list *list,
870 struct anv_state_table *table);
871
872
873 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)874 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
875 {
876 return &table->map[idx].state;
877 }
878 /**
879 * Implements a pool of re-usable BOs. The interface is identical to that
880 * of block_pool except that each block is its own BO.
881 */
882 struct anv_bo_pool {
883 const char *name;
884
885 struct anv_device *device;
886
887 struct util_sparse_array_free_list free_list[16];
888 };
889
890 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
891 const char *name);
892 void anv_bo_pool_finish(struct anv_bo_pool *pool);
893 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
894 struct anv_bo **bo_out);
895 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
896
897 struct anv_scratch_pool {
898 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
899 struct anv_bo *bos[16][MESA_SHADER_STAGES];
900 uint32_t surfs[16];
901 struct anv_state surf_states[16];
902 };
903
904 void anv_scratch_pool_init(struct anv_device *device,
905 struct anv_scratch_pool *pool);
906 void anv_scratch_pool_finish(struct anv_device *device,
907 struct anv_scratch_pool *pool);
908 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
909 struct anv_scratch_pool *pool,
910 gl_shader_stage stage,
911 unsigned per_thread_scratch);
912 uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
913 struct anv_scratch_pool *pool,
914 unsigned per_thread_scratch);
915
916 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
917 struct anv_bo_cache {
918 struct util_sparse_array bo_map;
919 pthread_mutex_t mutex;
920 };
921
922 VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
923 struct anv_device *device);
924 void anv_bo_cache_finish(struct anv_bo_cache *cache);
925
926 struct anv_queue_family {
927 /* Standard bits passed on to the client */
928 VkQueueFlags queueFlags;
929 uint32_t queueCount;
930
931 /* Driver internal information */
932 enum drm_i915_gem_engine_class engine_class;
933 };
934
935 #define ANV_MAX_QUEUE_FAMILIES 3
936
937 struct anv_memory_type {
938 /* Standard bits passed on to the client */
939 VkMemoryPropertyFlags propertyFlags;
940 uint32_t heapIndex;
941 };
942
943 struct anv_memory_heap {
944 /* Standard bits passed on to the client */
945 VkDeviceSize size;
946 VkMemoryHeapFlags flags;
947
948 /** Driver-internal book-keeping.
949 *
950 * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
951 */
952 VkDeviceSize used __attribute__ ((aligned (8)));
953
954 bool is_local_mem;
955 };
956
957 struct anv_memregion {
958 struct drm_i915_gem_memory_class_instance region;
959 uint64_t size;
960 uint64_t available;
961 };
962
963 struct anv_physical_device {
964 struct vk_physical_device vk;
965
966 /* Link in anv_instance::physical_devices */
967 struct list_head link;
968
969 struct anv_instance * instance;
970 char path[20];
971 struct intel_device_info info;
972 /** Amount of "GPU memory" we want to advertise
973 *
974 * Clearly, this value is bogus since Intel is a UMA architecture. On
975 * gfx7 platforms, we are limited by GTT size unless we want to implement
976 * fine-grained tracking and GTT splitting. On Broadwell and above we are
977 * practically unlimited. However, we will never report more than 3/4 of
978 * the total system ram to try and avoid running out of RAM.
979 */
980 bool supports_48bit_addresses;
981 struct brw_compiler * compiler;
982 struct isl_device isl_dev;
983 struct intel_perf_config * perf;
984 /* True if hardware support is incomplete/alpha */
985 bool is_alpha;
986 /*
987 * Number of commands required to implement a performance query begin +
988 * end.
989 */
990 uint32_t n_perf_query_commands;
991 int cmd_parser_version;
992 bool has_exec_async;
993 bool has_exec_capture;
994 int max_context_priority;
995 bool has_context_isolation;
996 bool has_mmap_offset;
997 bool has_userptr_probe;
998 uint64_t gtt_size;
999
1000 bool use_relocations;
1001 bool use_softpin;
1002 bool always_use_bindless;
1003 bool use_call_secondary;
1004
1005 /** True if we can access buffers using A64 messages */
1006 bool has_a64_buffer_access;
1007 /** True if we can use bindless access for images */
1008 bool has_bindless_images;
1009 /** True if we can use bindless access for samplers */
1010 bool has_bindless_samplers;
1011 /** True if we can use timeline semaphores through execbuf */
1012 bool has_exec_timeline;
1013
1014 /** True if we can read the GPU timestamp register
1015 *
1016 * When running in a virtual context, the timestamp register is unreadable
1017 * on Gfx12+.
1018 */
1019 bool has_reg_timestamp;
1020
1021 /** True if this device has implicit AUX
1022 *
1023 * If true, CCS is handled as an implicit attachment to the BO rather than
1024 * as an explicitly bound surface.
1025 */
1026 bool has_implicit_ccs;
1027
1028 bool always_flush_cache;
1029
1030 struct {
1031 uint32_t family_count;
1032 struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];
1033 } queue;
1034
1035 struct {
1036 uint32_t type_count;
1037 struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
1038 uint32_t heap_count;
1039 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
1040 bool need_clflush;
1041 } memory;
1042
1043 /* Either we have a single vram region and it's all mappable, or we have
1044 * both mappable & non-mappable parts. System memory is always available.
1045 */
1046 struct anv_memregion vram_mappable;
1047 struct anv_memregion vram_non_mappable;
1048 struct anv_memregion sys;
1049 uint8_t driver_build_sha1[20];
1050 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
1051 uint8_t driver_uuid[VK_UUID_SIZE];
1052 uint8_t device_uuid[VK_UUID_SIZE];
1053
1054 struct vk_sync_type sync_syncobj_type;
1055 struct vk_sync_timeline_type sync_timeline_type;
1056 const struct vk_sync_type * sync_types[4];
1057
1058 struct wsi_device wsi_device;
1059 int local_fd;
1060 bool has_local;
1061 int64_t local_major;
1062 int64_t local_minor;
1063 int master_fd;
1064 bool has_master;
1065 int64_t master_major;
1066 int64_t master_minor;
1067 struct drm_i915_query_engine_info * engine_info;
1068
1069 void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool);
1070 struct intel_measure_device measure_device;
1071 };
1072
1073 static inline bool
anv_physical_device_has_vram(const struct anv_physical_device * device)1074 anv_physical_device_has_vram(const struct anv_physical_device *device)
1075 {
1076 return device->vram_mappable.size > 0;
1077 }
1078
1079 struct anv_app_info {
1080 const char* app_name;
1081 uint32_t app_version;
1082 const char* engine_name;
1083 uint32_t engine_version;
1084 uint32_t api_version;
1085 };
1086
1087 struct anv_instance {
1088 struct vk_instance vk;
1089
1090 bool physical_devices_enumerated;
1091 struct list_head physical_devices;
1092
1093 struct driOptionCache dri_options;
1094 struct driOptionCache available_dri_options;
1095
1096 /**
1097 * Workarounds for game bugs.
1098 */
1099 bool assume_full_subgroups;
1100 bool limit_trig_input_range;
1101 bool sample_mask_out_opengl_behaviour;
1102 };
1103
1104 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1105 void anv_finish_wsi(struct anv_physical_device *physical_device);
1106
1107 struct anv_queue {
1108 struct vk_queue vk;
1109
1110 struct anv_device * device;
1111
1112 const struct anv_queue_family * family;
1113
1114 uint32_t index_in_family;
1115
1116 uint32_t exec_flags;
1117
1118 /** Synchronization object for debug purposes (DEBUG_SYNC) */
1119 struct vk_sync *sync;
1120
1121 struct intel_ds_queue * ds;
1122 };
1123
1124 struct nir_xfb_info;
1125 struct anv_pipeline_bind_map;
1126
1127 extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1128
1129 struct anv_shader_bin *
1130 anv_device_search_for_kernel(struct anv_device *device,
1131 struct vk_pipeline_cache *cache,
1132 const void *key_data, uint32_t key_size,
1133 bool *user_cache_bit);
1134
1135 struct anv_shader_bin *
1136 anv_device_upload_kernel(struct anv_device *device,
1137 struct vk_pipeline_cache *cache,
1138 gl_shader_stage stage,
1139 const void *key_data, uint32_t key_size,
1140 const void *kernel_data, uint32_t kernel_size,
1141 const struct brw_stage_prog_data *prog_data,
1142 uint32_t prog_data_size,
1143 const struct brw_compile_stats *stats,
1144 uint32_t num_stats,
1145 const struct nir_xfb_info *xfb_info,
1146 const struct anv_pipeline_bind_map *bind_map);
1147
1148 struct nir_shader;
1149 struct nir_shader_compiler_options;
1150
1151 struct nir_shader *
1152 anv_device_search_for_nir(struct anv_device *device,
1153 struct vk_pipeline_cache *cache,
1154 const struct nir_shader_compiler_options *nir_options,
1155 unsigned char sha1_key[20],
1156 void *mem_ctx);
1157
1158 void
1159 anv_device_upload_nir(struct anv_device *device,
1160 struct vk_pipeline_cache *cache,
1161 const struct nir_shader *nir,
1162 unsigned char sha1_key[20]);
1163
1164 struct anv_device {
1165 struct vk_device vk;
1166
1167 struct anv_physical_device * physical;
1168 struct intel_device_info info;
1169 struct isl_device isl_dev;
1170 int context_id;
1171 int fd;
1172 bool can_chain_batches;
1173 bool robust_buffer_access;
1174
1175 pthread_mutex_t vma_mutex;
1176 struct util_vma_heap vma_lo;
1177 struct util_vma_heap vma_cva;
1178 struct util_vma_heap vma_hi;
1179
1180 /** List of all anv_device_memory objects */
1181 struct list_head memory_objects;
1182
1183 struct anv_bo_pool batch_bo_pool;
1184 struct anv_bo_pool utrace_bo_pool;
1185
1186 struct anv_bo_cache bo_cache;
1187
1188 struct anv_state_pool general_state_pool;
1189 struct anv_state_pool dynamic_state_pool;
1190 struct anv_state_pool instruction_state_pool;
1191 struct anv_state_pool binding_table_pool;
1192 struct anv_state_pool surface_state_pool;
1193
1194 struct anv_state_reserved_pool custom_border_colors;
1195
1196 /** BO used for various workarounds
1197 *
1198 * There are a number of workarounds on our hardware which require writing
1199 * data somewhere and it doesn't really matter where. For that, we use
1200 * this BO and just write to the first dword or so.
1201 *
1202 * We also need to be able to handle NULL buffers bound as pushed UBOs.
1203 * For that, we use the high bytes (>= 1024) of the workaround BO.
1204 */
1205 struct anv_bo * workaround_bo;
1206 struct anv_address workaround_address;
1207
1208 struct anv_bo * trivial_batch_bo;
1209 struct anv_state null_surface_state;
1210
1211 struct vk_pipeline_cache * default_pipeline_cache;
1212 struct vk_pipeline_cache * internal_cache;
1213 struct blorp_context blorp;
1214
1215 struct anv_state border_colors;
1216
1217 struct anv_state slice_hash;
1218
1219 /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1220 *
1221 * We need to emit CPS_STATE structures for each viewport accessible by a
1222 * pipeline. So rather than write many identical CPS_STATE structures
1223 * dynamically, we can enumerate all possible combinaisons and then just
1224 * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1225 * array.
1226 */
1227 struct anv_state cps_states;
1228
1229 uint32_t queue_count;
1230 struct anv_queue * queues;
1231
1232 struct anv_scratch_pool scratch_pool;
1233 struct anv_bo *rt_scratch_bos[16];
1234
1235 /** Shadow ray query BO
1236 *
1237 * The ray_query_bo only holds the current ray being traced. When using
1238 * more than 1 ray query per thread, we cannot fit all the queries in
1239 * there, so we need a another buffer to hold query data that is not
1240 * currently being used by the HW for tracing, similar to a scratch space.
1241 *
1242 * The size of the shadow buffer depends on the number of queries per
1243 * shader.
1244 */
1245 struct anv_bo *ray_query_shadow_bos[16];
1246 /** Ray query buffer used to communicated with HW unit.
1247 */
1248 struct anv_bo *ray_query_bo;
1249
1250 struct anv_shader_bin *rt_trampoline;
1251 struct anv_shader_bin *rt_trivial_return;
1252
1253 pthread_mutex_t mutex;
1254 pthread_cond_t queue_submit;
1255
1256 struct intel_batch_decode_ctx decoder_ctx;
1257 /*
1258 * When decoding a anv_cmd_buffer, we might need to search for BOs through
1259 * the cmd_buffer's list.
1260 */
1261 struct anv_cmd_buffer *cmd_buffer_being_decoded;
1262
1263 int perf_fd; /* -1 if no opened */
1264 uint64_t perf_metric; /* 0 if unset */
1265
1266 struct intel_aux_map_context *aux_map_ctx;
1267
1268 const struct intel_l3_config *l3_config;
1269
1270 struct intel_debug_block_frame *debug_frame_desc;
1271
1272 struct intel_ds_device ds;
1273 };
1274
1275 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1276 #define ANV_ALWAYS_SOFTPIN true
1277 #else
1278 #define ANV_ALWAYS_SOFTPIN false
1279 #endif
1280
1281 static inline bool
anv_use_relocations(const struct anv_physical_device * pdevice)1282 anv_use_relocations(const struct anv_physical_device *pdevice)
1283 {
1284 #if defined(GFX_VERx10) && GFX_VERx10 >= 90
1285 /* Sky Lake and later always uses softpin */
1286 assert(!pdevice->use_relocations);
1287 return false;
1288 #elif defined(GFX_VERx10) && GFX_VERx10 < 80
1289 /* Haswell and earlier never use softpin */
1290 assert(pdevice->use_relocations);
1291 return true;
1292 #else
1293 /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1294 * device. Also, for GFX version 8, we need to look at the physical
1295 * device because Broadwell softpins but Cherryview doesn't.
1296 */
1297 return pdevice->use_relocations;
1298 #endif
1299 }
1300
1301 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1302 anv_binding_table_pool(struct anv_device *device)
1303 {
1304 if (anv_use_relocations(device->physical))
1305 return &device->surface_state_pool;
1306 else
1307 return &device->binding_table_pool;
1308 }
1309
1310 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1311 anv_binding_table_pool_alloc(struct anv_device *device)
1312 {
1313 if (anv_use_relocations(device->physical))
1314 return anv_state_pool_alloc_back(&device->surface_state_pool);
1315 else
1316 return anv_state_pool_alloc(&device->binding_table_pool,
1317 device->binding_table_pool.block_size, 0);
1318 }
1319
1320 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1321 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1322 anv_state_pool_free(anv_binding_table_pool(device), state);
1323 }
1324
1325 static inline uint32_t
anv_mocs(const struct anv_device * device,const struct anv_bo * bo,isl_surf_usage_flags_t usage)1326 anv_mocs(const struct anv_device *device,
1327 const struct anv_bo *bo,
1328 isl_surf_usage_flags_t usage)
1329 {
1330 return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1331 }
1332
1333 void anv_device_init_blorp(struct anv_device *device);
1334 void anv_device_finish_blorp(struct anv_device *device);
1335
1336 enum anv_bo_alloc_flags {
1337 /** Specifies that the BO must have a 32-bit address
1338 *
1339 * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1340 */
1341 ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
1342
1343 /** Specifies that the BO may be shared externally */
1344 ANV_BO_ALLOC_EXTERNAL = (1 << 1),
1345
1346 /** Specifies that the BO should be mapped */
1347 ANV_BO_ALLOC_MAPPED = (1 << 2),
1348
1349 /** Specifies that the BO should be snooped so we get coherency */
1350 ANV_BO_ALLOC_SNOOPED = (1 << 3),
1351
1352 /** Specifies that the BO should be captured in error states */
1353 ANV_BO_ALLOC_CAPTURE = (1 << 4),
1354
1355 /** Specifies that the BO will have an address assigned by the caller
1356 *
1357 * Such BOs do not exist in any VMA heap.
1358 */
1359 ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1360
1361 /** Enables implicit synchronization on the BO
1362 *
1363 * This is the opposite of EXEC_OBJECT_ASYNC.
1364 */
1365 ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
1366
1367 /** Enables implicit synchronization on the BO
1368 *
1369 * This is equivalent to EXEC_OBJECT_WRITE.
1370 */
1371 ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1372
1373 /** Has an address which is visible to the client */
1374 ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1375
1376 /** This buffer has implicit CCS data attached to it */
1377 ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1378
1379 /** This buffer is allocated from local memory */
1380 ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1381
1382 /** This buffer is allocated from local memory and should be cpu visible */
1383 ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
1384 };
1385
1386 VkResult anv_device_alloc_bo(struct anv_device *device,
1387 const char *name, uint64_t size,
1388 enum anv_bo_alloc_flags alloc_flags,
1389 uint64_t explicit_address,
1390 struct anv_bo **bo);
1391 VkResult anv_device_map_bo(struct anv_device *device,
1392 struct anv_bo *bo,
1393 uint64_t offset,
1394 size_t size,
1395 uint32_t gem_flags,
1396 void **map_out);
1397 void anv_device_unmap_bo(struct anv_device *device,
1398 struct anv_bo *bo,
1399 void *map, size_t map_size);
1400 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1401 void *host_ptr, uint32_t size,
1402 enum anv_bo_alloc_flags alloc_flags,
1403 uint64_t client_address,
1404 struct anv_bo **bo_out);
1405 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1406 enum anv_bo_alloc_flags alloc_flags,
1407 uint64_t client_address,
1408 struct anv_bo **bo);
1409 VkResult anv_device_export_bo(struct anv_device *device,
1410 struct anv_bo *bo, int *fd_out);
1411 VkResult anv_device_get_bo_tiling(struct anv_device *device,
1412 struct anv_bo *bo,
1413 enum isl_tiling *tiling_out);
1414 VkResult anv_device_set_bo_tiling(struct anv_device *device,
1415 struct anv_bo *bo,
1416 uint32_t row_pitch_B,
1417 enum isl_tiling tiling);
1418 void anv_device_release_bo(struct anv_device *device,
1419 struct anv_bo *bo);
1420
1421 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1422 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1423 {
1424 return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1425 }
1426
1427 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1428 int64_t timeout);
1429
1430 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1431 uint32_t exec_flags,
1432 const VkDeviceQueueCreateInfo *pCreateInfo,
1433 uint32_t index_in_family);
1434 void anv_queue_finish(struct anv_queue *queue);
1435
1436 VkResult anv_queue_submit(struct vk_queue *queue,
1437 struct vk_queue_submit *submit);
1438 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1439 struct anv_batch *batch);
1440
1441 void* anv_gem_mmap(struct anv_device *device,
1442 uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1443 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1444 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1445 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1446 uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1447 uint32_t flags, uint32_t num_regions,
1448 struct drm_i915_gem_memory_class_instance *regions);
1449 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1450 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1451 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1452 int anv_gem_execbuffer(struct anv_device *device,
1453 struct drm_i915_gem_execbuffer2 *execbuf);
1454 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1455 uint32_t stride, uint32_t tiling);
1456 int anv_gem_create_context(struct anv_device *device);
1457 bool anv_gem_has_context_priority(int fd, int priority);
1458 int anv_gem_destroy_context(struct anv_device *device, int context);
1459 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1460 uint64_t value);
1461 int anv_gem_get_param(int fd, uint32_t param);
1462 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1463 int anv_gem_context_get_reset_stats(int fd, int context,
1464 uint32_t *active, uint32_t *pending);
1465 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1466 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1467 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1468 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1469 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1470 uint32_t read_domains, uint32_t write_domain);
1471 int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1472 int32_t *buffer_len);
1473 struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1474
1475 uint64_t anv_vma_alloc(struct anv_device *device,
1476 uint64_t size, uint64_t align,
1477 enum anv_bo_alloc_flags alloc_flags,
1478 uint64_t client_address);
1479 void anv_vma_free(struct anv_device *device,
1480 uint64_t address, uint64_t size);
1481
1482 struct anv_reloc_list {
1483 uint32_t num_relocs;
1484 uint32_t array_length;
1485 struct drm_i915_gem_relocation_entry * relocs;
1486 struct anv_bo ** reloc_bos;
1487 uint32_t dep_words;
1488 BITSET_WORD * deps;
1489 };
1490
1491 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1492 const VkAllocationCallbacks *alloc);
1493 void anv_reloc_list_finish(struct anv_reloc_list *list,
1494 const VkAllocationCallbacks *alloc);
1495
1496 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1497 const VkAllocationCallbacks *alloc,
1498 uint32_t offset, struct anv_bo *target_bo,
1499 uint32_t delta, uint64_t *address_u64_out);
1500
1501 VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1502 const VkAllocationCallbacks *alloc,
1503 struct anv_bo *target_bo);
1504
1505 struct anv_batch_bo {
1506 /* Link in the anv_cmd_buffer.owned_batch_bos list */
1507 struct list_head link;
1508
1509 struct anv_bo * bo;
1510
1511 /* Bytes actually consumed in this batch BO */
1512 uint32_t length;
1513
1514 /* When this batch BO is used as part of a primary batch buffer, this
1515 * tracked whether it is chained to another primary batch buffer.
1516 *
1517 * If this is the case, the relocation list's last entry points the
1518 * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1519 */
1520 bool chained;
1521
1522 struct anv_reloc_list relocs;
1523 };
1524
1525 struct anv_batch {
1526 const VkAllocationCallbacks * alloc;
1527
1528 struct anv_address start_addr;
1529
1530 void * start;
1531 void * end;
1532 void * next;
1533
1534 struct anv_reloc_list * relocs;
1535
1536 /* This callback is called (with the associated user data) in the event
1537 * that the batch runs out of space.
1538 */
1539 VkResult (*extend_cb)(struct anv_batch *, void *);
1540 void * user_data;
1541
1542 /**
1543 * Current error status of the command buffer. Used to track inconsistent
1544 * or incomplete command buffer states that are the consequence of run-time
1545 * errors such as out of memory scenarios. We want to track this in the
1546 * batch because the command buffer object is not visible to some parts
1547 * of the driver.
1548 */
1549 VkResult status;
1550 };
1551
1552 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1553 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1554 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1555
1556 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1557 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1558 void *map, size_t size)
1559 {
1560 batch->start_addr = addr;
1561 batch->next = batch->start = map;
1562 batch->end = map + size;
1563 }
1564
1565 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1566 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1567 {
1568 assert(error != VK_SUCCESS);
1569 if (batch->status == VK_SUCCESS)
1570 batch->status = error;
1571 return batch->status;
1572 }
1573
1574 static inline bool
anv_batch_has_error(struct anv_batch * batch)1575 anv_batch_has_error(struct anv_batch *batch)
1576 {
1577 return batch->status != VK_SUCCESS;
1578 }
1579
1580 static inline uint64_t
anv_batch_emit_reloc(struct anv_batch * batch,void * location,struct anv_bo * bo,uint32_t delta)1581 anv_batch_emit_reloc(struct anv_batch *batch,
1582 void *location, struct anv_bo *bo, uint32_t delta)
1583 {
1584 uint64_t address_u64 = 0;
1585 VkResult result;
1586
1587 if (ANV_ALWAYS_SOFTPIN) {
1588 address_u64 = bo->offset + delta;
1589 result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1590 } else {
1591 result = anv_reloc_list_add(batch->relocs, batch->alloc,
1592 location - batch->start, bo, delta,
1593 &address_u64);
1594 }
1595 if (unlikely(result != VK_SUCCESS)) {
1596 anv_batch_set_error(batch, result);
1597 return 0;
1598 }
1599
1600 return address_u64;
1601 }
1602
1603 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1604 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1605 {
1606 unsigned reloc_size = 0;
1607 if (device->info.ver >= 8) {
1608 reloc_size = sizeof(uint64_t);
1609 *(uint64_t *)p = intel_canonical_address(v);
1610 } else {
1611 reloc_size = sizeof(uint32_t);
1612 *(uint32_t *)p = v;
1613 }
1614
1615 if (flush && device->physical->memory.need_clflush)
1616 intel_flush_range(p, reloc_size);
1617 }
1618
1619 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1620 _anv_combine_address(struct anv_batch *batch, void *location,
1621 const struct anv_address address, uint32_t delta)
1622 {
1623 if (address.bo == NULL) {
1624 return address.offset + delta;
1625 } else if (batch == NULL) {
1626 assert(anv_bo_is_pinned(address.bo));
1627 return anv_address_physical(anv_address_add(address, delta));
1628 } else {
1629 assert(batch->start <= location && location < batch->end);
1630 /* i915 relocations are signed. */
1631 assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1632 return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1633 }
1634 }
1635
1636 #define __gen_address_type struct anv_address
1637 #define __gen_user_data struct anv_batch
1638 #define __gen_combine_address _anv_combine_address
1639
1640 /* Wrapper macros needed to work around preprocessor argument issues. In
1641 * particular, arguments don't get pre-evaluated if they are concatenated.
1642 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1643 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1644 * We can work around this easily enough with these helpers.
1645 */
1646 #define __anv_cmd_length(cmd) cmd ## _length
1647 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1648 #define __anv_cmd_header(cmd) cmd ## _header
1649 #define __anv_cmd_pack(cmd) cmd ## _pack
1650 #define __anv_reg_num(reg) reg ## _num
1651
1652 #define anv_pack_struct(dst, struc, ...) do { \
1653 struct struc __template = { \
1654 __VA_ARGS__ \
1655 }; \
1656 __anv_cmd_pack(struc)(NULL, dst, &__template); \
1657 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1658 } while (0)
1659
1660 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
1661 void *__dst = anv_batch_emit_dwords(batch, n); \
1662 if (__dst) { \
1663 struct cmd __template = { \
1664 __anv_cmd_header(cmd), \
1665 .DWordLength = n - __anv_cmd_length_bias(cmd), \
1666 __VA_ARGS__ \
1667 }; \
1668 __anv_cmd_pack(cmd)(batch, __dst, &__template); \
1669 } \
1670 __dst; \
1671 })
1672
1673 #define anv_batch_emit_merge(batch, dwords0, dwords1) \
1674 do { \
1675 uint32_t *dw; \
1676 \
1677 STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
1678 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
1679 if (!dw) \
1680 break; \
1681 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
1682 dw[i] = (dwords0)[i] | (dwords1)[i]; \
1683 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1684 } while (0)
1685
1686 #define anv_batch_emit(batch, cmd, name) \
1687 for (struct cmd name = { __anv_cmd_header(cmd) }, \
1688 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
1689 __builtin_expect(_dst != NULL, 1); \
1690 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
1691 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1692 _dst = NULL; \
1693 }))
1694
1695 #define anv_batch_write_reg(batch, reg, name) \
1696 for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \
1697 ({ \
1698 uint32_t _dw[__anv_cmd_length(reg)]; \
1699 __anv_cmd_pack(reg)(NULL, _dw, &name); \
1700 for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \
1701 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1702 lri.RegisterOffset = __anv_reg_num(reg); \
1703 lri.DataDWord = _dw[i]; \
1704 } \
1705 } \
1706 _cont = NULL; \
1707 }))
1708
1709 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1710 /* #define __gen_get_batch_address anv_batch_address */
1711 /* #define __gen_address_value anv_address_physical */
1712 /* #define __gen_address_offset anv_address_add */
1713
1714 struct anv_device_memory {
1715 struct vk_object_base base;
1716
1717 struct list_head link;
1718
1719 struct anv_bo * bo;
1720 const struct anv_memory_type * type;
1721
1722 void * map;
1723 size_t map_size;
1724
1725 /* The map, from the user PoV is map + map_delta */
1726 uint64_t map_delta;
1727
1728 /* The map, from the user PoV is map + map_delta */
1729 uint32_t map_delta;
1730
1731 /* If set, we are holding reference to AHardwareBuffer
1732 * which we must release when memory is freed.
1733 */
1734 struct AHardwareBuffer * ahw;
1735
1736 /* If set, this memory comes from a host pointer. */
1737 void * host_ptr;
1738 };
1739
1740 /**
1741 * Header for Vertex URB Entry (VUE)
1742 */
1743 struct anv_vue_header {
1744 uint32_t Reserved;
1745 uint32_t RTAIndex; /* RenderTargetArrayIndex */
1746 uint32_t ViewportIndex;
1747 float PointWidth;
1748 };
1749
1750 /** Struct representing a sampled image descriptor
1751 *
1752 * This descriptor layout is used for sampled images, bare sampler, and
1753 * combined image/sampler descriptors.
1754 */
1755 struct anv_sampled_image_descriptor {
1756 /** Bindless image handle
1757 *
1758 * This is expected to already be shifted such that the 20-bit
1759 * SURFACE_STATE table index is in the top 20 bits.
1760 */
1761 uint32_t image;
1762
1763 /** Bindless sampler handle
1764 *
1765 * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1766 * to the dynamic state base address.
1767 */
1768 uint32_t sampler;
1769 };
1770
1771 struct anv_texture_swizzle_descriptor {
1772 /** Texture swizzle
1773 *
1774 * See also nir_intrinsic_channel_select_intel
1775 */
1776 uint8_t swizzle[4];
1777
1778 /** Unused padding to ensure the struct is a multiple of 64 bits */
1779 uint32_t _pad;
1780 };
1781
1782 /** Struct representing a storage image descriptor */
1783 struct anv_storage_image_descriptor {
1784 /** Bindless image handles
1785 *
1786 * These are expected to already be shifted such that the 20-bit
1787 * SURFACE_STATE table index is in the top 20 bits.
1788 */
1789 uint32_t vanilla;
1790 uint32_t lowered;
1791 };
1792
1793 /** Struct representing a address/range descriptor
1794 *
1795 * The fields of this struct correspond directly to the data layout of
1796 * nir_address_format_64bit_bounded_global addresses. The last field is the
1797 * offset in the NIR address so it must be zero so that when you load the
1798 * descriptor you get a pointer to the start of the range.
1799 */
1800 struct anv_address_range_descriptor {
1801 uint64_t address;
1802 uint32_t range;
1803 uint32_t zero;
1804 };
1805
1806 enum anv_descriptor_data {
1807 /** The descriptor contains a BTI reference to a surface state */
1808 ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),
1809 /** The descriptor contains a BTI reference to a sampler state */
1810 ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1),
1811 /** The descriptor contains an actual buffer view */
1812 ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2),
1813 /** The descriptor contains auxiliary image layout data */
1814 ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),
1815 /** The descriptor contains auxiliary image layout data */
1816 ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1817 /** anv_address_range_descriptor with a buffer address and range */
1818 ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),
1819 /** Bindless surface handle */
1820 ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
1821 /** Storage image handles */
1822 ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
1823 /** Storage image handles */
1824 ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
1825 };
1826
1827 struct anv_descriptor_set_binding_layout {
1828 /* The type of the descriptors in this binding */
1829 VkDescriptorType type;
1830
1831 /* Flags provided when this binding was created */
1832 VkDescriptorBindingFlags flags;
1833
1834 /* Bitfield representing the type of data this descriptor contains */
1835 enum anv_descriptor_data data;
1836
1837 /* Maximum number of YCbCr texture/sampler planes */
1838 uint8_t max_plane_count;
1839
1840 /* Number of array elements in this binding (or size in bytes for inline
1841 * uniform data)
1842 */
1843 uint32_t array_size;
1844
1845 /* Index into the flattened descriptor set */
1846 uint32_t descriptor_index;
1847
1848 /* Index into the dynamic state array for a dynamic buffer */
1849 int16_t dynamic_offset_index;
1850
1851 /* Index into the descriptor set buffer views */
1852 int32_t buffer_view_index;
1853
1854 /* Offset into the descriptor buffer where this descriptor lives */
1855 uint32_t descriptor_offset;
1856
1857 /* Pre computed stride */
1858 unsigned descriptor_stride;
1859
1860 /* Immutable samplers (or NULL if no immutable samplers) */
1861 struct anv_sampler **immutable_samplers;
1862 };
1863
1864 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1865 const struct anv_descriptor_set_binding_layout *binding,
1866 bool sampler);
1867
1868 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1869 const struct anv_descriptor_set_binding_layout *binding,
1870 bool sampler);
1871
1872 struct anv_descriptor_set_layout {
1873 struct vk_object_base base;
1874
1875 /* Descriptor set layouts can be destroyed at almost any time */
1876 uint32_t ref_cnt;
1877
1878 /* Number of bindings in this descriptor set */
1879 uint32_t binding_count;
1880
1881 /* Total number of descriptors */
1882 uint32_t descriptor_count;
1883
1884 /* Shader stages affected by this descriptor set */
1885 uint16_t shader_stages;
1886
1887 /* Number of buffer views in this descriptor set */
1888 uint32_t buffer_view_count;
1889
1890 /* Number of dynamic offsets used by this descriptor set */
1891 uint16_t dynamic_offset_count;
1892
1893 /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1894 * this buffer
1895 */
1896 VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1897
1898 /* Size of the descriptor buffer for this descriptor set */
1899 uint32_t descriptor_buffer_size;
1900
1901 /* Bindings in this descriptor set */
1902 struct anv_descriptor_set_binding_layout binding[0];
1903 };
1904
1905 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1906 struct anv_descriptor_set_layout *layout);
1907
1908 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1909 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1910 {
1911 assert(layout && layout->ref_cnt >= 1);
1912 p_atomic_inc(&layout->ref_cnt);
1913 }
1914
1915 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1916 anv_descriptor_set_layout_unref(struct anv_device *device,
1917 struct anv_descriptor_set_layout *layout)
1918 {
1919 assert(layout && layout->ref_cnt >= 1);
1920 if (p_atomic_dec_zero(&layout->ref_cnt))
1921 anv_descriptor_set_layout_destroy(device, layout);
1922 }
1923
1924 struct anv_descriptor {
1925 VkDescriptorType type;
1926
1927 union {
1928 struct {
1929 VkImageLayout layout;
1930 struct anv_image_view *image_view;
1931 struct anv_sampler *sampler;
1932 };
1933
1934 struct {
1935 struct anv_buffer_view *set_buffer_view;
1936 struct anv_buffer *buffer;
1937 uint64_t offset;
1938 uint64_t range;
1939 };
1940
1941 struct anv_buffer_view *buffer_view;
1942
1943 struct anv_acceleration_structure *accel_struct;
1944 };
1945 };
1946
1947 struct anv_descriptor_set {
1948 struct vk_object_base base;
1949
1950 struct anv_descriptor_pool *pool;
1951 struct anv_descriptor_set_layout *layout;
1952
1953 /* Amount of space occupied in the the pool by this descriptor set. It can
1954 * be larger than the size of the descriptor set.
1955 */
1956 uint32_t size;
1957
1958 /* State relative to anv_descriptor_pool::bo */
1959 struct anv_state desc_mem;
1960 /* Surface state for the descriptor buffer */
1961 struct anv_state desc_surface_state;
1962
1963 /* Descriptor set address. */
1964 struct anv_address desc_addr;
1965
1966 uint32_t buffer_view_count;
1967 struct anv_buffer_view *buffer_views;
1968
1969 /* Link to descriptor pool's desc_sets list . */
1970 struct list_head pool_link;
1971
1972 uint32_t descriptor_count;
1973 struct anv_descriptor descriptors[0];
1974 };
1975
1976 static inline bool
anv_descriptor_set_is_push(struct anv_descriptor_set * set)1977 anv_descriptor_set_is_push(struct anv_descriptor_set *set)
1978 {
1979 return set->pool == NULL;
1980 }
1981
1982 struct anv_buffer_view {
1983 struct vk_object_base base;
1984
1985 uint64_t range; /**< VkBufferViewCreateInfo::range */
1986
1987 struct anv_address address;
1988
1989 struct anv_state surface_state;
1990 struct anv_state storage_surface_state;
1991 struct anv_state lowered_storage_surface_state;
1992
1993 struct brw_image_param lowered_storage_image_param;
1994 };
1995
1996 struct anv_push_descriptor_set {
1997 struct anv_descriptor_set set;
1998
1999 /* Put this field right behind anv_descriptor_set so it fills up the
2000 * descriptors[0] field. */
2001 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2002
2003 /** True if the descriptor set buffer has been referenced by a draw or
2004 * dispatch command.
2005 */
2006 bool set_used_on_gpu;
2007
2008 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2009 };
2010
2011 static inline struct anv_address
anv_descriptor_set_address(struct anv_descriptor_set * set)2012 anv_descriptor_set_address(struct anv_descriptor_set *set)
2013 {
2014 if (anv_descriptor_set_is_push(set)) {
2015 /* We have to flag push descriptor set as used on the GPU
2016 * so that the next time we push descriptors, we grab a new memory.
2017 */
2018 struct anv_push_descriptor_set *push_set =
2019 (struct anv_push_descriptor_set *)set;
2020 push_set->set_used_on_gpu = true;
2021 }
2022
2023 return set->desc_addr;
2024 }
2025
2026 struct anv_descriptor_pool {
2027 struct vk_object_base base;
2028
2029 uint32_t size;
2030 uint32_t next;
2031 uint32_t free_list;
2032
2033 struct anv_bo *bo;
2034 struct util_vma_heap bo_heap;
2035
2036 struct anv_state_stream surface_state_stream;
2037 void *surface_state_free_list;
2038
2039 struct list_head desc_sets;
2040
2041 bool host_only;
2042
2043 char data[0];
2044 };
2045
2046 struct anv_descriptor_template_entry {
2047 /* The type of descriptor in this entry */
2048 VkDescriptorType type;
2049
2050 /* Binding in the descriptor set */
2051 uint32_t binding;
2052
2053 /* Offset at which to write into the descriptor set binding */
2054 uint32_t array_element;
2055
2056 /* Number of elements to write into the descriptor set binding */
2057 uint32_t array_count;
2058
2059 /* Offset into the user provided data */
2060 size_t offset;
2061
2062 /* Stride between elements into the user provided data */
2063 size_t stride;
2064 };
2065
2066 struct anv_descriptor_update_template {
2067 struct vk_object_base base;
2068
2069 VkPipelineBindPoint bind_point;
2070
2071 /* The descriptor set this template corresponds to. This value is only
2072 * valid if the template was created with the templateType
2073 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2074 */
2075 uint8_t set;
2076
2077 /* Number of entries in this template */
2078 uint32_t entry_count;
2079
2080 /* Entries of the template */
2081 struct anv_descriptor_template_entry entries[0];
2082 };
2083
2084 size_t
2085 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2086 uint32_t var_desc_count);
2087
2088 uint32_t
2089 anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2090 uint32_t var_desc_count);
2091
2092 void
2093 anv_descriptor_set_write_image_view(struct anv_device *device,
2094 struct anv_descriptor_set *set,
2095 const VkDescriptorImageInfo * const info,
2096 VkDescriptorType type,
2097 uint32_t binding,
2098 uint32_t element);
2099
2100 void
2101 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2102 struct anv_descriptor_set *set,
2103 VkDescriptorType type,
2104 struct anv_buffer_view *buffer_view,
2105 uint32_t binding,
2106 uint32_t element);
2107
2108 void
2109 anv_descriptor_set_write_buffer(struct anv_device *device,
2110 struct anv_descriptor_set *set,
2111 struct anv_state_stream *alloc_stream,
2112 VkDescriptorType type,
2113 struct anv_buffer *buffer,
2114 uint32_t binding,
2115 uint32_t element,
2116 VkDeviceSize offset,
2117 VkDeviceSize range);
2118
2119 void
2120 anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2121 struct anv_descriptor_set *set,
2122 struct anv_acceleration_structure *accel,
2123 uint32_t binding,
2124 uint32_t element);
2125
2126 void
2127 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2128 struct anv_descriptor_set *set,
2129 uint32_t binding,
2130 const void *data,
2131 size_t offset,
2132 size_t size);
2133
2134 void
2135 anv_descriptor_set_write_template(struct anv_device *device,
2136 struct anv_descriptor_set *set,
2137 struct anv_state_stream *alloc_stream,
2138 const struct anv_descriptor_update_template *template,
2139 const void *data);
2140
2141 #define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)
2142 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)
2143 #define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
2144 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
2145 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2146 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2147
2148 struct anv_pipeline_binding {
2149 /** Index in the descriptor set
2150 *
2151 * This is a flattened index; the descriptor set layout is already taken
2152 * into account.
2153 */
2154 uint32_t index;
2155
2156 /** The descriptor set this surface corresponds to.
2157 *
2158 * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2159 * binding is not a normal descriptor set but something else.
2160 */
2161 uint8_t set;
2162
2163 union {
2164 /** Plane in the binding index for images */
2165 uint8_t plane;
2166
2167 /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2168 uint8_t dynamic_offset_index;
2169 };
2170
2171 /** For a storage image, whether it requires a lowered surface */
2172 uint8_t lowered_storage_surface;
2173
2174 /** Pad to 64 bits so that there are no holes and we can safely memcmp
2175 * assuming POD zero-initialization.
2176 */
2177 uint8_t pad;
2178 };
2179
2180 struct anv_push_range {
2181 /** Index in the descriptor set */
2182 uint32_t index;
2183
2184 /** Descriptor set index */
2185 uint8_t set;
2186
2187 /** Dynamic offset index (for dynamic UBOs) */
2188 uint8_t dynamic_offset_index;
2189
2190 /** Start offset in units of 32B */
2191 uint8_t start;
2192
2193 /** Range in units of 32B */
2194 uint8_t length;
2195 };
2196
2197 struct anv_pipeline_layout {
2198 struct vk_object_base base;
2199
2200 struct {
2201 struct anv_descriptor_set_layout *layout;
2202 uint32_t dynamic_offset_start;
2203 } set[MAX_SETS];
2204
2205 uint32_t num_sets;
2206
2207 unsigned char sha1[20];
2208 };
2209
2210 struct anv_buffer {
2211 struct vk_buffer vk;
2212
2213 /* Set when bound */
2214 struct anv_address address;
2215 };
2216
2217 enum anv_cmd_dirty_bits {
2218 ANV_CMD_DIRTY_PIPELINE = 1 << 0,
2219 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 1,
2220 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 2,
2221 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 3,
2222 };
2223 typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
2224
2225 enum anv_pipe_bits {
2226 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
2227 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
2228 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
2229 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
2230 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
2231 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
2232 ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),
2233 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
2234 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2235 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
2236 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
2237
2238 /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2239 * cache work has completed. Available on Gfx12+. For earlier Gfx we
2240 * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2241 */
2242 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),
2243 ANV_PIPE_PSS_STALL_SYNC_BIT = (1 << 15),
2244 ANV_PIPE_CS_STALL_BIT = (1 << 20),
2245 ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
2246
2247 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
2248 * a flush has happened but not a CS stall. The next time we do any sort
2249 * of invalidation we need to insert a CS stall at that time. Otherwise,
2250 * we would have to CS stall on every flush which could be bad.
2251 */
2252 ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),
2253
2254 /* This bit does not exist directly in PIPE_CONTROL. It means that render
2255 * target operations related to transfer commands with VkBuffer as
2256 * destination are ongoing. Some operations like copies on the command
2257 * streamer might need to be aware of this to trigger the appropriate stall
2258 * before they can proceed with the copy.
2259 */
2260 ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),
2261
2262 /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2263 * AUX-TT data has changed and we need to invalidate AUX-TT data. This is
2264 * done by writing the AUX-TT register.
2265 */
2266 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24),
2267
2268 /* This bit does not exist directly in PIPE_CONTROL. It means that a
2269 * PIPE_CONTROL with a post-sync operation will follow. This is used to
2270 * implement a workaround for Gfx9.
2271 */
2272 ANV_PIPE_POST_SYNC_BIT = (1 << 25),
2273 };
2274
2275 #define ANV_PIPE_FLUSH_BITS ( \
2276 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2277 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2278 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2279 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2280 ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2281
2282 #define ANV_PIPE_STALL_BITS ( \
2283 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2284 ANV_PIPE_DEPTH_STALL_BIT | \
2285 ANV_PIPE_CS_STALL_BIT)
2286
2287 #define ANV_PIPE_INVALIDATE_BITS ( \
2288 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2289 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2290 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2291 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2292 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2293 ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2294
2295 enum intel_ds_stall_flag
2296 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
2297
2298 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2299 anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2300 VkAccessFlags2 flags)
2301 {
2302 enum anv_pipe_bits pipe_bits = 0;
2303
2304 u_foreach_bit64(b, flags) {
2305 switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2306 case VK_ACCESS_2_SHADER_WRITE_BIT:
2307 case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
2308 /* We're transitioning a buffer that was previously used as write
2309 * destination through the data port. To make its content available
2310 * to future operations, flush the hdc pipeline.
2311 */
2312 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2313 break;
2314 case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
2315 /* We're transitioning a buffer that was previously used as render
2316 * target. To make its content available to future operations, flush
2317 * the render target cache.
2318 */
2319 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2320 break;
2321 case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2322 /* We're transitioning a buffer that was previously used as depth
2323 * buffer. To make its content available to future operations, flush
2324 * the depth cache.
2325 */
2326 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2327 break;
2328 case VK_ACCESS_2_TRANSFER_WRITE_BIT:
2329 /* We're transitioning a buffer that was previously used as a
2330 * transfer write destination. Generic write operations include color
2331 * & depth operations as well as buffer operations like :
2332 * - vkCmdClearColorImage()
2333 * - vkCmdClearDepthStencilImage()
2334 * - vkCmdBlitImage()
2335 * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2336 *
2337 * Most of these operations are implemented using Blorp which writes
2338 * through the render target, so flush that cache to make it visible
2339 * to future operations. And for depth related operations we also
2340 * need to flush the depth cache.
2341 */
2342 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2343 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2344 break;
2345 case VK_ACCESS_2_MEMORY_WRITE_BIT:
2346 /* We're transitioning a buffer for generic write operations. Flush
2347 * all the caches.
2348 */
2349 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2350 break;
2351 case VK_ACCESS_2_HOST_WRITE_BIT:
2352 /* We're transitioning a buffer for access by CPU. Invalidate
2353 * all the caches. Since data and tile caches don't have invalidate,
2354 * we are forced to flush those as well.
2355 */
2356 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2357 pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2358 break;
2359 case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2360 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2361 /* We're transitioning a buffer written either from VS stage or from
2362 * the command streamer (see CmdEndTransformFeedbackEXT), we just
2363 * need to stall the CS.
2364 */
2365 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2366 break;
2367 default:
2368 break; /* Nothing to do */
2369 }
2370 }
2371
2372 return pipe_bits;
2373 }
2374
2375 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device * device,VkAccessFlags2 flags)2376 anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2377 VkAccessFlags2 flags)
2378 {
2379 enum anv_pipe_bits pipe_bits = 0;
2380
2381 u_foreach_bit64(b, flags) {
2382 switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2383 case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
2384 /* Indirect draw commands take a buffer as input that we're going to
2385 * read from the command streamer to load some of the HW registers
2386 * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2387 * command streamer stall so that all the cache flushes have
2388 * completed before the command streamer loads from memory.
2389 */
2390 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2391 /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2392 * through a vertex buffer, so invalidate that cache.
2393 */
2394 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2395 /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2396 * UBO from the buffer, so we need to invalidate constant cache.
2397 */
2398 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2399 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2400 /* Tile cache flush needed For CmdDipatchIndirect since command
2401 * streamer and vertex fetch aren't L3 coherent.
2402 */
2403 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2404 break;
2405 case VK_ACCESS_2_INDEX_READ_BIT:
2406 case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
2407 /* We transitioning a buffer to be used for as input for vkCmdDraw*
2408 * commands, so we invalidate the VF cache to make sure there is no
2409 * stale data when we start rendering.
2410 */
2411 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2412 break;
2413 case VK_ACCESS_2_UNIFORM_READ_BIT:
2414 /* We transitioning a buffer to be used as uniform data. Because
2415 * uniform is accessed through the data port & sampler, we need to
2416 * invalidate the texture cache (sampler) & constant cache (data
2417 * port) to avoid stale data.
2418 */
2419 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2420 if (device->physical->compiler->indirect_ubos_use_sampler)
2421 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2422 else
2423 pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2424 break;
2425 case VK_ACCESS_2_SHADER_READ_BIT:
2426 case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
2427 case VK_ACCESS_2_TRANSFER_READ_BIT:
2428 /* Transitioning a buffer to be read through the sampler, so
2429 * invalidate the texture cache, we don't want any stale data.
2430 */
2431 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2432 break;
2433 case VK_ACCESS_2_MEMORY_READ_BIT:
2434 /* Transitioning a buffer for generic read, invalidate all the
2435 * caches.
2436 */
2437 pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2438 break;
2439 case VK_ACCESS_2_MEMORY_WRITE_BIT:
2440 /* Generic write, make sure all previously written things land in
2441 * memory.
2442 */
2443 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2444 break;
2445 case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2446 case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2447 /* Transitioning a buffer for conditional rendering or transform
2448 * feedback. We'll load the content of this buffer into HW registers
2449 * using the command streamer, so we need to stall the command
2450 * streamer , so we need to stall the command streamer to make sure
2451 * any in-flight flush operations have completed.
2452 */
2453 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2454 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2455 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2456 break;
2457 case VK_ACCESS_2_HOST_READ_BIT:
2458 /* We're transitioning a buffer that was written by CPU. Flush
2459 * all the caches.
2460 */
2461 pipe_bits |= ANV_PIPE_FLUSH_BITS;
2462 break;
2463 case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2464 /* We're transitioning a buffer to be written by the streamout fixed
2465 * function. This one is apparently not L3 coherent, so we need a
2466 * tile cache flush to make sure any previous write is not going to
2467 * create WaW hazards.
2468 */
2469 pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2470 break;
2471 default:
2472 break; /* Nothing to do */
2473 }
2474 }
2475
2476 return pipe_bits;
2477 }
2478
2479 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
2480 VK_IMAGE_ASPECT_COLOR_BIT | \
2481 VK_IMAGE_ASPECT_PLANE_0_BIT | \
2482 VK_IMAGE_ASPECT_PLANE_1_BIT | \
2483 VK_IMAGE_ASPECT_PLANE_2_BIT)
2484 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2485 VK_IMAGE_ASPECT_PLANE_0_BIT | \
2486 VK_IMAGE_ASPECT_PLANE_1_BIT | \
2487 VK_IMAGE_ASPECT_PLANE_2_BIT)
2488
2489 struct anv_vertex_binding {
2490 struct anv_buffer * buffer;
2491 VkDeviceSize offset;
2492 VkDeviceSize size;
2493 };
2494
2495 struct anv_xfb_binding {
2496 struct anv_buffer * buffer;
2497 VkDeviceSize offset;
2498 VkDeviceSize size;
2499 };
2500
2501 struct anv_push_constants {
2502 /** Push constant data provided by the client through vkPushConstants */
2503 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2504
2505 /** Dynamic offsets for dynamic UBOs and SSBOs */
2506 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2507
2508 /* Robust access pushed registers. */
2509 uint64_t push_reg_mask[MESA_SHADER_STAGES];
2510
2511 /** Ray query globals (RT_DISPATCH_GLOBALS) */
2512 uint64_t ray_query_globals;
2513
2514 /* Base addresses for descriptor sets */
2515 uint64_t desc_sets[MAX_SETS];
2516
2517 struct {
2518 /** Base workgroup ID
2519 *
2520 * Used for vkCmdDispatchBase.
2521 */
2522 uint32_t base_work_group_id[3];
2523
2524 /** Subgroup ID
2525 *
2526 * This is never set by software but is implicitly filled out when
2527 * uploading the push constants for compute shaders.
2528 */
2529 uint32_t subgroup_id;
2530 } cs;
2531 };
2532
2533 struct anv_surface_state {
2534 struct anv_state state;
2535 /** Address of the surface referred to by this state
2536 *
2537 * This address is relative to the start of the BO.
2538 */
2539 struct anv_address address;
2540 /* Address of the aux surface, if any
2541 *
2542 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2543 *
2544 * With the exception of gfx8, the bottom 12 bits of this address' offset
2545 * include extra aux information.
2546 */
2547 struct anv_address aux_address;
2548 /* Address of the clear color, if any
2549 *
2550 * This address is relative to the start of the BO.
2551 */
2552 struct anv_address clear_address;
2553 };
2554
2555 struct anv_attachment {
2556 VkFormat vk_format;
2557 const struct anv_image_view *iview;
2558 VkImageLayout layout;
2559 enum isl_aux_usage aux_usage;
2560 struct anv_surface_state surface_state;
2561
2562 VkResolveModeFlagBits resolve_mode;
2563 const struct anv_image_view *resolve_iview;
2564 VkImageLayout resolve_layout;
2565 };
2566
2567 /** State tracking for vertex buffer flushes
2568 *
2569 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2570 * addresses. If you happen to have two vertex buffers which get placed
2571 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2572 * collisions. In order to solve this problem, we track vertex address ranges
2573 * which are live in the cache and invalidate the cache if one ever exceeds 32
2574 * bits.
2575 */
2576 struct anv_vb_cache_range {
2577 /* Virtual address at which the live vertex buffer cache range starts for
2578 * this vertex buffer index.
2579 */
2580 uint64_t start;
2581
2582 /* Virtual address of the byte after where vertex buffer cache range ends.
2583 * This is exclusive such that end - start is the size of the range.
2584 */
2585 uint64_t end;
2586 };
2587
2588 /* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
2589 static inline bool
anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range * bound,struct anv_vb_cache_range * dirty,struct anv_address vb_address,uint32_t vb_size)2590 anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
2591 struct anv_vb_cache_range *dirty,
2592 struct anv_address vb_address,
2593 uint32_t vb_size)
2594 {
2595 if (vb_size == 0) {
2596 bound->start = 0;
2597 bound->end = 0;
2598 return false;
2599 }
2600
2601 assert(vb_address.bo && anv_bo_is_pinned(vb_address.bo));
2602 bound->start = intel_48b_address(anv_address_physical(vb_address));
2603 bound->end = bound->start + vb_size;
2604 assert(bound->end > bound->start); /* No overflow */
2605
2606 /* Align everything to a cache line */
2607 bound->start &= ~(64ull - 1ull);
2608 bound->end = align_u64(bound->end, 64);
2609
2610 /* Compute the dirty range */
2611 dirty->start = MIN2(dirty->start, bound->start);
2612 dirty->end = MAX2(dirty->end, bound->end);
2613
2614 /* If our range is larger than 32 bits, we have to flush */
2615 assert(bound->end - bound->start <= (1ull << 32));
2616 return (dirty->end - dirty->start) > (1ull << 32);
2617 }
2618
2619 /** State tracking for particular pipeline bind point
2620 *
2621 * This struct is the base struct for anv_cmd_graphics_state and
2622 * anv_cmd_compute_state. These are used to track state which is bound to a
2623 * particular type of pipeline. Generic state that applies per-stage such as
2624 * binding table offsets and push constants is tracked generically with a
2625 * per-stage array in anv_cmd_state.
2626 */
2627 struct anv_cmd_pipeline_state {
2628 struct anv_descriptor_set *descriptors[MAX_SETS];
2629 struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2630
2631 struct anv_push_constants push_constants;
2632
2633 /* Push constant state allocated when flushing push constants. */
2634 struct anv_state push_constants_state;
2635 };
2636
2637 /** State tracking for graphics pipeline
2638 *
2639 * This has anv_cmd_pipeline_state as a base struct to track things which get
2640 * bound to a graphics pipeline. Along with general pipeline bind point state
2641 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2642 * state which is graphics-specific.
2643 */
2644 struct anv_cmd_graphics_state {
2645 struct anv_cmd_pipeline_state base;
2646
2647 struct anv_graphics_pipeline *pipeline;
2648
2649 VkRenderingFlags rendering_flags;
2650 VkRect2D render_area;
2651 uint32_t layer_count;
2652 uint32_t samples;
2653 uint32_t view_mask;
2654 uint32_t color_att_count;
2655 struct anv_state att_states;
2656 struct anv_attachment color_att[MAX_RTS];
2657 struct anv_attachment depth_att;
2658 struct anv_attachment stencil_att;
2659 struct anv_state null_surface_state;
2660
2661 anv_cmd_dirty_mask_t dirty;
2662 uint32_t vb_dirty;
2663
2664 struct anv_vb_cache_range ib_bound_range;
2665 struct anv_vb_cache_range ib_dirty_range;
2666 struct anv_vb_cache_range vb_bound_ranges[33];
2667 struct anv_vb_cache_range vb_dirty_ranges[33];
2668
2669 uint32_t restart_index;
2670
2671 VkShaderStageFlags push_constant_stages;
2672
2673 uint32_t primitive_topology;
2674
2675 struct anv_buffer *index_buffer;
2676 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2677 uint32_t index_offset;
2678
2679 struct vk_sample_locations_state sample_locations;
2680 };
2681
2682 enum anv_depth_reg_mode {
2683 ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2684 ANV_DEPTH_REG_MODE_HW_DEFAULT,
2685 ANV_DEPTH_REG_MODE_D16_1X_MSAA,
2686 };
2687
2688 /** State tracking for compute pipeline
2689 *
2690 * This has anv_cmd_pipeline_state as a base struct to track things which get
2691 * bound to a compute pipeline. Along with general pipeline bind point state
2692 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2693 * state which is compute-specific.
2694 */
2695 struct anv_cmd_compute_state {
2696 struct anv_cmd_pipeline_state base;
2697
2698 struct anv_compute_pipeline *pipeline;
2699
2700 bool pipeline_dirty;
2701
2702 struct anv_state push_data;
2703
2704 struct anv_address num_workgroups;
2705 };
2706
2707 struct anv_cmd_ray_tracing_state {
2708 struct anv_cmd_pipeline_state base;
2709
2710 struct anv_ray_tracing_pipeline *pipeline;
2711
2712 bool pipeline_dirty;
2713
2714 struct {
2715 struct anv_bo *bo;
2716 struct brw_rt_scratch_layout layout;
2717 } scratch;
2718 };
2719
2720 /** State required while building cmd buffer */
2721 struct anv_cmd_state {
2722 /* PIPELINE_SELECT.PipelineSelection */
2723 uint32_t current_pipeline;
2724 const struct intel_l3_config * current_l3_config;
2725 uint32_t last_aux_map_state;
2726
2727 struct anv_cmd_graphics_state gfx;
2728 struct anv_cmd_compute_state compute;
2729 struct anv_cmd_ray_tracing_state rt;
2730
2731 enum anv_pipe_bits pending_pipe_bits;
2732 VkShaderStageFlags descriptors_dirty;
2733 VkShaderStageFlags push_constants_dirty;
2734
2735 struct anv_vertex_binding vertex_bindings[MAX_VBS];
2736 bool xfb_enabled;
2737 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
2738 struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];
2739 struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];
2740
2741 unsigned char sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2742 unsigned char surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2743 unsigned char push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2744
2745 /**
2746 * Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top
2747 * of any command buffer it is disabled by disabling it in EndCommandBuffer
2748 * and before invoking the secondary in ExecuteCommands.
2749 */
2750 bool pma_fix_enabled;
2751
2752 /**
2753 * Whether or not we know for certain that HiZ is enabled for the current
2754 * subpass. If, for whatever reason, we are unsure as to whether HiZ is
2755 * enabled or not, this will be false.
2756 */
2757 bool hiz_enabled;
2758
2759 /* We ensure the registers for the gfx12 D16 fix are initialized at the
2760 * first non-NULL depth stencil packet emission of every command buffer.
2761 * For secondary command buffer execution, we transfer the state from the
2762 * last command buffer to the primary (if known).
2763 */
2764 enum anv_depth_reg_mode depth_reg_mode;
2765
2766 /**
2767 * Whether RHWO optimization is enabled (Wa_1508744258).
2768 */
2769 bool rhwo_optimization_enabled;
2770
2771 /**
2772 * Pending state of the RHWO optimization, to be applied at the next
2773 * genX(cmd_buffer_apply_pipe_flushes).
2774 */
2775 bool pending_rhwo_optimization_enabled;
2776
2777 bool conditional_render_enabled;
2778
2779 /**
2780 * Last rendering scale argument provided to
2781 * genX(cmd_buffer_emit_hashing_mode)().
2782 */
2783 unsigned current_hash_scale;
2784
2785 /**
2786 * A buffer used for spill/fill of ray queries.
2787 */
2788 struct anv_bo * ray_query_shadow_bo;
2789 };
2790
2791 #define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
2792 #define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
2793
2794 enum anv_cmd_buffer_exec_mode {
2795 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2796 ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2797 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2798 ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2799 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2800 ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
2801 };
2802
2803 struct anv_measure_batch;
2804
2805 struct anv_cmd_buffer {
2806 struct vk_command_buffer vk;
2807
2808 struct anv_device * device;
2809 struct anv_queue_family * queue_family;
2810
2811 struct anv_batch batch;
2812
2813 /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
2814 * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
2815 * rewrite the end to chain multiple batch together at vkQueueSubmit().
2816 */
2817 void * batch_end;
2818
2819 /* Fields required for the actual chain of anv_batch_bo's.
2820 *
2821 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2822 */
2823 struct list_head batch_bos;
2824 enum anv_cmd_buffer_exec_mode exec_mode;
2825
2826 /* A vector of anv_batch_bo pointers for every batch or surface buffer
2827 * referenced by this command buffer
2828 *
2829 * initialized by anv_cmd_buffer_init_batch_bo_chain()
2830 */
2831 struct u_vector seen_bbos;
2832
2833 /* A vector of int32_t's for every block of binding tables.
2834 *
2835 * initialized by anv_cmd_buffer_init_batch_bo_chain()
2836 */
2837 struct u_vector bt_block_states;
2838 struct anv_state bt_next;
2839
2840 struct anv_reloc_list surface_relocs;
2841 /** Last seen surface state block pool center bo offset */
2842 uint32_t last_ss_pool_center;
2843
2844 /* Serial for tracking buffer completion */
2845 uint32_t serial;
2846
2847 /* Stream objects for storing temporary data */
2848 struct anv_state_stream surface_state_stream;
2849 struct anv_state_stream dynamic_state_stream;
2850 struct anv_state_stream general_state_stream;
2851
2852 VkCommandBufferUsageFlags usage_flags;
2853
2854 struct anv_query_pool *perf_query_pool;
2855
2856 struct anv_cmd_state state;
2857
2858 struct anv_address return_addr;
2859
2860 /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
2861 uint64_t intel_perf_marker;
2862
2863 struct anv_measure_batch *measure;
2864
2865 /**
2866 * KHR_performance_query requires self modifying command buffers and this
2867 * array has the location of modifying commands to the query begin and end
2868 * instructions storing performance counters. The array length is
2869 * anv_physical_device::n_perf_query_commands.
2870 */
2871 struct mi_address_token *self_mod_locations;
2872
2873 /**
2874 * Index tracking which of the self_mod_locations items have already been
2875 * used.
2876 */
2877 uint32_t perf_reloc_idx;
2878
2879 /**
2880 * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2881 * Used to increase allocation size for long command buffers.
2882 */
2883 uint32_t total_batch_size;
2884
2885 /**
2886 *
2887 */
2888 struct u_trace trace;
2889 };
2890
2891 /* Determine whether we can chain a given cmd_buffer to another one. We need
2892 * softpin and we also need to make sure that we can edit the end of the batch
2893 * to point to next one, which requires the command buffer to not be used
2894 * simultaneously.
2895 */
2896 static inline bool
anv_cmd_buffer_is_chainable(struct anv_cmd_buffer * cmd_buffer)2897 anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
2898 {
2899 return !anv_use_relocations(cmd_buffer->device->physical) &&
2900 !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
2901 }
2902
2903 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2904 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2905 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2906 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
2907 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
2908 struct anv_cmd_buffer *secondary);
2909 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
2910 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
2911 struct anv_cmd_buffer *cmd_buffer,
2912 const VkSemaphore *in_semaphores,
2913 const uint64_t *in_wait_values,
2914 uint32_t num_in_semaphores,
2915 const VkSemaphore *out_semaphores,
2916 const uint64_t *out_signal_values,
2917 uint32_t num_out_semaphores,
2918 VkFence fence,
2919 int perf_query_pass);
2920
2921 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
2922
2923 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2924 const void *data, uint32_t size, uint32_t alignment);
2925 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
2926 uint32_t *a, uint32_t *b,
2927 uint32_t dwords, uint32_t alignment);
2928
2929 struct anv_address
2930 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
2931 struct anv_state
2932 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
2933 uint32_t entries, uint32_t *state_offset);
2934 struct anv_state
2935 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
2936 struct anv_state
2937 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
2938 uint32_t size, uint32_t alignment);
2939
2940 VkResult
2941 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
2942
2943 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
2944
2945 struct anv_state
2946 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
2947 struct anv_state
2948 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
2949
2950 VkResult
2951 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
2952 uint32_t num_entries,
2953 uint32_t *state_offset,
2954 struct anv_state *bt_state);
2955
2956 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
2957
2958 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
2959
2960 enum anv_bo_sync_state {
2961 /** Indicates that this is a new (or newly reset fence) */
2962 ANV_BO_SYNC_STATE_RESET,
2963
2964 /** Indicates that this fence has been submitted to the GPU but is still
2965 * (as far as we know) in use by the GPU.
2966 */
2967 ANV_BO_SYNC_STATE_SUBMITTED,
2968
2969 ANV_BO_SYNC_STATE_SIGNALED,
2970 };
2971
2972 struct anv_bo_sync {
2973 struct vk_sync sync;
2974
2975 enum anv_bo_sync_state state;
2976 struct anv_bo *bo;
2977 };
2978
2979 extern const struct vk_sync_type anv_bo_sync_type;
2980
2981 static inline bool
vk_sync_is_anv_bo_sync(const struct vk_sync * sync)2982 vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
2983 {
2984 return sync->type == &anv_bo_sync_type;
2985 }
2986
2987 VkResult anv_create_sync_for_memory(struct vk_device *device,
2988 VkDeviceMemory memory,
2989 bool signal_memory,
2990 struct vk_sync **sync_out);
2991
2992 struct anv_event {
2993 struct vk_object_base base;
2994 uint64_t semaphore;
2995 struct anv_state state;
2996 };
2997
2998 #define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
2999
3000 #define anv_foreach_stage(stage, stage_bits) \
3001 for (gl_shader_stage stage, \
3002 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
3003 stage = __builtin_ffs(__tmp) - 1, __tmp; \
3004 __tmp &= ~(1 << (stage)))
3005
3006 struct anv_pipeline_bind_map {
3007 unsigned char surface_sha1[20];
3008 unsigned char sampler_sha1[20];
3009 unsigned char push_sha1[20];
3010
3011 uint32_t surface_count;
3012 uint32_t sampler_count;
3013
3014 struct anv_pipeline_binding * surface_to_descriptor;
3015 struct anv_pipeline_binding * sampler_to_descriptor;
3016
3017 struct anv_push_range push_ranges[4];
3018 };
3019
3020 struct anv_shader_bin {
3021 struct vk_pipeline_cache_object base;
3022
3023 gl_shader_stage stage;
3024
3025 struct anv_state kernel;
3026 uint32_t kernel_size;
3027
3028 const struct brw_stage_prog_data *prog_data;
3029 uint32_t prog_data_size;
3030
3031 struct brw_compile_stats stats[3];
3032 uint32_t num_stats;
3033
3034 struct nir_xfb_info *xfb_info;
3035
3036 struct anv_pipeline_bind_map bind_map;
3037 };
3038
3039 struct anv_shader_bin *
3040 anv_shader_bin_create(struct anv_device *device,
3041 gl_shader_stage stage,
3042 const void *key, uint32_t key_size,
3043 const void *kernel, uint32_t kernel_size,
3044 const struct brw_stage_prog_data *prog_data,
3045 uint32_t prog_data_size,
3046 const struct brw_compile_stats *stats, uint32_t num_stats,
3047 const struct nir_xfb_info *xfb_info,
3048 const struct anv_pipeline_bind_map *bind_map);
3049
3050 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3051 anv_shader_bin_ref(struct anv_shader_bin *shader)
3052 {
3053 vk_pipeline_cache_object_ref(&shader->base);
3054 }
3055
3056 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3057 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3058 {
3059 vk_pipeline_cache_object_unref(&shader->base);
3060 }
3061
3062 #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
3063 assert((local_arg_offset) % 8 == 0); \
3064 const struct brw_bs_prog_data *prog_data = \
3065 brw_bs_prog_data_const(bin->prog_data); \
3066 assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \
3067 \
3068 (struct GFX_BINDLESS_SHADER_RECORD) { \
3069 .OffsetToLocalArguments = (local_arg_offset) / 8, \
3070 .BindlessShaderDispatchMode = \
3071 prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8, \
3072 .KernelStartPointer = bin->kernel.offset, \
3073 }; \
3074 })
3075
3076 struct anv_pipeline_executable {
3077 gl_shader_stage stage;
3078
3079 struct brw_compile_stats stats;
3080
3081 char *nir;
3082 char *disasm;
3083 };
3084
3085 enum anv_pipeline_type {
3086 ANV_PIPELINE_GRAPHICS,
3087 ANV_PIPELINE_COMPUTE,
3088 ANV_PIPELINE_RAY_TRACING,
3089 };
3090
3091 struct anv_pipeline {
3092 struct vk_object_base base;
3093
3094 struct anv_device * device;
3095
3096 struct anv_batch batch;
3097 struct anv_reloc_list batch_relocs;
3098
3099 void * mem_ctx;
3100
3101 enum anv_pipeline_type type;
3102 VkPipelineCreateFlags flags;
3103
3104 uint32_t ray_queries;
3105
3106 struct util_dynarray executables;
3107
3108 const struct intel_l3_config * l3_config;
3109 };
3110
3111 struct anv_graphics_pipeline {
3112 struct anv_pipeline base;
3113
3114 /* Shaders */
3115 struct anv_shader_bin * shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
3116
3117 VkShaderStageFlags active_stages;
3118
3119 struct vk_sample_locations_state sample_locations;
3120 struct vk_dynamic_graphics_state dynamic_state;
3121
3122 /* These fields are required with dynamic primitive topology,
3123 * rasterization_samples used only with gen < 8.
3124 */
3125 VkLineRasterizationModeEXT line_mode;
3126 VkPolygonMode polygon_mode;
3127 uint32_t patch_control_points;
3128 uint32_t rasterization_samples;
3129
3130 VkColorComponentFlags color_comp_writes[MAX_RTS];
3131
3132 uint32_t view_mask;
3133 uint32_t instance_multiplier;
3134
3135 bool depth_clamp_enable;
3136 bool depth_clip_enable;
3137 bool kill_pixel;
3138 bool force_fragment_thread_dispatch;
3139 bool negative_one_to_one;
3140
3141 /* When primitive replication is used, subpass->view_mask will describe what
3142 * views to replicate.
3143 */
3144 bool use_primitive_replication;
3145
3146 uint32_t vb_used;
3147 struct anv_pipeline_vertex_binding {
3148 uint32_t stride;
3149 bool instanced;
3150 uint32_t instance_divisor;
3151 } vb[MAX_VBS];
3152
3153 /* Pre computed CS instructions that can directly be copied into
3154 * anv_cmd_buffer.
3155 */
3156 uint32_t batch_data[512];
3157
3158 /* Pre packed CS instructions & structures that need to be merged later
3159 * with dynamic state.
3160 */
3161 struct {
3162 uint32_t sf[7];
3163 uint32_t clip[4];
3164 uint32_t xfb_bo_pitch[4];
3165 uint32_t wm[3];
3166 uint32_t blend_state[MAX_RTS * 2];
3167 uint32_t streamout_state[3];
3168 } gfx7;
3169
3170 struct {
3171 uint32_t sf[4];
3172 uint32_t raster[5];
3173 uint32_t wm[2];
3174 uint32_t ps_blend[2];
3175 uint32_t blend_state[1 + MAX_RTS * 2];
3176 uint32_t streamout_state[5];
3177 } gfx8;
3178 };
3179
3180 struct anv_compute_pipeline {
3181 struct anv_pipeline base;
3182
3183 struct anv_shader_bin * cs;
3184 uint32_t batch_data[9];
3185 uint32_t interface_descriptor_data[8];
3186 };
3187
3188 struct anv_rt_shader_group {
3189 VkRayTracingShaderGroupTypeKHR type;
3190
3191 struct anv_shader_bin *general;
3192 struct anv_shader_bin *closest_hit;
3193 struct anv_shader_bin *any_hit;
3194 struct anv_shader_bin *intersection;
3195
3196 /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3197 uint32_t handle[8];
3198 };
3199
3200 struct anv_ray_tracing_pipeline {
3201 struct anv_pipeline base;
3202
3203 /* All shaders in the pipeline */
3204 struct util_dynarray shaders;
3205
3206 uint32_t group_count;
3207 struct anv_rt_shader_group * groups;
3208
3209 /* If non-zero, this is the default computed stack size as per the stack
3210 * size computation in the Vulkan spec. If zero, that indicates that the
3211 * client has requested a dynamic stack size.
3212 */
3213 uint32_t stack_size;
3214 };
3215
3216 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \
3217 static inline struct anv_##pipe_type##_pipeline * \
3218 anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \
3219 { \
3220 assert(pipeline->type == pipe_enum); \
3221 return (struct anv_##pipe_type##_pipeline *) pipeline; \
3222 }
3223
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3224 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3225 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3226 ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3227
3228 static inline bool
3229 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3230 gl_shader_stage stage)
3231 {
3232 return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3233 }
3234
3235 static inline bool
anv_pipeline_is_primitive(const struct anv_graphics_pipeline * pipeline)3236 anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
3237 {
3238 return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
3239 }
3240
3241 static inline bool
anv_pipeline_is_mesh(const struct anv_graphics_pipeline * pipeline)3242 anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
3243 {
3244 return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
3245 }
3246
3247 static inline bool
anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer * cmd_buffer)3248 anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
3249 {
3250 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
3251 const struct vk_dynamic_graphics_state *dyn =
3252 &cmd_buffer->vk.dynamic_graphics_state;
3253 uint8_t color_writes = dyn->cb.color_write_enables;
3254
3255 /* All writes disabled through vkCmdSetColorWriteEnableEXT */
3256 if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
3257 return true;
3258
3259 /* Or all write masks are empty */
3260 for (uint32_t i = 0; i < state->color_att_count; i++) {
3261 if (state->pipeline->color_comp_writes[i] != 0)
3262 return false;
3263 }
3264
3265 return true;
3266 }
3267
3268 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \
3269 static inline const struct brw_##prefix##_prog_data * \
3270 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \
3271 { \
3272 if (anv_pipeline_has_stage(pipeline, stage)) { \
3273 return (const struct brw_##prefix##_prog_data *) \
3274 pipeline->shaders[stage]->prog_data; \
3275 } else { \
3276 return NULL; \
3277 } \
3278 }
3279
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3280 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3281 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3282 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3283 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3284 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3285 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
3286 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
3287
3288 static inline const struct brw_cs_prog_data *
3289 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3290 {
3291 assert(pipeline->cs);
3292 return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3293 }
3294
3295 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3296 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3297 {
3298 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3299 return &get_gs_prog_data(pipeline)->base;
3300 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3301 return &get_tes_prog_data(pipeline)->base;
3302 else
3303 return &get_vs_prog_data(pipeline)->base;
3304 }
3305
3306 VkResult
3307 anv_device_init_rt_shaders(struct anv_device *device);
3308
3309 void
3310 anv_device_finish_rt_shaders(struct anv_device *device);
3311
3312 VkResult
3313 anv_pipeline_init(struct anv_pipeline *pipeline,
3314 struct anv_device *device,
3315 enum anv_pipeline_type type,
3316 VkPipelineCreateFlags flags,
3317 const VkAllocationCallbacks *pAllocator);
3318
3319 void
3320 anv_pipeline_finish(struct anv_pipeline *pipeline,
3321 struct anv_device *device,
3322 const VkAllocationCallbacks *pAllocator);
3323
3324 struct anv_format_plane {
3325 enum isl_format isl_format:16;
3326 struct isl_swizzle swizzle;
3327
3328 /* Whether this plane contains chroma channels */
3329 bool has_chroma;
3330
3331 /* For downscaling of YUV planes */
3332 uint8_t denominator_scales[2];
3333
3334 /* How to map sampled ycbcr planes to a single 4 component element. */
3335 struct isl_swizzle ycbcr_swizzle;
3336
3337 /* What aspect is associated to this plane */
3338 VkImageAspectFlags aspect;
3339 };
3340
3341
3342 struct anv_format {
3343 struct anv_format_plane planes[3];
3344 VkFormat vk_format;
3345 uint8_t n_planes;
3346 bool can_ycbcr;
3347 };
3348
3349 static inline void
anv_assert_valid_aspect_set(VkImageAspectFlags aspects)3350 anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3351 {
3352 if (util_bitcount(aspects) == 1) {
3353 assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3354 VK_IMAGE_ASPECT_DEPTH_BIT |
3355 VK_IMAGE_ASPECT_STENCIL_BIT |
3356 VK_IMAGE_ASPECT_PLANE_0_BIT |
3357 VK_IMAGE_ASPECT_PLANE_1_BIT |
3358 VK_IMAGE_ASPECT_PLANE_2_BIT));
3359 } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3360 assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3361 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3362 VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3363 aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3364 VK_IMAGE_ASPECT_PLANE_1_BIT |
3365 VK_IMAGE_ASPECT_PLANE_2_BIT));
3366 } else {
3367 assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3368 VK_IMAGE_ASPECT_STENCIL_BIT));
3369 }
3370 }
3371
3372 /**
3373 * Return the aspect's plane relative to all_aspects. For an image, for
3374 * instance, all_aspects would be the set of aspects in the image. For
3375 * an image view, all_aspects would be the subset of aspects represented
3376 * by that particular view.
3377 */
3378 static inline uint32_t
anv_aspect_to_plane(VkImageAspectFlags all_aspects,VkImageAspectFlagBits aspect)3379 anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3380 VkImageAspectFlagBits aspect)
3381 {
3382 anv_assert_valid_aspect_set(all_aspects);
3383 assert(util_bitcount(aspect) == 1);
3384 assert(!(aspect & ~all_aspects));
3385
3386 /* Because we always put image and view planes in aspect-bit-order, the
3387 * plane index is the number of bits in all_aspects before aspect.
3388 */
3389 return util_bitcount(all_aspects & (aspect - 1));
3390 }
3391
3392 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3393 u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3394
3395 const struct anv_format *
3396 anv_get_format(VkFormat format);
3397
3398 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3399 anv_get_format_planes(VkFormat vk_format)
3400 {
3401 const struct anv_format *format = anv_get_format(vk_format);
3402
3403 return format != NULL ? format->n_planes : 0;
3404 }
3405
3406 struct anv_format_plane
3407 anv_get_format_plane(const struct intel_device_info *devinfo,
3408 VkFormat vk_format, uint32_t plane,
3409 VkImageTiling tiling);
3410
3411 struct anv_format_plane
3412 anv_get_format_aspect(const struct intel_device_info *devinfo,
3413 VkFormat vk_format,
3414 VkImageAspectFlagBits aspect, VkImageTiling tiling);
3415
3416 static inline enum isl_format
anv_get_isl_format(const struct intel_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3417 anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3418 VkImageAspectFlags aspect, VkImageTiling tiling)
3419 {
3420 return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3421 }
3422
3423 bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3424 VkImageCreateFlags create_flags,
3425 VkFormat vk_format, VkImageTiling vk_tiling,
3426 VkImageUsageFlags vk_usage,
3427 const VkImageFormatListCreateInfo *fmt_list);
3428
3429 extern VkFormat
3430 vk_format_from_android(unsigned android_format, unsigned android_usage);
3431
3432 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3433 anv_swizzle_for_render(struct isl_swizzle swizzle)
3434 {
3435 /* Sometimes the swizzle will have alpha map to one. We do this to fake
3436 * RGB as RGBA for texturing
3437 */
3438 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3439 swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3440
3441 /* But it doesn't matter what we render to that channel */
3442 swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3443
3444 return swizzle;
3445 }
3446
3447 void
3448 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3449
3450 /**
3451 * Describes how each part of anv_image will be bound to memory.
3452 */
3453 struct anv_image_memory_range {
3454 /**
3455 * Disjoint bindings into which each portion of the image will be bound.
3456 *
3457 * Binding images to memory can be complicated and invold binding different
3458 * portions of the image to different memory objects or regions. For most
3459 * images, everything lives in the MAIN binding and gets bound by
3460 * vkBindImageMemory. For disjoint multi-planar images, each plane has
3461 * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3462 * VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are
3463 * implicit or driver-managed and live in special-case bindings.
3464 */
3465 enum anv_image_memory_binding {
3466 /**
3467 * Used if and only if image is not multi-planar disjoint. Bound by
3468 * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3469 */
3470 ANV_IMAGE_MEMORY_BINDING_MAIN,
3471
3472 /**
3473 * Used if and only if image is multi-planar disjoint. Bound by
3474 * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3475 */
3476 ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3477 ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3478 ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3479
3480 /**
3481 * Driver-private bo. In special cases we may store the aux surface and/or
3482 * aux state in this binding.
3483 */
3484 ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3485
3486 /** Sentinel */
3487 ANV_IMAGE_MEMORY_BINDING_END,
3488 } binding;
3489
3490 /**
3491 * Offset is relative to the start of the binding created by
3492 * vkBindImageMemory, not to the start of the bo.
3493 */
3494 uint64_t offset;
3495
3496 uint64_t size;
3497 uint32_t alignment;
3498 };
3499
3500 /**
3501 * Subsurface of an anv_image.
3502 */
3503 struct anv_surface {
3504 struct isl_surf isl;
3505 struct anv_image_memory_range memory_range;
3506 };
3507
3508 static inline bool MUST_CHECK
anv_surface_is_valid(const struct anv_surface * surface)3509 anv_surface_is_valid(const struct anv_surface *surface)
3510 {
3511 return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3512 }
3513
3514 struct anv_image {
3515 struct vk_image vk;
3516
3517 uint32_t n_planes;
3518
3519 /**
3520 * Image has multi-planar format and was created with
3521 * VK_IMAGE_CREATE_DISJOINT_BIT.
3522 */
3523 bool disjoint;
3524
3525 /**
3526 * Image was imported from an struct AHardwareBuffer. We have to delay
3527 * final image creation until bind time.
3528 */
3529 bool from_ahb;
3530
3531 /**
3532 * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3533 * must be released when the image is destroyed.
3534 */
3535 bool from_gralloc;
3536
3537 /**
3538 * The memory bindings created by vkCreateImage and vkBindImageMemory.
3539 *
3540 * For details on the image's memory layout, see check_memory_bindings().
3541 *
3542 * vkCreateImage constructs the `memory_range` for each
3543 * anv_image_memory_binding. After vkCreateImage, each binding is valid if
3544 * and only if `memory_range::size > 0`.
3545 *
3546 * vkBindImageMemory binds each valid `memory_range` to an `address`.
3547 * Usually, the app will provide the address via the parameters of
3548 * vkBindImageMemory. However, special-case bindings may be bound to
3549 * driver-private memory.
3550 */
3551 struct anv_image_binding {
3552 struct anv_image_memory_range memory_range;
3553 struct anv_address address;
3554 } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3555
3556 /**
3557 * Image subsurfaces
3558 *
3559 * For each foo, anv_image::planes[x].surface is valid if and only if
3560 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3561 * to figure the number associated with a given aspect.
3562 *
3563 * The hardware requires that the depth buffer and stencil buffer be
3564 * separate surfaces. From Vulkan's perspective, though, depth and stencil
3565 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we
3566 * allocate the depth and stencil buffers as separate surfaces in the same
3567 * bo.
3568 */
3569 struct anv_image_plane {
3570 struct anv_surface primary_surface;
3571
3572 /**
3573 * A surface which shadows the main surface and may have different
3574 * tiling. This is used for sampling using a tiling that isn't supported
3575 * for other operations.
3576 */
3577 struct anv_surface shadow_surface;
3578
3579 /**
3580 * The base aux usage for this image. For color images, this can be
3581 * either CCS_E or CCS_D depending on whether or not we can reliably
3582 * leave CCS on all the time.
3583 */
3584 enum isl_aux_usage aux_usage;
3585
3586 struct anv_surface aux_surface;
3587
3588 /** Location of the fast clear state. */
3589 struct anv_image_memory_range fast_clear_memory_range;
3590
3591 /**
3592 * Whether this image can be fast cleared with non-zero clear colors.
3593 * This can happen with mutable images when formats of different bit
3594 * sizes per components are used.
3595 *
3596 * On Gfx9+, because the clear colors are stored as a 4 components 32bit
3597 * values, we can clear in R16G16_UNORM (store 2 16bit values in the
3598 * components 0 & 1 of the clear color) and then draw in R32_UINT which
3599 * would interpret the clear color as a single component value, using
3600 * only the first 16bit component of the previous written clear color.
3601 *
3602 * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
3603 * boolean will prevent the usage of CC_ONE.
3604 */
3605 bool can_non_zero_fast_clear;
3606 } planes[3];
3607 };
3608
3609 static inline bool
anv_image_is_externally_shared(const struct anv_image * image)3610 anv_image_is_externally_shared(const struct anv_image *image)
3611 {
3612 return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
3613 image->vk.external_handle_types != 0;
3614 }
3615
3616 static inline bool
anv_image_has_private_binding(const struct anv_image * image)3617 anv_image_has_private_binding(const struct anv_image *image)
3618 {
3619 const struct anv_image_binding private_binding =
3620 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
3621 return private_binding.memory_range.size != 0;
3622 }
3623
3624 /* The ordering of this enum is important */
3625 enum anv_fast_clear_type {
3626 /** Image does not have/support any fast-clear blocks */
3627 ANV_FAST_CLEAR_NONE = 0,
3628 /** Image has/supports fast-clear but only to the default value */
3629 ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3630 /** Image has/supports fast-clear with an arbitrary fast-clear value */
3631 ANV_FAST_CLEAR_ANY = 2,
3632 };
3633
3634 /**
3635 * Return the aspect's _format_ plane, not its _memory_ plane (using the
3636 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3637 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3638 * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3639 */
3640 static inline uint32_t
anv_image_aspect_to_plane(const struct anv_image * image,VkImageAspectFlagBits aspect)3641 anv_image_aspect_to_plane(const struct anv_image *image,
3642 VkImageAspectFlagBits aspect)
3643 {
3644 return anv_aspect_to_plane(image->vk.aspects, aspect);
3645 }
3646
3647 /* Returns the number of auxiliary buffer levels attached to an image. */
3648 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)3649 anv_image_aux_levels(const struct anv_image * const image,
3650 VkImageAspectFlagBits aspect)
3651 {
3652 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3653 if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3654 return 0;
3655
3656 return image->vk.mip_levels;
3657 }
3658
3659 /* Returns the number of auxiliary buffer layers attached to an image. */
3660 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)3661 anv_image_aux_layers(const struct anv_image * const image,
3662 VkImageAspectFlagBits aspect,
3663 const uint8_t miplevel)
3664 {
3665 assert(image);
3666
3667 /* The miplevel must exist in the main buffer. */
3668 assert(miplevel < image->vk.mip_levels);
3669
3670 if (miplevel >= anv_image_aux_levels(image, aspect)) {
3671 /* There are no layers with auxiliary data because the miplevel has no
3672 * auxiliary data.
3673 */
3674 return 0;
3675 }
3676
3677 return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
3678 }
3679
3680 static inline struct anv_address MUST_CHECK
anv_image_address(const struct anv_image * image,const struct anv_image_memory_range * mem_range)3681 anv_image_address(const struct anv_image *image,
3682 const struct anv_image_memory_range *mem_range)
3683 {
3684 const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
3685 assert(binding->memory_range.offset == 0);
3686
3687 if (mem_range->size == 0)
3688 return ANV_NULL_ADDRESS;
3689
3690 return anv_address_add(binding->address, mem_range->offset);
3691 }
3692
3693 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3694 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3695 const struct anv_image *image,
3696 VkImageAspectFlagBits aspect)
3697 {
3698 assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
3699 VK_IMAGE_ASPECT_DEPTH_BIT));
3700
3701 uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3702 const struct anv_image_memory_range *mem_range =
3703 &image->planes[plane].fast_clear_memory_range;
3704
3705 return anv_image_address(image, mem_range);
3706 }
3707
3708 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3709 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3710 const struct anv_image *image,
3711 VkImageAspectFlagBits aspect)
3712 {
3713 struct anv_address addr =
3714 anv_image_get_clear_color_addr(device, image, aspect);
3715
3716 const unsigned clear_color_state_size = device->info.ver >= 10 ?
3717 device->isl_dev.ss.clear_color_state_size :
3718 device->isl_dev.ss.clear_value_size;
3719 return anv_address_add(addr, clear_color_state_size);
3720 }
3721
3722 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)3723 anv_image_get_compression_state_addr(const struct anv_device *device,
3724 const struct anv_image *image,
3725 VkImageAspectFlagBits aspect,
3726 uint32_t level, uint32_t array_layer)
3727 {
3728 assert(level < anv_image_aux_levels(image, aspect));
3729 assert(array_layer < anv_image_aux_layers(image, aspect, level));
3730 UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3731 assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3732
3733 /* Relative to start of the plane's fast clear memory range */
3734 uint32_t offset;
3735
3736 offset = 4; /* Go past the fast clear type */
3737
3738 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
3739 for (uint32_t l = 0; l < level; l++)
3740 offset += anv_minify(image->vk.extent.depth, l) * 4;
3741 } else {
3742 offset += level * image->vk.array_layers * 4;
3743 }
3744
3745 offset += array_layer * 4;
3746
3747 assert(offset < image->planes[plane].fast_clear_memory_range.size);
3748
3749 return anv_address_add(
3750 anv_image_get_fast_clear_type_addr(device, image, aspect),
3751 offset);
3752 }
3753
3754 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3755 static inline bool
anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,const struct anv_image * image)3756 anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
3757 const struct anv_image *image)
3758 {
3759 if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3760 return false;
3761
3762 /* For Gfx8-11, there are some restrictions around sampling from HiZ.
3763 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3764 * say:
3765 *
3766 * "If this field is set to AUX_HIZ, Number of Multisamples must
3767 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3768 */
3769 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
3770 return false;
3771
3772 /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3773 * struct. There's documentation which suggests that this feature actually
3774 * reduces performance on BDW, but it has only been observed to help so
3775 * far. Sampling fast-cleared blocks on BDW must also be handled with care
3776 * (see depth_stencil_attachment_compute_aux_usage() for more info).
3777 */
3778 if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
3779 return false;
3780
3781 return image->vk.samples == 1;
3782 }
3783
3784 /* Returns true if an MCS-enabled buffer can be sampled from. */
3785 static inline bool
anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,const struct anv_image * image)3786 anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
3787 const struct anv_image *image)
3788 {
3789 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
3790 const uint32_t plane =
3791 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
3792
3793 assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
3794
3795 const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
3796
3797 /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
3798 * See HSD 1707282275, wa_14013111325. Due to the use of
3799 * format-reinterpretation, a simplified workaround is implemented.
3800 */
3801 if (devinfo->ver >= 12 &&
3802 isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
3803 return false;
3804 }
3805
3806 return true;
3807 }
3808
3809 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)3810 anv_image_plane_uses_aux_map(const struct anv_device *device,
3811 const struct anv_image *image,
3812 uint32_t plane)
3813 {
3814 return device->info.has_aux_map &&
3815 isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
3816 }
3817
3818 void
3819 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3820 const struct anv_image *image,
3821 VkImageAspectFlagBits aspect,
3822 enum isl_aux_usage aux_usage,
3823 uint32_t level,
3824 uint32_t base_layer,
3825 uint32_t layer_count);
3826
3827 void
3828 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3829 const struct anv_image *image,
3830 VkImageAspectFlagBits aspect,
3831 enum isl_aux_usage aux_usage,
3832 enum isl_format format, struct isl_swizzle swizzle,
3833 uint32_t level, uint32_t base_layer, uint32_t layer_count,
3834 VkRect2D area, union isl_color_value clear_color);
3835 void
3836 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3837 const struct anv_image *image,
3838 VkImageAspectFlags aspects,
3839 enum isl_aux_usage depth_aux_usage,
3840 uint32_t level,
3841 uint32_t base_layer, uint32_t layer_count,
3842 VkRect2D area,
3843 float depth_value, uint8_t stencil_value);
3844 void
3845 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3846 const struct anv_image *src_image,
3847 enum isl_aux_usage src_aux_usage,
3848 uint32_t src_level, uint32_t src_base_layer,
3849 const struct anv_image *dst_image,
3850 enum isl_aux_usage dst_aux_usage,
3851 uint32_t dst_level, uint32_t dst_base_layer,
3852 VkImageAspectFlagBits aspect,
3853 uint32_t src_x, uint32_t src_y,
3854 uint32_t dst_x, uint32_t dst_y,
3855 uint32_t width, uint32_t height,
3856 uint32_t layer_count,
3857 enum blorp_filter filter);
3858 void
3859 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3860 const struct anv_image *image,
3861 VkImageAspectFlagBits aspect, uint32_t level,
3862 uint32_t base_layer, uint32_t layer_count,
3863 enum isl_aux_op hiz_op);
3864 void
3865 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3866 const struct anv_image *image,
3867 VkImageAspectFlags aspects,
3868 uint32_t level,
3869 uint32_t base_layer, uint32_t layer_count,
3870 VkRect2D area, uint8_t stencil_value);
3871 void
3872 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3873 const struct anv_image *image,
3874 enum isl_format format, struct isl_swizzle swizzle,
3875 VkImageAspectFlagBits aspect,
3876 uint32_t base_layer, uint32_t layer_count,
3877 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3878 bool predicate);
3879 void
3880 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
3881 const struct anv_image *image,
3882 enum isl_format format, struct isl_swizzle swizzle,
3883 VkImageAspectFlagBits aspect, uint32_t level,
3884 uint32_t base_layer, uint32_t layer_count,
3885 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
3886 bool predicate);
3887
3888 void
3889 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
3890 const struct anv_image *image,
3891 VkImageAspectFlagBits aspect,
3892 uint32_t base_level, uint32_t level_count,
3893 uint32_t base_layer, uint32_t layer_count);
3894
3895 enum isl_aux_state ATTRIBUTE_PURE
3896 anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
3897 const struct anv_image *image,
3898 const VkImageAspectFlagBits aspect,
3899 const VkImageLayout layout);
3900
3901 enum isl_aux_usage ATTRIBUTE_PURE
3902 anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
3903 const struct anv_image *image,
3904 const VkImageAspectFlagBits aspect,
3905 const VkImageUsageFlagBits usage,
3906 const VkImageLayout layout);
3907
3908 enum anv_fast_clear_type ATTRIBUTE_PURE
3909 anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
3910 const struct anv_image * const image,
3911 const VkImageAspectFlagBits aspect,
3912 const VkImageLayout layout);
3913
3914 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)3915 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
3916 VkImageAspectFlags aspects2)
3917 {
3918 if (aspects1 == aspects2)
3919 return true;
3920
3921 /* Only 1 color aspects are compatibles. */
3922 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3923 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3924 util_bitcount(aspects1) == util_bitcount(aspects2))
3925 return true;
3926
3927 return false;
3928 }
3929
3930 struct anv_image_view {
3931 struct vk_image_view vk;
3932
3933 const struct anv_image *image; /**< VkImageViewCreateInfo::image */
3934
3935 unsigned n_planes;
3936 struct {
3937 uint32_t image_plane;
3938
3939 struct isl_view isl;
3940
3941 /**
3942 * RENDER_SURFACE_STATE when using image as a sampler surface with an
3943 * image layout of SHADER_READ_ONLY_OPTIMAL or
3944 * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
3945 */
3946 struct anv_surface_state optimal_sampler_surface_state;
3947
3948 /**
3949 * RENDER_SURFACE_STATE when using image as a sampler surface with an
3950 * image layout of GENERAL.
3951 */
3952 struct anv_surface_state general_sampler_surface_state;
3953
3954 /**
3955 * RENDER_SURFACE_STATE when using image as a storage image. Separate
3956 * states for vanilla (with the original format) and one which has been
3957 * lowered to a format suitable for reading. This may be a raw surface
3958 * in extreme cases or simply a surface with a different format where we
3959 * expect some conversion to be done in the shader.
3960 */
3961 struct anv_surface_state storage_surface_state;
3962 struct anv_surface_state lowered_storage_surface_state;
3963
3964 struct brw_image_param lowered_storage_image_param;
3965 } planes[3];
3966 };
3967
3968 enum anv_image_view_state_flags {
3969 ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED = (1 << 0),
3970 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1),
3971 };
3972
3973 void anv_image_fill_surface_state(struct anv_device *device,
3974 const struct anv_image *image,
3975 VkImageAspectFlagBits aspect,
3976 const struct isl_view *view,
3977 isl_surf_usage_flags_t view_usage,
3978 enum isl_aux_usage aux_usage,
3979 const union isl_color_value *clear_color,
3980 enum anv_image_view_state_flags flags,
3981 struct anv_surface_state *state_inout,
3982 struct brw_image_param *image_param_out);
3983
3984 struct anv_image_create_info {
3985 const VkImageCreateInfo *vk_info;
3986
3987 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
3988 isl_tiling_flags_t isl_tiling_flags;
3989
3990 /** These flags will be added to any derived from VkImageCreateInfo. */
3991 isl_surf_usage_flags_t isl_extra_usage_flags;
3992 };
3993
3994 VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
3995 const struct anv_image_create_info *create_info);
3996
3997 void anv_image_finish(struct anv_image *image);
3998
3999 void anv_image_get_memory_requirements(struct anv_device *device,
4000 struct anv_image *image,
4001 VkImageAspectFlags aspects,
4002 VkMemoryRequirements2 *pMemoryRequirements);
4003
4004 enum isl_format
4005 anv_isl_format_for_descriptor_type(const struct anv_device *device,
4006 VkDescriptorType type);
4007
4008 static inline uint32_t
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeEXT line_mode)4009 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4010 VkLineRasterizationModeEXT line_mode)
4011 {
4012 if (raster_mode == VK_POLYGON_MODE_LINE &&
4013 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4014 return true;
4015 return false;
4016 }
4017
4018 VkFormatFeatureFlags2
4019 anv_get_image_format_features2(const struct intel_device_info *devinfo,
4020 VkFormat vk_format,
4021 const struct anv_format *anv_format,
4022 VkImageTiling vk_tiling,
4023 const struct isl_drm_modifier_info *isl_mod_info);
4024
4025 void anv_fill_buffer_surface_state(struct anv_device *device,
4026 struct anv_state state,
4027 enum isl_format format,
4028 struct isl_swizzle swizzle,
4029 isl_surf_usage_flags_t usage,
4030 struct anv_address address,
4031 uint32_t range, uint32_t stride);
4032
4033
4034 /* Haswell border color is a bit of a disaster. Float and unorm formats use a
4035 * straightforward 32-bit float color in the first 64 bytes. Instead of using
4036 * a nice float/integer union like Gfx8+, Haswell specifies the integer border
4037 * color as a separate entry /after/ the float color. The layout of this entry
4038 * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4039 *
4040 * Since we don't know the format/bpp, we can't make any of the border colors
4041 * containing '1' work for all formats, as it would be in the wrong place for
4042 * some of them. We opt to make 32-bit integers work as this seems like the
4043 * most common option. Fortunately, transparent black works regardless, as
4044 * all zeroes is the same in every bit-size.
4045 */
4046 struct hsw_border_color {
4047 float float32[4];
4048 uint32_t _pad0[12];
4049 uint32_t uint32[4];
4050 uint32_t _pad1[108];
4051 };
4052
4053 struct gfx8_border_color {
4054 union {
4055 float float32[4];
4056 uint32_t uint32[4];
4057 };
4058 /* Pad out to 64 bytes */
4059 uint32_t _pad[12];
4060 };
4061
4062 struct anv_ycbcr_conversion {
4063 struct vk_object_base base;
4064
4065 const struct anv_format * format;
4066 VkSamplerYcbcrModelConversion ycbcr_model;
4067 VkSamplerYcbcrRange ycbcr_range;
4068 VkComponentSwizzle mapping[4];
4069 VkChromaLocation chroma_offsets[2];
4070 VkFilter chroma_filter;
4071 bool chroma_reconstruction;
4072 };
4073
4074 struct anv_sampler {
4075 struct vk_object_base base;
4076
4077 uint32_t state[3][4];
4078 uint32_t n_planes;
4079 struct anv_ycbcr_conversion *conversion;
4080
4081 /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4082 * and with a 32-byte stride for use as bindless samplers.
4083 */
4084 struct anv_state bindless_state;
4085
4086 struct anv_state custom_border_color;
4087 };
4088
4089 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4090
4091 struct anv_query_pool {
4092 struct vk_object_base base;
4093
4094 VkQueryType type;
4095 VkQueryPipelineStatisticFlags pipeline_statistics;
4096 /** Stride between slots, in bytes */
4097 uint32_t stride;
4098 /** Number of slots in this query pool */
4099 uint32_t slots;
4100 struct anv_bo * bo;
4101
4102 /* KHR perf queries : */
4103 uint32_t pass_size;
4104 uint32_t data_offset;
4105 uint32_t snapshot_size;
4106 uint32_t n_counters;
4107 struct intel_perf_counter_pass *counter_pass;
4108 uint32_t n_passes;
4109 struct intel_perf_query_info **pass_query;
4110 };
4111
khr_perf_query_preamble_offset(const struct anv_query_pool * pool,uint32_t pass)4112 static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4113 uint32_t pass)
4114 {
4115 return pool->pass_size * pass + 8;
4116 }
4117
4118 struct anv_acceleration_structure {
4119 struct vk_object_base base;
4120
4121 VkDeviceSize size;
4122 struct anv_address address;
4123 };
4124
4125 int anv_get_instance_entrypoint_index(const char *name);
4126 int anv_get_device_entrypoint_index(const char *name);
4127 int anv_get_physical_device_entrypoint_index(const char *name);
4128
4129 const char *anv_get_instance_entry_name(int index);
4130 const char *anv_get_physical_device_entry_name(int index);
4131 const char *anv_get_device_entry_name(int index);
4132
4133 bool
4134 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4135 const struct vk_instance_extension_table *instance);
4136 bool
4137 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4138 const struct vk_instance_extension_table *instance);
4139 bool
4140 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4141 const struct vk_instance_extension_table *instance,
4142 const struct vk_device_extension_table *device);
4143
4144 const struct vk_device_dispatch_table *
4145 anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4146
4147 void
4148 anv_dump_pipe_bits(enum anv_pipe_bits bits);
4149
4150 static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits bits,const char * reason)4151 anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4152 enum anv_pipe_bits bits,
4153 const char* reason)
4154 {
4155 cmd_buffer->state.pending_pipe_bits |= bits;
4156 if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
4157 {
4158 fputs("pc: add ", stderr);
4159 anv_dump_pipe_bits(bits);
4160 fprintf(stderr, "reason: %s\n", reason);
4161 }
4162 }
4163
4164 struct anv_performance_configuration_intel {
4165 struct vk_object_base base;
4166
4167 struct intel_perf_registers *register_config;
4168
4169 uint64_t config_id;
4170 };
4171
4172 void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4173 void anv_device_perf_init(struct anv_device *device);
4174 void anv_perf_write_pass_results(struct intel_perf_config *perf,
4175 struct anv_query_pool *pool, uint32_t pass,
4176 const struct intel_perf_query_result *accumulated_results,
4177 union VkPerformanceCounterResultKHR *results);
4178
4179 /* Use to emit a series of memcpy operations */
4180 struct anv_memcpy_state {
4181 struct anv_device *device;
4182 struct anv_batch *batch;
4183
4184 struct anv_vb_cache_range vb_bound;
4185 struct anv_vb_cache_range vb_dirty;
4186 };
4187
4188 struct anv_utrace_flush_copy {
4189 /* Needs to be the first field */
4190 struct intel_ds_flush_data ds;
4191
4192 /* Batch stuff to implement of copy of timestamps recorded in another
4193 * buffer.
4194 */
4195 struct anv_reloc_list relocs;
4196 struct anv_batch batch;
4197 struct anv_bo *batch_bo;
4198
4199 /* Buffer of 64bits timestamps */
4200 struct anv_bo *trace_bo;
4201
4202 /* Syncobj to be signaled when the batch completes */
4203 struct vk_sync *sync;
4204
4205 /* Queue on which all the recorded traces are submitted */
4206 struct anv_queue *queue;
4207
4208 struct anv_memcpy_state memcpy_state;
4209 };
4210
4211 void anv_device_utrace_init(struct anv_device *device);
4212 void anv_device_utrace_finish(struct anv_device *device);
4213 VkResult
4214 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
4215 uint32_t cmd_buffer_count,
4216 struct anv_cmd_buffer **cmd_buffers,
4217 struct anv_utrace_flush_copy **out_flush_data);
4218
4219 #ifdef HAVE_PERFETTO
4220 void anv_perfetto_init(void);
4221 uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
4222 void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
4223 uint64_t start_ts);
4224 #else
anv_perfetto_init(void)4225 static inline void anv_perfetto_init(void)
4226 {
4227 }
anv_perfetto_begin_submit(struct anv_queue * queue)4228 static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
4229 {
4230 return 0;
4231 }
anv_perfetto_end_submit(struct anv_queue * queue,uint32_t submission_id,uint64_t start_ts)4232 static inline void anv_perfetto_end_submit(struct anv_queue *queue,
4233 uint32_t submission_id,
4234 uint64_t start_ts)
4235 {}
4236 #endif
4237
4238
4239 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4240 VK_FROM_HANDLE(__anv_type, __name, __handle)
4241
4242 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
4243 VK_OBJECT_TYPE_COMMAND_BUFFER)
4244 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4245 VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4246 VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4247 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4248 VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4249
4250 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4251 VkAccelerationStructureKHR,
4252 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4253 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
4254 VK_OBJECT_TYPE_BUFFER)
4255 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4256 VK_OBJECT_TYPE_BUFFER_VIEW)
4257 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4258 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4259 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4260 VK_OBJECT_TYPE_DESCRIPTOR_SET)
4261 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4262 VkDescriptorSetLayout,
4263 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4264 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4265 VkDescriptorUpdateTemplate,
4266 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4267 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4268 VK_OBJECT_TYPE_DEVICE_MEMORY)
4269 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4270 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
4271 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
4272 VK_OBJECT_TYPE_IMAGE_VIEW);
4273 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4274 VK_OBJECT_TYPE_PIPELINE)
4275 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4276 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4277 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4278 VK_OBJECT_TYPE_QUERY_POOL)
4279 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4280 VK_OBJECT_TYPE_SAMPLER)
4281 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4282 VkSamplerYcbcrConversion,
4283 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4284 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4285 VkPerformanceConfigurationINTEL,
4286 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4287
4288 #define anv_genX(devinfo, thing) ({ \
4289 __typeof(&gfx9_##thing) genX_thing; \
4290 switch ((devinfo)->verx10) { \
4291 case 70: \
4292 genX_thing = &gfx7_##thing; \
4293 break; \
4294 case 75: \
4295 genX_thing = &gfx75_##thing; \
4296 break; \
4297 case 80: \
4298 genX_thing = &gfx8_##thing; \
4299 break; \
4300 case 90: \
4301 genX_thing = &gfx9_##thing; \
4302 break; \
4303 case 110: \
4304 genX_thing = &gfx11_##thing; \
4305 break; \
4306 case 120: \
4307 genX_thing = &gfx12_##thing; \
4308 break; \
4309 case 125: \
4310 genX_thing = &gfx125_##thing; \
4311 break; \
4312 default: \
4313 unreachable("Unknown hardware generation"); \
4314 } \
4315 genX_thing; \
4316 })
4317
4318 /* Gen-specific function declarations */
4319 #ifdef genX
4320 # include "anv_genX.h"
4321 #else
4322 # define genX(x) gfx7_##x
4323 # include "anv_genX.h"
4324 # undef genX
4325 # define genX(x) gfx75_##x
4326 # include "anv_genX.h"
4327 # undef genX
4328 # define genX(x) gfx8_##x
4329 # include "anv_genX.h"
4330 # undef genX
4331 # define genX(x) gfx9_##x
4332 # include "anv_genX.h"
4333 # undef genX
4334 # define genX(x) gfx11_##x
4335 # include "anv_genX.h"
4336 # undef genX
4337 # define genX(x) gfx12_##x
4338 # include "anv_genX.h"
4339 # undef genX
4340 # define genX(x) gfx125_##x
4341 # include "anv_genX.h"
4342 # undef genX
4343 #endif
4344
4345 #endif /* ANV_PRIVATE_H */
4346