1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* Resource binding slots and sampler states (each described with 8 or
8 * 4 dwords) are stored in lists in memory which is accessed by shaders
9 * using scalar load instructions.
10 *
11 * This file is responsible for managing such lists. It keeps a copy of all
12 * descriptors in CPU memory and re-uploads a whole list if some slots have
13 * been changed.
14 *
15 * This code is also responsible for updating shader pointers to those lists.
16 *
17 * Note that CP DMA can't be used for updating the lists, because a GPU hang
18 * could leave the list in a mid-IB state and the next IB would get wrong
19 * descriptors and the whole context would be unusable at that point.
20 * (Note: The register shadowing can't be used due to the same reason)
21 *
22 * Also, uploading descriptors to newly allocated memory doesn't require
23 * a KCACHE flush.
24 *
25 *
26 * Possible scenarios for one 16 dword image+sampler slot:
27 *
28 * | Image | w/ FMASK | Buffer | NULL
29 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
30 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
31 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
32 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
33 *
34 * FMASK implies MSAA, therefore no sampler state.
35 * Sampler states are never unbound except when FMASK is bound.
36 */
37
38 #include "si_pipe.h"
39 #include "si_build_pm4.h"
40 #include "sid.h"
41 #include "util/format/u_format.h"
42 #include "util/hash_table.h"
43 #include "util/u_idalloc.h"
44 #include "util/u_memory.h"
45 #include "util/u_upload_mgr.h"
46
47 /* NULL image and buffer descriptor for textures (alpha = 1) and images
48 * (alpha = 0).
49 *
50 * For images, all fields must be zero except for the swizzle, which
51 * supports arbitrary combinations of 0s and 1s. The texture type must be
52 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
53 *
54 * For buffers, all fields must be zero. If they are not, the hw hangs.
55 *
56 * This is the only reason why the buffer descriptor must be in words [4:7].
57 */
58 static uint32_t null_texture_descriptor[8] = {
59 0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
60 /* the rest must contain zeros, which is also used by the buffer
61 * descriptor */
62 };
63
64 static uint32_t null_image_descriptor[8] = {
65 0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
66 /* the rest must contain zeros, which is also used by the buffer
67 * descriptor */
68 };
69
si_desc_extract_buffer_address(const uint32_t * desc)70 static uint64_t si_desc_extract_buffer_address(const uint32_t *desc)
71 {
72 uint64_t va = desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
73
74 /* Sign-extend the 48-bit address. */
75 va <<= 16;
76 va = (int64_t)va >> 16;
77 return va;
78 }
79
si_init_descriptor_list(uint32_t * desc_list,unsigned element_dw_size,unsigned num_elements,const uint32_t * null_descriptor)80 static void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size,
81 unsigned num_elements, const uint32_t *null_descriptor)
82 {
83 int i;
84
85 /* Initialize the array to NULL descriptors if the element size is 8. */
86 if (null_descriptor) {
87 assert(element_dw_size % 8 == 0);
88 for (i = 0; i < num_elements * element_dw_size / 8; i++)
89 memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
90 }
91 }
92
si_init_descriptors(struct si_descriptors * desc,short shader_userdata_rel_index,unsigned element_dw_size,unsigned num_elements)93 static void si_init_descriptors(struct si_descriptors *desc, short shader_userdata_rel_index,
94 unsigned element_dw_size, unsigned num_elements)
95 {
96 desc->list = CALLOC(num_elements, element_dw_size * 4);
97 desc->element_dw_size = element_dw_size;
98 desc->num_elements = num_elements;
99 desc->shader_userdata_offset = shader_userdata_rel_index * 4;
100 desc->slot_index_to_bind_directly = -1;
101 }
102
si_release_descriptors(struct si_descriptors * desc)103 static void si_release_descriptors(struct si_descriptors *desc)
104 {
105 si_resource_reference(&desc->buffer, NULL);
106 FREE(desc->list);
107 }
108
si_upload_descriptors(struct si_context * sctx,struct si_descriptors * desc)109 static void si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc)
110 {
111 unsigned slot_size = desc->element_dw_size * 4;
112 unsigned first_slot_offset = desc->first_active_slot * slot_size;
113 unsigned upload_size = desc->num_active_slots * slot_size;
114
115 /* Skip the upload if no shader is using the descriptors. dirty_mask
116 * will stay dirty and the descriptors will be uploaded when there is
117 * a shader using them.
118 */
119 if (!upload_size)
120 return;
121
122 /* If there is just one active descriptor, bind it directly. */
123 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
124 desc->num_active_slots == 1) {
125 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * desc->element_dw_size];
126
127 /* The buffer is already in the buffer list. */
128 si_resource_reference(&desc->buffer, NULL);
129 desc->gpu_list = NULL;
130 desc->gpu_address = si_desc_extract_buffer_address(descriptor);
131 return;
132 }
133
134 uint32_t *ptr;
135 unsigned buffer_offset;
136 u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size,
137 si_optimal_tcc_alignment(sctx, upload_size), &buffer_offset,
138 (struct pipe_resource **)&desc->buffer, (void **)&ptr);
139 if (!desc->buffer) {
140 sctx->ws->ctx_set_sw_reset_status(sctx->ctx, PIPE_GUILTY_CONTEXT_RESET,
141 "radeonsi: not enough memory to upload descriptors\n");
142 return;
143 }
144
145 util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size);
146 desc->gpu_list = ptr - first_slot_offset / 4;
147
148 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer,
149 RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS);
150
151 /* The shader pointer should point to slot 0. */
152 buffer_offset -= first_slot_offset;
153 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
154
155 assert(desc->buffer->flags & RADEON_FLAG_32BIT);
156 assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi);
157 assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi);
158 }
159
160 static void
si_add_descriptors_to_bo_list(struct si_context * sctx,struct si_descriptors * desc)161 si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc)
162 {
163 if (!desc->buffer)
164 return;
165
166 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer,
167 RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS);
168 }
169
170 /* SAMPLER VIEWS */
171
si_get_sampler_view_priority(struct si_resource * res)172 static inline unsigned si_get_sampler_view_priority(struct si_resource *res)
173 {
174 if (res->b.b.target == PIPE_BUFFER)
175 return RADEON_PRIO_SAMPLER_BUFFER;
176
177 if (res->b.b.nr_samples > 1)
178 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
179
180 return RADEON_PRIO_SAMPLER_TEXTURE;
181 }
182
si_sampler_and_image_descriptors(struct si_context * sctx,unsigned shader)183 static struct si_descriptors *si_sampler_and_image_descriptors(struct si_context *sctx,
184 unsigned shader)
185 {
186 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
187 }
188
si_release_sampler_views(struct si_samplers * samplers)189 static void si_release_sampler_views(struct si_samplers *samplers)
190 {
191 int i;
192
193 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
194 pipe_sampler_view_reference(&samplers->views[i], NULL);
195 }
196 }
197
si_sampler_view_add_buffer(struct si_context * sctx,struct pipe_resource * resource,unsigned usage,bool is_stencil_sampler)198 static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_resource *resource,
199 unsigned usage, bool is_stencil_sampler)
200 {
201 struct si_texture *tex = (struct si_texture *)resource;
202 unsigned priority;
203
204 if (!resource)
205 return;
206
207 /* Use the flushed depth texture if direct sampling is unsupported. */
208 if (resource->target != PIPE_BUFFER && tex->is_depth &&
209 !si_can_sample_zs(tex, is_stencil_sampler))
210 tex = tex->flushed_depth_texture;
211
212 priority = si_get_sampler_view_priority(&tex->buffer);
213 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, usage | priority);
214 }
215
si_sampler_views_begin_new_cs(struct si_context * sctx,struct si_samplers * samplers)216 static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers)
217 {
218 unsigned mask = samplers->enabled_mask;
219
220 /* Add buffers to the CS. */
221 while (mask) {
222 int i = u_bit_scan(&mask);
223 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
224
225 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
226 sview->is_stencil_sampler);
227 }
228 }
229
si_sampler_views_check_encrypted(struct si_context * sctx,struct si_samplers * samplers,unsigned samplers_declared)230 static bool si_sampler_views_check_encrypted(struct si_context *sctx, struct si_samplers *samplers,
231 unsigned samplers_declared)
232 {
233 unsigned mask = samplers->enabled_mask & samplers_declared;
234
235 /* Verify if a samplers uses an encrypted resource */
236 while (mask) {
237 int i = u_bit_scan(&mask);
238 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
239
240 struct si_resource *res = si_resource(sview->base.texture);
241 if (res->flags & RADEON_FLAG_ENCRYPTED)
242 return true;
243 }
244 return false;
245 }
246
247 /* Set buffer descriptor fields that can be changed by reallocations. */
si_set_buf_desc_address(struct si_resource * buf,uint64_t offset,uint32_t * state)248 static void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state)
249 {
250 uint64_t va = buf->gpu_address + offset;
251
252 state[0] = va;
253 state[1] &= C_008F04_BASE_ADDRESS_HI;
254 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
255 }
256
257 /* Set texture descriptor fields that can be changed by reallocations.
258 *
259 * \param tex texture
260 * \param base_level_info information of the level of BASE_ADDRESS
261 * \param base_level the level of BASE_ADDRESS
262 * \param first_level pipe_sampler_view.u.tex.first_level
263 * \param block_width util_format_get_blockwidth()
264 * \param is_stencil select between separate Z & Stencil
265 * \param state descriptor to update
266 */
si_set_mutable_tex_desc_fields(struct si_screen * sscreen,struct si_texture * tex,const struct legacy_surf_level * base_level_info,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,uint16_t access,uint32_t * restrict state)267 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex,
268 const struct legacy_surf_level *base_level_info,
269 unsigned base_level, unsigned first_level, unsigned block_width,
270 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
271 bool is_stencil, uint16_t access, uint32_t * restrict state)
272 {
273 uint64_t va, meta_va = 0;
274
275 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {
276 tex = tex->flushed_depth_texture;
277 is_stencil = false;
278 }
279
280 va = tex->buffer.gpu_address;
281
282 if (sscreen->info.gfx_level >= GFX9) {
283 /* Only stencil_offset needs to be added here. */
284 if (is_stencil)
285 va += tex->surface.u.gfx9.zs.stencil_offset;
286 else
287 va += tex->surface.u.gfx9.surf_offset;
288 } else {
289 va += (uint64_t)base_level_info->offset_256B * 256;
290 }
291
292 if (!sscreen->info.has_image_opcodes) {
293 /* Set it as a buffer descriptor. */
294 state[0] = va;
295 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
296 return;
297 }
298
299 state[0] = va >> 8;
300 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
301
302 if (sscreen->info.gfx_level >= GFX8) {
303 if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) {
304 meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;
305
306 if (sscreen->info.gfx_level == GFX8) {
307 meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
308 assert(base_level_info->mode == RADEON_SURF_MODE_2D);
309 }
310
311 unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8;
312 dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1;
313 meta_va |= dcc_tile_swizzle;
314 } else if (vi_tc_compat_htile_enabled(tex, first_level,
315 is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) {
316 meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;
317 }
318 }
319
320 if (sscreen->info.gfx_level >= GFX10) {
321 state[0] |= tex->surface.tile_swizzle;
322
323 if (is_stencil) {
324 state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
325 } else {
326 state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
327 }
328
329 /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
330 * of 256B.
331 */
332 if (sscreen->info.gfx_level >= GFX10_3 && tex->surface.u.gfx9.uses_custom_pitch) {
333 ASSERTED unsigned min_alignment = 256;
334 assert((tex->surface.u.gfx9.surf_pitch * tex->surface.bpe) % min_alignment == 0);
335 assert(tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
336 tex->buffer.b.b.target == PIPE_TEXTURE_RECT);
337 assert(tex->surface.is_linear);
338 unsigned pitch = tex->surface.u.gfx9.surf_pitch;
339
340 /* Subsampled images have the pitch in the units of blocks. */
341 if (tex->surface.blk_w == 2)
342 pitch *= 2;
343
344 state[4] |= S_00A010_DEPTH(pitch - 1) | /* DEPTH contains low bits of PITCH. */
345 S_00A010_PITCH_MSB((pitch - 1) >> 13);
346 }
347
348 if (meta_va) {
349 struct gfx9_surf_meta_flags meta = {
350 .rb_aligned = 1,
351 .pipe_aligned = 1,
352 };
353
354 if (!tex->is_depth && tex->surface.meta_offset)
355 meta = tex->surface.u.gfx9.color.dcc;
356
357 state[6] |= S_00A018_COMPRESSION_EN(1) |
358 S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
359 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) |
360 /* DCC image stores require the following settings:
361 * - INDEPENDENT_64B_BLOCKS = 0
362 * - INDEPENDENT_128B_BLOCKS = 1
363 * - MAX_COMPRESSED_BLOCK_SIZE = 128B
364 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
365 *
366 * The same limitations apply to SDMA compressed stores because
367 * SDMA uses the same DCC codec.
368 */
369 S_00A018_WRITE_COMPRESS_ENABLE(ac_surface_supports_dcc_image_stores(sscreen->info.gfx_level, &tex->surface) &&
370 (access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE));
371
372 /* TC-compatible MSAA HTILE requires ITERATE_256. */
373 if (tex->is_depth && tex->buffer.b.b.nr_samples >= 2)
374 state[6] |= S_00A018_ITERATE_256(1);
375
376 state[7] = meta_va >> 16;
377 }
378 } else if (sscreen->info.gfx_level == GFX9) {
379 state[0] |= tex->surface.tile_swizzle;
380
381 if (is_stencil) {
382 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
383 state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);
384 } else {
385 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
386
387 uint32_t hw_format = G_008F14_DATA_FORMAT(state[1]);
388 uint16_t epitch = tex->surface.u.gfx9.epitch;
389
390 /* epitch is surf_pitch - 1 and are in elements unit.
391 * For some reason I don't understand, when a packed YUV format
392 * like UYUV is used, we have to double epitch (making it a pixel
393 * pitch instead of an element pitch). Note that it's only done
394 * when sampling the texture using its native format; we don't
395 * need to do this when sampling it as UINT32 (as done by
396 * SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT).
397 * This looks broken, so it's possible that surf_pitch / epitch
398 * are computed incorrectly, but that's the only way I found
399 * to get these use cases to work properly:
400 * - yuyv dmabuf import (#6131)
401 * - jpeg vaapi decode
402 * - yuyv texture sampling (!26947)
403 * - jpeg vaapi get image (#10375)
404 */
405 if ((tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM ||
406 tex->buffer.b.b.format == PIPE_FORMAT_G8R8_B8R8_UNORM) &&
407 (hw_format == V_008F14_IMG_DATA_FORMAT_GB_GR ||
408 hw_format == V_008F14_IMG_DATA_FORMAT_BG_RG)) {
409 epitch = (epitch + 1) * 2 - 1;
410 }
411
412 state[4] |= S_008F20_PITCH(epitch);
413 }
414
415 if (meta_va) {
416 struct gfx9_surf_meta_flags meta = {
417 .rb_aligned = 1,
418 .pipe_aligned = 1,
419 };
420
421 if (!tex->is_depth && tex->surface.meta_offset)
422 meta = tex->surface.u.gfx9.color.dcc;
423
424 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
425 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
426 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
427 state[6] |= S_008F28_COMPRESSION_EN(1);
428 state[7] = meta_va >> 8;
429 }
430 } else {
431 /* GFX6-GFX8 */
432 unsigned pitch = base_level_info->nblk_x * block_width;
433 unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
434
435 /* Only macrotiled modes can set tile swizzle. */
436 if (base_level_info->mode == RADEON_SURF_MODE_2D)
437 state[0] |= tex->surface.tile_swizzle;
438
439 state[3] |= S_008F1C_TILING_INDEX(index);
440 state[4] |= S_008F20_PITCH(pitch - 1);
441
442 if (sscreen->info.gfx_level == GFX8 && meta_va) {
443 state[6] |= S_008F28_COMPRESSION_EN(1);
444 state[7] = meta_va >> 8;
445 }
446 }
447
448 if (tex->swap_rgb_to_bgr) {
449 unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]);
450 unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]);
451
452 state[3] &= C_008F1C_DST_SEL_X;
453 state[3] |= S_008F1C_DST_SEL_X(swizzle_z);
454 state[3] &= C_008F1C_DST_SEL_Z;
455 state[3] |= S_008F1C_DST_SEL_Z(swizzle_x);
456 }
457 }
458
si_set_sampler_state_desc(struct si_sampler_state * sstate,struct si_sampler_view * sview,struct si_texture * tex,uint32_t * desc)459 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
460 struct si_sampler_view *sview, struct si_texture *tex,
461 uint32_t *desc)
462 {
463 if (tex && tex->upgraded_depth && sview && !sview->is_stencil_sampler)
464 memcpy(desc, sstate->upgraded_depth_val, 4 * 4);
465 else
466 memcpy(desc, sstate->val, 4 * 4);
467 }
468
si_set_sampler_view_desc(struct si_context * sctx,struct si_sampler_view * sview,struct si_sampler_state * sstate,uint32_t * restrict desc)469 static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_view *sview,
470 struct si_sampler_state *sstate,
471 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
472 uint32_t * restrict desc)
473 {
474 struct pipe_sampler_view *view = &sview->base;
475 struct si_texture *tex = (struct si_texture *)view->texture;
476
477 assert(tex); /* views with texture == NULL aren't supported */
478
479 if (tex->buffer.b.b.target == PIPE_BUFFER) {
480 memcpy(desc, sview->state, 8 * 4);
481 memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */
482 si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4);
483 return;
484 }
485
486 if (unlikely(sview->dcc_incompatible)) {
487 if (vi_dcc_enabled(tex, view->u.tex.first_level))
488 if (!si_texture_disable_dcc(sctx, tex))
489 si_decompress_dcc(sctx, tex);
490
491 sview->dcc_incompatible = false;
492 }
493
494 bool is_separate_stencil = tex->db_compatible && sview->is_stencil_sampler;
495
496 memcpy(desc, sview->state, 8 * 4);
497 si_set_mutable_tex_desc_fields(sctx->screen, tex, sview->base_level_info, 0,
498 sview->base.u.tex.first_level, sview->block_width,
499 is_separate_stencil, 0, desc);
500
501 if (tex->surface.fmask_size) {
502 memcpy(desc + 8, sview->fmask_state, 8 * 4);
503 } else {
504 /* Disable FMASK and bind sampler state in [12:15]. */
505 memcpy(desc + 8, null_texture_descriptor, 4 * 4);
506
507 if (sstate)
508 si_set_sampler_state_desc(sstate, sview, tex, desc + 12);
509 }
510 }
511
color_needs_decompression(struct si_texture * tex)512 static bool color_needs_decompression(struct si_texture *tex)
513 {
514 struct si_screen *sscreen = (struct si_screen *)tex->buffer.b.b.screen;
515
516 if (sscreen->info.gfx_level >= GFX11 || tex->is_depth)
517 return false;
518
519 return tex->surface.fmask_size ||
520 (tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset));
521 }
522
depth_needs_decompression(struct si_texture * tex,bool is_stencil)523 static bool depth_needs_decompression(struct si_texture *tex, bool is_stencil)
524 {
525 /* If the depth/stencil texture is TC-compatible, no decompression
526 * will be done. The decompression function will only flush DB caches
527 * to make it coherent with shaders. That's necessary because the driver
528 * doesn't flush DB caches in any other case.
529 */
530 return tex->db_compatible && (tex->dirty_level_mask || (is_stencil && tex->stencil_dirty_level_mask));
531 }
532
si_reset_sampler_view_slot(struct si_samplers * samplers,unsigned slot,uint32_t * restrict desc)533 static void si_reset_sampler_view_slot(struct si_samplers *samplers, unsigned slot,
534 uint32_t * restrict desc)
535 {
536 pipe_sampler_view_reference(&samplers->views[slot], NULL);
537 memcpy(desc, null_texture_descriptor, 8 * 4);
538 /* Only clear the lower dwords of FMASK. */
539 memcpy(desc + 8, null_texture_descriptor, 4 * 4);
540 /* Re-set the sampler state if we are transitioning from FMASK. */
541 if (samplers->sampler_states[slot])
542 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, desc + 12);
543 }
544
si_set_sampler_views(struct si_context * sctx,unsigned shader,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views,bool disallow_early_out)545 static void si_set_sampler_views(struct si_context *sctx, unsigned shader,
546 unsigned start_slot, unsigned count,
547 unsigned unbind_num_trailing_slots,
548 bool take_ownership, struct pipe_sampler_view **views,
549 bool disallow_early_out)
550 {
551 struct si_samplers *samplers = &sctx->samplers[shader];
552 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
553 uint32_t unbound_mask = 0;
554
555 if (views) {
556 for (unsigned i = 0; i < count; i++) {
557 unsigned slot = start_slot + i;
558 struct si_sampler_view *sview = (struct si_sampler_view *)views[i];
559 unsigned desc_slot = si_get_sampler_slot(slot);
560 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
561 uint32_t *restrict desc = descs->list + desc_slot * 16;
562
563 if (samplers->views[slot] == &sview->base && !disallow_early_out) {
564 if (take_ownership) {
565 struct pipe_sampler_view *view = views[i];
566 pipe_sampler_view_reference(&view, NULL);
567 }
568 continue;
569 }
570
571 if (sview) {
572 struct si_texture *tex = (struct si_texture *)sview->base.texture;
573
574 si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc);
575
576 if (tex->buffer.b.b.target == PIPE_BUFFER) {
577 tex->buffer.bind_history |= SI_BIND_SAMPLER_BUFFER(shader);
578 samplers->needs_depth_decompress_mask &= ~(1u << slot);
579 samplers->needs_color_decompress_mask &= ~(1u << slot);
580 } else {
581 if (tex->is_depth) {
582 samplers->has_depth_tex_mask |= 1u << slot;
583 samplers->needs_color_decompress_mask &= ~(1u << slot);
584
585 if (depth_needs_decompression(tex, sview->is_stencil_sampler)) {
586 samplers->needs_depth_decompress_mask |= 1u << slot;
587 } else {
588 samplers->needs_depth_decompress_mask &= ~(1u << slot);
589 }
590 } else {
591 samplers->has_depth_tex_mask &= ~(1u << slot);
592 samplers->needs_depth_decompress_mask &= ~(1u << slot);
593
594 if (color_needs_decompression(tex)) {
595 samplers->needs_color_decompress_mask |= 1u << slot;
596 } else {
597 samplers->needs_color_decompress_mask &= ~(1u << slot);
598 }
599 }
600
601 if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&
602 p_atomic_read(&tex->framebuffers_bound))
603 sctx->need_check_render_feedback = true;
604 }
605
606 if (take_ownership) {
607 pipe_sampler_view_reference(&samplers->views[slot], NULL);
608 samplers->views[slot] = &sview->base;
609 } else {
610 pipe_sampler_view_reference(&samplers->views[slot], &sview->base);
611 }
612 samplers->enabled_mask |= 1u << slot;
613
614 /* Since this can flush, it must be done after enabled_mask is
615 * updated. */
616 si_sampler_view_add_buffer(sctx, &tex->buffer.b.b, RADEON_USAGE_READ,
617 sview->is_stencil_sampler);
618 } else {
619 si_reset_sampler_view_slot(samplers, slot, desc);
620 unbound_mask |= 1u << slot;
621 }
622 }
623 } else {
624 unbind_num_trailing_slots += count;
625 count = 0;
626 }
627
628 for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
629 unsigned slot = start_slot + count + i;
630 unsigned desc_slot = si_get_sampler_slot(slot);
631 uint32_t * restrict desc = descs->list + desc_slot * 16;
632
633 if (samplers->views[slot])
634 si_reset_sampler_view_slot(samplers, slot, desc);
635 }
636
637 unbound_mask |= BITFIELD_RANGE(start_slot + count, unbind_num_trailing_slots);
638 samplers->enabled_mask &= ~unbound_mask;
639 samplers->has_depth_tex_mask &= ~unbound_mask;
640 samplers->needs_depth_decompress_mask &= ~unbound_mask;
641 samplers->needs_color_decompress_mask &= ~unbound_mask;
642
643 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
644 if (shader != PIPE_SHADER_COMPUTE)
645 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
646 }
647
si_update_shader_needs_decompress_mask(struct si_context * sctx,unsigned shader)648 static void si_update_shader_needs_decompress_mask(struct si_context *sctx, unsigned shader)
649 {
650 struct si_samplers *samplers = &sctx->samplers[shader];
651 unsigned shader_bit = 1 << shader;
652
653 if (samplers->needs_depth_decompress_mask || samplers->needs_color_decompress_mask ||
654 sctx->images[shader].needs_color_decompress_mask)
655 sctx->shader_needs_decompress_mask |= shader_bit;
656 else
657 sctx->shader_needs_decompress_mask &= ~shader_bit;
658
659 if (samplers->has_depth_tex_mask)
660 sctx->shader_has_depth_tex |= shader_bit;
661 else
662 sctx->shader_has_depth_tex &= ~shader_bit;
663 }
664
si_pipe_set_sampler_views(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)665 static void si_pipe_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader,
666 unsigned start, unsigned count,
667 unsigned unbind_num_trailing_slots,
668 bool take_ownership, struct pipe_sampler_view **views)
669 {
670 struct si_context *sctx = (struct si_context *)ctx;
671
672 if ((!count && !unbind_num_trailing_slots) || shader >= SI_NUM_SHADERS)
673 return;
674
675 si_set_sampler_views(sctx, shader, start, count, unbind_num_trailing_slots,
676 take_ownership, views, false);
677 si_update_shader_needs_decompress_mask(sctx, shader);
678 }
679
si_samplers_update_needs_color_decompress_mask(struct si_samplers * samplers)680 static void si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
681 {
682 unsigned mask = samplers->enabled_mask;
683
684 while (mask) {
685 int i = u_bit_scan(&mask);
686 struct pipe_resource *res = samplers->views[i]->texture;
687
688 if (res && res->target != PIPE_BUFFER) {
689 struct si_texture *tex = (struct si_texture *)res;
690
691 if (color_needs_decompression(tex)) {
692 samplers->needs_color_decompress_mask |= 1u << i;
693 } else {
694 samplers->needs_color_decompress_mask &= ~(1u << i);
695 }
696 }
697 }
698 }
699
700 /* IMAGE VIEWS */
701
si_release_image_views(struct si_images * images)702 static void si_release_image_views(struct si_images *images)
703 {
704 unsigned i;
705
706 for (i = 0; i < SI_NUM_IMAGES; ++i) {
707 struct pipe_image_view *view = &images->views[i];
708
709 pipe_resource_reference(&view->resource, NULL);
710 }
711 }
712
si_image_views_begin_new_cs(struct si_context * sctx,struct si_images * images)713 static void si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
714 {
715 uint mask = images->enabled_mask;
716
717 /* Add buffers to the CS. */
718 while (mask) {
719 int i = u_bit_scan(&mask);
720 struct pipe_image_view *view = &images->views[i];
721
722 assert(view->resource);
723
724 si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false);
725 }
726 }
727
si_image_views_check_encrypted(struct si_context * sctx,struct si_images * images,unsigned images_declared)728 static bool si_image_views_check_encrypted(struct si_context *sctx, struct si_images *images,
729 unsigned images_declared)
730 {
731 uint mask = images->enabled_mask & images_declared;
732
733 while (mask) {
734 int i = u_bit_scan(&mask);
735 struct pipe_image_view *view = &images->views[i];
736
737 assert(view->resource);
738
739 struct si_texture *tex = (struct si_texture *)view->resource;
740 if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
741 return true;
742 }
743 return false;
744 }
745
si_disable_shader_image(struct si_context * ctx,unsigned shader,unsigned slot)746 static void si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
747 {
748 struct si_images *images = &ctx->images[shader];
749
750 if (images->enabled_mask & (1u << slot)) {
751 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
752 unsigned desc_slot = si_get_image_slot(slot);
753
754 pipe_resource_reference(&images->views[slot].resource, NULL);
755 images->needs_color_decompress_mask &= ~(1 << slot);
756
757 memcpy(descs->list + desc_slot * 8, null_image_descriptor, 8 * 4);
758 images->enabled_mask &= ~(1u << slot);
759 images->display_dcc_store_mask &= ~(1u << slot);
760 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
761 if (shader != PIPE_SHADER_COMPUTE)
762 si_mark_atom_dirty(ctx, &ctx->atoms.s.gfx_shader_pointers);
763 }
764 }
765
si_mark_image_range_valid(const struct pipe_image_view * view)766 static void si_mark_image_range_valid(const struct pipe_image_view *view)
767 {
768 struct si_resource *res = si_resource(view->resource);
769
770 if (res->b.b.target != PIPE_BUFFER)
771 return;
772
773 util_range_add(&res->b.b, &res->valid_buffer_range, view->u.buf.offset,
774 view->u.buf.offset + view->u.buf.size);
775 }
776
si_set_shader_image_desc(struct si_context * ctx,const struct pipe_image_view * view,bool skip_decompress,uint32_t * desc,uint32_t * fmask_desc)777 static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view,
778 bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc)
779 {
780 struct si_screen *screen = ctx->screen;
781 struct si_resource *res;
782
783 res = si_resource(view->resource);
784
785 if (res->b.b.target == PIPE_BUFFER) {
786 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
787 si_mark_image_range_valid(view);
788 uint32_t elements = si_clamp_texture_texel_count(screen->max_texel_buffer_elements,
789 view->format, view->u.buf.size);
790
791 si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, elements,
792 desc);
793 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
794 } else {
795 static const unsigned char swizzle[4] = {0, 1, 2, 3};
796 struct si_texture *tex = (struct si_texture *)res;
797 unsigned level = view->u.tex.level;
798 bool uses_dcc = vi_dcc_enabled(tex, level);
799 unsigned access = view->access;
800
801 if (uses_dcc && screen->always_allow_dcc_stores)
802 access |= SI_IMAGE_ACCESS_ALLOW_DCC_STORE;
803
804 assert(!tex->is_depth);
805 assert(fmask_desc || tex->surface.fmask_offset == 0);
806
807 if (uses_dcc && !skip_decompress &&
808 !(access & SI_IMAGE_ACCESS_DCC_OFF) &&
809 ((!(access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE) && (access & PIPE_IMAGE_ACCESS_WRITE)) ||
810 !vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) {
811 /* If DCC can't be disabled, at least decompress it.
812 * The decompression is relatively cheap if the surface
813 * has been decompressed already.
814 */
815 if (!si_texture_disable_dcc(ctx, tex))
816 si_decompress_dcc(ctx, tex);
817 }
818
819 unsigned width = res->b.b.width0;
820 unsigned height = res->b.b.height0;
821 unsigned depth = res->b.b.depth0;
822 unsigned hw_level = level;
823
824 if (ctx->gfx_level <= GFX8) {
825 /* Always force the base level to the selected level.
826 *
827 * This is required for 3D textures, where otherwise
828 * selecting a single slice for non-layered bindings
829 * fails. It doesn't hurt the other targets.
830 */
831 width = u_minify(width, level);
832 height = u_minify(height, level);
833 depth = u_minify(depth, level);
834 hw_level = 0;
835 }
836
837 if (access & SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT) {
838 if (ctx->gfx_level >= GFX9) {
839 /* Since the aligned width and height are derived from the width and height
840 * by the hw, set them directly as the width and height, so that UINT formats
841 * get exactly the same layout as BCn formats.
842 */
843 width = tex->surface.u.gfx9.base_mip_width;
844 height = tex->surface.u.gfx9.base_mip_height;
845 } else {
846 width = util_format_get_nblocksx(tex->buffer.b.b.format, width);
847 height = util_format_get_nblocksy(tex->buffer.b.b.format, height);
848 }
849 }
850
851 screen->make_texture_descriptor(
852 screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level,
853 view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, false,
854 desc, fmask_desc);
855 si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level,
856 util_format_get_blockwidth(view->format),
857 false, access, desc);
858 }
859 }
860
si_set_shader_image(struct si_context * ctx,unsigned shader,unsigned slot,const struct pipe_image_view * view,bool skip_decompress)861 static void si_set_shader_image(struct si_context *ctx, unsigned shader, unsigned slot,
862 const struct pipe_image_view *view, bool skip_decompress)
863 {
864 struct si_images *images = &ctx->images[shader];
865 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
866 struct si_resource *res;
867
868 if (!view || !view->resource) {
869 si_disable_shader_image(ctx, shader, slot);
870 return;
871 }
872
873 res = si_resource(view->resource);
874
875 si_set_shader_image_desc(ctx, view, skip_decompress, descs->list + si_get_image_slot(slot) * 8,
876 descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);
877
878 if (&images->views[slot] != view)
879 util_copy_image_view(&images->views[slot], view);
880
881 if (res->b.b.target == PIPE_BUFFER) {
882 images->needs_color_decompress_mask &= ~(1 << slot);
883 images->display_dcc_store_mask &= ~(1u << slot);
884 res->bind_history |= SI_BIND_IMAGE_BUFFER(shader);
885 } else {
886 struct si_texture *tex = (struct si_texture *)res;
887 unsigned level = view->u.tex.level;
888
889 if (color_needs_decompression(tex)) {
890 images->needs_color_decompress_mask |= 1 << slot;
891 } else {
892 images->needs_color_decompress_mask &= ~(1 << slot);
893 }
894
895 if (tex->surface.display_dcc_offset && view->access & PIPE_IMAGE_ACCESS_WRITE) {
896 images->display_dcc_store_mask |= 1u << slot;
897
898 /* Set displayable_dcc_dirty for non-compute stages conservatively (before draw calls). */
899 if (shader != PIPE_SHADER_COMPUTE)
900 tex->displayable_dcc_dirty = true;
901 } else {
902 images->display_dcc_store_mask &= ~(1u << slot);
903 }
904
905 if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))
906 ctx->need_check_render_feedback = true;
907 }
908
909 images->enabled_mask |= 1u << slot;
910 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
911 if (shader != PIPE_SHADER_COMPUTE)
912 si_mark_atom_dirty(ctx, &ctx->atoms.s.gfx_shader_pointers);
913
914 /* Since this can flush, it must be done after enabled_mask is updated. */
915 si_sampler_view_add_buffer(ctx, &res->b.b,
916 (view->access & PIPE_IMAGE_ACCESS_WRITE) ?
917 RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false);
918 }
919
si_set_shader_images(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * views)920 static void si_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader,
921 unsigned start_slot, unsigned count,
922 unsigned unbind_num_trailing_slots,
923 const struct pipe_image_view *views)
924 {
925 struct si_context *ctx = (struct si_context *)pipe;
926 unsigned i, slot;
927
928 assert(shader < SI_NUM_SHADERS);
929
930 if (!count && !unbind_num_trailing_slots)
931 return;
932
933 assert(start_slot + count + unbind_num_trailing_slots <= SI_NUM_IMAGES);
934
935 if (views) {
936 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
937 si_set_shader_image(ctx, shader, slot, &views[i], false);
938 } else {
939 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
940 si_set_shader_image(ctx, shader, slot, NULL, false);
941 }
942
943 for (i = 0; i < unbind_num_trailing_slots; ++i, ++slot)
944 si_set_shader_image(ctx, shader, slot, NULL, false);
945
946 if (shader == PIPE_SHADER_COMPUTE &&
947 ctx->cs_shader_state.program &&
948 start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs)
949 ctx->compute_image_sgprs_dirty = true;
950
951 si_update_shader_needs_decompress_mask(ctx, shader);
952 }
953
si_images_update_needs_color_decompress_mask(struct si_images * images)954 static void si_images_update_needs_color_decompress_mask(struct si_images *images)
955 {
956 unsigned mask = images->enabled_mask;
957
958 while (mask) {
959 int i = u_bit_scan(&mask);
960 struct pipe_resource *res = images->views[i].resource;
961
962 if (res && res->target != PIPE_BUFFER) {
963 struct si_texture *tex = (struct si_texture *)res;
964
965 if (color_needs_decompression(tex)) {
966 images->needs_color_decompress_mask |= 1 << i;
967 } else {
968 images->needs_color_decompress_mask &= ~(1 << i);
969 }
970 }
971 }
972 }
973
si_force_disable_ps_colorbuf0_slot(struct si_context * sctx)974 void si_force_disable_ps_colorbuf0_slot(struct si_context *sctx)
975 {
976 if (sctx->ps_uses_fbfetch) {
977 sctx->ps_uses_fbfetch = false;
978 si_update_ps_iter_samples(sctx);
979 }
980 }
981
si_update_ps_colorbuf0_slot(struct si_context * sctx)982 void si_update_ps_colorbuf0_slot(struct si_context *sctx)
983 {
984 struct si_buffer_resources *buffers = &sctx->internal_bindings;
985 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
986 unsigned slot = SI_PS_IMAGE_COLORBUF0;
987 struct pipe_surface *surf = NULL;
988 struct si_texture *tex = NULL;
989
990 /* FBFETCH is always disabled for u_blitter, and will be re-enabled after u_blitter is done. */
991 if (sctx->blitter_running || sctx->suppress_update_ps_colorbuf0_slot) {
992 assert(!sctx->ps_uses_fbfetch);
993 return;
994 }
995
996 /* Get the color buffer if FBFETCH should be enabled. */
997 if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output &&
998 sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0]) {
999 surf = sctx->framebuffer.state.cbufs[0];
1000 if (surf) {
1001 tex = (struct si_texture *)surf->texture;
1002 assert(tex && !tex->is_depth);
1003 }
1004 }
1005
1006 /* Return if FBFETCH transitions from disabled to disabled. */
1007 if (!sctx->ps_uses_fbfetch && !surf)
1008 return;
1009
1010 if (surf) {
1011 bool disable_dcc = tex->surface.meta_offset != 0;
1012 bool disable_cmask = tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer;
1013
1014 /* Disable DCC and eliminate fast clear because the texture is used as both a sampler
1015 * and color buffer.
1016 */
1017 if (disable_dcc || disable_cmask) {
1018 /* Disable fbfetch only for decompression. */
1019 si_force_disable_ps_colorbuf0_slot(sctx);
1020 sctx->suppress_update_ps_colorbuf0_slot = true;
1021
1022 si_texture_disable_dcc(sctx, tex);
1023
1024 if (disable_cmask) {
1025 assert(tex->cmask_buffer != &tex->buffer);
1026 si_eliminate_fast_color_clear(sctx, tex, NULL);
1027 si_texture_discard_cmask(sctx->screen, tex);
1028 }
1029
1030 sctx->suppress_update_ps_colorbuf0_slot = false;
1031 }
1032
1033 /* Bind color buffer 0 as a shader image. */
1034 struct pipe_image_view view = {0};
1035 view.resource = surf->texture;
1036 view.format = surf->format;
1037 view.access = PIPE_IMAGE_ACCESS_READ;
1038 view.u.tex.first_layer = surf->u.tex.first_layer;
1039 view.u.tex.last_layer = surf->u.tex.last_layer;
1040 view.u.tex.level = surf->u.tex.level;
1041
1042 /* Set the descriptor. */
1043 uint32_t *desc = descs->list + slot * 4;
1044 memset(desc, 0, 16 * 4);
1045 si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);
1046
1047 pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);
1048 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer,
1049 RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_IMAGE);
1050 buffers->enabled_mask |= 1llu << slot;
1051 } else {
1052 /* Clear the descriptor. */
1053 memset(descs->list + slot * 4, 0, 8 * 4);
1054 pipe_resource_reference(&buffers->buffers[slot], NULL);
1055 buffers->enabled_mask &= ~(1llu << slot);
1056 }
1057
1058 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
1059 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1060 sctx->ps_uses_fbfetch = surf != NULL;
1061 si_update_ps_iter_samples(sctx);
1062 si_ps_key_update_framebuffer(sctx);
1063 }
1064
1065 /* SAMPLER STATES */
1066
si_bind_sampler_states(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1067 static void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader,
1068 unsigned start, unsigned count, void **states)
1069 {
1070 struct si_context *sctx = (struct si_context *)ctx;
1071 struct si_samplers *samplers = &sctx->samplers[shader];
1072 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
1073 struct si_sampler_state **sstates = (struct si_sampler_state **)states;
1074 int i;
1075
1076 if (!count || shader >= SI_NUM_SHADERS || !sstates)
1077 return;
1078
1079 for (i = 0; i < count; i++) {
1080 unsigned slot = start + i;
1081 unsigned desc_slot = si_get_sampler_slot(slot);
1082
1083 if (!sstates[i] || sstates[i] == samplers->sampler_states[slot])
1084 continue;
1085
1086 #ifndef NDEBUG
1087 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
1088 #endif
1089 samplers->sampler_states[slot] = sstates[i];
1090
1091 /* If FMASK is bound, don't overwrite it.
1092 * The sampler state will be set after FMASK is unbound.
1093 */
1094 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[slot];
1095
1096 struct si_texture *tex = NULL;
1097
1098 if (sview && sview->base.texture && sview->base.texture->target != PIPE_BUFFER)
1099 tex = (struct si_texture *)sview->base.texture;
1100
1101 if (tex && tex->surface.fmask_size)
1102 continue;
1103
1104 si_set_sampler_state_desc(sstates[i], sview, tex, desc->list + desc_slot * 16 + 12);
1105
1106 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
1107 if (shader != PIPE_SHADER_COMPUTE)
1108 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1109 }
1110 }
1111
1112 /* BUFFER RESOURCES */
1113
si_init_buffer_resources(struct si_context * sctx,struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned num_buffers,short shader_userdata_rel_index,unsigned priority,unsigned priority_constbuf)1114 static void si_init_buffer_resources(struct si_context *sctx,
1115 struct si_buffer_resources *buffers,
1116 struct si_descriptors *descs, unsigned num_buffers,
1117 short shader_userdata_rel_index,
1118 unsigned priority,
1119 unsigned priority_constbuf)
1120 {
1121 buffers->priority = priority;
1122 buffers->priority_constbuf = priority_constbuf;
1123 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource *));
1124 buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));
1125
1126 si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
1127
1128 /* Initialize buffer descriptors, so that we don't have to do it at bind time. */
1129 for (unsigned i = 0; i < num_buffers; i++) {
1130 uint32_t *desc = descs->list + i * 4;
1131
1132 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1133 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
1134
1135 if (sctx->gfx_level >= GFX11) {
1136 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
1137 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
1138 } else if (sctx->gfx_level >= GFX10) {
1139 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
1140 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
1141 } else {
1142 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1143 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1144 }
1145 }
1146 }
1147
si_release_buffer_resources(struct si_buffer_resources * buffers,struct si_descriptors * descs)1148 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
1149 struct si_descriptors *descs)
1150 {
1151 int i;
1152
1153 for (i = 0; i < descs->num_elements; i++) {
1154 pipe_resource_reference(&buffers->buffers[i], NULL);
1155 }
1156
1157 FREE(buffers->buffers);
1158 FREE(buffers->offsets);
1159 }
1160
si_buffer_resources_begin_new_cs(struct si_context * sctx,struct si_buffer_resources * buffers)1161 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
1162 struct si_buffer_resources *buffers)
1163 {
1164 uint64_t mask = buffers->enabled_mask;
1165
1166 /* Add buffers to the CS. */
1167 while (mask) {
1168 int i = u_bit_scan64(&mask);
1169
1170 radeon_add_to_buffer_list(
1171 sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]),
1172 (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) |
1173 (i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf));
1174 }
1175 }
1176
si_buffer_resources_check_encrypted(struct si_context * sctx,struct si_buffer_resources * buffers)1177 static bool si_buffer_resources_check_encrypted(struct si_context *sctx,
1178 struct si_buffer_resources *buffers)
1179 {
1180 uint64_t mask = buffers->enabled_mask;
1181
1182 while (mask) {
1183 int i = u_bit_scan64(&mask);
1184
1185 if (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED)
1186 return true;
1187 }
1188
1189 return false;
1190 }
1191
si_get_buffer_from_descriptors(struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned idx,struct pipe_resource ** buf,unsigned * offset,unsigned * size)1192 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
1193 struct si_descriptors *descs, unsigned idx,
1194 struct pipe_resource **buf, unsigned *offset,
1195 unsigned *size)
1196 {
1197 pipe_resource_reference(buf, buffers->buffers[idx]);
1198 if (*buf) {
1199 struct si_resource *res = si_resource(*buf);
1200 const uint32_t *desc = descs->list + idx * 4;
1201 uint64_t va;
1202
1203 *size = desc[2];
1204
1205 assert(G_008F04_STRIDE(desc[1]) == 0);
1206 va = si_desc_extract_buffer_address(desc);
1207
1208 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
1209 *offset = va - res->gpu_address;
1210 }
1211 }
1212
1213 /* CONSTANT BUFFERS */
1214
si_const_and_shader_buffer_descriptors(struct si_context * sctx,unsigned shader)1215 static struct si_descriptors *si_const_and_shader_buffer_descriptors(struct si_context *sctx,
1216 unsigned shader)
1217 {
1218 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1219 }
1220
si_upload_const_buffer(struct si_context * sctx,struct si_resource ** buf,const uint8_t * ptr,unsigned size,uint32_t * const_offset)1221 static void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf,
1222 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1223 {
1224 void *tmp;
1225
1226 u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size),
1227 const_offset, (struct pipe_resource **)buf, &tmp);
1228 if (*buf)
1229 util_memcpy_cpu_to_le32(tmp, ptr, size);
1230 }
1231
si_set_constant_buffer(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,uint slot,bool take_ownership,const struct pipe_constant_buffer * input)1232 static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
1233 unsigned descriptors_idx, uint slot, bool take_ownership,
1234 const struct pipe_constant_buffer *input)
1235 {
1236 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1237 assert(slot < descs->num_elements);
1238 pipe_resource_reference(&buffers->buffers[slot], NULL);
1239
1240 /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1241 * with a NULL buffer). We need to use a dummy buffer instead. */
1242 if (sctx->gfx_level == GFX7 && (!input || (!input->buffer && !input->user_buffer)))
1243 input = &sctx->null_const_buf;
1244
1245 if (input && (input->buffer || input->user_buffer)) {
1246 struct pipe_resource *buffer = NULL;
1247 uint64_t va;
1248 unsigned buffer_offset;
1249
1250 /* Upload the user buffer if needed. */
1251 if (input->user_buffer) {
1252 si_upload_const_buffer(sctx, (struct si_resource **)&buffer, input->user_buffer,
1253 input->buffer_size, &buffer_offset);
1254 if (!buffer) {
1255 /* Just unbind on failure. */
1256 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, false, NULL);
1257 return;
1258 }
1259 } else {
1260 if (take_ownership) {
1261 buffer = input->buffer;
1262 } else {
1263 pipe_resource_reference(&buffer, input->buffer);
1264 }
1265 buffer_offset = input->buffer_offset;
1266 }
1267
1268 va = si_resource(buffer)->gpu_address + buffer_offset;
1269
1270 /* Set the descriptor. */
1271 uint32_t *desc = descs->list + slot * 4;
1272 desc[0] = va;
1273 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);
1274 desc[2] = input->buffer_size;
1275
1276 buffers->buffers[slot] = buffer;
1277 buffers->offsets[slot] = buffer_offset;
1278 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),
1279 RADEON_USAGE_READ | buffers->priority_constbuf);
1280 buffers->enabled_mask |= 1llu << slot;
1281 } else {
1282 /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */
1283 memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 3);
1284 buffers->enabled_mask &= ~(1llu << slot);
1285 }
1286
1287 sctx->descriptors_dirty |= 1u << descriptors_idx;
1288 if (descriptors_idx < SI_DESCS_FIRST_COMPUTE)
1289 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1290 }
1291
si_get_inline_uniform_state(union si_shader_key * key,enum pipe_shader_type shader,bool * inline_uniforms,uint32_t ** inlined_values)1292 void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader,
1293 bool *inline_uniforms, uint32_t **inlined_values)
1294 {
1295 if (shader == PIPE_SHADER_FRAGMENT) {
1296 *inline_uniforms = key->ps.opt.inline_uniforms;
1297 *inlined_values = key->ps.opt.inlined_uniform_values;
1298 } else {
1299 *inline_uniforms = key->ge.opt.inline_uniforms;
1300 *inlined_values = key->ge.opt.inlined_uniform_values;
1301 }
1302 }
1303
si_invalidate_inlinable_uniforms(struct si_context * sctx,enum pipe_shader_type shader)1304 void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader)
1305 {
1306 if (shader == PIPE_SHADER_COMPUTE)
1307 return;
1308
1309 bool inline_uniforms;
1310 uint32_t *inlined_values;
1311 si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
1312
1313 if (inline_uniforms) {
1314 if (shader == PIPE_SHADER_FRAGMENT)
1315 sctx->shaders[shader].key.ps.opt.inline_uniforms = false;
1316 else
1317 sctx->shaders[shader].key.ge.opt.inline_uniforms = false;
1318
1319 memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4);
1320 sctx->do_update_shaders = true;
1321 }
1322 }
1323
si_pipe_set_constant_buffer(struct pipe_context * ctx,enum pipe_shader_type shader,uint slot,bool take_ownership,const struct pipe_constant_buffer * input)1324 static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader,
1325 uint slot, bool take_ownership,
1326 const struct pipe_constant_buffer *input)
1327 {
1328 struct si_context *sctx = (struct si_context *)ctx;
1329
1330 if (shader >= SI_NUM_SHADERS)
1331 return;
1332
1333 if (input) {
1334 if (input->buffer) {
1335 if (slot == 0 &&
1336 !(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {
1337 assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader");
1338 return;
1339 }
1340 si_resource(input->buffer)->bind_history |= SI_BIND_CONSTANT_BUFFER(shader);
1341 }
1342
1343 if (slot == 0)
1344 si_invalidate_inlinable_uniforms(sctx, shader);
1345 }
1346
1347 slot = si_get_constbuf_slot(slot);
1348 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1349 si_const_and_shader_buffer_descriptors_idx(shader), slot,
1350 take_ownership, input);
1351 }
1352
si_set_inlinable_constants(struct pipe_context * ctx,enum pipe_shader_type shader,uint num_values,uint32_t * values)1353 static void si_set_inlinable_constants(struct pipe_context *ctx,
1354 enum pipe_shader_type shader,
1355 uint num_values, uint32_t *values)
1356 {
1357 struct si_context *sctx = (struct si_context *)ctx;
1358
1359 if (shader == PIPE_SHADER_COMPUTE)
1360 return;
1361
1362 bool inline_uniforms;
1363 uint32_t *inlined_values;
1364 si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values);
1365
1366 if (!inline_uniforms) {
1367 /* It's the first time we set the constants. Always update shaders. */
1368 if (shader == PIPE_SHADER_FRAGMENT)
1369 sctx->shaders[shader].key.ps.opt.inline_uniforms = true;
1370 else
1371 sctx->shaders[shader].key.ge.opt.inline_uniforms = true;
1372
1373 memcpy(inlined_values, values, num_values * 4);
1374 sctx->do_update_shaders = true;
1375 return;
1376 }
1377
1378 /* We have already set inlinable constants for this shader. Update the shader only if
1379 * the constants are being changed so as not to update shaders needlessly.
1380 */
1381 if (memcmp(inlined_values, values, num_values * 4)) {
1382 memcpy(inlined_values, values, num_values * 4);
1383 sctx->do_update_shaders = true;
1384 }
1385 }
1386
si_get_pipe_constant_buffer(struct si_context * sctx,uint shader,uint slot,struct pipe_constant_buffer * cbuf)1387 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot,
1388 struct pipe_constant_buffer *cbuf)
1389 {
1390 cbuf->user_buffer = NULL;
1391 si_get_buffer_from_descriptors(
1392 &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors(sctx, shader),
1393 si_get_constbuf_slot(slot), &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1394 }
1395
1396 /* SHADER BUFFERS */
1397
si_set_shader_buffer(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,uint slot,const struct pipe_shader_buffer * sbuffer,bool writable,unsigned priority)1398 static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
1399 unsigned descriptors_idx, uint slot,
1400 const struct pipe_shader_buffer *sbuffer, bool writable,
1401 unsigned priority)
1402 {
1403 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1404 uint32_t *desc = descs->list + slot * 4;
1405
1406 if (!sbuffer || !sbuffer->buffer) {
1407 pipe_resource_reference(&buffers->buffers[slot], NULL);
1408 /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */
1409 memset(desc, 0, sizeof(uint32_t) * 3);
1410 buffers->enabled_mask &= ~(1llu << slot);
1411 buffers->writable_mask &= ~(1llu << slot);
1412 sctx->descriptors_dirty |= 1u << descriptors_idx;
1413 if (descriptors_idx < SI_DESCS_FIRST_COMPUTE)
1414 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1415 return;
1416 }
1417
1418 struct si_resource *buf = si_resource(sbuffer->buffer);
1419 uint64_t va = buf->gpu_address + sbuffer->buffer_offset;
1420
1421 desc[0] = va;
1422 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);
1423 desc[2] = sbuffer->buffer_size;
1424
1425 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1426 buffers->offsets[slot] = sbuffer->buffer_offset;
1427 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buf,
1428 (writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority);
1429 if (writable)
1430 buffers->writable_mask |= 1llu << slot;
1431 else
1432 buffers->writable_mask &= ~(1llu << slot);
1433
1434 buffers->enabled_mask |= 1llu << slot;
1435 sctx->descriptors_dirty |= 1lu << descriptors_idx;
1436 if (descriptors_idx < SI_DESCS_FIRST_COMPUTE)
1437 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1438
1439 util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
1440 sbuffer->buffer_offset + sbuffer->buffer_size);
1441 }
1442
si_set_shader_buffers(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_shader_buffer * sbuffers,unsigned writable_bitmask,bool internal_blit)1443 void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader,
1444 unsigned start_slot, unsigned count,
1445 const struct pipe_shader_buffer *sbuffers,
1446 unsigned writable_bitmask, bool internal_blit)
1447 {
1448 struct si_context *sctx = (struct si_context *)ctx;
1449 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1450 unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader);
1451 unsigned i;
1452
1453 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1454
1455 if (shader == PIPE_SHADER_COMPUTE &&
1456 sctx->cs_shader_state.program &&
1457 start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs)
1458 sctx->compute_shaderbuf_sgprs_dirty = true;
1459
1460 for (i = 0; i < count; ++i) {
1461 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1462 unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1463
1464 /* Don't track bind history for internal blits, such as clear_buffer and copy_buffer
1465 * to prevent unnecessary synchronization before compute blits later.
1466 */
1467 if (!internal_blit && sbuffer && sbuffer->buffer)
1468 si_resource(sbuffer->buffer)->bind_history |= SI_BIND_SHADER_BUFFER(shader);
1469
1470 si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer,
1471 !!(writable_bitmask & (1u << i)), buffers->priority);
1472 }
1473 }
1474
si_pipe_set_shader_buffers(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_shader_buffer * sbuffers,unsigned writable_bitmask)1475 static void si_pipe_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader,
1476 unsigned start_slot, unsigned count,
1477 const struct pipe_shader_buffer *sbuffers,
1478 unsigned writable_bitmask)
1479 {
1480 si_set_shader_buffers(ctx, shader, start_slot, count, sbuffers, writable_bitmask, false);
1481 }
1482
si_get_shader_buffers(struct si_context * sctx,enum pipe_shader_type shader,uint start_slot,uint count,struct pipe_shader_buffer * sbuf)1483 void si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot,
1484 uint count, struct pipe_shader_buffer *sbuf)
1485 {
1486 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1487 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1488
1489 for (unsigned i = 0; i < count; ++i) {
1490 si_get_buffer_from_descriptors(buffers, descs, si_get_shaderbuf_slot(start_slot + i),
1491 &sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size);
1492 }
1493 }
1494
1495 /* RING BUFFERS */
1496
si_set_internal_const_buffer(struct si_context * sctx,uint slot,const struct pipe_constant_buffer * input)1497 void si_set_internal_const_buffer(struct si_context *sctx, uint slot,
1498 const struct pipe_constant_buffer *input)
1499 {
1500 si_set_constant_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, false, input);
1501 }
1502
si_set_internal_shader_buffer(struct si_context * sctx,uint slot,const struct pipe_shader_buffer * sbuffer)1503 void si_set_internal_shader_buffer(struct si_context *sctx, uint slot,
1504 const struct pipe_shader_buffer *sbuffer)
1505 {
1506 si_set_shader_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, sbuffer, true,
1507 RADEON_PRIO_SHADER_RW_BUFFER);
1508 }
1509
si_set_ring_buffer(struct si_context * sctx,uint slot,struct pipe_resource * buffer,unsigned stride,unsigned num_records,bool add_tid,bool swizzle,unsigned element_size,unsigned index_stride,uint64_t offset)1510 void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer,
1511 unsigned stride, unsigned num_records, bool add_tid, bool swizzle,
1512 unsigned element_size, unsigned index_stride, uint64_t offset)
1513 {
1514 struct si_buffer_resources *buffers = &sctx->internal_bindings;
1515 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
1516
1517 /* The stride field in the resource descriptor has 14 bits */
1518 assert(stride < (1 << 14));
1519
1520 assert(slot < descs->num_elements);
1521 pipe_resource_reference(&buffers->buffers[slot], NULL);
1522
1523 if (buffer) {
1524 uint64_t va;
1525
1526 va = si_resource(buffer)->gpu_address + offset;
1527
1528 switch (element_size) {
1529 default:
1530 unreachable("Unsupported ring buffer element size");
1531 case 0:
1532 case 2:
1533 element_size = 0;
1534 break;
1535 case 4:
1536 element_size = 1;
1537 break;
1538 case 8:
1539 element_size = 2;
1540 break;
1541 case 16:
1542 element_size = 3;
1543 break;
1544 }
1545
1546 switch (index_stride) {
1547 default:
1548 unreachable("Unsupported ring buffer index stride");
1549 case 0:
1550 case 8:
1551 index_stride = 0;
1552 break;
1553 case 16:
1554 index_stride = 1;
1555 break;
1556 case 32:
1557 index_stride = 2;
1558 break;
1559 case 64:
1560 index_stride = 3;
1561 break;
1562 }
1563
1564 if (sctx->gfx_level >= GFX8 && stride)
1565 num_records *= stride;
1566
1567 /* Set the descriptor. */
1568 uint32_t *desc = descs->list + slot * 4;
1569 desc[0] = va;
1570 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
1571 desc[2] = num_records;
1572 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1573 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1574 S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid);
1575
1576 if (sctx->gfx_level >= GFX11) {
1577 assert(!swizzle || element_size == 1 || element_size == 3); /* 4 or 16 bytes */
1578 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(swizzle ? element_size : 0);
1579 } else if (sctx->gfx_level >= GFX9) {
1580 assert(!swizzle || element_size == 1); /* only 4 bytes on GFX9 */
1581 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle);
1582 } else {
1583 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle);
1584 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1585 }
1586
1587 if (sctx->gfx_level >= GFX11) {
1588 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
1589 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
1590 } else if (sctx->gfx_level >= GFX10) {
1591 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
1592 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
1593 } else {
1594 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1595 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1596 }
1597
1598 pipe_resource_reference(&buffers->buffers[slot], buffer);
1599 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),
1600 RADEON_USAGE_READWRITE | buffers->priority);
1601 buffers->enabled_mask |= 1llu << slot;
1602 } else {
1603 /* Clear the descriptor. */
1604 memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
1605 buffers->enabled_mask &= ~(1llu << slot);
1606 }
1607
1608 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
1609 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1610 }
1611
1612 /* INTERNAL CONST BUFFERS */
1613
si_set_polygon_stipple(struct pipe_context * ctx,const struct pipe_poly_stipple * state)1614 static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state)
1615 {
1616 struct si_context *sctx = (struct si_context *)ctx;
1617 struct pipe_constant_buffer cb = {};
1618 unsigned stipple[32];
1619 int i;
1620
1621 for (i = 0; i < 32; i++)
1622 stipple[i] = util_bitreverse(state->stipple[i]);
1623
1624 cb.user_buffer = stipple;
1625 cb.buffer_size = sizeof(stipple);
1626
1627 si_set_internal_const_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1628 }
1629
1630 /* TEXTURE METADATA ENABLE/DISABLE */
1631
si_resident_handles_update_needs_color_decompress(struct si_context * sctx)1632 static void si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1633 {
1634 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1635 util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1636
1637 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1638 struct pipe_resource *res = (*tex_handle)->view->texture;
1639 struct si_texture *tex;
1640
1641 if (!res || res->target == PIPE_BUFFER)
1642 continue;
1643
1644 tex = (struct si_texture *)res;
1645 if (!color_needs_decompression(tex))
1646 continue;
1647
1648 util_dynarray_append(&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *,
1649 *tex_handle);
1650 }
1651
1652 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1653 struct pipe_image_view *view = &(*img_handle)->view;
1654 struct pipe_resource *res = view->resource;
1655 struct si_texture *tex;
1656
1657 if (!res || res->target == PIPE_BUFFER)
1658 continue;
1659
1660 tex = (struct si_texture *)res;
1661 if (!color_needs_decompression(tex))
1662 continue;
1663
1664 util_dynarray_append(&sctx->resident_img_needs_color_decompress, struct si_image_handle *,
1665 *img_handle);
1666 }
1667 }
1668
1669 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1670 * while the texture is bound, possibly by a different context. In that case,
1671 * call this function to update needs_*_decompress_masks.
1672 */
si_update_needs_color_decompress_masks(struct si_context * sctx)1673 void si_update_needs_color_decompress_masks(struct si_context *sctx)
1674 {
1675 assert(sctx->gfx_level < GFX11);
1676
1677 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1678 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1679 si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1680 si_update_shader_needs_decompress_mask(sctx, i);
1681 }
1682
1683 si_resident_handles_update_needs_color_decompress(sctx);
1684 }
1685
1686 /* BUFFER DISCARD/INVALIDATION */
1687
1688 /* Reset descriptors of buffer resources after \p buf has been invalidated.
1689 * If buf == NULL, reset all descriptors.
1690 */
si_reset_buffer_resources(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,uint64_t slot_mask,struct pipe_resource * buf,unsigned priority)1691 static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
1692 unsigned descriptors_idx, uint64_t slot_mask,
1693 struct pipe_resource *buf, unsigned priority)
1694 {
1695 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1696 bool noop = true;
1697 uint64_t mask = buffers->enabled_mask & slot_mask;
1698
1699 while (mask) {
1700 unsigned i = u_bit_scan64(&mask);
1701 struct pipe_resource *buffer = buffers->buffers[i];
1702
1703 if (buffer && (!buf || buffer == buf)) {
1704 si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);
1705 sctx->descriptors_dirty |= 1u << descriptors_idx;
1706 if (descriptors_idx < SI_DESCS_FIRST_COMPUTE)
1707 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1708
1709 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),
1710 (buffers->writable_mask & (1llu << i) ?
1711 RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority);
1712 noop = false;
1713 }
1714 }
1715 return !noop;
1716 }
1717
si_mark_bindless_descriptors_dirty(struct si_context * sctx)1718 static void si_mark_bindless_descriptors_dirty(struct si_context *sctx)
1719 {
1720 sctx->bindless_descriptors_dirty = true;
1721 /* gfx_shader_pointers uploads bindless descriptors. */
1722 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1723 /* gfx_shader_pointers can flag cache flags, so we need to dirty this too. */
1724 si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
1725 }
1726
1727 /* Update all buffer bindings where the buffer is bound, including
1728 * all resource descriptors. This is invalidate_buffer without
1729 * the invalidation.
1730 *
1731 * If buf == NULL, update all buffer bindings.
1732 */
si_rebind_buffer(struct si_context * sctx,struct pipe_resource * buf)1733 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
1734 {
1735 struct si_resource *buffer = si_resource(buf);
1736 unsigned i;
1737 unsigned num_elems = sctx->num_vertex_elements;
1738
1739 /* We changed the buffer, now we need to bind it where the old one
1740 * was bound. This consists of 2 things:
1741 * 1) Updating the resource descriptor and dirtying it.
1742 * 2) Adding a relocation to the CS, so that it's usable.
1743 */
1744
1745 /* Vertex buffers. */
1746 if (!buffer) {
1747 sctx->vertex_buffers_dirty = num_elems > 0;
1748
1749 /* We don't know which buffer was invalidated, so we have to add all of them. */
1750 unsigned num_vb = sctx->num_vertex_buffers;
1751 for (unsigned i = 0; i < num_vb; i++) {
1752 struct si_resource *buf = si_resource(sctx->vertex_buffer[i].buffer.resource);
1753 if (buf) {
1754 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buf,
1755 RADEON_USAGE_READ |
1756 RADEON_PRIO_VERTEX_BUFFER);
1757 }
1758 }
1759 } else if (buffer->bind_history & SI_BIND_VERTEX_BUFFER) {
1760 unsigned num_vb = sctx->num_vertex_buffers;
1761
1762 for (i = 0; i < num_elems; i++) {
1763 int vb = sctx->vertex_elements->vertex_buffer_index[i];
1764
1765 if (vb >= num_vb)
1766 continue;
1767 if (!sctx->vertex_buffer[vb].buffer.resource)
1768 continue;
1769
1770 if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1771 sctx->vertex_buffers_dirty = num_elems > 0;
1772 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buffer,
1773 RADEON_USAGE_READ |
1774 RADEON_PRIO_VERTEX_BUFFER);
1775 break;
1776 }
1777 }
1778 }
1779
1780 /* Streamout buffers. (other internal buffers can't be invalidated) */
1781 if (!buffer || buffer->bind_history & SI_BIND_STREAMOUT_BUFFER) {
1782 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1783 struct si_buffer_resources *buffers = &sctx->internal_bindings;
1784 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
1785 struct pipe_resource *buffer = buffers->buffers[i];
1786
1787 if (!buffer || (buf && buffer != buf))
1788 continue;
1789
1790 si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);
1791 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
1792 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1793
1794 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_WRITE |
1795 RADEON_PRIO_SHADER_RW_BUFFER);
1796
1797 /* Update the streamout state. */
1798 if (sctx->streamout.begin_emitted)
1799 si_emit_streamout_end(sctx);
1800 sctx->streamout.append_bitmask = sctx->streamout.enabled_mask;
1801 si_streamout_buffers_dirty(sctx);
1802 }
1803 }
1804
1805 /* Constant and shader buffers. */
1806 if (!buffer || buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) {
1807 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) >>
1808 SI_BIND_CONSTANT_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS);
1809 u_foreach_bit(shader, mask) {
1810 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1811 si_const_and_shader_buffer_descriptors_idx(shader),
1812 u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1813 buf, sctx->const_and_shader_buffers[shader].priority_constbuf);
1814 }
1815 }
1816
1817 if (!buffer || buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) {
1818 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) >>
1819 SI_BIND_SHADER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS);
1820 u_foreach_bit(shader, mask) {
1821 if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1822 si_const_and_shader_buffer_descriptors_idx(shader),
1823 u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
1824 sctx->const_and_shader_buffers[shader].priority) &&
1825 shader == PIPE_SHADER_COMPUTE) {
1826 sctx->compute_shaderbuf_sgprs_dirty = true;
1827 }
1828 }
1829 }
1830
1831 if (!buffer || buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) {
1832 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) >>
1833 SI_BIND_SAMPLER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS);
1834 /* Texture buffers - update bindings. */
1835 u_foreach_bit(shader, mask) {
1836 struct si_samplers *samplers = &sctx->samplers[shader];
1837 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
1838 unsigned mask = samplers->enabled_mask;
1839
1840 while (mask) {
1841 unsigned i = u_bit_scan(&mask);
1842 struct pipe_resource *buffer = samplers->views[i]->texture;
1843
1844 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1845 unsigned desc_slot = si_get_sampler_slot(i);
1846
1847 si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset,
1848 descs->list + desc_slot * 16 + 4);
1849 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
1850 if (shader != PIPE_SHADER_COMPUTE)
1851 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1852
1853 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READ |
1854 RADEON_PRIO_SAMPLER_BUFFER);
1855 }
1856 }
1857 }
1858 }
1859
1860 /* Shader images */
1861 if (!buffer || buffer->bind_history & SI_BIND_IMAGE_BUFFER_ALL) {
1862 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_IMAGE_BUFFER_SHIFT) >>
1863 SI_BIND_IMAGE_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS);
1864 u_foreach_bit(shader, mask) {
1865 struct si_images *images = &sctx->images[shader];
1866 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
1867 unsigned mask = images->enabled_mask;
1868
1869 while (mask) {
1870 unsigned i = u_bit_scan(&mask);
1871 struct pipe_resource *buffer = images->views[i].resource;
1872
1873 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1874 unsigned desc_slot = si_get_image_slot(i);
1875
1876 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1877 si_mark_image_range_valid(&images->views[i]);
1878
1879 si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset,
1880 descs->list + desc_slot * 8 + 4);
1881 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
1882 if (shader != PIPE_SHADER_COMPUTE)
1883 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
1884
1885 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),
1886 RADEON_USAGE_READWRITE |
1887 RADEON_PRIO_SAMPLER_BUFFER);
1888
1889 if (shader == PIPE_SHADER_COMPUTE)
1890 sctx->compute_image_sgprs_dirty = true;
1891 }
1892 }
1893 }
1894 }
1895
1896 /* Bindless texture handles */
1897 if (!buffer || buffer->texture_handle_allocated) {
1898 struct si_descriptors *descs = &sctx->bindless_descriptors;
1899
1900 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1901 struct pipe_sampler_view *view = (*tex_handle)->view;
1902 unsigned desc_slot = (*tex_handle)->desc_slot;
1903 struct pipe_resource *buffer = view->texture;
1904
1905 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1906 si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
1907 descs->list + desc_slot * 16 + 4);
1908
1909 (*tex_handle)->desc_dirty = true;
1910 si_mark_bindless_descriptors_dirty(sctx);
1911
1912 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READ |
1913 RADEON_PRIO_SAMPLER_BUFFER);
1914 }
1915 }
1916 }
1917
1918 /* Bindless image handles */
1919 if (!buffer || buffer->image_handle_allocated) {
1920 struct si_descriptors *descs = &sctx->bindless_descriptors;
1921
1922 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1923 struct pipe_image_view *view = &(*img_handle)->view;
1924 unsigned desc_slot = (*img_handle)->desc_slot;
1925 struct pipe_resource *buffer = view->resource;
1926
1927 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1928 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1929 si_mark_image_range_valid(view);
1930
1931 si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
1932 descs->list + desc_slot * 16 + 4);
1933
1934 (*img_handle)->desc_dirty = true;
1935 si_mark_bindless_descriptors_dirty(sctx);
1936
1937 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer),
1938 RADEON_USAGE_READWRITE | RADEON_PRIO_SAMPLER_BUFFER);
1939 }
1940 }
1941 }
1942
1943 if (buffer) {
1944 /* Do the same for other contexts. They will invoke this function
1945 * with buffer == NULL.
1946 */
1947 unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
1948
1949 /* Skip the update for the current context, because we have already updated
1950 * the buffer bindings.
1951 */
1952 if (new_counter == sctx->last_dirty_buf_counter + 1)
1953 sctx->last_dirty_buf_counter = new_counter;
1954 }
1955 }
1956
si_upload_bindless_descriptor(struct si_context * sctx,unsigned desc_slot,unsigned num_dwords)1957 static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot,
1958 unsigned num_dwords)
1959 {
1960 struct si_descriptors *desc = &sctx->bindless_descriptors;
1961 unsigned desc_slot_offset = desc_slot * 16;
1962 uint32_t *data;
1963 uint64_t va;
1964
1965 data = desc->list + desc_slot_offset;
1966 va = desc->gpu_address + desc_slot_offset * 4;
1967
1968 si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2,
1969 V_370_ME, data);
1970 }
1971
si_upload_bindless_descriptors(struct si_context * sctx)1972 static void si_upload_bindless_descriptors(struct si_context *sctx)
1973 {
1974 if (!sctx->bindless_descriptors_dirty)
1975 return;
1976
1977 /* Wait for graphics/compute to be idle before updating the resident
1978 * descriptors directly in memory, in case the GPU is using them.
1979 */
1980 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
1981 si_emit_cache_flush_direct(sctx);
1982
1983 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1984 unsigned desc_slot = (*tex_handle)->desc_slot;
1985
1986 if (!(*tex_handle)->desc_dirty)
1987 continue;
1988
1989 si_upload_bindless_descriptor(sctx, desc_slot, 16);
1990 (*tex_handle)->desc_dirty = false;
1991 }
1992
1993 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1994 unsigned desc_slot = (*img_handle)->desc_slot;
1995
1996 if (!(*img_handle)->desc_dirty)
1997 continue;
1998
1999 si_upload_bindless_descriptor(sctx, desc_slot, 8);
2000 (*img_handle)->desc_dirty = false;
2001 }
2002
2003 /* Invalidate scalar L0 because the cache doesn't know that L2 changed. */
2004 sctx->flags |= SI_CONTEXT_INV_SCACHE;
2005 sctx->bindless_descriptors_dirty = false;
2006 }
2007
2008 /* Update mutable image descriptor fields of all resident textures. */
si_update_bindless_texture_descriptor(struct si_context * sctx,struct si_texture_handle * tex_handle)2009 static void si_update_bindless_texture_descriptor(struct si_context *sctx,
2010 struct si_texture_handle *tex_handle)
2011 {
2012 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
2013 struct si_descriptors *desc = &sctx->bindless_descriptors;
2014 unsigned desc_slot_offset = tex_handle->desc_slot * 16;
2015 uint32_t desc_list[16];
2016
2017 if (sview->base.texture->target == PIPE_BUFFER)
2018 return;
2019
2020 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
2021 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, desc->list + desc_slot_offset);
2022
2023 if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) {
2024 tex_handle->desc_dirty = true;
2025 si_mark_bindless_descriptors_dirty(sctx);
2026 }
2027 }
2028
si_update_bindless_image_descriptor(struct si_context * sctx,struct si_image_handle * img_handle)2029 static void si_update_bindless_image_descriptor(struct si_context *sctx,
2030 struct si_image_handle *img_handle)
2031 {
2032 struct si_descriptors *desc = &sctx->bindless_descriptors;
2033 unsigned desc_slot_offset = img_handle->desc_slot * 16;
2034 struct pipe_image_view *view = &img_handle->view;
2035 struct pipe_resource *res = view->resource;
2036 uint32_t image_desc[16];
2037 unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;
2038
2039 if (res->target == PIPE_BUFFER)
2040 return;
2041
2042 memcpy(image_desc, desc->list + desc_slot_offset, desc_size);
2043 si_set_shader_image_desc(sctx, view, true, desc->list + desc_slot_offset,
2044 desc->list + desc_slot_offset + 8);
2045
2046 if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {
2047 img_handle->desc_dirty = true;
2048 si_mark_bindless_descriptors_dirty(sctx);
2049 }
2050 }
2051
si_update_all_resident_texture_descriptors(struct si_context * sctx)2052 static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
2053 {
2054 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
2055 si_update_bindless_texture_descriptor(sctx, *tex_handle);
2056 }
2057
2058 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
2059 si_update_bindless_image_descriptor(sctx, *img_handle);
2060 }
2061 }
2062
2063 /* Update mutable image descriptor fields of all bound textures. */
si_update_all_texture_descriptors(struct si_context * sctx)2064 void si_update_all_texture_descriptors(struct si_context *sctx)
2065 {
2066 unsigned shader;
2067
2068 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
2069 struct si_samplers *samplers = &sctx->samplers[shader];
2070 struct si_images *images = &sctx->images[shader];
2071 unsigned mask;
2072
2073 /* Images. */
2074 mask = images->enabled_mask;
2075 while (mask) {
2076 unsigned i = u_bit_scan(&mask);
2077 struct pipe_image_view *view = &images->views[i];
2078
2079 if (!view->resource || view->resource->target == PIPE_BUFFER)
2080 continue;
2081
2082 si_set_shader_image(sctx, shader, i, view, true);
2083 }
2084
2085 /* Sampler views. */
2086 mask = samplers->enabled_mask;
2087 while (mask) {
2088 unsigned i = u_bit_scan(&mask);
2089 struct pipe_sampler_view *view = samplers->views[i];
2090
2091 if (!view || !view->texture || view->texture->target == PIPE_BUFFER)
2092 continue;
2093
2094 si_set_sampler_views(sctx, shader, i, 1, 0, false, &samplers->views[i], true);
2095 }
2096
2097 si_update_shader_needs_decompress_mask(sctx, shader);
2098 }
2099
2100 si_update_all_resident_texture_descriptors(sctx);
2101 si_update_ps_colorbuf0_slot(sctx);
2102 }
2103
2104 /* SHADER USER DATA */
2105
si_mark_shader_pointers_dirty(struct si_context * sctx,unsigned shader)2106 static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader)
2107 {
2108 sctx->shader_pointers_dirty |=
2109 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS);
2110
2111 if (shader == PIPE_SHADER_VERTEX)
2112 sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
2113
2114 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
2115 }
2116
si_shader_pointers_mark_dirty(struct si_context * sctx)2117 void si_shader_pointers_mark_dirty(struct si_context *sctx)
2118 {
2119 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2120 sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
2121 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
2122 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
2123 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
2124 sctx->compute_shaderbuf_sgprs_dirty = true;
2125 sctx->compute_image_sgprs_dirty = true;
2126 if (sctx->gfx_level >= GFX11)
2127 sctx->gs_attribute_ring_pointer_dirty = true;
2128 }
2129
2130 /* Set a base register address for user data constants in the given shader.
2131 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
2132 */
si_set_user_data_base(struct si_context * sctx,unsigned shader,uint32_t new_base)2133 static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base)
2134 {
2135 uint32_t *base = &sctx->shader_pointers.sh_base[shader];
2136
2137 if (*base != new_base) {
2138 *base = new_base;
2139
2140 if (new_base)
2141 si_mark_shader_pointers_dirty(sctx, shader);
2142
2143 /* Any change in enabled shader stages requires re-emitting
2144 * the VS state SGPR, because it contains the clamp_vertex_color
2145 * state, which can be done in VS, TES, and GS.
2146 */
2147 sctx->last_vs_state = ~0;
2148 sctx->last_gs_state = ~0;
2149 }
2150 }
2151
2152 /* This must be called when these are changed between enabled and disabled
2153 * - geometry shader
2154 * - tessellation evaluation shader
2155 * - NGG
2156 */
si_shader_change_notify(struct si_context * sctx)2157 void si_shader_change_notify(struct si_context *sctx)
2158 {
2159 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
2160 si_get_user_data_base(sctx->gfx_level,
2161 sctx->shader.tes.cso ? TESS_ON : TESS_OFF,
2162 sctx->shader.gs.cso ? GS_ON : GS_OFF,
2163 sctx->ngg ? NGG_ON : NGG_OFF,
2164 PIPE_SHADER_VERTEX));
2165
2166 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
2167 si_get_user_data_base(sctx->gfx_level,
2168 sctx->shader.tes.cso ? TESS_ON : TESS_OFF,
2169 sctx->shader.gs.cso ? GS_ON : GS_OFF,
2170 sctx->ngg ? NGG_ON : NGG_OFF,
2171 PIPE_SHADER_TESS_EVAL));
2172
2173 /* Update as_* flags in shader keys. Ignore disabled shader stages.
2174 * as_ls = VS before TCS
2175 * as_es = VS before GS or TES before GS
2176 * as_ngg = NGG enabled for the last geometry stage.
2177 * If GS sets as_ngg, the previous stage must set as_ngg too.
2178 */
2179 if (sctx->shader.tes.cso) {
2180 sctx->shader.vs.key.ge.as_ls = 1;
2181 sctx->shader.vs.key.ge.as_es = 0;
2182 sctx->shader.vs.key.ge.as_ngg = 0;
2183
2184 if (sctx->shader.gs.cso) {
2185 sctx->shader.tes.key.ge.as_es = 1;
2186 sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
2187 sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
2188 } else {
2189 sctx->shader.tes.key.ge.as_es = 0;
2190 sctx->shader.tes.key.ge.as_ngg = sctx->ngg;
2191 }
2192 } else if (sctx->shader.gs.cso) {
2193 sctx->shader.vs.key.ge.as_ls = 0;
2194 sctx->shader.vs.key.ge.as_es = 1;
2195 sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
2196 sctx->shader.gs.key.ge.as_ngg = sctx->ngg;
2197 } else {
2198 sctx->shader.vs.key.ge.as_ls = 0;
2199 sctx->shader.vs.key.ge.as_es = 0;
2200 sctx->shader.vs.key.ge.as_ngg = sctx->ngg;
2201 }
2202 }
2203
2204 #define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base, type) do { \
2205 unsigned sh_reg_base = (sh_base); \
2206 if (sh_reg_base) { \
2207 unsigned mask = shader_pointers_dirty & (pointer_mask); \
2208 \
2209 while (mask) { \
2210 int start, count; \
2211 u_bit_scan_consecutive_range(&mask, &start, &count); \
2212 \
2213 struct si_descriptors *descs = &sctx->descriptors[start]; \
2214 unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \
2215 \
2216 radeon_set_sh_reg_seq(sh_offset, count); \
2217 for (int i = 0; i < count; i++) \
2218 radeon_emit_32bit_pointer(sctx->screen, descs[i].gpu_address); \
2219 } \
2220 } \
2221 } while (0)
2222
2223 #define gfx11_push_consecutive_shader_pointers(sctx, pointer_mask, sh_base, type) do { \
2224 unsigned sh_reg_base = (sh_base); \
2225 if (sh_reg_base) { \
2226 unsigned mask = shader_pointers_dirty & (pointer_mask); \
2227 \
2228 u_foreach_bit(i, mask) { \
2229 struct si_descriptors *descs = &sctx->descriptors[i]; \
2230 unsigned sh_reg = sh_reg_base + descs->shader_userdata_offset; \
2231 \
2232 gfx11_push_##type##_sh_reg(sh_reg, descs->gpu_address); \
2233 } \
2234 } \
2235 } while (0)
2236
si_emit_global_shader_pointers(struct si_context * sctx,struct si_descriptors * descs)2237 static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
2238 {
2239 assert(!sctx->screen->info.has_set_sh_pairs_packed);
2240
2241 radeon_begin(&sctx->gfx_cs);
2242
2243 if (sctx->gfx_level >= GFX11) {
2244 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2245 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
2246 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
2247 } else if (sctx->gfx_level >= GFX10) {
2248 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2249 /* HW VS stage only used in non-NGG mode. */
2250 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2251 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
2252 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
2253 } else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) {
2254 /* We can't use the COMMON registers with register shadowing. */
2255 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2256 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2257 radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
2258 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
2259 } else if (sctx->gfx_level == GFX9) {
2260 /* Broadcast it to all shader stages. */
2261 radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2262 } else {
2263 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2264 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2265 radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
2266 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
2267 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
2268 radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);
2269 }
2270 radeon_end();
2271 }
2272
gfx11_push_global_shader_pointers(struct si_context * sctx,struct si_descriptors * descs)2273 static void gfx11_push_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
2274 {
2275 gfx11_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + descs->shader_userdata_offset,
2276 descs->gpu_address);
2277 gfx11_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + descs->shader_userdata_offset,
2278 descs->gpu_address);
2279 gfx11_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 + descs->shader_userdata_offset,
2280 descs->gpu_address);
2281 }
2282
si_emit_graphics_shader_pointers(struct si_context * sctx,unsigned index)2283 void si_emit_graphics_shader_pointers(struct si_context *sctx, unsigned index)
2284 {
2285 uint32_t *sh_base = sctx->shader_pointers.sh_base;
2286 unsigned all_gfx_desc_mask = BITFIELD_RANGE(0, SI_DESCS_FIRST_COMPUTE);
2287 unsigned descriptors_dirty = sctx->descriptors_dirty & all_gfx_desc_mask;
2288 unsigned shader_pointers_dirty = sctx->shader_pointers_dirty | descriptors_dirty;
2289
2290 /* Blits shouldn't set VS shader pointers. */
2291 if (sctx->num_vs_blit_sgprs)
2292 shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX);
2293
2294 /* Upload descriptors. */
2295 if (descriptors_dirty) {
2296 sctx->descriptors_dirty &= ~descriptors_dirty;
2297
2298 do {
2299 si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&descriptors_dirty)]);
2300 } while (descriptors_dirty);
2301 }
2302
2303 si_upload_bindless_descriptors(sctx);
2304
2305 /* Set shader pointers. */
2306 if (sctx->screen->info.has_set_sh_pairs_packed) {
2307 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2308 sh_base[PIPE_SHADER_VERTEX], gfx);
2309 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2310 sh_base[PIPE_SHADER_TESS_EVAL], gfx);
2311 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2312 sh_base[PIPE_SHADER_FRAGMENT], gfx);
2313 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2314 sh_base[PIPE_SHADER_TESS_CTRL], gfx);
2315 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2316 sh_base[PIPE_SHADER_GEOMETRY], gfx);
2317
2318 if (sctx->gs_attribute_ring_pointer_dirty) {
2319 gfx11_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
2320 GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4,
2321 sctx->screen->attribute_ring->gpu_address);
2322 sctx->gs_attribute_ring_pointer_dirty = false;
2323 }
2324
2325 if (shader_pointers_dirty & (1 << SI_DESCS_INTERNAL))
2326 gfx11_push_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]);
2327
2328 if (sctx->graphics_bindless_pointer_dirty) {
2329 gfx11_push_global_shader_pointers(sctx, &sctx->bindless_descriptors);
2330 sctx->graphics_bindless_pointer_dirty = false;
2331 }
2332 } else {
2333 radeon_begin(&sctx->gfx_cs);
2334 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2335 sh_base[PIPE_SHADER_VERTEX], gfx);
2336 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2337 sh_base[PIPE_SHADER_TESS_EVAL], gfx);
2338 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2339 sh_base[PIPE_SHADER_FRAGMENT], gfx);
2340 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2341 sh_base[PIPE_SHADER_TESS_CTRL], gfx);
2342 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2343 sh_base[PIPE_SHADER_GEOMETRY], gfx);
2344
2345 if (sctx->gs_attribute_ring_pointer_dirty) {
2346 assert(sctx->gfx_level >= GFX11);
2347 radeon_set_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
2348 GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4,
2349 sctx->screen->attribute_ring->gpu_address);
2350 sctx->gs_attribute_ring_pointer_dirty = false;
2351 }
2352 radeon_end();
2353
2354 if (shader_pointers_dirty & (1 << SI_DESCS_INTERNAL))
2355 si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]);
2356
2357 if (sctx->graphics_bindless_pointer_dirty) {
2358 si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
2359 sctx->graphics_bindless_pointer_dirty = false;
2360 }
2361 }
2362
2363 sctx->shader_pointers_dirty &= ~all_gfx_desc_mask;
2364 }
2365
si_emit_compute_shader_pointers(struct si_context * sctx)2366 void si_emit_compute_shader_pointers(struct si_context *sctx)
2367 {
2368 /* This does not update internal bindings as that is not needed for compute shaders. */
2369 unsigned descriptors_dirty = sctx->descriptors_dirty & SI_DESCS_SHADER_MASK(COMPUTE);
2370 unsigned shader_pointers_dirty = sctx->shader_pointers_dirty | descriptors_dirty;
2371
2372 /* Upload descriptors. */
2373 if (descriptors_dirty) {
2374 sctx->descriptors_dirty &= ~descriptors_dirty;
2375
2376 do {
2377 si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&descriptors_dirty)]);
2378 } while (descriptors_dirty);
2379 }
2380
2381 si_upload_bindless_descriptors(sctx);
2382
2383 radeon_begin(&sctx->gfx_cs);
2384
2385 /* Set shader pointers. */
2386 if (sctx->screen->info.has_set_sh_pairs_packed) {
2387 gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2388 R_00B900_COMPUTE_USER_DATA_0, compute);
2389
2390 if (sctx->compute_bindless_pointer_dirty) {
2391 gfx11_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
2392 sctx->bindless_descriptors.shader_userdata_offset,
2393 sctx->bindless_descriptors.gpu_address);
2394 sctx->compute_bindless_pointer_dirty = false;
2395 }
2396 } else {
2397 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2398 R_00B900_COMPUTE_USER_DATA_0, compute);
2399
2400 if (sctx->compute_bindless_pointer_dirty) {
2401 radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors,
2402 R_00B900_COMPUTE_USER_DATA_0);
2403 sctx->compute_bindless_pointer_dirty = false;
2404 }
2405 }
2406
2407 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2408
2409 /* Set shader buffer descriptors in user SGPRs. */
2410 struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
2411 unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;
2412
2413 if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
2414 struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
2415
2416 radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 +
2417 shader->cs_shaderbufs_sgpr_index * 4,
2418 num_shaderbufs * 4);
2419
2420 for (unsigned i = 0; i < num_shaderbufs; i++)
2421 radeon_emit_array(&desc->list[si_get_shaderbuf_slot(i) * 4], 4);
2422
2423 sctx->compute_shaderbuf_sgprs_dirty = false;
2424 }
2425
2426 /* Set image descriptors in user SGPRs. */
2427 unsigned num_images = shader->cs_num_images_in_user_sgprs;
2428 if (num_images && sctx->compute_image_sgprs_dirty) {
2429 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
2430
2431 radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 +
2432 shader->cs_images_sgpr_index * 4,
2433 shader->cs_images_num_sgprs);
2434
2435 for (unsigned i = 0; i < num_images; i++) {
2436 unsigned desc_offset = si_get_image_slot(i) * 8;
2437 unsigned num_sgprs = 8;
2438
2439 /* Image buffers are in desc[4..7]. */
2440 if (BITSET_TEST(shader->info.base.image_buffers, i)) {
2441 desc_offset += 4;
2442 num_sgprs = 4;
2443 }
2444
2445 radeon_emit_array(&desc->list[desc_offset], num_sgprs);
2446 }
2447
2448 sctx->compute_image_sgprs_dirty = false;
2449 }
2450 radeon_end();
2451 }
2452
2453 /* BINDLESS */
2454
si_init_bindless_descriptors(struct si_context * sctx,struct si_descriptors * desc,short shader_userdata_rel_index,unsigned num_elements)2455 static void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc,
2456 short shader_userdata_rel_index, unsigned num_elements)
2457 {
2458 ASSERTED unsigned desc_slot;
2459
2460 si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements);
2461 sctx->bindless_descriptors.num_active_slots = num_elements;
2462
2463 /* The first bindless descriptor is stored at slot 1, because 0 is not
2464 * considered to be a valid handle.
2465 */
2466 sctx->num_bindless_descriptors = 1;
2467
2468 /* Track which bindless slots are used (or not). */
2469 util_idalloc_init(&sctx->bindless_used_slots, num_elements);
2470
2471 /* Reserve slot 0 because it's an invalid handle for bindless. */
2472 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2473 assert(desc_slot == 0);
2474 }
2475
si_release_bindless_descriptors(struct si_context * sctx)2476 static void si_release_bindless_descriptors(struct si_context *sctx)
2477 {
2478 si_release_descriptors(&sctx->bindless_descriptors);
2479 util_idalloc_fini(&sctx->bindless_used_slots);
2480 }
2481
si_get_first_free_bindless_slot(struct si_context * sctx)2482 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2483 {
2484 struct si_descriptors *desc = &sctx->bindless_descriptors;
2485 unsigned desc_slot;
2486
2487 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2488 if (desc_slot >= desc->num_elements) {
2489 /* The array of bindless descriptors is full, resize it. */
2490 unsigned slot_size = desc->element_dw_size * 4;
2491 unsigned new_num_elements = desc->num_elements * 2;
2492
2493 desc->list =
2494 REALLOC(desc->list, desc->num_elements * slot_size, new_num_elements * slot_size);
2495 desc->num_elements = new_num_elements;
2496 desc->num_active_slots = new_num_elements;
2497 }
2498
2499 assert(desc_slot);
2500 return desc_slot;
2501 }
2502
si_create_bindless_descriptor(struct si_context * sctx,uint32_t * desc_list,unsigned size)2503 static unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2504 unsigned size)
2505 {
2506 struct si_descriptors *desc = &sctx->bindless_descriptors;
2507 unsigned desc_slot, desc_slot_offset;
2508
2509 /* Find a free slot. */
2510 desc_slot = si_get_first_free_bindless_slot(sctx);
2511
2512 /* For simplicity, sampler and image bindless descriptors use fixed
2513 * 16-dword slots for now. Image descriptors only need 8-dword but this
2514 * doesn't really matter because no real apps use image handles.
2515 */
2516 desc_slot_offset = desc_slot * 16;
2517
2518 /* Copy the descriptor into the array. */
2519 memcpy(desc->list + desc_slot_offset, desc_list, size);
2520
2521 /* Re-upload the whole array of bindless descriptors into a new buffer.
2522 */
2523 si_upload_descriptors(sctx, desc);
2524
2525 /* Make sure to re-emit the shader pointers for all stages. */
2526 sctx->graphics_bindless_pointer_dirty = true;
2527 sctx->compute_bindless_pointer_dirty = true;
2528 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
2529
2530 return desc_slot;
2531 }
2532
si_update_bindless_buffer_descriptor(struct si_context * sctx,unsigned desc_slot,struct pipe_resource * resource,uint64_t offset,bool * desc_dirty)2533 static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsigned desc_slot,
2534 struct pipe_resource *resource, uint64_t offset,
2535 bool *desc_dirty)
2536 {
2537 struct si_descriptors *desc = &sctx->bindless_descriptors;
2538 struct si_resource *buf = si_resource(resource);
2539 unsigned desc_slot_offset = desc_slot * 16;
2540 uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2541 uint64_t old_desc_va;
2542
2543 assert(resource->target == PIPE_BUFFER);
2544
2545 /* Retrieve the old buffer addr from the descriptor. */
2546 old_desc_va = si_desc_extract_buffer_address(desc_list);
2547
2548 if (old_desc_va != buf->gpu_address + offset) {
2549 /* The buffer has been invalidated when the handle wasn't
2550 * resident, update the descriptor and the dirty flag.
2551 */
2552 si_set_buf_desc_address(buf, offset, &desc_list[0]);
2553
2554 *desc_dirty = true;
2555 }
2556 }
2557
si_create_texture_handle(struct pipe_context * ctx,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2558 static uint64_t si_create_texture_handle(struct pipe_context *ctx, struct pipe_sampler_view *view,
2559 const struct pipe_sampler_state *state)
2560 {
2561 struct si_sampler_view *sview = (struct si_sampler_view *)view;
2562 struct si_context *sctx = (struct si_context *)ctx;
2563 struct si_texture_handle *tex_handle;
2564 struct si_sampler_state *sstate;
2565 uint32_t desc_list[16];
2566 uint64_t handle;
2567
2568 tex_handle = CALLOC_STRUCT(si_texture_handle);
2569 if (!tex_handle)
2570 return 0;
2571
2572 memset(desc_list, 0, sizeof(desc_list));
2573 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2574
2575 sstate = ctx->create_sampler_state(ctx, state);
2576 if (!sstate) {
2577 FREE(tex_handle);
2578 return 0;
2579 }
2580
2581 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2582 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2583 ctx->delete_sampler_state(ctx, sstate);
2584
2585 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));
2586 if (!tex_handle->desc_slot) {
2587 FREE(tex_handle);
2588 return 0;
2589 }
2590
2591 handle = tex_handle->desc_slot;
2592
2593 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)(uintptr_t)handle, tex_handle)) {
2594 FREE(tex_handle);
2595 return 0;
2596 }
2597
2598 pipe_sampler_view_reference(&tex_handle->view, view);
2599
2600 si_resource(sview->base.texture)->texture_handle_allocated = true;
2601
2602 return handle;
2603 }
2604
si_delete_texture_handle(struct pipe_context * ctx,uint64_t handle)2605 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2606 {
2607 struct si_context *sctx = (struct si_context *)ctx;
2608 struct si_texture_handle *tex_handle;
2609 struct hash_entry *entry;
2610
2611 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);
2612 if (!entry)
2613 return;
2614
2615 tex_handle = (struct si_texture_handle *)entry->data;
2616
2617 /* Allow this descriptor slot to be re-used. */
2618 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2619
2620 pipe_sampler_view_reference(&tex_handle->view, NULL);
2621 _mesa_hash_table_remove(sctx->tex_handles, entry);
2622 FREE(tex_handle);
2623 }
2624
si_make_texture_handle_resident(struct pipe_context * ctx,uint64_t handle,bool resident)2625 static void si_make_texture_handle_resident(struct pipe_context *ctx, uint64_t handle,
2626 bool resident)
2627 {
2628 struct si_context *sctx = (struct si_context *)ctx;
2629 struct si_texture_handle *tex_handle;
2630 struct si_sampler_view *sview;
2631 struct hash_entry *entry;
2632
2633 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);
2634 if (!entry)
2635 return;
2636
2637 tex_handle = (struct si_texture_handle *)entry->data;
2638 sview = (struct si_sampler_view *)tex_handle->view;
2639
2640 if (resident) {
2641 if (sview->base.texture->target != PIPE_BUFFER) {
2642 struct si_texture *tex = (struct si_texture *)sview->base.texture;
2643
2644 if (depth_needs_decompression(tex, sview->is_stencil_sampler)) {
2645 util_dynarray_append(&sctx->resident_tex_needs_depth_decompress,
2646 struct si_texture_handle *, tex_handle);
2647 }
2648
2649 if (color_needs_decompression(tex)) {
2650 util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
2651 struct si_texture_handle *, tex_handle);
2652 }
2653
2654 if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&
2655 p_atomic_read(&tex->framebuffers_bound))
2656 sctx->need_check_render_feedback = true;
2657
2658 si_update_bindless_texture_descriptor(sctx, tex_handle);
2659 } else {
2660 si_update_bindless_buffer_descriptor(sctx, tex_handle->desc_slot, sview->base.texture,
2661 sview->base.u.buf.offset, &tex_handle->desc_dirty);
2662 }
2663
2664 /* Re-upload the descriptor if it has been updated while it
2665 * wasn't resident.
2666 */
2667 if (tex_handle->desc_dirty)
2668 si_mark_bindless_descriptors_dirty(sctx);
2669
2670 /* Add the texture handle to the per-context list. */
2671 util_dynarray_append(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle);
2672
2673 /* Add the buffers to the current CS in case si_begin_new_cs()
2674 * is not going to be called.
2675 */
2676 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
2677 sview->is_stencil_sampler);
2678 } else {
2679 /* Remove the texture handle from the per-context list. */
2680 util_dynarray_delete_unordered(&sctx->resident_tex_handles, struct si_texture_handle *,
2681 tex_handle);
2682
2683 if (sview->base.texture->target != PIPE_BUFFER) {
2684 util_dynarray_delete_unordered(&sctx->resident_tex_needs_depth_decompress,
2685 struct si_texture_handle *, tex_handle);
2686
2687 util_dynarray_delete_unordered(&sctx->resident_tex_needs_color_decompress,
2688 struct si_texture_handle *, tex_handle);
2689 }
2690 }
2691 }
2692
si_create_image_handle(struct pipe_context * ctx,const struct pipe_image_view * view)2693 static uint64_t si_create_image_handle(struct pipe_context *ctx, const struct pipe_image_view *view)
2694 {
2695 struct si_context *sctx = (struct si_context *)ctx;
2696 struct si_image_handle *img_handle;
2697 uint32_t desc_list[16];
2698 uint64_t handle;
2699
2700 if (!view || !view->resource)
2701 return 0;
2702
2703 img_handle = CALLOC_STRUCT(si_image_handle);
2704 if (!img_handle)
2705 return 0;
2706
2707 memset(desc_list, 0, sizeof(desc_list));
2708 si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);
2709
2710 si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);
2711
2712 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));
2713 if (!img_handle->desc_slot) {
2714 FREE(img_handle);
2715 return 0;
2716 }
2717
2718 handle = img_handle->desc_slot;
2719
2720 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)(uintptr_t)handle, img_handle)) {
2721 FREE(img_handle);
2722 return 0;
2723 }
2724
2725 util_copy_image_view(&img_handle->view, view);
2726
2727 si_resource(view->resource)->image_handle_allocated = true;
2728
2729 return handle;
2730 }
2731
si_delete_image_handle(struct pipe_context * ctx,uint64_t handle)2732 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2733 {
2734 struct si_context *sctx = (struct si_context *)ctx;
2735 struct si_image_handle *img_handle;
2736 struct hash_entry *entry;
2737
2738 entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);
2739 if (!entry)
2740 return;
2741
2742 img_handle = (struct si_image_handle *)entry->data;
2743
2744 util_copy_image_view(&img_handle->view, NULL);
2745 _mesa_hash_table_remove(sctx->img_handles, entry);
2746 FREE(img_handle);
2747 }
2748
si_make_image_handle_resident(struct pipe_context * ctx,uint64_t handle,unsigned access,bool resident)2749 static void si_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle,
2750 unsigned access, bool resident)
2751 {
2752 struct si_context *sctx = (struct si_context *)ctx;
2753 struct si_image_handle *img_handle;
2754 struct pipe_image_view *view;
2755 struct si_resource *res;
2756 struct hash_entry *entry;
2757
2758 entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);
2759 if (!entry)
2760 return;
2761
2762 img_handle = (struct si_image_handle *)entry->data;
2763 view = &img_handle->view;
2764 res = si_resource(view->resource);
2765
2766 if (resident) {
2767 if (res->b.b.target != PIPE_BUFFER) {
2768 struct si_texture *tex = (struct si_texture *)res;
2769 unsigned level = view->u.tex.level;
2770
2771 if (color_needs_decompression(tex)) {
2772 util_dynarray_append(&sctx->resident_img_needs_color_decompress,
2773 struct si_image_handle *, img_handle);
2774 }
2775
2776 if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))
2777 sctx->need_check_render_feedback = true;
2778
2779 si_update_bindless_image_descriptor(sctx, img_handle);
2780 } else {
2781 si_update_bindless_buffer_descriptor(sctx, img_handle->desc_slot, view->resource,
2782 view->u.buf.offset, &img_handle->desc_dirty);
2783 }
2784
2785 /* Re-upload the descriptor if it has been updated while it
2786 * wasn't resident.
2787 */
2788 if (img_handle->desc_dirty)
2789 si_mark_bindless_descriptors_dirty(sctx);
2790
2791 /* Add the image handle to the per-context list. */
2792 util_dynarray_append(&sctx->resident_img_handles, struct si_image_handle *, img_handle);
2793
2794 /* Add the buffers to the current CS in case si_begin_new_cs()
2795 * is not going to be called.
2796 */
2797 si_sampler_view_add_buffer(sctx, view->resource,
2798 (access & PIPE_IMAGE_ACCESS_WRITE) ?
2799 RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false);
2800 } else {
2801 /* Remove the image handle from the per-context list. */
2802 util_dynarray_delete_unordered(&sctx->resident_img_handles, struct si_image_handle *,
2803 img_handle);
2804
2805 if (res->b.b.target != PIPE_BUFFER) {
2806 util_dynarray_delete_unordered(&sctx->resident_img_needs_color_decompress,
2807 struct si_image_handle *, img_handle);
2808 }
2809 }
2810 }
2811
si_resident_buffers_add_all_to_bo_list(struct si_context * sctx)2812 static void si_resident_buffers_add_all_to_bo_list(struct si_context *sctx)
2813 {
2814 unsigned num_resident_tex_handles, num_resident_img_handles;
2815
2816 num_resident_tex_handles = sctx->resident_tex_handles.size / sizeof(struct si_texture_handle *);
2817 num_resident_img_handles = sctx->resident_img_handles.size / sizeof(struct si_image_handle *);
2818
2819 /* Add all resident texture handles. */
2820 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
2821 struct si_sampler_view *sview = (struct si_sampler_view *)(*tex_handle)->view;
2822
2823 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
2824 sview->is_stencil_sampler);
2825 }
2826
2827 /* Add all resident image handles. */
2828 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
2829 struct pipe_image_view *view = &(*img_handle)->view;
2830
2831 si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false);
2832 }
2833
2834 sctx->num_resident_handles += num_resident_tex_handles + num_resident_img_handles;
2835 assert(sctx->bo_list_add_all_resident_resources);
2836 sctx->bo_list_add_all_resident_resources = false;
2837 }
2838
2839 static void si_emit_gfx_resources_add_all_to_bo_list(struct si_context *sctx, unsigned index);
2840
2841 /* INIT/DEINIT/UPLOAD */
2842
si_init_all_descriptors(struct si_context * sctx)2843 void si_init_all_descriptors(struct si_context *sctx)
2844 {
2845 int i;
2846 unsigned first_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
2847 unsigned hs_sgpr0, gs_sgpr0;
2848
2849 if (sctx->gfx_level >= GFX11) {
2850 hs_sgpr0 = R_00B420_SPI_SHADER_PGM_LO_HS;
2851 gs_sgpr0 = R_00B220_SPI_SHADER_PGM_LO_GS;
2852 } else {
2853 hs_sgpr0 = R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS;
2854 gs_sgpr0 = R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS;
2855 }
2856
2857 for (i = first_shader; i < SI_NUM_SHADERS; i++) {
2858 bool is_2nd =
2859 sctx->gfx_level >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY);
2860 unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;
2861 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2862 int rel_dw_offset;
2863 struct si_descriptors *desc;
2864
2865 if (is_2nd) {
2866 if (i == PIPE_SHADER_TESS_CTRL) {
2867 rel_dw_offset =
2868 (hs_sgpr0 - R_00B430_SPI_SHADER_USER_DATA_HS_0) / 4;
2869 } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */
2870 rel_dw_offset =
2871 (gs_sgpr0 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
2872 } else {
2873 rel_dw_offset =
2874 (gs_sgpr0 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
2875 }
2876 } else {
2877 rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS;
2878 }
2879 desc = si_const_and_shader_buffer_descriptors(sctx, i);
2880 si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i], desc, num_buffer_slots,
2881 rel_dw_offset, RADEON_PRIO_SHADER_RW_BUFFER,
2882 RADEON_PRIO_CONST_BUFFER);
2883 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2884
2885 if (is_2nd) {
2886 if (i == PIPE_SHADER_TESS_CTRL) {
2887 rel_dw_offset =
2888 (hs_sgpr0 + 4 - R_00B430_SPI_SHADER_USER_DATA_HS_0) / 4;
2889 } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */
2890 rel_dw_offset =
2891 (gs_sgpr0 + 4 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
2892 } else {
2893 rel_dw_offset =
2894 (gs_sgpr0 + 4 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
2895 }
2896 } else {
2897 rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
2898 }
2899
2900 desc = si_sampler_and_image_descriptors(sctx, i);
2901 si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
2902
2903 int j;
2904 for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)
2905 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2906 for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)
2907 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2908 }
2909
2910 si_init_buffer_resources(sctx, &sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL],
2911 SI_NUM_INTERNAL_BINDINGS, SI_SGPR_INTERNAL_BINDINGS,
2912 /* The second priority is used by
2913 * const buffers in RW buffer slots. */
2914 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2915 sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots = SI_NUM_INTERNAL_BINDINGS;
2916
2917 /* Initialize an array of 1024 bindless descriptors, when the limit is
2918 * reached, just make it larger and re-upload the whole array.
2919 */
2920 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2921 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024);
2922
2923 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2924
2925 /* Set pipe_context functions. */
2926 sctx->b.bind_sampler_states = si_bind_sampler_states;
2927 sctx->b.set_shader_images = si_set_shader_images;
2928 sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
2929 sctx->b.set_inlinable_constants = si_set_inlinable_constants;
2930 sctx->b.set_shader_buffers = si_pipe_set_shader_buffers;
2931 sctx->b.set_sampler_views = si_pipe_set_sampler_views;
2932 sctx->b.create_texture_handle = si_create_texture_handle;
2933 sctx->b.delete_texture_handle = si_delete_texture_handle;
2934 sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
2935 sctx->b.create_image_handle = si_create_image_handle;
2936 sctx->b.delete_image_handle = si_delete_image_handle;
2937 sctx->b.make_image_handle_resident = si_make_image_handle_resident;
2938
2939 if (!sctx->has_graphics)
2940 return;
2941
2942 sctx->b.set_polygon_stipple = si_set_polygon_stipple;
2943
2944 sctx->atoms.s.gfx_add_all_to_bo_list.emit = si_emit_gfx_resources_add_all_to_bo_list;
2945 sctx->atoms.s.gfx_shader_pointers.emit = si_emit_graphics_shader_pointers;
2946
2947 /* Set default and immutable mappings. */
2948 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
2949 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF,
2950 sctx->ngg, PIPE_SHADER_VERTEX));
2951 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2952 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF,
2953 NGG_OFF, PIPE_SHADER_TESS_CTRL));
2954 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2955 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF,
2956 NGG_OFF, PIPE_SHADER_GEOMETRY));
2957 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2958 }
2959
si_release_all_descriptors(struct si_context * sctx)2960 void si_release_all_descriptors(struct si_context *sctx)
2961 {
2962 int i;
2963
2964 for (i = 0; i < SI_NUM_SHADERS; i++) {
2965 si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2966 si_const_and_shader_buffer_descriptors(sctx, i));
2967 si_release_sampler_views(&sctx->samplers[i]);
2968 si_release_image_views(&sctx->images[i]);
2969 }
2970 si_release_buffer_resources(&sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL]);
2971 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2972 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2973
2974 for (i = 0; i < SI_NUM_DESCS; ++i)
2975 si_release_descriptors(&sctx->descriptors[i]);
2976
2977 si_release_bindless_descriptors(sctx);
2978 }
2979
si_gfx_resources_check_encrypted(struct si_context * sctx)2980 bool si_gfx_resources_check_encrypted(struct si_context *sctx)
2981 {
2982 bool use_encrypted_bo = false;
2983
2984 for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS && !use_encrypted_bo; i++) {
2985 struct si_shader_ctx_state *current_shader = &sctx->shaders[i];
2986 if (!current_shader->cso)
2987 continue;
2988
2989 use_encrypted_bo |=
2990 si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]);
2991 use_encrypted_bo |=
2992 si_sampler_views_check_encrypted(sctx, &sctx->samplers[i],
2993 current_shader->cso->info.base.textures_used[0]);
2994 use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i],
2995 u_bit_consecutive(0, current_shader->cso->info.base.num_images));
2996 }
2997 use_encrypted_bo |= si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);
2998
2999 struct si_state_blend *blend = sctx->queued.named.blend;
3000 for (int i = 0; i < sctx->framebuffer.state.nr_cbufs && !use_encrypted_bo; i++) {
3001 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
3002 if (surf && surf->texture) {
3003 struct si_texture *tex = (struct si_texture *)surf->texture;
3004 if (!(tex->buffer.flags & RADEON_FLAG_ENCRYPTED))
3005 continue;
3006
3007 /* Are we reading from this framebuffer */
3008 if (((blend->blend_enable_4bit >> (4 * i)) & 0xf) ||
3009 vi_dcc_enabled(tex, 0)) {
3010 use_encrypted_bo = true;
3011 }
3012 }
3013 }
3014
3015 if (sctx->framebuffer.state.zsbuf) {
3016 struct si_texture* zs = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
3017 if (zs &&
3018 (zs->buffer.flags & RADEON_FLAG_ENCRYPTED)) {
3019 /* TODO: This isn't needed if depth.func is PIPE_FUNC_NEVER or PIPE_FUNC_ALWAYS */
3020 use_encrypted_bo = true;
3021 }
3022 }
3023
3024 #ifndef NDEBUG
3025 if (use_encrypted_bo) {
3026 /* Verify that color buffers are encrypted */
3027 for (int i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
3028 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
3029 if (!surf)
3030 continue;
3031 struct si_texture *tex = (struct si_texture *)surf->texture;
3032 assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));
3033 }
3034 /* Verify that depth/stencil buffer is encrypted */
3035 if (sctx->framebuffer.state.zsbuf) {
3036 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
3037 struct si_texture *tex = (struct si_texture *)surf->texture;
3038 assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));
3039 }
3040 }
3041 #endif
3042
3043 return use_encrypted_bo;
3044 }
3045
si_emit_gfx_resources_add_all_to_bo_list(struct si_context * sctx,unsigned index)3046 static void si_emit_gfx_resources_add_all_to_bo_list(struct si_context *sctx, unsigned index)
3047 {
3048 for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
3049 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
3050 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
3051 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
3052 }
3053 si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);
3054
3055 unsigned num_vb = sctx->num_vertex_buffers;
3056 for (unsigned i = 0; i < num_vb; i++) {
3057 struct si_resource *buf = si_resource(sctx->vertex_buffer[i].buffer.resource);
3058 if (buf) {
3059 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buf,
3060 RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER);
3061 }
3062 }
3063
3064 if (sctx->bo_list_add_all_resident_resources)
3065 si_resident_buffers_add_all_to_bo_list(sctx);
3066 }
3067
si_compute_resources_check_encrypted(struct si_context * sctx)3068 bool si_compute_resources_check_encrypted(struct si_context *sctx)
3069 {
3070 unsigned sh = PIPE_SHADER_COMPUTE;
3071
3072 struct si_shader_info* info = &sctx->cs_shader_state.program->sel.info;
3073
3074 /* TODO: we should assert that either use_encrypted_bo is false,
3075 * or all writable buffers are encrypted.
3076 */
3077 return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) ||
3078 si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) ||
3079 si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) ||
3080 si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);
3081 }
3082
si_compute_resources_add_all_to_bo_list(struct si_context * sctx)3083 void si_compute_resources_add_all_to_bo_list(struct si_context *sctx)
3084 {
3085 unsigned sh = PIPE_SHADER_COMPUTE;
3086
3087 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[sh]);
3088 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[sh]);
3089 si_image_views_begin_new_cs(sctx, &sctx->images[sh]);
3090 si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);
3091
3092 if (sctx->bo_list_add_all_resident_resources)
3093 si_resident_buffers_add_all_to_bo_list(sctx);
3094
3095 assert(sctx->bo_list_add_all_compute_resources);
3096 sctx->bo_list_add_all_compute_resources = false;
3097 }
3098
si_add_all_descriptors_to_bo_list(struct si_context * sctx)3099 void si_add_all_descriptors_to_bo_list(struct si_context *sctx)
3100 {
3101 for (unsigned i = 0; i < SI_NUM_DESCS; ++i)
3102 si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]);
3103 si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors);
3104
3105 sctx->bo_list_add_all_resident_resources = true;
3106 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_add_all_to_bo_list);
3107 sctx->bo_list_add_all_compute_resources = true;
3108 }
3109
si_set_active_descriptors(struct si_context * sctx,unsigned desc_idx,uint64_t new_active_mask)3110 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask)
3111 {
3112 struct si_descriptors *desc = &sctx->descriptors[desc_idx];
3113
3114 /* Ignore no-op updates and updates that disable all slots. */
3115 if (!new_active_mask ||
3116 new_active_mask == u_bit_consecutive64(desc->first_active_slot, desc->num_active_slots))
3117 return;
3118
3119 int first, count;
3120 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
3121 assert(new_active_mask == 0);
3122
3123 /* Upload/dump descriptors if slots are being enabled. */
3124 if (first < desc->first_active_slot ||
3125 first + count > desc->first_active_slot + desc->num_active_slots) {
3126 sctx->descriptors_dirty |= 1u << desc_idx;
3127 if (desc_idx < SI_DESCS_FIRST_COMPUTE)
3128 si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
3129 }
3130
3131 desc->first_active_slot = first;
3132 desc->num_active_slots = count;
3133 }
3134
si_set_active_descriptors_for_shader(struct si_context * sctx,struct si_shader_selector * sel)3135 void si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel)
3136 {
3137 if (!sel)
3138 return;
3139
3140 si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index,
3141 sel->active_const_and_shader_buffers);
3142 si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index,
3143 sel->active_samplers_and_images);
3144 }
3145