1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 /* Resource binding slots and sampler states (each described with 8 or
25 * 4 dwords) are stored in lists in memory which is accessed by shaders
26 * using scalar load instructions.
27 *
28 * This file is responsible for managing such lists. It keeps a copy of all
29 * descriptors in CPU memory and re-uploads a whole list if some slots have
30 * been changed.
31 *
32 * This code is also reponsible for updating shader pointers to those lists.
33 *
34 * Note that CP DMA can't be used for updating the lists, because a GPU hang
35 * could leave the list in a mid-IB state and the next IB would get wrong
36 * descriptors and the whole context would be unusable at that point.
37 * (Note: The register shadowing can't be used due to the same reason)
38 *
39 * Also, uploading descriptors to newly allocated memory doesn't require
40 * a KCACHE flush.
41 *
42 *
43 * Possible scenarios for one 16 dword image+sampler slot:
44 *
45 * | Image | w/ FMASK | Buffer | NULL
46 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
47 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
48 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
49 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
50 *
51 * FMASK implies MSAA, therefore no sampler state.
52 * Sampler states are never unbound except when FMASK is bound.
53 */
54
55 #include "radeon/r600_cs.h"
56 #include "si_pipe.h"
57 #include "sid.h"
58 #include "gfx9d.h"
59
60 #include "util/hash_table.h"
61 #include "util/u_idalloc.h"
62 #include "util/u_format.h"
63 #include "util/u_memory.h"
64 #include "util/u_upload_mgr.h"
65
66
67 /* NULL image and buffer descriptor for textures (alpha = 1) and images
68 * (alpha = 0).
69 *
70 * For images, all fields must be zero except for the swizzle, which
71 * supports arbitrary combinations of 0s and 1s. The texture type must be
72 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
73 *
74 * For buffers, all fields must be zero. If they are not, the hw hangs.
75 *
76 * This is the only reason why the buffer descriptor must be in words [4:7].
77 */
78 static uint32_t null_texture_descriptor[8] = {
79 0,
80 0,
81 0,
82 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) |
83 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
84 /* the rest must contain zeros, which is also used by the buffer
85 * descriptor */
86 };
87
88 static uint32_t null_image_descriptor[8] = {
89 0,
90 0,
91 0,
92 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
93 /* the rest must contain zeros, which is also used by the buffer
94 * descriptor */
95 };
96
si_desc_extract_buffer_address(uint32_t * desc)97 static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
98 {
99 return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
100 }
101
si_init_descriptor_list(uint32_t * desc_list,unsigned element_dw_size,unsigned num_elements,const uint32_t * null_descriptor)102 static void si_init_descriptor_list(uint32_t *desc_list,
103 unsigned element_dw_size,
104 unsigned num_elements,
105 const uint32_t *null_descriptor)
106 {
107 int i;
108
109 /* Initialize the array to NULL descriptors if the element size is 8. */
110 if (null_descriptor) {
111 assert(element_dw_size % 8 == 0);
112 for (i = 0; i < num_elements * element_dw_size / 8; i++)
113 memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
114 }
115 }
116
si_init_descriptors(struct si_descriptors * desc,unsigned shader_userdata_index,unsigned element_dw_size,unsigned num_elements)117 static void si_init_descriptors(struct si_descriptors *desc,
118 unsigned shader_userdata_index,
119 unsigned element_dw_size,
120 unsigned num_elements)
121 {
122 desc->list = CALLOC(num_elements, element_dw_size * 4);
123 desc->element_dw_size = element_dw_size;
124 desc->num_elements = num_elements;
125 desc->shader_userdata_offset = shader_userdata_index * 4;
126 desc->slot_index_to_bind_directly = -1;
127 }
128
si_release_descriptors(struct si_descriptors * desc)129 static void si_release_descriptors(struct si_descriptors *desc)
130 {
131 r600_resource_reference(&desc->buffer, NULL);
132 FREE(desc->list);
133 }
134
si_upload_descriptors(struct si_context * sctx,struct si_descriptors * desc)135 static bool si_upload_descriptors(struct si_context *sctx,
136 struct si_descriptors *desc)
137 {
138 unsigned slot_size = desc->element_dw_size * 4;
139 unsigned first_slot_offset = desc->first_active_slot * slot_size;
140 unsigned upload_size = desc->num_active_slots * slot_size;
141
142 /* Skip the upload if no shader is using the descriptors. dirty_mask
143 * will stay dirty and the descriptors will be uploaded when there is
144 * a shader using them.
145 */
146 if (!upload_size)
147 return true;
148
149 /* If there is just one active descriptor, bind it directly. */
150 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
151 desc->num_active_slots == 1) {
152 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
153 desc->element_dw_size];
154
155 /* The buffer is already in the buffer list. */
156 r600_resource_reference(&desc->buffer, NULL);
157 desc->gpu_list = NULL;
158 desc->gpu_address = si_desc_extract_buffer_address(descriptor);
159 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
160 return true;
161 }
162
163 uint32_t *ptr;
164 unsigned buffer_offset;
165 u_upload_alloc(sctx->b.b.const_uploader, first_slot_offset, upload_size,
166 si_optimal_tcc_alignment(sctx, upload_size),
167 &buffer_offset, (struct pipe_resource**)&desc->buffer,
168 (void**)&ptr);
169 if (!desc->buffer) {
170 desc->gpu_address = 0;
171 return false; /* skip the draw call */
172 }
173
174 util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
175 upload_size);
176 desc->gpu_list = ptr - first_slot_offset / 4;
177
178 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
179 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
180
181 /* The shader pointer should point to slot 0. */
182 buffer_offset -= first_slot_offset;
183 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
184
185 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
186 return true;
187 }
188
189 static void
si_descriptors_begin_new_cs(struct si_context * sctx,struct si_descriptors * desc)190 si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
191 {
192 if (!desc->buffer)
193 return;
194
195 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
196 RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
197 }
198
199 /* SAMPLER VIEWS */
200
201 static inline enum radeon_bo_priority
si_get_sampler_view_priority(struct r600_resource * res)202 si_get_sampler_view_priority(struct r600_resource *res)
203 {
204 if (res->b.b.target == PIPE_BUFFER)
205 return RADEON_PRIO_SAMPLER_BUFFER;
206
207 if (res->b.b.nr_samples > 1)
208 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
209
210 return RADEON_PRIO_SAMPLER_TEXTURE;
211 }
212
213 static unsigned
si_sampler_and_image_descriptors_idx(unsigned shader)214 si_sampler_and_image_descriptors_idx(unsigned shader)
215 {
216 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
217 SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
218 }
219
220 static struct si_descriptors *
si_sampler_and_image_descriptors(struct si_context * sctx,unsigned shader)221 si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
222 {
223 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
224 }
225
si_release_sampler_views(struct si_samplers * samplers)226 static void si_release_sampler_views(struct si_samplers *samplers)
227 {
228 int i;
229
230 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
231 pipe_sampler_view_reference(&samplers->views[i], NULL);
232 }
233 }
234
si_sampler_view_add_buffer(struct si_context * sctx,struct pipe_resource * resource,enum radeon_bo_usage usage,bool is_stencil_sampler,bool check_mem)235 static void si_sampler_view_add_buffer(struct si_context *sctx,
236 struct pipe_resource *resource,
237 enum radeon_bo_usage usage,
238 bool is_stencil_sampler,
239 bool check_mem)
240 {
241 struct r600_resource *rres;
242 struct r600_texture *rtex;
243 enum radeon_bo_priority priority;
244
245 if (!resource)
246 return;
247
248 if (resource->target != PIPE_BUFFER) {
249 struct r600_texture *tex = (struct r600_texture*)resource;
250
251 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil_sampler))
252 resource = &tex->flushed_depth_texture->resource.b.b;
253 }
254
255 rres = (struct r600_resource*)resource;
256 priority = si_get_sampler_view_priority(rres);
257
258 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
259 rres, usage, priority,
260 check_mem);
261
262 if (resource->target == PIPE_BUFFER)
263 return;
264
265 /* Now add separate DCC or HTILE. */
266 rtex = (struct r600_texture*)resource;
267 if (rtex->dcc_separate_buffer) {
268 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
269 rtex->dcc_separate_buffer, usage,
270 RADEON_PRIO_DCC, check_mem);
271 }
272 }
273
si_sampler_views_begin_new_cs(struct si_context * sctx,struct si_samplers * samplers)274 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
275 struct si_samplers *samplers)
276 {
277 unsigned mask = samplers->enabled_mask;
278
279 /* Add buffers to the CS. */
280 while (mask) {
281 int i = u_bit_scan(&mask);
282 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
283
284 si_sampler_view_add_buffer(sctx, sview->base.texture,
285 RADEON_USAGE_READ,
286 sview->is_stencil_sampler, false);
287 }
288 }
289
290 /* Set buffer descriptor fields that can be changed by reallocations. */
si_set_buf_desc_address(struct r600_resource * buf,uint64_t offset,uint32_t * state)291 static void si_set_buf_desc_address(struct r600_resource *buf,
292 uint64_t offset, uint32_t *state)
293 {
294 uint64_t va = buf->gpu_address + offset;
295
296 state[0] = va;
297 state[1] &= C_008F04_BASE_ADDRESS_HI;
298 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
299 }
300
301 /* Set texture descriptor fields that can be changed by reallocations.
302 *
303 * \param tex texture
304 * \param base_level_info information of the level of BASE_ADDRESS
305 * \param base_level the level of BASE_ADDRESS
306 * \param first_level pipe_sampler_view.u.tex.first_level
307 * \param block_width util_format_get_blockwidth()
308 * \param is_stencil select between separate Z & Stencil
309 * \param state descriptor to update
310 */
si_set_mutable_tex_desc_fields(struct si_screen * sscreen,struct r600_texture * tex,const struct legacy_surf_level * base_level_info,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,uint32_t * state)311 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
312 struct r600_texture *tex,
313 const struct legacy_surf_level *base_level_info,
314 unsigned base_level, unsigned first_level,
315 unsigned block_width, bool is_stencil,
316 uint32_t *state)
317 {
318 uint64_t va, meta_va = 0;
319
320 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {
321 tex = tex->flushed_depth_texture;
322 is_stencil = false;
323 }
324
325 va = tex->resource.gpu_address;
326
327 if (sscreen->info.chip_class >= GFX9) {
328 /* Only stencil_offset needs to be added here. */
329 if (is_stencil)
330 va += tex->surface.u.gfx9.stencil_offset;
331 else
332 va += tex->surface.u.gfx9.surf_offset;
333 } else {
334 va += base_level_info->offset;
335 }
336
337 state[0] = va >> 8;
338 state[1] &= C_008F14_BASE_ADDRESS_HI;
339 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
340
341 /* Only macrotiled modes can set tile swizzle.
342 * GFX9 doesn't use (legacy) base_level_info.
343 */
344 if (sscreen->info.chip_class >= GFX9 ||
345 base_level_info->mode == RADEON_SURF_MODE_2D)
346 state[0] |= tex->surface.tile_swizzle;
347
348 if (sscreen->info.chip_class >= VI) {
349 state[6] &= C_008F28_COMPRESSION_EN;
350 state[7] = 0;
351
352 if (vi_dcc_enabled(tex, first_level)) {
353 meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
354 tex->dcc_offset;
355
356 if (sscreen->info.chip_class == VI) {
357 meta_va += base_level_info->dcc_offset;
358 assert(base_level_info->mode == RADEON_SURF_MODE_2D);
359 }
360
361 meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
362 } else if (vi_tc_compat_htile_enabled(tex, first_level)) {
363 meta_va = tex->resource.gpu_address + tex->htile_offset;
364 }
365
366 if (meta_va) {
367 state[6] |= S_008F28_COMPRESSION_EN(1);
368 state[7] = meta_va >> 8;
369 }
370 }
371
372 if (sscreen->info.chip_class >= GFX9) {
373 state[3] &= C_008F1C_SW_MODE;
374 state[4] &= C_008F20_PITCH_GFX9;
375
376 if (is_stencil) {
377 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
378 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.stencil.epitch);
379 } else {
380 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
381 state[4] |= S_008F20_PITCH_GFX9(tex->surface.u.gfx9.surf.epitch);
382 }
383
384 state[5] &= C_008F24_META_DATA_ADDRESS &
385 C_008F24_META_PIPE_ALIGNED &
386 C_008F24_META_RB_ALIGNED;
387 if (meta_va) {
388 struct gfx9_surf_meta_flags meta;
389
390 if (tex->dcc_offset)
391 meta = tex->surface.u.gfx9.dcc;
392 else
393 meta = tex->surface.u.gfx9.htile;
394
395 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
396 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
397 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
398 }
399 } else {
400 /* SI-CI-VI */
401 unsigned pitch = base_level_info->nblk_x * block_width;
402 unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
403
404 state[3] &= C_008F1C_TILING_INDEX;
405 state[3] |= S_008F1C_TILING_INDEX(index);
406 state[4] &= C_008F20_PITCH_GFX6;
407 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
408 }
409 }
410
si_set_sampler_state_desc(struct si_sampler_state * sstate,struct si_sampler_view * sview,struct r600_texture * tex,uint32_t * desc)411 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
412 struct si_sampler_view *sview,
413 struct r600_texture *tex,
414 uint32_t *desc)
415 {
416 if (sview && sview->is_integer)
417 memcpy(desc, sstate->integer_val, 4*4);
418 else if (tex && tex->upgraded_depth &&
419 (!sview || !sview->is_stencil_sampler))
420 memcpy(desc, sstate->upgraded_depth_val, 4*4);
421 else
422 memcpy(desc, sstate->val, 4*4);
423 }
424
si_set_sampler_view_desc(struct si_context * sctx,struct si_sampler_view * sview,struct si_sampler_state * sstate,uint32_t * desc)425 static void si_set_sampler_view_desc(struct si_context *sctx,
426 struct si_sampler_view *sview,
427 struct si_sampler_state *sstate,
428 uint32_t *desc)
429 {
430 struct pipe_sampler_view *view = &sview->base;
431 struct r600_texture *rtex = (struct r600_texture *)view->texture;
432 bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
433
434 if (unlikely(!is_buffer && sview->dcc_incompatible)) {
435 if (vi_dcc_enabled(rtex, view->u.tex.first_level))
436 if (!si_texture_disable_dcc(&sctx->b, rtex))
437 sctx->b.decompress_dcc(&sctx->b.b, rtex);
438
439 sview->dcc_incompatible = false;
440 }
441
442 assert(rtex); /* views with texture == NULL aren't supported */
443 memcpy(desc, sview->state, 8*4);
444
445 if (is_buffer) {
446 si_set_buf_desc_address(&rtex->resource,
447 sview->base.u.buf.offset,
448 desc + 4);
449 } else {
450 bool is_separate_stencil = rtex->db_compatible &&
451 sview->is_stencil_sampler;
452
453 si_set_mutable_tex_desc_fields(sctx->screen, rtex,
454 sview->base_level_info,
455 sview->base_level,
456 sview->base.u.tex.first_level,
457 sview->block_width,
458 is_separate_stencil,
459 desc);
460 }
461
462 if (!is_buffer && rtex->fmask.size) {
463 memcpy(desc + 8, sview->fmask_state, 8*4);
464 } else {
465 /* Disable FMASK and bind sampler state in [12:15]. */
466 memcpy(desc + 8, null_texture_descriptor, 4*4);
467
468 if (sstate)
469 si_set_sampler_state_desc(sstate, sview,
470 is_buffer ? NULL : rtex,
471 desc + 12);
472 }
473 }
474
color_needs_decompression(struct r600_texture * rtex)475 static bool color_needs_decompression(struct r600_texture *rtex)
476 {
477 return rtex->fmask.size ||
478 (rtex->dirty_level_mask &&
479 (rtex->cmask.size || rtex->dcc_offset));
480 }
481
depth_needs_decompression(struct r600_texture * rtex)482 static bool depth_needs_decompression(struct r600_texture *rtex)
483 {
484 /* If the depth/stencil texture is TC-compatible, no decompression
485 * will be done. The decompression function will only flush DB caches
486 * to make it coherent with shaders. That's necessary because the driver
487 * doesn't flush DB caches in any other case.
488 */
489 return rtex->db_compatible;
490 }
491
si_set_sampler_view(struct si_context * sctx,unsigned shader,unsigned slot,struct pipe_sampler_view * view,bool disallow_early_out)492 static void si_set_sampler_view(struct si_context *sctx,
493 unsigned shader,
494 unsigned slot, struct pipe_sampler_view *view,
495 bool disallow_early_out)
496 {
497 struct si_samplers *samplers = &sctx->samplers[shader];
498 struct si_sampler_view *rview = (struct si_sampler_view*)view;
499 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
500 unsigned desc_slot = si_get_sampler_slot(slot);
501 uint32_t *desc = descs->list + desc_slot * 16;
502
503 if (samplers->views[slot] == view && !disallow_early_out)
504 return;
505
506 if (view) {
507 struct r600_texture *rtex = (struct r600_texture *)view->texture;
508
509 si_set_sampler_view_desc(sctx, rview,
510 samplers->sampler_states[slot], desc);
511
512 if (rtex->resource.b.b.target == PIPE_BUFFER) {
513 rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
514 samplers->needs_depth_decompress_mask &= ~(1u << slot);
515 samplers->needs_color_decompress_mask &= ~(1u << slot);
516 } else {
517 if (depth_needs_decompression(rtex)) {
518 samplers->needs_depth_decompress_mask |= 1u << slot;
519 } else {
520 samplers->needs_depth_decompress_mask &= ~(1u << slot);
521 }
522 if (color_needs_decompression(rtex)) {
523 samplers->needs_color_decompress_mask |= 1u << slot;
524 } else {
525 samplers->needs_color_decompress_mask &= ~(1u << slot);
526 }
527
528 if (rtex->dcc_offset &&
529 p_atomic_read(&rtex->framebuffers_bound))
530 sctx->need_check_render_feedback = true;
531 }
532
533 pipe_sampler_view_reference(&samplers->views[slot], view);
534 samplers->enabled_mask |= 1u << slot;
535
536 /* Since this can flush, it must be done after enabled_mask is
537 * updated. */
538 si_sampler_view_add_buffer(sctx, view->texture,
539 RADEON_USAGE_READ,
540 rview->is_stencil_sampler, true);
541 } else {
542 pipe_sampler_view_reference(&samplers->views[slot], NULL);
543 memcpy(desc, null_texture_descriptor, 8*4);
544 /* Only clear the lower dwords of FMASK. */
545 memcpy(desc + 8, null_texture_descriptor, 4*4);
546 /* Re-set the sampler state if we are transitioning from FMASK. */
547 if (samplers->sampler_states[slot])
548 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL,
549 desc + 12);
550
551 samplers->enabled_mask &= ~(1u << slot);
552 samplers->needs_depth_decompress_mask &= ~(1u << slot);
553 samplers->needs_color_decompress_mask &= ~(1u << slot);
554 }
555
556 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
557 }
558
si_update_shader_needs_decompress_mask(struct si_context * sctx,unsigned shader)559 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
560 unsigned shader)
561 {
562 struct si_samplers *samplers = &sctx->samplers[shader];
563 unsigned shader_bit = 1 << shader;
564
565 if (samplers->needs_depth_decompress_mask ||
566 samplers->needs_color_decompress_mask ||
567 sctx->images[shader].needs_color_decompress_mask)
568 sctx->shader_needs_decompress_mask |= shader_bit;
569 else
570 sctx->shader_needs_decompress_mask &= ~shader_bit;
571 }
572
si_set_sampler_views(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,struct pipe_sampler_view ** views)573 static void si_set_sampler_views(struct pipe_context *ctx,
574 enum pipe_shader_type shader, unsigned start,
575 unsigned count,
576 struct pipe_sampler_view **views)
577 {
578 struct si_context *sctx = (struct si_context *)ctx;
579 int i;
580
581 if (!count || shader >= SI_NUM_SHADERS)
582 return;
583
584 if (views) {
585 for (i = 0; i < count; i++)
586 si_set_sampler_view(sctx, shader, start + i, views[i], false);
587 } else {
588 for (i = 0; i < count; i++)
589 si_set_sampler_view(sctx, shader, start + i, NULL, false);
590 }
591
592 si_update_shader_needs_decompress_mask(sctx, shader);
593 }
594
595 static void
si_samplers_update_needs_color_decompress_mask(struct si_samplers * samplers)596 si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
597 {
598 unsigned mask = samplers->enabled_mask;
599
600 while (mask) {
601 int i = u_bit_scan(&mask);
602 struct pipe_resource *res = samplers->views[i]->texture;
603
604 if (res && res->target != PIPE_BUFFER) {
605 struct r600_texture *rtex = (struct r600_texture *)res;
606
607 if (color_needs_decompression(rtex)) {
608 samplers->needs_color_decompress_mask |= 1u << i;
609 } else {
610 samplers->needs_color_decompress_mask &= ~(1u << i);
611 }
612 }
613 }
614 }
615
616 /* IMAGE VIEWS */
617
618 static void
si_release_image_views(struct si_images * images)619 si_release_image_views(struct si_images *images)
620 {
621 unsigned i;
622
623 for (i = 0; i < SI_NUM_IMAGES; ++i) {
624 struct pipe_image_view *view = &images->views[i];
625
626 pipe_resource_reference(&view->resource, NULL);
627 }
628 }
629
630 static void
si_image_views_begin_new_cs(struct si_context * sctx,struct si_images * images)631 si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
632 {
633 uint mask = images->enabled_mask;
634
635 /* Add buffers to the CS. */
636 while (mask) {
637 int i = u_bit_scan(&mask);
638 struct pipe_image_view *view = &images->views[i];
639
640 assert(view->resource);
641
642 si_sampler_view_add_buffer(sctx, view->resource,
643 RADEON_USAGE_READWRITE, false, false);
644 }
645 }
646
647 static void
si_disable_shader_image(struct si_context * ctx,unsigned shader,unsigned slot)648 si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
649 {
650 struct si_images *images = &ctx->images[shader];
651
652 if (images->enabled_mask & (1u << slot)) {
653 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
654 unsigned desc_slot = si_get_image_slot(slot);
655
656 pipe_resource_reference(&images->views[slot].resource, NULL);
657 images->needs_color_decompress_mask &= ~(1 << slot);
658
659 memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
660 images->enabled_mask &= ~(1u << slot);
661 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
662 }
663 }
664
665 static void
si_mark_image_range_valid(const struct pipe_image_view * view)666 si_mark_image_range_valid(const struct pipe_image_view *view)
667 {
668 struct r600_resource *res = (struct r600_resource *)view->resource;
669
670 assert(res && res->b.b.target == PIPE_BUFFER);
671
672 util_range_add(&res->valid_buffer_range,
673 view->u.buf.offset,
674 view->u.buf.offset + view->u.buf.size);
675 }
676
si_set_shader_image_desc(struct si_context * ctx,const struct pipe_image_view * view,bool skip_decompress,uint32_t * desc)677 static void si_set_shader_image_desc(struct si_context *ctx,
678 const struct pipe_image_view *view,
679 bool skip_decompress,
680 uint32_t *desc)
681 {
682 struct si_screen *screen = ctx->screen;
683 struct r600_resource *res;
684
685 res = (struct r600_resource *)view->resource;
686
687 if (res->b.b.target == PIPE_BUFFER) {
688 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
689 si_mark_image_range_valid(view);
690
691 si_make_buffer_descriptor(screen, res,
692 view->format,
693 view->u.buf.offset,
694 view->u.buf.size, desc);
695 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
696 } else {
697 static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
698 struct r600_texture *tex = (struct r600_texture *)res;
699 unsigned level = view->u.tex.level;
700 unsigned width, height, depth, hw_level;
701 bool uses_dcc = vi_dcc_enabled(tex, level);
702 unsigned access = view->access;
703
704 /* Clear the write flag when writes can't occur.
705 * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
706 * so we don't wanna trigger it.
707 */
708 if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) {
709 assert(!"Z/S and MSAA image stores are not supported");
710 access &= ~PIPE_IMAGE_ACCESS_WRITE;
711 }
712
713 assert(!tex->is_depth);
714 assert(tex->fmask.size == 0);
715
716 if (uses_dcc && !skip_decompress &&
717 (view->access & PIPE_IMAGE_ACCESS_WRITE ||
718 !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
719 /* If DCC can't be disabled, at least decompress it.
720 * The decompression is relatively cheap if the surface
721 * has been decompressed already.
722 */
723 if (!si_texture_disable_dcc(&ctx->b, tex))
724 ctx->b.decompress_dcc(&ctx->b.b, tex);
725 }
726
727 if (ctx->b.chip_class >= GFX9) {
728 /* Always set the base address. The swizzle modes don't
729 * allow setting mipmap level offsets as the base.
730 */
731 width = res->b.b.width0;
732 height = res->b.b.height0;
733 depth = res->b.b.depth0;
734 hw_level = level;
735 } else {
736 /* Always force the base level to the selected level.
737 *
738 * This is required for 3D textures, where otherwise
739 * selecting a single slice for non-layered bindings
740 * fails. It doesn't hurt the other targets.
741 */
742 width = u_minify(res->b.b.width0, level);
743 height = u_minify(res->b.b.height0, level);
744 depth = u_minify(res->b.b.depth0, level);
745 hw_level = 0;
746 }
747
748 si_make_texture_descriptor(screen, tex,
749 false, res->b.b.target,
750 view->format, swizzle,
751 hw_level, hw_level,
752 view->u.tex.first_layer,
753 view->u.tex.last_layer,
754 width, height, depth,
755 desc, NULL);
756 si_set_mutable_tex_desc_fields(screen, tex,
757 &tex->surface.u.legacy.level[level],
758 level, level,
759 util_format_get_blockwidth(view->format),
760 false, desc);
761 }
762 }
763
si_set_shader_image(struct si_context * ctx,unsigned shader,unsigned slot,const struct pipe_image_view * view,bool skip_decompress)764 static void si_set_shader_image(struct si_context *ctx,
765 unsigned shader,
766 unsigned slot, const struct pipe_image_view *view,
767 bool skip_decompress)
768 {
769 struct si_images *images = &ctx->images[shader];
770 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
771 struct r600_resource *res;
772 unsigned desc_slot = si_get_image_slot(slot);
773 uint32_t *desc = descs->list + desc_slot * 8;
774
775 if (!view || !view->resource) {
776 si_disable_shader_image(ctx, shader, slot);
777 return;
778 }
779
780 res = (struct r600_resource *)view->resource;
781
782 if (&images->views[slot] != view)
783 util_copy_image_view(&images->views[slot], view);
784
785 si_set_shader_image_desc(ctx, view, skip_decompress, desc);
786
787 if (res->b.b.target == PIPE_BUFFER) {
788 images->needs_color_decompress_mask &= ~(1 << slot);
789 res->bind_history |= PIPE_BIND_SHADER_IMAGE;
790 } else {
791 struct r600_texture *tex = (struct r600_texture *)res;
792 unsigned level = view->u.tex.level;
793
794 if (color_needs_decompression(tex)) {
795 images->needs_color_decompress_mask |= 1 << slot;
796 } else {
797 images->needs_color_decompress_mask &= ~(1 << slot);
798 }
799
800 if (vi_dcc_enabled(tex, level) &&
801 p_atomic_read(&tex->framebuffers_bound))
802 ctx->need_check_render_feedback = true;
803 }
804
805 images->enabled_mask |= 1u << slot;
806 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
807
808 /* Since this can flush, it must be done after enabled_mask is updated. */
809 si_sampler_view_add_buffer(ctx, &res->b.b,
810 (view->access & PIPE_IMAGE_ACCESS_WRITE) ?
811 RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
812 false, true);
813 }
814
815 static void
si_set_shader_images(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_image_view * views)816 si_set_shader_images(struct pipe_context *pipe,
817 enum pipe_shader_type shader,
818 unsigned start_slot, unsigned count,
819 const struct pipe_image_view *views)
820 {
821 struct si_context *ctx = (struct si_context *)pipe;
822 unsigned i, slot;
823
824 assert(shader < SI_NUM_SHADERS);
825
826 if (!count)
827 return;
828
829 assert(start_slot + count <= SI_NUM_IMAGES);
830
831 if (views) {
832 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
833 si_set_shader_image(ctx, shader, slot, &views[i], false);
834 } else {
835 for (i = 0, slot = start_slot; i < count; ++i, ++slot)
836 si_set_shader_image(ctx, shader, slot, NULL, false);
837 }
838
839 si_update_shader_needs_decompress_mask(ctx, shader);
840 }
841
842 static void
si_images_update_needs_color_decompress_mask(struct si_images * images)843 si_images_update_needs_color_decompress_mask(struct si_images *images)
844 {
845 unsigned mask = images->enabled_mask;
846
847 while (mask) {
848 int i = u_bit_scan(&mask);
849 struct pipe_resource *res = images->views[i].resource;
850
851 if (res && res->target != PIPE_BUFFER) {
852 struct r600_texture *rtex = (struct r600_texture *)res;
853
854 if (color_needs_decompression(rtex)) {
855 images->needs_color_decompress_mask |= 1 << i;
856 } else {
857 images->needs_color_decompress_mask &= ~(1 << i);
858 }
859 }
860 }
861 }
862
863 /* SAMPLER STATES */
864
si_bind_sampler_states(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)865 static void si_bind_sampler_states(struct pipe_context *ctx,
866 enum pipe_shader_type shader,
867 unsigned start, unsigned count, void **states)
868 {
869 struct si_context *sctx = (struct si_context *)ctx;
870 struct si_samplers *samplers = &sctx->samplers[shader];
871 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
872 struct si_sampler_state **sstates = (struct si_sampler_state**)states;
873 int i;
874
875 if (!count || shader >= SI_NUM_SHADERS)
876 return;
877
878 for (i = 0; i < count; i++) {
879 unsigned slot = start + i;
880 unsigned desc_slot = si_get_sampler_slot(slot);
881
882 if (!sstates[i] ||
883 sstates[i] == samplers->sampler_states[slot])
884 continue;
885
886 #ifdef DEBUG
887 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
888 #endif
889 samplers->sampler_states[slot] = sstates[i];
890
891 /* If FMASK is bound, don't overwrite it.
892 * The sampler state will be set after FMASK is unbound.
893 */
894 struct si_sampler_view *sview =
895 (struct si_sampler_view *)samplers->views[slot];
896
897 struct r600_texture *tex = NULL;
898
899 if (sview && sview->base.texture &&
900 sview->base.texture->target != PIPE_BUFFER)
901 tex = (struct r600_texture *)sview->base.texture;
902
903 if (tex && tex->fmask.size)
904 continue;
905
906 si_set_sampler_state_desc(sstates[i], sview, tex,
907 desc->list + desc_slot * 16 + 12);
908
909 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
910 }
911 }
912
913 /* BUFFER RESOURCES */
914
si_init_buffer_resources(struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned num_buffers,unsigned shader_userdata_index,enum radeon_bo_usage shader_usage,enum radeon_bo_usage shader_usage_constbuf,enum radeon_bo_priority priority,enum radeon_bo_priority priority_constbuf)915 static void si_init_buffer_resources(struct si_buffer_resources *buffers,
916 struct si_descriptors *descs,
917 unsigned num_buffers,
918 unsigned shader_userdata_index,
919 enum radeon_bo_usage shader_usage,
920 enum radeon_bo_usage shader_usage_constbuf,
921 enum radeon_bo_priority priority,
922 enum radeon_bo_priority priority_constbuf)
923 {
924 buffers->shader_usage = shader_usage;
925 buffers->shader_usage_constbuf = shader_usage_constbuf;
926 buffers->priority = priority;
927 buffers->priority_constbuf = priority_constbuf;
928 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
929
930 si_init_descriptors(descs, shader_userdata_index, 4, num_buffers);
931 }
932
si_release_buffer_resources(struct si_buffer_resources * buffers,struct si_descriptors * descs)933 static void si_release_buffer_resources(struct si_buffer_resources *buffers,
934 struct si_descriptors *descs)
935 {
936 int i;
937
938 for (i = 0; i < descs->num_elements; i++) {
939 pipe_resource_reference(&buffers->buffers[i], NULL);
940 }
941
942 FREE(buffers->buffers);
943 }
944
si_buffer_resources_begin_new_cs(struct si_context * sctx,struct si_buffer_resources * buffers)945 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
946 struct si_buffer_resources *buffers)
947 {
948 unsigned mask = buffers->enabled_mask;
949
950 /* Add buffers to the CS. */
951 while (mask) {
952 int i = u_bit_scan(&mask);
953
954 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
955 r600_resource(buffers->buffers[i]),
956 i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
957 buffers->shader_usage_constbuf,
958 i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
959 buffers->priority_constbuf);
960 }
961 }
962
si_get_buffer_from_descriptors(struct si_buffer_resources * buffers,struct si_descriptors * descs,unsigned idx,struct pipe_resource ** buf,unsigned * offset,unsigned * size)963 static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
964 struct si_descriptors *descs,
965 unsigned idx, struct pipe_resource **buf,
966 unsigned *offset, unsigned *size)
967 {
968 pipe_resource_reference(buf, buffers->buffers[idx]);
969 if (*buf) {
970 struct r600_resource *res = r600_resource(*buf);
971 const uint32_t *desc = descs->list + idx * 4;
972 uint64_t va;
973
974 *size = desc[2];
975
976 assert(G_008F04_STRIDE(desc[1]) == 0);
977 va = ((uint64_t)desc[1] << 32) | desc[0];
978
979 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
980 *offset = va - res->gpu_address;
981 }
982 }
983
984 /* VERTEX BUFFERS */
985
si_vertex_buffers_begin_new_cs(struct si_context * sctx)986 static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
987 {
988 struct si_descriptors *desc = &sctx->vertex_buffers;
989 int count = sctx->vertex_elements ? sctx->vertex_elements->count : 0;
990 int i;
991
992 for (i = 0; i < count; i++) {
993 int vb = sctx->vertex_elements->vertex_buffer_index[i];
994
995 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
996 continue;
997 if (!sctx->vertex_buffer[vb].buffer.resource)
998 continue;
999
1000 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1001 (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
1002 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1003 }
1004
1005 if (!desc->buffer)
1006 return;
1007 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1008 desc->buffer, RADEON_USAGE_READ,
1009 RADEON_PRIO_DESCRIPTORS);
1010 }
1011
si_upload_vertex_buffer_descriptors(struct si_context * sctx)1012 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
1013 {
1014 struct si_vertex_elements *velems = sctx->vertex_elements;
1015 struct si_descriptors *desc = &sctx->vertex_buffers;
1016 unsigned i, count;
1017 unsigned desc_list_byte_size;
1018 unsigned first_vb_use_mask;
1019 uint32_t *ptr;
1020
1021 if (!sctx->vertex_buffers_dirty || !velems)
1022 return true;
1023
1024 count = velems->count;
1025
1026 if (!count)
1027 return true;
1028
1029 desc_list_byte_size = velems->desc_list_byte_size;
1030 first_vb_use_mask = velems->first_vb_use_mask;
1031
1032 /* Vertex buffer descriptors are the only ones which are uploaded
1033 * directly through a staging buffer and don't go through
1034 * the fine-grained upload path.
1035 */
1036 unsigned buffer_offset = 0;
1037 u_upload_alloc(sctx->b.b.const_uploader, 0,
1038 desc_list_byte_size,
1039 si_optimal_tcc_alignment(sctx, desc_list_byte_size),
1040 &buffer_offset,
1041 (struct pipe_resource**)&desc->buffer, (void**)&ptr);
1042 if (!desc->buffer) {
1043 desc->gpu_address = 0;
1044 return false;
1045 }
1046
1047 desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
1048 desc->list = ptr;
1049 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1050 desc->buffer, RADEON_USAGE_READ,
1051 RADEON_PRIO_DESCRIPTORS);
1052
1053 assert(count <= SI_MAX_ATTRIBS);
1054
1055 for (i = 0; i < count; i++) {
1056 struct pipe_vertex_buffer *vb;
1057 struct r600_resource *rbuffer;
1058 unsigned vbo_index = velems->vertex_buffer_index[i];
1059 uint32_t *desc = &ptr[i*4];
1060
1061 vb = &sctx->vertex_buffer[vbo_index];
1062 rbuffer = (struct r600_resource*)vb->buffer.resource;
1063 if (!rbuffer) {
1064 memset(desc, 0, 16);
1065 continue;
1066 }
1067
1068 int64_t offset = (int64_t)((int)vb->buffer_offset) +
1069 velems->src_offset[i];
1070 uint64_t va = rbuffer->gpu_address + offset;
1071
1072 int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
1073 if (sctx->b.chip_class != VI && vb->stride) {
1074 /* Round up by rounding down and adding 1 */
1075 num_records = (num_records - velems->format_size[i]) /
1076 vb->stride + 1;
1077 }
1078 assert(num_records >= 0 && num_records <= UINT_MAX);
1079
1080 desc[0] = va;
1081 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1082 S_008F04_STRIDE(vb->stride);
1083 desc[2] = num_records;
1084 desc[3] = velems->rsrc_word3[i];
1085
1086 if (first_vb_use_mask & (1 << i)) {
1087 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1088 (struct r600_resource*)vb->buffer.resource,
1089 RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1090 }
1091 }
1092
1093 /* Don't flush the const cache. It would have a very negative effect
1094 * on performance (confirmed by testing). New descriptors are always
1095 * uploaded to a fresh new buffer, so I don't think flushing the const
1096 * cache is needed. */
1097 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1098 sctx->vertex_buffers_dirty = false;
1099 sctx->vertex_buffer_pointer_dirty = true;
1100 sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
1101 return true;
1102 }
1103
1104
1105 /* CONSTANT BUFFERS */
1106
1107 static unsigned
si_const_and_shader_buffer_descriptors_idx(unsigned shader)1108 si_const_and_shader_buffer_descriptors_idx(unsigned shader)
1109 {
1110 return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
1111 SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS;
1112 }
1113
1114 static struct si_descriptors *
si_const_and_shader_buffer_descriptors(struct si_context * sctx,unsigned shader)1115 si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader)
1116 {
1117 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1118 }
1119
si_upload_const_buffer(struct si_context * sctx,struct r600_resource ** rbuffer,const uint8_t * ptr,unsigned size,uint32_t * const_offset)1120 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
1121 const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1122 {
1123 void *tmp;
1124
1125 u_upload_alloc(sctx->b.b.const_uploader, 0, size,
1126 si_optimal_tcc_alignment(sctx, size),
1127 const_offset,
1128 (struct pipe_resource**)rbuffer, &tmp);
1129 if (*rbuffer)
1130 util_memcpy_cpu_to_le32(tmp, ptr, size);
1131 }
1132
si_set_constant_buffer(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,uint slot,const struct pipe_constant_buffer * input)1133 static void si_set_constant_buffer(struct si_context *sctx,
1134 struct si_buffer_resources *buffers,
1135 unsigned descriptors_idx,
1136 uint slot, const struct pipe_constant_buffer *input)
1137 {
1138 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1139 assert(slot < descs->num_elements);
1140 pipe_resource_reference(&buffers->buffers[slot], NULL);
1141
1142 /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1143 * with a NULL buffer). We need to use a dummy buffer instead. */
1144 if (sctx->b.chip_class == CIK &&
1145 (!input || (!input->buffer && !input->user_buffer)))
1146 input = &sctx->null_const_buf;
1147
1148 if (input && (input->buffer || input->user_buffer)) {
1149 struct pipe_resource *buffer = NULL;
1150 uint64_t va;
1151
1152 /* Upload the user buffer if needed. */
1153 if (input->user_buffer) {
1154 unsigned buffer_offset;
1155
1156 si_upload_const_buffer(sctx,
1157 (struct r600_resource**)&buffer, input->user_buffer,
1158 input->buffer_size, &buffer_offset);
1159 if (!buffer) {
1160 /* Just unbind on failure. */
1161 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
1162 return;
1163 }
1164 va = r600_resource(buffer)->gpu_address + buffer_offset;
1165 } else {
1166 pipe_resource_reference(&buffer, input->buffer);
1167 va = r600_resource(buffer)->gpu_address + input->buffer_offset;
1168 /* Only track usage for non-user buffers. */
1169 r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1170 }
1171
1172 /* Set the descriptor. */
1173 uint32_t *desc = descs->list + slot*4;
1174 desc[0] = va;
1175 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1176 S_008F04_STRIDE(0);
1177 desc[2] = input->buffer_size;
1178 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1179 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1180 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1181 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1182 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1183 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1184
1185 buffers->buffers[slot] = buffer;
1186 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1187 (struct r600_resource*)buffer,
1188 buffers->shader_usage_constbuf,
1189 buffers->priority_constbuf, true);
1190 buffers->enabled_mask |= 1u << slot;
1191 } else {
1192 /* Clear the descriptor. */
1193 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1194 buffers->enabled_mask &= ~(1u << slot);
1195 }
1196
1197 sctx->descriptors_dirty |= 1u << descriptors_idx;
1198 }
1199
si_set_rw_buffer(struct si_context * sctx,uint slot,const struct pipe_constant_buffer * input)1200 void si_set_rw_buffer(struct si_context *sctx,
1201 uint slot, const struct pipe_constant_buffer *input)
1202 {
1203 si_set_constant_buffer(sctx, &sctx->rw_buffers,
1204 SI_DESCS_RW_BUFFERS, slot, input);
1205 }
1206
si_pipe_set_constant_buffer(struct pipe_context * ctx,enum pipe_shader_type shader,uint slot,const struct pipe_constant_buffer * input)1207 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
1208 enum pipe_shader_type shader, uint slot,
1209 const struct pipe_constant_buffer *input)
1210 {
1211 struct si_context *sctx = (struct si_context *)ctx;
1212
1213 if (shader >= SI_NUM_SHADERS)
1214 return;
1215
1216 slot = si_get_constbuf_slot(slot);
1217 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1218 si_const_and_shader_buffer_descriptors_idx(shader),
1219 slot, input);
1220 }
1221
si_get_pipe_constant_buffer(struct si_context * sctx,uint shader,uint slot,struct pipe_constant_buffer * cbuf)1222 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
1223 uint slot, struct pipe_constant_buffer *cbuf)
1224 {
1225 cbuf->user_buffer = NULL;
1226 si_get_buffer_from_descriptors(
1227 &sctx->const_and_shader_buffers[shader],
1228 si_const_and_shader_buffer_descriptors(sctx, shader),
1229 si_get_constbuf_slot(slot),
1230 &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1231 }
1232
1233 /* SHADER BUFFERS */
1234
si_set_shader_buffers(struct pipe_context * ctx,enum pipe_shader_type shader,unsigned start_slot,unsigned count,const struct pipe_shader_buffer * sbuffers)1235 static void si_set_shader_buffers(struct pipe_context *ctx,
1236 enum pipe_shader_type shader,
1237 unsigned start_slot, unsigned count,
1238 const struct pipe_shader_buffer *sbuffers)
1239 {
1240 struct si_context *sctx = (struct si_context *)ctx;
1241 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1242 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1243 unsigned i;
1244
1245 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1246
1247 for (i = 0; i < count; ++i) {
1248 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1249 struct r600_resource *buf;
1250 unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1251 uint32_t *desc = descs->list + slot * 4;
1252 uint64_t va;
1253
1254 if (!sbuffer || !sbuffer->buffer) {
1255 pipe_resource_reference(&buffers->buffers[slot], NULL);
1256 memset(desc, 0, sizeof(uint32_t) * 4);
1257 buffers->enabled_mask &= ~(1u << slot);
1258 sctx->descriptors_dirty |=
1259 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1260 continue;
1261 }
1262
1263 buf = (struct r600_resource *)sbuffer->buffer;
1264 va = buf->gpu_address + sbuffer->buffer_offset;
1265
1266 desc[0] = va;
1267 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1268 S_008F04_STRIDE(0);
1269 desc[2] = sbuffer->buffer_size;
1270 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1271 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1272 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1273 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1274 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1275 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1276
1277 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1278 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, buf,
1279 buffers->shader_usage,
1280 buffers->priority, true);
1281 buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
1282
1283 buffers->enabled_mask |= 1u << slot;
1284 sctx->descriptors_dirty |=
1285 1u << si_const_and_shader_buffer_descriptors_idx(shader);
1286
1287 util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
1288 sbuffer->buffer_offset + sbuffer->buffer_size);
1289 }
1290 }
1291
si_get_shader_buffers(struct si_context * sctx,enum pipe_shader_type shader,uint start_slot,uint count,struct pipe_shader_buffer * sbuf)1292 void si_get_shader_buffers(struct si_context *sctx,
1293 enum pipe_shader_type shader,
1294 uint start_slot, uint count,
1295 struct pipe_shader_buffer *sbuf)
1296 {
1297 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1298 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1299
1300 for (unsigned i = 0; i < count; ++i) {
1301 si_get_buffer_from_descriptors(
1302 buffers, descs,
1303 si_get_shaderbuf_slot(start_slot + i),
1304 &sbuf[i].buffer, &sbuf[i].buffer_offset,
1305 &sbuf[i].buffer_size);
1306 }
1307 }
1308
1309 /* RING BUFFERS */
1310
si_set_ring_buffer(struct pipe_context * ctx,uint slot,struct pipe_resource * buffer,unsigned stride,unsigned num_records,bool add_tid,bool swizzle,unsigned element_size,unsigned index_stride,uint64_t offset)1311 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
1312 struct pipe_resource *buffer,
1313 unsigned stride, unsigned num_records,
1314 bool add_tid, bool swizzle,
1315 unsigned element_size, unsigned index_stride, uint64_t offset)
1316 {
1317 struct si_context *sctx = (struct si_context *)ctx;
1318 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1319 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1320
1321 /* The stride field in the resource descriptor has 14 bits */
1322 assert(stride < (1 << 14));
1323
1324 assert(slot < descs->num_elements);
1325 pipe_resource_reference(&buffers->buffers[slot], NULL);
1326
1327 if (buffer) {
1328 uint64_t va;
1329
1330 va = r600_resource(buffer)->gpu_address + offset;
1331
1332 switch (element_size) {
1333 default:
1334 assert(!"Unsupported ring buffer element size");
1335 case 0:
1336 case 2:
1337 element_size = 0;
1338 break;
1339 case 4:
1340 element_size = 1;
1341 break;
1342 case 8:
1343 element_size = 2;
1344 break;
1345 case 16:
1346 element_size = 3;
1347 break;
1348 }
1349
1350 switch (index_stride) {
1351 default:
1352 assert(!"Unsupported ring buffer index stride");
1353 case 0:
1354 case 8:
1355 index_stride = 0;
1356 break;
1357 case 16:
1358 index_stride = 1;
1359 break;
1360 case 32:
1361 index_stride = 2;
1362 break;
1363 case 64:
1364 index_stride = 3;
1365 break;
1366 }
1367
1368 if (sctx->b.chip_class >= VI && stride)
1369 num_records *= stride;
1370
1371 /* Set the descriptor. */
1372 uint32_t *desc = descs->list + slot*4;
1373 desc[0] = va;
1374 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
1375 S_008F04_STRIDE(stride) |
1376 S_008F04_SWIZZLE_ENABLE(swizzle);
1377 desc[2] = num_records;
1378 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1379 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1380 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1381 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1382 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1383 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1384 S_008F0C_INDEX_STRIDE(index_stride) |
1385 S_008F0C_ADD_TID_ENABLE(add_tid);
1386
1387 if (sctx->b.chip_class >= GFX9)
1388 assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1389 else
1390 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1391
1392 pipe_resource_reference(&buffers->buffers[slot], buffer);
1393 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
1394 (struct r600_resource*)buffer,
1395 buffers->shader_usage, buffers->priority);
1396 buffers->enabled_mask |= 1u << slot;
1397 } else {
1398 /* Clear the descriptor. */
1399 memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
1400 buffers->enabled_mask &= ~(1u << slot);
1401 }
1402
1403 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1404 }
1405
si_desc_reset_buffer_offset(struct pipe_context * ctx,uint32_t * desc,uint64_t old_buf_va,struct pipe_resource * new_buf)1406 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
1407 uint32_t *desc, uint64_t old_buf_va,
1408 struct pipe_resource *new_buf)
1409 {
1410 /* Retrieve the buffer offset from the descriptor. */
1411 uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
1412
1413 assert(old_buf_va <= old_desc_va);
1414 uint64_t offset_within_buffer = old_desc_va - old_buf_va;
1415
1416 /* Update the descriptor. */
1417 si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
1418 desc);
1419 }
1420
1421 /* INTERNAL CONST BUFFERS */
1422
si_set_polygon_stipple(struct pipe_context * ctx,const struct pipe_poly_stipple * state)1423 static void si_set_polygon_stipple(struct pipe_context *ctx,
1424 const struct pipe_poly_stipple *state)
1425 {
1426 struct si_context *sctx = (struct si_context *)ctx;
1427 struct pipe_constant_buffer cb = {};
1428 unsigned stipple[32];
1429 int i;
1430
1431 for (i = 0; i < 32; i++)
1432 stipple[i] = util_bitreverse(state->stipple[i]);
1433
1434 cb.user_buffer = stipple;
1435 cb.buffer_size = sizeof(stipple);
1436
1437 si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1438 }
1439
1440 /* TEXTURE METADATA ENABLE/DISABLE */
1441
1442 static void
si_resident_handles_update_needs_color_decompress(struct si_context * sctx)1443 si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1444 {
1445 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1446 util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1447
1448 util_dynarray_foreach(&sctx->resident_tex_handles,
1449 struct si_texture_handle *, tex_handle) {
1450 struct pipe_resource *res = (*tex_handle)->view->texture;
1451 struct r600_texture *rtex;
1452
1453 if (!res || res->target == PIPE_BUFFER)
1454 continue;
1455
1456 rtex = (struct r600_texture *)res;
1457 if (!color_needs_decompression(rtex))
1458 continue;
1459
1460 util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
1461 struct si_texture_handle *, *tex_handle);
1462 }
1463
1464 util_dynarray_foreach(&sctx->resident_img_handles,
1465 struct si_image_handle *, img_handle) {
1466 struct pipe_image_view *view = &(*img_handle)->view;
1467 struct pipe_resource *res = view->resource;
1468 struct r600_texture *rtex;
1469
1470 if (!res || res->target == PIPE_BUFFER)
1471 continue;
1472
1473 rtex = (struct r600_texture *)res;
1474 if (!color_needs_decompression(rtex))
1475 continue;
1476
1477 util_dynarray_append(&sctx->resident_img_needs_color_decompress,
1478 struct si_image_handle *, *img_handle);
1479 }
1480 }
1481
1482 /* CMASK can be enabled (for fast clear) and disabled (for texture export)
1483 * while the texture is bound, possibly by a different context. In that case,
1484 * call this function to update needs_*_decompress_masks.
1485 */
si_update_needs_color_decompress_masks(struct si_context * sctx)1486 void si_update_needs_color_decompress_masks(struct si_context *sctx)
1487 {
1488 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1489 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1490 si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1491 si_update_shader_needs_decompress_mask(sctx, i);
1492 }
1493
1494 si_resident_handles_update_needs_color_decompress(sctx);
1495 }
1496
1497 /* BUFFER DISCARD/INVALIDATION */
1498
1499 /** Reset descriptors of buffer resources after \p buf has been invalidated. */
si_reset_buffer_resources(struct si_context * sctx,struct si_buffer_resources * buffers,unsigned descriptors_idx,unsigned slot_mask,struct pipe_resource * buf,uint64_t old_va,enum radeon_bo_usage usage,enum radeon_bo_priority priority)1500 static void si_reset_buffer_resources(struct si_context *sctx,
1501 struct si_buffer_resources *buffers,
1502 unsigned descriptors_idx,
1503 unsigned slot_mask,
1504 struct pipe_resource *buf,
1505 uint64_t old_va,
1506 enum radeon_bo_usage usage,
1507 enum radeon_bo_priority priority)
1508 {
1509 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1510 unsigned mask = buffers->enabled_mask & slot_mask;
1511
1512 while (mask) {
1513 unsigned i = u_bit_scan(&mask);
1514 if (buffers->buffers[i] == buf) {
1515 si_desc_reset_buffer_offset(&sctx->b.b,
1516 descs->list + i*4,
1517 old_va, buf);
1518 sctx->descriptors_dirty |= 1u << descriptors_idx;
1519
1520 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1521 (struct r600_resource *)buf,
1522 usage, priority, true);
1523 }
1524 }
1525 }
1526
si_rebind_buffer(struct pipe_context * ctx,struct pipe_resource * buf,uint64_t old_va)1527 static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
1528 uint64_t old_va)
1529 {
1530 struct si_context *sctx = (struct si_context*)ctx;
1531 struct r600_resource *rbuffer = r600_resource(buf);
1532 unsigned i, shader;
1533 unsigned num_elems = sctx->vertex_elements ?
1534 sctx->vertex_elements->count : 0;
1535
1536 /* We changed the buffer, now we need to bind it where the old one
1537 * was bound. This consists of 2 things:
1538 * 1) Updating the resource descriptor and dirtying it.
1539 * 2) Adding a relocation to the CS, so that it's usable.
1540 */
1541
1542 /* Vertex buffers. */
1543 if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1544 for (i = 0; i < num_elems; i++) {
1545 int vb = sctx->vertex_elements->vertex_buffer_index[i];
1546
1547 if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1548 continue;
1549 if (!sctx->vertex_buffer[vb].buffer.resource)
1550 continue;
1551
1552 if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1553 sctx->vertex_buffers_dirty = true;
1554 break;
1555 }
1556 }
1557 }
1558
1559 /* Streamout buffers. (other internal buffers can't be invalidated) */
1560 if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1561 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1562 struct si_buffer_resources *buffers = &sctx->rw_buffers;
1563 struct si_descriptors *descs =
1564 &sctx->descriptors[SI_DESCS_RW_BUFFERS];
1565
1566 if (buffers->buffers[i] != buf)
1567 continue;
1568
1569 si_desc_reset_buffer_offset(ctx, descs->list + i*4,
1570 old_va, buf);
1571 sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
1572
1573 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1574 rbuffer, buffers->shader_usage,
1575 RADEON_PRIO_SHADER_RW_BUFFER,
1576 true);
1577
1578 /* Update the streamout state. */
1579 if (sctx->streamout.begin_emitted)
1580 si_emit_streamout_end(sctx);
1581 sctx->streamout.append_bitmask =
1582 sctx->streamout.enabled_mask;
1583 si_streamout_buffers_dirty(sctx);
1584 }
1585 }
1586
1587 /* Constant and shader buffers. */
1588 if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1589 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1590 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1591 si_const_and_shader_buffer_descriptors_idx(shader),
1592 u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1593 buf, old_va,
1594 sctx->const_and_shader_buffers[shader].shader_usage_constbuf,
1595 sctx->const_and_shader_buffers[shader].priority_constbuf);
1596 }
1597
1598 if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1599 for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1600 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1601 si_const_and_shader_buffer_descriptors_idx(shader),
1602 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
1603 buf, old_va,
1604 sctx->const_and_shader_buffers[shader].shader_usage,
1605 sctx->const_and_shader_buffers[shader].priority);
1606 }
1607
1608 if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1609 /* Texture buffers - update bindings. */
1610 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1611 struct si_samplers *samplers = &sctx->samplers[shader];
1612 struct si_descriptors *descs =
1613 si_sampler_and_image_descriptors(sctx, shader);
1614 unsigned mask = samplers->enabled_mask;
1615
1616 while (mask) {
1617 unsigned i = u_bit_scan(&mask);
1618 if (samplers->views[i]->texture == buf) {
1619 unsigned desc_slot = si_get_sampler_slot(i);
1620
1621 si_desc_reset_buffer_offset(ctx,
1622 descs->list +
1623 desc_slot * 16 + 4,
1624 old_va, buf);
1625 sctx->descriptors_dirty |=
1626 1u << si_sampler_and_image_descriptors_idx(shader);
1627
1628 radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
1629 rbuffer, RADEON_USAGE_READ,
1630 RADEON_PRIO_SAMPLER_BUFFER,
1631 true);
1632 }
1633 }
1634 }
1635 }
1636
1637 /* Shader images */
1638 if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1639 for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1640 struct si_images *images = &sctx->images[shader];
1641 struct si_descriptors *descs =
1642 si_sampler_and_image_descriptors(sctx, shader);
1643 unsigned mask = images->enabled_mask;
1644
1645 while (mask) {
1646 unsigned i = u_bit_scan(&mask);
1647
1648 if (images->views[i].resource == buf) {
1649 unsigned desc_slot = si_get_image_slot(i);
1650
1651 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1652 si_mark_image_range_valid(&images->views[i]);
1653
1654 si_desc_reset_buffer_offset(
1655 ctx, descs->list + desc_slot * 8 + 4,
1656 old_va, buf);
1657 sctx->descriptors_dirty |=
1658 1u << si_sampler_and_image_descriptors_idx(shader);
1659
1660 radeon_add_to_buffer_list_check_mem(
1661 &sctx->b, &sctx->b.gfx, rbuffer,
1662 RADEON_USAGE_READWRITE,
1663 RADEON_PRIO_SAMPLER_BUFFER, true);
1664 }
1665 }
1666 }
1667 }
1668
1669 /* Bindless texture handles */
1670 if (rbuffer->texture_handle_allocated) {
1671 struct si_descriptors *descs = &sctx->bindless_descriptors;
1672
1673 util_dynarray_foreach(&sctx->resident_tex_handles,
1674 struct si_texture_handle *, tex_handle) {
1675 struct pipe_sampler_view *view = (*tex_handle)->view;
1676 unsigned desc_slot = (*tex_handle)->desc_slot;
1677
1678 if (view->texture == buf) {
1679 si_set_buf_desc_address(rbuffer,
1680 view->u.buf.offset,
1681 descs->list +
1682 desc_slot * 16 + 4);
1683
1684 (*tex_handle)->desc_dirty = true;
1685 sctx->bindless_descriptors_dirty = true;
1686
1687 radeon_add_to_buffer_list_check_mem(
1688 &sctx->b, &sctx->b.gfx, rbuffer,
1689 RADEON_USAGE_READ,
1690 RADEON_PRIO_SAMPLER_BUFFER, true);
1691 }
1692 }
1693 }
1694
1695 /* Bindless image handles */
1696 if (rbuffer->image_handle_allocated) {
1697 struct si_descriptors *descs = &sctx->bindless_descriptors;
1698
1699 util_dynarray_foreach(&sctx->resident_img_handles,
1700 struct si_image_handle *, img_handle) {
1701 struct pipe_image_view *view = &(*img_handle)->view;
1702 unsigned desc_slot = (*img_handle)->desc_slot;
1703
1704 if (view->resource == buf) {
1705 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1706 si_mark_image_range_valid(view);
1707
1708 si_set_buf_desc_address(rbuffer,
1709 view->u.buf.offset,
1710 descs->list +
1711 desc_slot * 16 + 4);
1712
1713 (*img_handle)->desc_dirty = true;
1714 sctx->bindless_descriptors_dirty = true;
1715
1716 radeon_add_to_buffer_list_check_mem(
1717 &sctx->b, &sctx->b.gfx, rbuffer,
1718 RADEON_USAGE_READWRITE,
1719 RADEON_PRIO_SAMPLER_BUFFER, true);
1720 }
1721 }
1722 }
1723 }
1724
1725 /* Reallocate a buffer a update all resource bindings where the buffer is
1726 * bound.
1727 *
1728 * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
1729 * idle by discarding its contents. Apps usually tell us when to do this using
1730 * map_buffer flags, for example.
1731 */
si_invalidate_buffer(struct pipe_context * ctx,struct pipe_resource * buf)1732 static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
1733 {
1734 struct si_context *sctx = (struct si_context*)ctx;
1735 struct r600_resource *rbuffer = r600_resource(buf);
1736 uint64_t old_va = rbuffer->gpu_address;
1737
1738 /* Reallocate the buffer in the same pipe_resource. */
1739 si_alloc_resource(sctx->screen, rbuffer);
1740
1741 si_rebind_buffer(ctx, buf, old_va);
1742 }
1743
si_upload_bindless_descriptor(struct si_context * sctx,unsigned desc_slot,unsigned num_dwords)1744 static void si_upload_bindless_descriptor(struct si_context *sctx,
1745 unsigned desc_slot,
1746 unsigned num_dwords)
1747 {
1748 struct si_descriptors *desc = &sctx->bindless_descriptors;
1749 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1750 unsigned desc_slot_offset = desc_slot * 16;
1751 uint32_t *data;
1752 uint64_t va;
1753
1754 data = desc->list + desc_slot_offset;
1755 va = desc->gpu_address + desc_slot_offset * 4;
1756
1757 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
1758 radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
1759 S_370_WR_CONFIRM(1) |
1760 S_370_ENGINE_SEL(V_370_ME));
1761 radeon_emit(cs, va);
1762 radeon_emit(cs, va >> 32);
1763 radeon_emit_array(cs, data, num_dwords);
1764 }
1765
si_upload_bindless_descriptors(struct si_context * sctx)1766 static void si_upload_bindless_descriptors(struct si_context *sctx)
1767 {
1768 if (!sctx->bindless_descriptors_dirty)
1769 return;
1770
1771 /* Wait for graphics/compute to be idle before updating the resident
1772 * descriptors directly in memory, in case the GPU is using them.
1773 */
1774 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
1775 SI_CONTEXT_CS_PARTIAL_FLUSH;
1776 si_emit_cache_flush(sctx);
1777
1778 util_dynarray_foreach(&sctx->resident_tex_handles,
1779 struct si_texture_handle *, tex_handle) {
1780 unsigned desc_slot = (*tex_handle)->desc_slot;
1781
1782 if (!(*tex_handle)->desc_dirty)
1783 continue;
1784
1785 si_upload_bindless_descriptor(sctx, desc_slot, 16);
1786 (*tex_handle)->desc_dirty = false;
1787 }
1788
1789 util_dynarray_foreach(&sctx->resident_img_handles,
1790 struct si_image_handle *, img_handle) {
1791 unsigned desc_slot = (*img_handle)->desc_slot;
1792
1793 if (!(*img_handle)->desc_dirty)
1794 continue;
1795
1796 si_upload_bindless_descriptor(sctx, desc_slot, 8);
1797 (*img_handle)->desc_dirty = false;
1798 }
1799
1800 /* Invalidate L1 because it doesn't know that L2 changed. */
1801 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1;
1802 si_emit_cache_flush(sctx);
1803
1804 sctx->bindless_descriptors_dirty = false;
1805 }
1806
1807 /* Update mutable image descriptor fields of all resident textures. */
si_update_bindless_texture_descriptor(struct si_context * sctx,struct si_texture_handle * tex_handle)1808 static void si_update_bindless_texture_descriptor(struct si_context *sctx,
1809 struct si_texture_handle *tex_handle)
1810 {
1811 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
1812 struct si_descriptors *desc = &sctx->bindless_descriptors;
1813 unsigned desc_slot_offset = tex_handle->desc_slot * 16;
1814 uint32_t desc_list[16];
1815
1816 if (sview->base.texture->target == PIPE_BUFFER)
1817 return;
1818
1819 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
1820 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate,
1821 desc->list + desc_slot_offset);
1822
1823 if (memcmp(desc_list, desc->list + desc_slot_offset,
1824 sizeof(desc_list))) {
1825 tex_handle->desc_dirty = true;
1826 sctx->bindless_descriptors_dirty = true;
1827 }
1828 }
1829
si_update_bindless_image_descriptor(struct si_context * sctx,struct si_image_handle * img_handle)1830 static void si_update_bindless_image_descriptor(struct si_context *sctx,
1831 struct si_image_handle *img_handle)
1832 {
1833 struct si_descriptors *desc = &sctx->bindless_descriptors;
1834 unsigned desc_slot_offset = img_handle->desc_slot * 16;
1835 struct pipe_image_view *view = &img_handle->view;
1836 uint32_t desc_list[8];
1837
1838 if (view->resource->target == PIPE_BUFFER)
1839 return;
1840
1841 memcpy(desc_list, desc->list + desc_slot_offset,
1842 sizeof(desc_list));
1843 si_set_shader_image_desc(sctx, view, true,
1844 desc->list + desc_slot_offset);
1845
1846 if (memcmp(desc_list, desc->list + desc_slot_offset,
1847 sizeof(desc_list))) {
1848 img_handle->desc_dirty = true;
1849 sctx->bindless_descriptors_dirty = true;
1850 }
1851 }
1852
si_update_all_resident_texture_descriptors(struct si_context * sctx)1853 static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
1854 {
1855 util_dynarray_foreach(&sctx->resident_tex_handles,
1856 struct si_texture_handle *, tex_handle) {
1857 si_update_bindless_texture_descriptor(sctx, *tex_handle);
1858 }
1859
1860 util_dynarray_foreach(&sctx->resident_img_handles,
1861 struct si_image_handle *, img_handle) {
1862 si_update_bindless_image_descriptor(sctx, *img_handle);
1863 }
1864
1865 si_upload_bindless_descriptors(sctx);
1866 }
1867
1868 /* Update mutable image descriptor fields of all bound textures. */
si_update_all_texture_descriptors(struct si_context * sctx)1869 void si_update_all_texture_descriptors(struct si_context *sctx)
1870 {
1871 unsigned shader;
1872
1873 for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1874 struct si_samplers *samplers = &sctx->samplers[shader];
1875 struct si_images *images = &sctx->images[shader];
1876 unsigned mask;
1877
1878 /* Images. */
1879 mask = images->enabled_mask;
1880 while (mask) {
1881 unsigned i = u_bit_scan(&mask);
1882 struct pipe_image_view *view = &images->views[i];
1883
1884 if (!view->resource ||
1885 view->resource->target == PIPE_BUFFER)
1886 continue;
1887
1888 si_set_shader_image(sctx, shader, i, view, true);
1889 }
1890
1891 /* Sampler views. */
1892 mask = samplers->enabled_mask;
1893 while (mask) {
1894 unsigned i = u_bit_scan(&mask);
1895 struct pipe_sampler_view *view = samplers->views[i];
1896
1897 if (!view ||
1898 !view->texture ||
1899 view->texture->target == PIPE_BUFFER)
1900 continue;
1901
1902 si_set_sampler_view(sctx, shader, i,
1903 samplers->views[i], true);
1904 }
1905
1906 si_update_shader_needs_decompress_mask(sctx, shader);
1907 }
1908
1909 si_update_all_resident_texture_descriptors(sctx);
1910 }
1911
1912 /* SHADER USER DATA */
1913
si_mark_shader_pointers_dirty(struct si_context * sctx,unsigned shader)1914 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
1915 unsigned shader)
1916 {
1917 sctx->shader_pointers_dirty |=
1918 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS,
1919 SI_NUM_SHADER_DESCS);
1920
1921 if (shader == PIPE_SHADER_VERTEX)
1922 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1923
1924 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1925 }
1926
si_shader_pointers_begin_new_cs(struct si_context * sctx)1927 static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
1928 {
1929 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1930 sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
1931 si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
1932 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1933 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1934 }
1935
1936 /* Set a base register address for user data constants in the given shader.
1937 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1938 */
si_set_user_data_base(struct si_context * sctx,unsigned shader,uint32_t new_base)1939 static void si_set_user_data_base(struct si_context *sctx,
1940 unsigned shader, uint32_t new_base)
1941 {
1942 uint32_t *base = &sctx->shader_pointers.sh_base[shader];
1943
1944 if (*base != new_base) {
1945 *base = new_base;
1946
1947 if (new_base) {
1948 si_mark_shader_pointers_dirty(sctx, shader);
1949
1950 if (shader == PIPE_SHADER_VERTEX)
1951 sctx->last_vs_state = ~0;
1952 }
1953 }
1954 }
1955
1956 /* This must be called when these shaders are changed from non-NULL to NULL
1957 * and vice versa:
1958 * - geometry shader
1959 * - tessellation control shader
1960 * - tessellation evaluation shader
1961 */
si_shader_change_notify(struct si_context * sctx)1962 void si_shader_change_notify(struct si_context *sctx)
1963 {
1964 /* VS can be bound as VS, ES, or LS. */
1965 if (sctx->tes_shader.cso) {
1966 if (sctx->b.chip_class >= GFX9) {
1967 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1968 R_00B430_SPI_SHADER_USER_DATA_LS_0);
1969 } else {
1970 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1971 R_00B530_SPI_SHADER_USER_DATA_LS_0);
1972 }
1973 } else if (sctx->gs_shader.cso) {
1974 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1975 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1976 } else {
1977 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1978 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1979 }
1980
1981 /* TES can be bound as ES, VS, or not bound. */
1982 if (sctx->tes_shader.cso) {
1983 if (sctx->gs_shader.cso)
1984 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1985 R_00B330_SPI_SHADER_USER_DATA_ES_0);
1986 else
1987 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1988 R_00B130_SPI_SHADER_USER_DATA_VS_0);
1989 } else {
1990 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
1991 }
1992 }
1993
si_emit_shader_pointer_head(struct radeon_winsys_cs * cs,struct si_descriptors * desc,unsigned sh_base,unsigned pointer_count)1994 static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
1995 struct si_descriptors *desc,
1996 unsigned sh_base,
1997 unsigned pointer_count)
1998 {
1999 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
2000 radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
2001 }
2002
si_emit_shader_pointer_body(struct radeon_winsys_cs * cs,struct si_descriptors * desc)2003 static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
2004 struct si_descriptors *desc)
2005 {
2006 uint64_t va = desc->gpu_address;
2007
2008 radeon_emit(cs, va);
2009 radeon_emit(cs, va >> 32);
2010 }
2011
si_emit_shader_pointer(struct si_context * sctx,struct si_descriptors * desc,unsigned sh_base)2012 static void si_emit_shader_pointer(struct si_context *sctx,
2013 struct si_descriptors *desc,
2014 unsigned sh_base)
2015 {
2016 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2017
2018 si_emit_shader_pointer_head(cs, desc, sh_base, 1);
2019 si_emit_shader_pointer_body(cs, desc);
2020 }
2021
si_emit_consecutive_shader_pointers(struct si_context * sctx,unsigned pointer_mask,unsigned sh_base)2022 static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
2023 unsigned pointer_mask,
2024 unsigned sh_base)
2025 {
2026 if (!sh_base)
2027 return;
2028
2029 struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2030 unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
2031
2032 while (mask) {
2033 int start, count;
2034 u_bit_scan_consecutive_range(&mask, &start, &count);
2035
2036 struct si_descriptors *descs = &sctx->descriptors[start];
2037
2038 si_emit_shader_pointer_head(cs, descs, sh_base, count);
2039 for (int i = 0; i < count; i++)
2040 si_emit_shader_pointer_body(cs, descs + i);
2041 }
2042 }
2043
si_emit_global_shader_pointers(struct si_context * sctx,struct si_descriptors * descs)2044 static void si_emit_global_shader_pointers(struct si_context *sctx,
2045 struct si_descriptors *descs)
2046 {
2047 if (sctx->b.chip_class == GFX9) {
2048 /* Broadcast it to all shader stages. */
2049 si_emit_shader_pointer(sctx, descs,
2050 R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2051 return;
2052 }
2053
2054 si_emit_shader_pointer(sctx, descs,
2055 R_00B030_SPI_SHADER_USER_DATA_PS_0);
2056 si_emit_shader_pointer(sctx, descs,
2057 R_00B130_SPI_SHADER_USER_DATA_VS_0);
2058 si_emit_shader_pointer(sctx, descs,
2059 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2060 si_emit_shader_pointer(sctx, descs,
2061 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2062 si_emit_shader_pointer(sctx, descs,
2063 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2064 si_emit_shader_pointer(sctx, descs,
2065 R_00B530_SPI_SHADER_USER_DATA_LS_0);
2066 }
2067
si_emit_graphics_shader_pointers(struct si_context * sctx,struct r600_atom * atom)2068 void si_emit_graphics_shader_pointers(struct si_context *sctx,
2069 struct r600_atom *atom)
2070 {
2071 uint32_t *sh_base = sctx->shader_pointers.sh_base;
2072
2073 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
2074 si_emit_global_shader_pointers(sctx,
2075 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2076 }
2077
2078 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2079 sh_base[PIPE_SHADER_VERTEX]);
2080 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2081 sh_base[PIPE_SHADER_TESS_CTRL]);
2082 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2083 sh_base[PIPE_SHADER_TESS_EVAL]);
2084 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2085 sh_base[PIPE_SHADER_GEOMETRY]);
2086 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2087 sh_base[PIPE_SHADER_FRAGMENT]);
2088
2089 sctx->shader_pointers_dirty &=
2090 ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
2091
2092 if (sctx->vertex_buffer_pointer_dirty) {
2093 si_emit_shader_pointer(sctx, &sctx->vertex_buffers,
2094 sh_base[PIPE_SHADER_VERTEX]);
2095 sctx->vertex_buffer_pointer_dirty = false;
2096 }
2097
2098 if (sctx->graphics_bindless_pointer_dirty) {
2099 si_emit_global_shader_pointers(sctx,
2100 &sctx->bindless_descriptors);
2101 sctx->graphics_bindless_pointer_dirty = false;
2102 }
2103 }
2104
si_emit_compute_shader_pointers(struct si_context * sctx)2105 void si_emit_compute_shader_pointers(struct si_context *sctx)
2106 {
2107 unsigned base = R_00B900_COMPUTE_USER_DATA_0;
2108
2109 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2110 R_00B900_COMPUTE_USER_DATA_0);
2111 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2112
2113 if (sctx->compute_bindless_pointer_dirty) {
2114 si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
2115 sctx->compute_bindless_pointer_dirty = false;
2116 }
2117 }
2118
2119 /* BINDLESS */
2120
si_init_bindless_descriptors(struct si_context * sctx,struct si_descriptors * desc,unsigned shader_userdata_index,unsigned num_elements)2121 static void si_init_bindless_descriptors(struct si_context *sctx,
2122 struct si_descriptors *desc,
2123 unsigned shader_userdata_index,
2124 unsigned num_elements)
2125 {
2126 MAYBE_UNUSED unsigned desc_slot;
2127
2128 si_init_descriptors(desc, shader_userdata_index, 16, num_elements);
2129 sctx->bindless_descriptors.num_active_slots = num_elements;
2130
2131 /* The first bindless descriptor is stored at slot 1, because 0 is not
2132 * considered to be a valid handle.
2133 */
2134 sctx->num_bindless_descriptors = 1;
2135
2136 /* Track which bindless slots are used (or not). */
2137 util_idalloc_init(&sctx->bindless_used_slots);
2138 util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
2139
2140 /* Reserve slot 0 because it's an invalid handle for bindless. */
2141 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2142 assert(desc_slot == 0);
2143 }
2144
si_release_bindless_descriptors(struct si_context * sctx)2145 static void si_release_bindless_descriptors(struct si_context *sctx)
2146 {
2147 si_release_descriptors(&sctx->bindless_descriptors);
2148 util_idalloc_fini(&sctx->bindless_used_slots);
2149 }
2150
si_get_first_free_bindless_slot(struct si_context * sctx)2151 static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2152 {
2153 struct si_descriptors *desc = &sctx->bindless_descriptors;
2154 unsigned desc_slot;
2155
2156 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2157 if (desc_slot >= desc->num_elements) {
2158 /* The array of bindless descriptors is full, resize it. */
2159 unsigned slot_size = desc->element_dw_size * 4;
2160 unsigned new_num_elements = desc->num_elements * 2;
2161
2162 desc->list = REALLOC(desc->list, desc->num_elements * slot_size,
2163 new_num_elements * slot_size);
2164 desc->num_elements = new_num_elements;
2165 desc->num_active_slots = new_num_elements;
2166 }
2167
2168 assert(desc_slot);
2169 return desc_slot;
2170 }
2171
2172 static unsigned
si_create_bindless_descriptor(struct si_context * sctx,uint32_t * desc_list,unsigned size)2173 si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2174 unsigned size)
2175 {
2176 struct si_descriptors *desc = &sctx->bindless_descriptors;
2177 unsigned desc_slot, desc_slot_offset;
2178
2179 /* Find a free slot. */
2180 desc_slot = si_get_first_free_bindless_slot(sctx);
2181
2182 /* For simplicity, sampler and image bindless descriptors use fixed
2183 * 16-dword slots for now. Image descriptors only need 8-dword but this
2184 * doesn't really matter because no real apps use image handles.
2185 */
2186 desc_slot_offset = desc_slot * 16;
2187
2188 /* Copy the descriptor into the array. */
2189 memcpy(desc->list + desc_slot_offset, desc_list, size);
2190
2191 /* Re-upload the whole array of bindless descriptors into a new buffer.
2192 */
2193 if (!si_upload_descriptors(sctx, desc))
2194 return 0;
2195
2196 /* Make sure to re-emit the shader pointers for all stages. */
2197 sctx->graphics_bindless_pointer_dirty = true;
2198 sctx->compute_bindless_pointer_dirty = true;
2199
2200 return desc_slot;
2201 }
2202
si_update_bindless_buffer_descriptor(struct si_context * sctx,unsigned desc_slot,struct pipe_resource * resource,uint64_t offset,bool * desc_dirty)2203 static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
2204 unsigned desc_slot,
2205 struct pipe_resource *resource,
2206 uint64_t offset,
2207 bool *desc_dirty)
2208 {
2209 struct si_descriptors *desc = &sctx->bindless_descriptors;
2210 struct r600_resource *buf = r600_resource(resource);
2211 unsigned desc_slot_offset = desc_slot * 16;
2212 uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2213 uint64_t old_desc_va;
2214
2215 assert(resource->target == PIPE_BUFFER);
2216
2217 /* Retrieve the old buffer addr from the descriptor. */
2218 old_desc_va = si_desc_extract_buffer_address(desc_list);
2219
2220 if (old_desc_va != buf->gpu_address + offset) {
2221 /* The buffer has been invalidated when the handle wasn't
2222 * resident, update the descriptor and the dirty flag.
2223 */
2224 si_set_buf_desc_address(buf, offset, &desc_list[0]);
2225
2226 *desc_dirty = true;
2227 }
2228 }
2229
si_create_texture_handle(struct pipe_context * ctx,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)2230 static uint64_t si_create_texture_handle(struct pipe_context *ctx,
2231 struct pipe_sampler_view *view,
2232 const struct pipe_sampler_state *state)
2233 {
2234 struct si_sampler_view *sview = (struct si_sampler_view *)view;
2235 struct si_context *sctx = (struct si_context *)ctx;
2236 struct si_texture_handle *tex_handle;
2237 struct si_sampler_state *sstate;
2238 uint32_t desc_list[16];
2239 uint64_t handle;
2240
2241 tex_handle = CALLOC_STRUCT(si_texture_handle);
2242 if (!tex_handle)
2243 return 0;
2244
2245 memset(desc_list, 0, sizeof(desc_list));
2246 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2247
2248 sstate = ctx->create_sampler_state(ctx, state);
2249 if (!sstate) {
2250 FREE(tex_handle);
2251 return 0;
2252 }
2253
2254 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2255 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2256 ctx->delete_sampler_state(ctx, sstate);
2257
2258 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2259 sizeof(desc_list));
2260 if (!tex_handle->desc_slot) {
2261 FREE(tex_handle);
2262 return 0;
2263 }
2264
2265 handle = tex_handle->desc_slot;
2266
2267 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
2268 tex_handle)) {
2269 FREE(tex_handle);
2270 return 0;
2271 }
2272
2273 pipe_sampler_view_reference(&tex_handle->view, view);
2274
2275 r600_resource(sview->base.texture)->texture_handle_allocated = true;
2276
2277 return handle;
2278 }
2279
si_delete_texture_handle(struct pipe_context * ctx,uint64_t handle)2280 static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2281 {
2282 struct si_context *sctx = (struct si_context *)ctx;
2283 struct si_texture_handle *tex_handle;
2284 struct hash_entry *entry;
2285
2286 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2287 if (!entry)
2288 return;
2289
2290 tex_handle = (struct si_texture_handle *)entry->data;
2291
2292 /* Allow this descriptor slot to be re-used. */
2293 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2294
2295 pipe_sampler_view_reference(&tex_handle->view, NULL);
2296 _mesa_hash_table_remove(sctx->tex_handles, entry);
2297 FREE(tex_handle);
2298 }
2299
si_make_texture_handle_resident(struct pipe_context * ctx,uint64_t handle,bool resident)2300 static void si_make_texture_handle_resident(struct pipe_context *ctx,
2301 uint64_t handle, bool resident)
2302 {
2303 struct si_context *sctx = (struct si_context *)ctx;
2304 struct si_texture_handle *tex_handle;
2305 struct si_sampler_view *sview;
2306 struct hash_entry *entry;
2307
2308 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)handle);
2309 if (!entry)
2310 return;
2311
2312 tex_handle = (struct si_texture_handle *)entry->data;
2313 sview = (struct si_sampler_view *)tex_handle->view;
2314
2315 if (resident) {
2316 if (sview->base.texture->target != PIPE_BUFFER) {
2317 struct r600_texture *rtex =
2318 (struct r600_texture *)sview->base.texture;
2319
2320 if (depth_needs_decompression(rtex)) {
2321 util_dynarray_append(
2322 &sctx->resident_tex_needs_depth_decompress,
2323 struct si_texture_handle *,
2324 tex_handle);
2325 }
2326
2327 if (color_needs_decompression(rtex)) {
2328 util_dynarray_append(
2329 &sctx->resident_tex_needs_color_decompress,
2330 struct si_texture_handle *,
2331 tex_handle);
2332 }
2333
2334 if (rtex->dcc_offset &&
2335 p_atomic_read(&rtex->framebuffers_bound))
2336 sctx->need_check_render_feedback = true;
2337
2338 si_update_bindless_texture_descriptor(sctx, tex_handle);
2339 } else {
2340 si_update_bindless_buffer_descriptor(sctx,
2341 tex_handle->desc_slot,
2342 sview->base.texture,
2343 sview->base.u.buf.offset,
2344 &tex_handle->desc_dirty);
2345 }
2346
2347 /* Re-upload the descriptor if it has been updated while it
2348 * wasn't resident.
2349 */
2350 if (tex_handle->desc_dirty)
2351 sctx->bindless_descriptors_dirty = true;
2352
2353 /* Add the texture handle to the per-context list. */
2354 util_dynarray_append(&sctx->resident_tex_handles,
2355 struct si_texture_handle *, tex_handle);
2356
2357 /* Add the buffers to the current CS in case si_begin_new_cs()
2358 * is not going to be called.
2359 */
2360 si_sampler_view_add_buffer(sctx, sview->base.texture,
2361 RADEON_USAGE_READ,
2362 sview->is_stencil_sampler, false);
2363 } else {
2364 /* Remove the texture handle from the per-context list. */
2365 util_dynarray_delete_unordered(&sctx->resident_tex_handles,
2366 struct si_texture_handle *,
2367 tex_handle);
2368
2369 if (sview->base.texture->target != PIPE_BUFFER) {
2370 util_dynarray_delete_unordered(
2371 &sctx->resident_tex_needs_depth_decompress,
2372 struct si_texture_handle *, tex_handle);
2373
2374 util_dynarray_delete_unordered(
2375 &sctx->resident_tex_needs_color_decompress,
2376 struct si_texture_handle *, tex_handle);
2377 }
2378 }
2379 }
2380
si_create_image_handle(struct pipe_context * ctx,const struct pipe_image_view * view)2381 static uint64_t si_create_image_handle(struct pipe_context *ctx,
2382 const struct pipe_image_view *view)
2383 {
2384 struct si_context *sctx = (struct si_context *)ctx;
2385 struct si_image_handle *img_handle;
2386 uint32_t desc_list[8];
2387 uint64_t handle;
2388
2389 if (!view || !view->resource)
2390 return 0;
2391
2392 img_handle = CALLOC_STRUCT(si_image_handle);
2393 if (!img_handle)
2394 return 0;
2395
2396 memset(desc_list, 0, sizeof(desc_list));
2397 si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
2398
2399 si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
2400
2401 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
2402 sizeof(desc_list));
2403 if (!img_handle->desc_slot) {
2404 FREE(img_handle);
2405 return 0;
2406 }
2407
2408 handle = img_handle->desc_slot;
2409
2410 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle,
2411 img_handle)) {
2412 FREE(img_handle);
2413 return 0;
2414 }
2415
2416 util_copy_image_view(&img_handle->view, view);
2417
2418 r600_resource(view->resource)->image_handle_allocated = true;
2419
2420 return handle;
2421 }
2422
si_delete_image_handle(struct pipe_context * ctx,uint64_t handle)2423 static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2424 {
2425 struct si_context *sctx = (struct si_context *)ctx;
2426 struct si_image_handle *img_handle;
2427 struct hash_entry *entry;
2428
2429 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2430 if (!entry)
2431 return;
2432
2433 img_handle = (struct si_image_handle *)entry->data;
2434
2435 util_copy_image_view(&img_handle->view, NULL);
2436 _mesa_hash_table_remove(sctx->img_handles, entry);
2437 FREE(img_handle);
2438 }
2439
si_make_image_handle_resident(struct pipe_context * ctx,uint64_t handle,unsigned access,bool resident)2440 static void si_make_image_handle_resident(struct pipe_context *ctx,
2441 uint64_t handle, unsigned access,
2442 bool resident)
2443 {
2444 struct si_context *sctx = (struct si_context *)ctx;
2445 struct si_image_handle *img_handle;
2446 struct pipe_image_view *view;
2447 struct r600_resource *res;
2448 struct hash_entry *entry;
2449
2450 entry = _mesa_hash_table_search(sctx->img_handles, (void *)handle);
2451 if (!entry)
2452 return;
2453
2454 img_handle = (struct si_image_handle *)entry->data;
2455 view = &img_handle->view;
2456 res = (struct r600_resource *)view->resource;
2457
2458 if (resident) {
2459 if (res->b.b.target != PIPE_BUFFER) {
2460 struct r600_texture *rtex = (struct r600_texture *)res;
2461 unsigned level = view->u.tex.level;
2462
2463 if (color_needs_decompression(rtex)) {
2464 util_dynarray_append(
2465 &sctx->resident_img_needs_color_decompress,
2466 struct si_image_handle *,
2467 img_handle);
2468 }
2469
2470 if (vi_dcc_enabled(rtex, level) &&
2471 p_atomic_read(&rtex->framebuffers_bound))
2472 sctx->need_check_render_feedback = true;
2473
2474 si_update_bindless_image_descriptor(sctx, img_handle);
2475 } else {
2476 si_update_bindless_buffer_descriptor(sctx,
2477 img_handle->desc_slot,
2478 view->resource,
2479 view->u.buf.offset,
2480 &img_handle->desc_dirty);
2481 }
2482
2483 /* Re-upload the descriptor if it has been updated while it
2484 * wasn't resident.
2485 */
2486 if (img_handle->desc_dirty)
2487 sctx->bindless_descriptors_dirty = true;
2488
2489 /* Add the image handle to the per-context list. */
2490 util_dynarray_append(&sctx->resident_img_handles,
2491 struct si_image_handle *, img_handle);
2492
2493 /* Add the buffers to the current CS in case si_begin_new_cs()
2494 * is not going to be called.
2495 */
2496 si_sampler_view_add_buffer(sctx, view->resource,
2497 (access & PIPE_IMAGE_ACCESS_WRITE) ?
2498 RADEON_USAGE_READWRITE :
2499 RADEON_USAGE_READ, false, false);
2500 } else {
2501 /* Remove the image handle from the per-context list. */
2502 util_dynarray_delete_unordered(&sctx->resident_img_handles,
2503 struct si_image_handle *,
2504 img_handle);
2505
2506 if (res->b.b.target != PIPE_BUFFER) {
2507 util_dynarray_delete_unordered(
2508 &sctx->resident_img_needs_color_decompress,
2509 struct si_image_handle *,
2510 img_handle);
2511 }
2512 }
2513 }
2514
2515
si_all_resident_buffers_begin_new_cs(struct si_context * sctx)2516 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
2517 {
2518 unsigned num_resident_tex_handles, num_resident_img_handles;
2519
2520 num_resident_tex_handles = sctx->resident_tex_handles.size /
2521 sizeof(struct si_texture_handle *);
2522 num_resident_img_handles = sctx->resident_img_handles.size /
2523 sizeof(struct si_image_handle *);
2524
2525 /* Add all resident texture handles. */
2526 util_dynarray_foreach(&sctx->resident_tex_handles,
2527 struct si_texture_handle *, tex_handle) {
2528 struct si_sampler_view *sview =
2529 (struct si_sampler_view *)(*tex_handle)->view;
2530
2531 si_sampler_view_add_buffer(sctx, sview->base.texture,
2532 RADEON_USAGE_READ,
2533 sview->is_stencil_sampler, false);
2534 }
2535
2536 /* Add all resident image handles. */
2537 util_dynarray_foreach(&sctx->resident_img_handles,
2538 struct si_image_handle *, img_handle) {
2539 struct pipe_image_view *view = &(*img_handle)->view;
2540
2541 si_sampler_view_add_buffer(sctx, view->resource,
2542 RADEON_USAGE_READWRITE,
2543 false, false);
2544 }
2545
2546 sctx->b.num_resident_handles += num_resident_tex_handles +
2547 num_resident_img_handles;
2548 }
2549
2550 /* INIT/DEINIT/UPLOAD */
2551
si_init_all_descriptors(struct si_context * sctx)2552 void si_init_all_descriptors(struct si_context *sctx)
2553 {
2554 int i;
2555
2556 STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2557 STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
2558
2559 for (i = 0; i < SI_NUM_SHADERS; i++) {
2560 bool gfx9_tcs = false;
2561 bool gfx9_gs = false;
2562 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
2563 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2564 struct si_descriptors *desc;
2565
2566 if (sctx->b.chip_class >= GFX9) {
2567 gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
2568 gfx9_gs = i == PIPE_SHADER_GEOMETRY;
2569 }
2570
2571 desc = si_const_and_shader_buffer_descriptors(sctx, i);
2572 si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
2573 num_buffer_slots,
2574 gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
2575 gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
2576 SI_SGPR_CONST_AND_SHADER_BUFFERS,
2577 RADEON_USAGE_READWRITE,
2578 RADEON_USAGE_READ,
2579 RADEON_PRIO_SHADER_RW_BUFFER,
2580 RADEON_PRIO_CONST_BUFFER);
2581 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2582
2583 desc = si_sampler_and_image_descriptors(sctx, i);
2584 si_init_descriptors(desc,
2585 gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
2586 gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
2587 SI_SGPR_SAMPLERS_AND_IMAGES,
2588 16, num_sampler_slots);
2589
2590 int j;
2591 for (j = 0; j < SI_NUM_IMAGES; j++)
2592 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2593 for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
2594 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2595 }
2596
2597 si_init_buffer_resources(&sctx->rw_buffers,
2598 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
2599 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
2600 /* The second set of usage/priority is used by
2601 * const buffers in RW buffer slots. */
2602 RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
2603 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2604 sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
2605
2606 si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
2607 4, SI_NUM_VERTEX_BUFFERS);
2608 FREE(sctx->vertex_buffers.list); /* not used */
2609 sctx->vertex_buffers.list = NULL;
2610
2611 /* Initialize an array of 1024 bindless descriptors, when the limit is
2612 * reached, just make it larger and re-upload the whole array.
2613 */
2614 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2615 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
2616 1024);
2617
2618 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2619
2620 /* Set pipe_context functions. */
2621 sctx->b.b.bind_sampler_states = si_bind_sampler_states;
2622 sctx->b.b.set_shader_images = si_set_shader_images;
2623 sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
2624 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
2625 sctx->b.b.set_shader_buffers = si_set_shader_buffers;
2626 sctx->b.b.set_sampler_views = si_set_sampler_views;
2627 sctx->b.b.create_texture_handle = si_create_texture_handle;
2628 sctx->b.b.delete_texture_handle = si_delete_texture_handle;
2629 sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
2630 sctx->b.b.create_image_handle = si_create_image_handle;
2631 sctx->b.b.delete_image_handle = si_delete_image_handle;
2632 sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
2633 sctx->b.invalidate_buffer = si_invalidate_buffer;
2634 sctx->b.rebind_buffer = si_rebind_buffer;
2635
2636 /* Shader user data. */
2637 si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
2638 si_emit_graphics_shader_pointers);
2639
2640 /* Set default and immutable mappings. */
2641 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2642
2643 if (sctx->b.chip_class >= GFX9) {
2644 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2645 R_00B430_SPI_SHADER_USER_DATA_LS_0);
2646 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2647 R_00B330_SPI_SHADER_USER_DATA_ES_0);
2648 } else {
2649 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2650 R_00B430_SPI_SHADER_USER_DATA_HS_0);
2651 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2652 R_00B230_SPI_SHADER_USER_DATA_GS_0);
2653 }
2654 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2655 }
2656
si_upload_shader_descriptors(struct si_context * sctx,unsigned mask)2657 static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
2658 {
2659 unsigned dirty = sctx->descriptors_dirty & mask;
2660
2661 /* Assume nothing will go wrong: */
2662 sctx->shader_pointers_dirty |= dirty;
2663
2664 while (dirty) {
2665 unsigned i = u_bit_scan(&dirty);
2666
2667 if (!si_upload_descriptors(sctx, &sctx->descriptors[i]))
2668 return false;
2669 }
2670
2671 sctx->descriptors_dirty &= ~mask;
2672
2673 si_upload_bindless_descriptors(sctx);
2674
2675 return true;
2676 }
2677
si_upload_graphics_shader_descriptors(struct si_context * sctx)2678 bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2679 {
2680 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2681 return si_upload_shader_descriptors(sctx, mask);
2682 }
2683
si_upload_compute_shader_descriptors(struct si_context * sctx)2684 bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2685 {
2686 /* Does not update rw_buffers as that is not needed for compute shaders
2687 * and the input buffer is using the same SGPR's anyway.
2688 */
2689 const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
2690 SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2691 return si_upload_shader_descriptors(sctx, mask);
2692 }
2693
si_release_all_descriptors(struct si_context * sctx)2694 void si_release_all_descriptors(struct si_context *sctx)
2695 {
2696 int i;
2697
2698 for (i = 0; i < SI_NUM_SHADERS; i++) {
2699 si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2700 si_const_and_shader_buffer_descriptors(sctx, i));
2701 si_release_sampler_views(&sctx->samplers[i]);
2702 si_release_image_views(&sctx->images[i]);
2703 }
2704 si_release_buffer_resources(&sctx->rw_buffers,
2705 &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
2706 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2707 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2708
2709 for (i = 0; i < SI_NUM_DESCS; ++i)
2710 si_release_descriptors(&sctx->descriptors[i]);
2711
2712 sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
2713 si_release_descriptors(&sctx->vertex_buffers);
2714 si_release_bindless_descriptors(sctx);
2715 }
2716
si_all_descriptors_begin_new_cs(struct si_context * sctx)2717 void si_all_descriptors_begin_new_cs(struct si_context *sctx)
2718 {
2719 int i;
2720
2721 for (i = 0; i < SI_NUM_SHADERS; i++) {
2722 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
2723 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
2724 si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2725 }
2726 si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
2727 si_vertex_buffers_begin_new_cs(sctx);
2728
2729 for (i = 0; i < SI_NUM_DESCS; ++i)
2730 si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
2731 si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
2732
2733 si_shader_pointers_begin_new_cs(sctx);
2734 }
2735
si_set_active_descriptors(struct si_context * sctx,unsigned desc_idx,uint64_t new_active_mask)2736 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
2737 uint64_t new_active_mask)
2738 {
2739 struct si_descriptors *desc = &sctx->descriptors[desc_idx];
2740
2741 /* Ignore no-op updates and updates that disable all slots. */
2742 if (!new_active_mask ||
2743 new_active_mask == u_bit_consecutive64(desc->first_active_slot,
2744 desc->num_active_slots))
2745 return;
2746
2747 int first, count;
2748 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
2749 assert(new_active_mask == 0);
2750
2751 /* Upload/dump descriptors if slots are being enabled. */
2752 if (first < desc->first_active_slot ||
2753 first + count > desc->first_active_slot + desc->num_active_slots)
2754 sctx->descriptors_dirty |= 1u << desc_idx;
2755
2756 desc->first_active_slot = first;
2757 desc->num_active_slots = count;
2758 }
2759
si_set_active_descriptors_for_shader(struct si_context * sctx,struct si_shader_selector * sel)2760 void si_set_active_descriptors_for_shader(struct si_context *sctx,
2761 struct si_shader_selector *sel)
2762 {
2763 if (!sel)
2764 return;
2765
2766 si_set_active_descriptors(sctx,
2767 si_const_and_shader_buffer_descriptors_idx(sel->type),
2768 sel->active_const_and_shader_buffers);
2769 si_set_active_descriptors(sctx,
2770 si_sampler_and_image_descriptors_idx(sel->type),
2771 sel->active_samplers_and_images);
2772 }
2773