1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set_layout.h"
7 #include "nvk_descriptor_types.h"
8 #include "nvk_shader.h"
9
10 #include "vk_pipeline.h"
11
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14
15 #include "clc397.h"
16 #include "clc597.h"
17
18 struct lower_desc_cbuf {
19 struct nvk_cbuf key;
20
21 uint32_t use_count;
22
23 uint64_t start;
24 uint64_t end;
25 };
26
27 DERIVE_HASH_TABLE(nvk_cbuf);
28
29 static int
compar_cbufs(const void * _a,const void * _b)30 compar_cbufs(const void *_a, const void *_b)
31 {
32 const struct lower_desc_cbuf *a = _a;
33 const struct lower_desc_cbuf *b = _b;
34
35 #define COMPAR(field, pos) \
36 if (a->field < b->field) return -(pos); \
37 if (a->field > b->field) return (pos);
38
39 /* Sort by most used first */
40 COMPAR(use_count, -1)
41
42 /* Keep the list stable by then sorting by key fields. */
43 COMPAR(key.type, 1)
44 COMPAR(key.desc_set, 1)
45 COMPAR(key.dynamic_idx, 1)
46 COMPAR(key.desc_offset, 1)
47
48 #undef COMPAR
49
50 return 0;
51 }
52
53 struct lower_descriptors_ctx {
54 const struct nv_device_info *dev_info;
55 const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
56
57 bool use_bindless_cbuf;
58 bool use_edb_buffer_views;
59 bool clamp_desc_array_bounds;
60 bool indirect_bind;
61 nir_address_format ubo_addr_format;
62 nir_address_format ssbo_addr_format;
63
64 struct hash_table *cbufs;
65 struct nvk_cbuf_map *cbuf_map;
66 };
67
68 static bool
descriptor_type_is_ubo(VkDescriptorType desc_type)69 descriptor_type_is_ubo(VkDescriptorType desc_type)
70 {
71 switch (desc_type) {
72 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
73 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
74 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75 return true;
76
77 default:
78 return false;
79 }
80 }
81
82 static bool
descriptor_type_is_ssbo(VkDescriptorType desc_type)83 descriptor_type_is_ssbo(VkDescriptorType desc_type)
84 {
85 switch (desc_type) {
86 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
87 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
88 return true;
89
90 default:
91 return false;
92 }
93 }
94
95 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)96 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
97 struct lower_descriptors_ctx *ctx)
98 {
99 struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
100 if (entry != NULL) {
101 struct lower_desc_cbuf *cbuf = entry->data;
102 cbuf->use_count++;
103 cbuf->start = MIN2(cbuf->start, start);
104 cbuf->end = MAX2(cbuf->end, end);
105 } else {
106 struct lower_desc_cbuf *cbuf =
107 ralloc(ctx->cbufs, struct lower_desc_cbuf);
108 *cbuf = (struct lower_desc_cbuf) {
109 .key = *key,
110 .use_count = 1,
111 .start = start,
112 .end = end,
113 };
114 _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
115 }
116 }
117
118 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)119 get_binding_layout(uint32_t set, uint32_t binding,
120 const struct lower_descriptors_ctx *ctx)
121 {
122 assert(set < NVK_MAX_SETS);
123 assert(ctx->set_layouts[set] != NULL);
124
125 const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
126
127 assert(binding < set_layout->binding_count);
128 return &set_layout->binding[binding];
129 }
130
131 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)132 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
133 struct lower_descriptors_ctx *ctx)
134 {
135 const struct nvk_descriptor_set_binding_layout *binding_layout =
136 get_binding_layout(set, binding, ctx);
137
138 const struct nvk_cbuf key = {
139 .type = NVK_CBUF_TYPE_DESC_SET,
140 .desc_set = set,
141 };
142
143 uint64_t start, end;
144 if (index == NULL) {
145 /* When we don't have an index, assume 0 */
146 start = binding_layout->offset;
147 end = start + binding_layout->stride;
148 } else if (nir_src_is_const(*index)) {
149 start = binding_layout->offset +
150 nir_src_as_uint(*index) * binding_layout->stride;
151 end = start + binding_layout->stride;
152 } else {
153 start = binding_layout->offset;
154 end = start + binding_layout->array_size * binding_layout->stride;
155 }
156
157 record_cbuf_use(&key, start, end, ctx);
158 }
159
160 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)161 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
162 struct lower_descriptors_ctx *ctx)
163 {
164 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
165
166 /* These we'll handle later */
167 if (descriptor_type_is_ubo(nir_intrinsic_desc_type(intrin)))
168 return;
169
170 record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
171 nir_intrinsic_binding(intrin),
172 &intrin->src[0], ctx);
173 }
174
175 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)176 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
177 struct lower_descriptors_ctx *ctx)
178 {
179 nir_src *index_src = NULL;
180 if (deref->deref_type == nir_deref_type_array) {
181 index_src = &deref->arr.index;
182 deref = nir_deref_instr_parent(deref);
183 }
184
185 assert(deref->deref_type == nir_deref_type_var);
186 nir_variable *var = deref->var;
187
188 record_descriptor_cbuf_use(var->data.descriptor_set,
189 var->data.binding,
190 index_src, ctx);
191 }
192
193 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)194 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
195 struct lower_descriptors_ctx *ctx)
196 {
197 const int texture_src_idx =
198 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
199 const int sampler_src_idx =
200 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
201
202 if (texture_src_idx >= 0) {
203 nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
204 record_deref_descriptor_cbuf_use(deref, ctx);
205 }
206
207 if (sampler_src_idx >= 0) {
208 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
209 record_deref_descriptor_cbuf_use(deref, ctx);
210 }
211 }
212
213 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)214 ubo_deref_to_cbuf(nir_deref_instr *deref,
215 nir_intrinsic_instr **resource_index_out,
216 uint64_t *offset_out,
217 uint64_t *start_out, uint64_t *end_out,
218 const struct lower_descriptors_ctx *ctx)
219 {
220 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
221
222 /* In case we early return */
223 *offset_out = 0;
224 *start_out = 0;
225 *end_out = UINT64_MAX;
226 *resource_index_out = NULL;
227
228 const struct nvk_cbuf invalid = {
229 .type = NVK_CBUF_TYPE_INVALID,
230 };
231
232 uint64_t offset = 0;
233 uint64_t range = glsl_get_explicit_size(deref->type, false);
234 bool offset_valid = true;
235 while (deref->deref_type != nir_deref_type_cast) {
236 nir_deref_instr *parent = nir_deref_instr_parent(deref);
237
238 switch (deref->deref_type) {
239 case nir_deref_type_var:
240 unreachable("Buffers don't use variables in Vulkan");
241
242 case nir_deref_type_array:
243 case nir_deref_type_array_wildcard: {
244 uint32_t stride = nir_deref_instr_array_stride(deref);
245 if (deref->deref_type == nir_deref_type_array &&
246 nir_src_is_const(deref->arr.index)) {
247 offset += nir_src_as_uint(deref->arr.index) * stride;
248 } else {
249 range = glsl_get_length(parent->type) * stride;
250 }
251 break;
252 }
253
254 case nir_deref_type_ptr_as_array:
255 /* All bets are off. We shouldn't see these most of the time
256 * anyway, even with variable pointers.
257 */
258 offset_valid = false;
259 unreachable("Variable pointers aren't allowed on UBOs");
260 break;
261
262 case nir_deref_type_struct: {
263 offset += glsl_get_struct_field_offset(parent->type,
264 deref->strct.index);
265 break;
266 }
267
268 default:
269 unreachable("Unknown deref type");
270 }
271
272 deref = parent;
273 }
274
275 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
276 if (load_desc == NULL ||
277 load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
278 return invalid;
279
280 nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
281 if (res_index == NULL ||
282 res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
283 return invalid;
284
285 /* We try to early return as little as possible prior to this point so we
286 * can return the resource index intrinsic in as many cases as possible.
287 * After this point, though, early returns are fair game.
288 */
289 *resource_index_out = res_index;
290
291 if (!offset_valid || !nir_src_is_const(res_index->src[0]))
292 return invalid;
293
294 uint32_t set = nir_intrinsic_desc_set(res_index);
295 uint32_t binding = nir_intrinsic_binding(res_index);
296 uint32_t index = nir_src_as_uint(res_index->src[0]);
297
298 const struct nvk_descriptor_set_binding_layout *binding_layout =
299 get_binding_layout(set, binding, ctx);
300
301 switch (binding_layout->type) {
302 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
303 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
304 *offset_out = 0;
305 *start_out = offset;
306 *end_out = offset + range;
307 return (struct nvk_cbuf) {
308 .type = NVK_CBUF_TYPE_UBO_DESC,
309 .desc_set = set,
310 .desc_offset = binding_layout->offset +
311 index * binding_layout->stride,
312 };
313 }
314
315 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
316 *offset_out = 0;
317 *start_out = offset;
318 *end_out = offset + range;
319
320 return (struct nvk_cbuf) {
321 .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
322 .desc_set = set,
323 .dynamic_idx = binding_layout->dynamic_buffer_index + index,
324 };
325 }
326
327 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
328 *offset_out = binding_layout->offset;
329 *start_out = binding_layout->offset + offset;
330 *end_out = *start_out + range;
331
332 return (struct nvk_cbuf) {
333 .type = NVK_CBUF_TYPE_DESC_SET,
334 .desc_set = set,
335 };
336 }
337
338 default:
339 return invalid;
340 }
341 }
342
343 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)344 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
345 struct lower_descriptors_ctx *ctx)
346 {
347 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
348
349 UNUSED uint64_t offset;
350 uint64_t start, end;
351 nir_intrinsic_instr *res_index;
352 struct nvk_cbuf cbuf =
353 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
354
355 if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
356 record_cbuf_use(&cbuf, start, end, ctx);
357 } else if (res_index != NULL) {
358 record_vulkan_resource_cbuf_use(res_index, ctx);
359 }
360 }
361
362 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)363 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
364 {
365 struct lower_descriptors_ctx *ctx = _ctx;
366
367 switch (instr->type) {
368 case nir_instr_type_intrinsic: {
369 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
370 switch (intrin->intrinsic) {
371 case nir_intrinsic_vulkan_resource_index:
372 record_vulkan_resource_cbuf_use(intrin, ctx);
373 return false;
374
375 case nir_intrinsic_load_deref: {
376 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
377 if (nir_deref_mode_is(deref, nir_var_mem_ubo))
378 record_load_ubo_cbuf_uses(deref, ctx);
379 return false;
380 }
381
382 case nir_intrinsic_image_deref_load:
383 case nir_intrinsic_image_deref_store:
384 case nir_intrinsic_image_deref_atomic:
385 case nir_intrinsic_image_deref_atomic_swap:
386 case nir_intrinsic_image_deref_size:
387 case nir_intrinsic_image_deref_samples: {
388 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
389 record_deref_descriptor_cbuf_use(deref, ctx);
390 return false;
391 }
392
393 default:
394 return false;
395 }
396 unreachable("All cases return false");
397 }
398
399 case nir_instr_type_tex:
400 record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
401 return false;
402
403 default:
404 return false;
405 }
406 }
407
408 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)409 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
410 {
411 ctx->cbuf_map->cbuf_count = 0;
412
413 /* Root descriptors always go in cbuf 0 */
414 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
415 .type = NVK_CBUF_TYPE_ROOT_DESC,
416 };
417
418 /* If we have constant data, put it at cbuf 1 */
419 if (nir->constant_data_size > 0) {
420 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
421 .type = NVK_CBUF_TYPE_SHADER_DATA,
422 };
423 }
424
425 if (ctx->indirect_bind)
426 return;
427
428 ctx->cbufs = nvk_cbuf_table_create(NULL);
429 nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
430 nir_metadata_all, (void *)ctx);
431
432 struct lower_desc_cbuf *cbufs =
433 ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
434 _mesa_hash_table_num_entries(ctx->cbufs));
435
436 uint32_t num_cbufs = 0;
437 hash_table_foreach(ctx->cbufs, entry) {
438 struct lower_desc_cbuf *cbuf = entry->data;
439
440 /* We currently only start cbufs at the beginning so if it starts after
441 * the max cbuf size, there's no point in including it in the list.
442 */
443 if (cbuf->start > NVK_MAX_CBUF_SIZE)
444 continue;
445
446 cbufs[num_cbufs++] = *cbuf;
447 }
448
449 qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
450
451 uint8_t max_cbuf_bindings;
452 if (nir->info.stage == MESA_SHADER_COMPUTE ||
453 nir->info.stage == MESA_SHADER_KERNEL) {
454 max_cbuf_bindings = 8;
455 } else {
456 max_cbuf_bindings = 16;
457 }
458
459 for (uint32_t i = 0; i < num_cbufs; i++) {
460 if (ctx->cbuf_map->cbuf_count >= max_cbuf_bindings)
461 break;
462
463 /* We can't support indirect cbufs in compute yet */
464 if ((nir->info.stage == MESA_SHADER_COMPUTE ||
465 nir->info.stage == MESA_SHADER_KERNEL) &&
466 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
467 continue;
468
469 /* Prior to Turing, indirect cbufs require splitting the pushbuf and
470 * pushing bits of the descriptor set. Doing this every draw call is
471 * probably more overhead than it's worth.
472 */
473 if (ctx->dev_info->cls_eng3d < TURING_A &&
474 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
475 continue;
476
477 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = cbufs[i].key;
478 }
479
480 ralloc_free(ctx->cbufs);
481 ctx->cbufs = NULL;
482 }
483
484 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)485 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
486 const struct lower_descriptors_ctx *ctx)
487 {
488 if (ctx->cbuf_map == NULL)
489 return -1;
490
491 for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
492 if (nvk_cbuf_equal(&ctx->cbuf_map->cbufs[c], key)) {
493 return c;
494 }
495 }
496
497 return -1;
498 }
499
500 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)501 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
502 {
503 const struct lower_descriptors_ctx *ctx = _ctx;
504
505 if (load->intrinsic != nir_intrinsic_load_deref)
506 return false;
507
508 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
509 if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
510 return false;
511
512 uint64_t offset, end;
513 UNUSED uint64_t start;
514 UNUSED nir_intrinsic_instr *res_index;
515 struct nvk_cbuf cbuf_key =
516 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
517
518 if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
519 return false;
520
521 if (end > NVK_MAX_CBUF_SIZE)
522 return false;
523
524 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
525 if (cbuf_idx < 0)
526 return false;
527
528 b->cursor = nir_before_instr(&load->instr);
529
530 nir_deref_path path;
531 nir_deref_path_init(&path, deref, NULL);
532
533 nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
534 nir_address_format addr_format = nir_address_format_32bit_index_offset;
535 for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
536 addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
537
538 nir_deref_path_finish(&path);
539
540 nir_lower_explicit_io_instr(b, load, addr, addr_format);
541
542 return true;
543 }
544
545 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)546 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
547 const struct lower_descriptors_ctx *ctx)
548 {
549 assert(load->intrinsic == nir_intrinsic_load_constant);
550
551 const struct nvk_cbuf cbuf_key = {
552 .type = NVK_CBUF_TYPE_SHADER_DATA,
553 };
554 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
555 assert(cbuf_idx >= 0);
556
557 uint32_t base = nir_intrinsic_base(load);
558
559 b->cursor = nir_before_instr(&load->instr);
560
561 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
562 nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
563 nir_imm_int(b, cbuf_idx), offset,
564 .align_mul = nir_intrinsic_align_mul(load),
565 .align_offset = nir_intrinsic_align_offset(load));
566
567 nir_def_rewrite_uses(&load->def, data);
568
569 return true;
570 }
571
572 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)573 load_descriptor_set_addr(nir_builder *b, uint32_t set,
574 UNUSED const struct lower_descriptors_ctx *ctx)
575 {
576 uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
577 set * sizeof(struct nvk_buffer_address);
578
579 return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
580 nir_imm_int(b, set_addr_offset),
581 .align_mul = 8, .align_offset = 0);
582 }
583
584 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)585 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
586 const struct lower_descriptors_ctx *ctx)
587 {
588 int dynamic_buffer_start_imm = 0;
589 for (uint32_t s = 0; s < set; s++) {
590 if (ctx->set_layouts[s] == NULL) {
591 dynamic_buffer_start_imm = -1;
592 break;
593 }
594
595 dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
596 }
597
598 if (dynamic_buffer_start_imm >= 0) {
599 return nir_imm_int(b, dynamic_buffer_start_imm);
600 } else {
601 uint32_t root_offset =
602 nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
603
604 return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
605 nir_imm_int(b, root_offset),
606 .align_mul = 1, .align_offset = 0));
607 }
608 }
609
610 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)611 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
612 uint32_t set, uint32_t binding, nir_def *index,
613 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
614 {
615 const struct nvk_descriptor_set_binding_layout *binding_layout =
616 get_binding_layout(set, binding, ctx);
617
618 if (ctx->clamp_desc_array_bounds)
619 index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
620
621 switch (binding_layout->type) {
622 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
623 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
624 /* Get the index in the root descriptor table dynamic_buffers array. */
625 nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
626
627 index = nir_iadd(b, index,
628 nir_iadd_imm(b, dynamic_buffer_start,
629 binding_layout->dynamic_buffer_index));
630 uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
631 nir_def *root_desc_offset =
632 nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
633 nvk_root_descriptor_offset(dynamic_buffers));
634
635 assert(num_components * bit_size <= desc_size * 8);
636 return nir_ldc_nv(b, num_components, bit_size,
637 nir_imm_int(b, 0), root_desc_offset,
638 .align_mul = 16, .align_offset = 0);
639 }
640
641 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
642 nir_def *base_addr =
643 nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
644 binding_layout->offset);
645
646 assert(binding_layout->stride == 1);
647 const uint32_t binding_size = binding_layout->array_size;
648
649 if (ctx->use_bindless_cbuf) {
650 assert(num_components == 1 && bit_size == 64);
651 const uint32_t size = align(binding_size, 16);
652 return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
653 ((uint64_t)size >> 4) << 45);
654 } else {
655 /* Convert it to nir_address_format_64bit_bounded_global */
656 assert(num_components == 4 && bit_size == 32);
657 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
658 nir_unpack_64_2x32_split_y(b, base_addr),
659 nir_imm_int(b, binding_size),
660 nir_imm_int(b, 0));
661 }
662 }
663
664 default: {
665 assert(binding_layout->stride > 0);
666 nir_def *desc_ubo_offset =
667 nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
668 binding_layout->offset + offset_B);
669
670 uint64_t max_desc_ubo_offset = binding_layout->offset +
671 binding_layout->array_size * binding_layout->stride;
672
673 unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
674 desc_align_mul = MIN2(desc_align_mul, 16);
675 unsigned desc_align_offset = binding_layout->offset + offset_B;
676 desc_align_offset %= desc_align_mul;
677
678 const struct nvk_cbuf cbuf_key = {
679 .type = NVK_CBUF_TYPE_DESC_SET,
680 .desc_set = set,
681 };
682 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
683
684 if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
685 return nir_ldc_nv(b, num_components, bit_size,
686 nir_imm_int(b, cbuf_idx),
687 desc_ubo_offset,
688 .align_mul = desc_align_mul,
689 .align_offset = desc_align_offset);
690 } else {
691 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
692 return nir_load_global_constant_offset(b, num_components, bit_size,
693 set_addr, desc_ubo_offset,
694 .align_mul = desc_align_mul,
695 .align_offset = desc_align_offset);
696 }
697 }
698 }
699 }
700
701 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)702 is_idx_intrin(nir_intrinsic_instr *intrin)
703 {
704 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
705 intrin = nir_src_as_intrinsic(intrin->src[0]);
706 if (intrin == NULL)
707 return false;
708 }
709
710 return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
711 }
712
713 static nir_def *
buffer_address_to_ldcx_handle(nir_builder * b,nir_def * addr)714 buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
715 {
716 nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
717 nir_def *size = nir_channel(b, addr, 2);
718 nir_def *offset = nir_channel(b, addr, 3);
719
720 nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
721 nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
722 nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
723
724 /* If we assume the top bis of the address are 0 as well as the bottom two
725 * bits of the size. (We can trust it since it's a descriptor) then
726 *
727 * ((size >> 4) << 13) | addr
728 *
729 * is just an imad.
730 */
731 nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
732
733 return nir_vec3(b, addr16_lo, handle_hi, offset);
734 }
735
736 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)737 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
738 const struct lower_descriptors_ctx *ctx)
739 {
740 nir_def *index = nir_imm_int(b, 0);
741
742 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
743 index = nir_iadd(b, index, intrin->src[1].ssa);
744 intrin = nir_src_as_intrinsic(intrin->src[0]);
745 }
746
747 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
748 uint32_t set = nir_intrinsic_desc_set(intrin);
749 uint32_t binding = nir_intrinsic_binding(intrin);
750 index = nir_iadd(b, index, intrin->src[0].ssa);
751
752 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
753 if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
754 nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
755
756 /* The descriptor is just the handle. NIR also needs an offset. */
757 return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
758 nir_unpack_64_2x32_split_y(b, desc),
759 nir_imm_int(b, 0));
760 } else {
761 nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
762
763 /* We know a priori that the the .w compnent (offset) is zero */
764 return nir_vec4(b, nir_channel(b, desc, 0),
765 nir_channel(b, desc, 1),
766 nir_channel(b, desc, 2),
767 nir_imm_int(b, 0));
768 }
769 }
770
771 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)772 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
773 const struct lower_descriptors_ctx *ctx)
774 {
775 ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
776 b->cursor = nir_before_instr(&intrin->instr);
777
778 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
779 if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
780 assert(descriptor_type_is_ssbo(desc_type));
781 return false;
782 }
783
784 nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
785
786 nir_def_rewrite_uses(&intrin->def, desc);
787
788 return true;
789 }
790
791 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)792 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
793 uint32_t root_table_offset,
794 const struct lower_descriptors_ctx *ctx)
795 {
796 b->cursor = nir_instr_remove(&intrin->instr);
797
798 nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
799 intrin->def.bit_size,
800 nir_imm_int(b, 0), /* Root table */
801 nir_imm_int(b, root_table_offset),
802 .align_mul = 4,
803 .align_offset = 0);
804
805 nir_def_rewrite_uses(&intrin->def, val);
806
807 return true;
808 }
809
810 #define lower_sysval_to_root_table(b, intrin, member, ctx) \
811 _lower_sysval_to_root_table(b, intrin, \
812 nvk_root_descriptor_offset(member), \
813 ctx)
814
815 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)816 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
817 const struct lower_descriptors_ctx *ctx)
818 {
819 const uint32_t push_region_offset =
820 nvk_root_descriptor_offset(push);
821 const uint32_t base = nir_intrinsic_base(load);
822
823 b->cursor = nir_before_instr(&load->instr);
824
825 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
826 push_region_offset + base);
827
828 nir_def *val =
829 nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
830 nir_imm_int(b, 0), offset,
831 .align_mul = load->def.bit_size / 8,
832 .align_offset = 0);
833
834 nir_def_rewrite_uses(&load->def, val);
835
836 return true;
837 }
838
839 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)840 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
841 uint32_t *set, uint32_t *binding,
842 nir_def **index)
843 {
844 if (deref->deref_type == nir_deref_type_array) {
845 *index = deref->arr.index.ssa;
846 deref = nir_deref_instr_parent(deref);
847 } else {
848 *index = nir_imm_int(b, 0);
849 }
850
851 assert(deref->deref_type == nir_deref_type_var);
852 nir_variable *var = deref->var;
853
854 *set = var->data.descriptor_set;
855 *binding = var->data.binding;
856 }
857
858 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)859 load_resource_deref_desc(nir_builder *b,
860 unsigned num_components, unsigned bit_size,
861 nir_deref_instr *deref, unsigned offset_B,
862 const struct lower_descriptors_ctx *ctx)
863 {
864 uint32_t set, binding;
865 nir_def *index;
866 get_resource_deref_binding(b, deref, &set, &binding, &index);
867 return load_descriptor(b, num_components, bit_size,
868 set, binding, index, offset_B, ctx);
869 }
870
871 static void
lower_msaa_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)872 lower_msaa_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
873 const struct lower_descriptors_ctx *ctx)
874 {
875 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS);
876
877 b->cursor = nir_before_instr(&intrin->instr);
878 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
879 nir_def *desc = load_resource_deref_desc(b, 2, 32, deref, 0, ctx);
880 nir_def *desc0 = nir_channel(b, desc, 0);
881 nir_def *desc1 = nir_channel(b, desc, 1);
882
883 nir_def *img_index = nir_ubitfield_extract_imm(b, desc0, 0, 20);
884 nir_rewrite_image_intrinsic(intrin, img_index, true);
885
886 nir_def *sw_log2 = nir_ubitfield_extract_imm(b, desc0, 20, 2);
887 nir_def *sh_log2 = nir_ubitfield_extract_imm(b, desc0, 22, 2);
888 nir_def *s_map = desc1;
889
890 nir_def *sw = nir_ishl(b, nir_imm_int(b, 1), sw_log2);
891 nir_def *sh = nir_ishl(b, nir_imm_int(b, 1), sh_log2);
892 nir_def *num_samples = nir_imul(b, sw, sh);
893
894 switch (intrin->intrinsic) {
895 case nir_intrinsic_bindless_image_load:
896 case nir_intrinsic_bindless_image_store:
897 case nir_intrinsic_bindless_image_atomic:
898 case nir_intrinsic_bindless_image_atomic_swap: {
899 nir_def *x = nir_channel(b, intrin->src[1].ssa, 0);
900 nir_def *y = nir_channel(b, intrin->src[1].ssa, 1);
901 nir_def *z = nir_channel(b, intrin->src[1].ssa, 2);
902 nir_def *w = nir_channel(b, intrin->src[1].ssa, 3);
903 nir_def *s = intrin->src[2].ssa;
904
905 nir_def *s_xy = nir_ushr(b, s_map, nir_imul_imm(b, s, 4));
906 nir_def *sx = nir_ubitfield_extract_imm(b, s_xy, 0, 2);
907 nir_def *sy = nir_ubitfield_extract_imm(b, s_xy, 2, 2);
908
909 x = nir_imad(b, x, sw, sx);
910 y = nir_imad(b, y, sh, sy);
911
912 /* Make OOB sample indices OOB X/Y indices */
913 x = nir_bcsel(b, nir_ult(b, s, num_samples), x, nir_imm_int(b, -1));
914
915 nir_src_rewrite(&intrin->src[1], nir_vec4(b, x, y, z, w));
916 nir_src_rewrite(&intrin->src[2], nir_undef(b, 1, 32));
917 break;
918 }
919
920 case nir_intrinsic_bindless_image_size: {
921 b->cursor = nir_after_instr(&intrin->instr);
922
923 nir_def *size = &intrin->def;
924 nir_def *w = nir_channel(b, size, 0);
925 nir_def *h = nir_channel(b, size, 1);
926
927 w = nir_ushr(b, w, sw_log2);
928 h = nir_ushr(b, h, sh_log2);
929
930 size = nir_vector_insert_imm(b, size, w, 0);
931 size = nir_vector_insert_imm(b, size, h, 1);
932
933 nir_def_rewrite_uses_after(&intrin->def, size, size->parent_instr);
934 break;
935 }
936
937 case nir_intrinsic_bindless_image_samples: {
938 /* We need to handle NULL descriptors explicitly */
939 nir_def *samples =
940 nir_bcsel(b, nir_ieq(b, desc0, nir_imm_int(b, 0)),
941 nir_imm_int(b, 0), num_samples);
942 nir_def_rewrite_uses(&intrin->def, samples);
943 break;
944 }
945
946 default:
947 unreachable("Unknown image intrinsic");
948 }
949
950 nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
951 }
952
953 static bool
is_edb_buffer_view(nir_deref_instr * deref,const struct lower_descriptors_ctx * ctx)954 is_edb_buffer_view(nir_deref_instr *deref,
955 const struct lower_descriptors_ctx *ctx)
956 {
957 if (glsl_get_sampler_dim(deref->type) != GLSL_SAMPLER_DIM_BUF)
958 return false;
959
960 if (ctx->use_edb_buffer_views)
961 return true;
962
963 nir_variable *var = nir_deref_instr_get_variable(deref);
964 uint8_t set = var->data.descriptor_set;
965
966 return ctx->set_layouts[set]->flags &
967 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
968 }
969
970 static nir_def *
edb_buffer_view_is_null(nir_builder * b,nir_def * desc)971 edb_buffer_view_is_null(nir_builder *b, nir_def *desc)
972 {
973 assert(desc->num_components == 4);
974 nir_def *index = nir_channel(b, desc, 0);
975 return nir_ieq_imm(b, index, 0);
976 }
977
978 static nir_def *
edb_buffer_view_offset_el(nir_builder * b,nir_def * desc)979 edb_buffer_view_offset_el(nir_builder *b, nir_def *desc)
980 {
981 assert(desc->num_components == 4);
982 return nir_channel(b, desc, 1);
983 }
984
985 static nir_def *
edb_buffer_view_size_el(nir_builder * b,nir_def * desc)986 edb_buffer_view_size_el(nir_builder *b, nir_def *desc)
987 {
988 assert(desc->num_components == 4);
989 return nir_channel(b, desc, 2);
990 }
991
992 static nir_def *
edb_buffer_view_oob_alpha(nir_builder * b,nir_def * desc)993 edb_buffer_view_oob_alpha(nir_builder *b, nir_def *desc)
994 {
995 assert(desc->num_components == 4);
996 return nir_channel(b, desc, 3);
997 }
998
999 static nir_def *
edb_buffer_view_coord_is_in_bounds(nir_builder * b,nir_def * desc,nir_def * coord)1000 edb_buffer_view_coord_is_in_bounds(nir_builder *b, nir_def *desc,
1001 nir_def *coord)
1002 {
1003 assert(desc->num_components == 4);
1004 return nir_ult(b, coord, edb_buffer_view_size_el(b, desc));
1005 }
1006
1007 static nir_def *
edb_buffer_view_index(nir_builder * b,nir_def * desc,nir_def * in_bounds)1008 edb_buffer_view_index(nir_builder *b, nir_def *desc, nir_def *in_bounds)
1009 {
1010 assert(desc->num_components == 4);
1011 nir_def *index = nir_channel(b, desc, 0);
1012
1013 /* Use the NULL descriptor for OOB access */
1014 return nir_bcsel(b, in_bounds, index, nir_imm_int(b, 0));
1015 }
1016
1017 static nir_def *
adjust_edb_buffer_view_coord(nir_builder * b,nir_def * desc,nir_def * coord)1018 adjust_edb_buffer_view_coord(nir_builder *b, nir_def *desc, nir_def *coord)
1019 {
1020 return nir_iadd(b, coord, edb_buffer_view_offset_el(b, desc));
1021 }
1022
1023 static nir_def *
fixup_edb_buffer_view_result(nir_builder * b,nir_def * desc,nir_def * in_bounds,nir_def * res,nir_alu_type dest_type)1024 fixup_edb_buffer_view_result(nir_builder *b, nir_def *desc, nir_def *in_bounds,
1025 nir_def *res, nir_alu_type dest_type)
1026 {
1027 if (res->num_components < 4)
1028 return res;
1029
1030 nir_def *is_null = edb_buffer_view_is_null(b, desc);
1031 nir_def *oob_alpha = edb_buffer_view_oob_alpha(b, desc);
1032
1033 nir_def *a = nir_channel(b, res, 3);
1034 a = nir_bcsel(b, nir_ior(b, in_bounds, is_null), a, oob_alpha);
1035 return nir_vector_insert_imm(b, res, a, 3);
1036 }
1037
1038 static void
lower_edb_buffer_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1039 lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1040 const struct lower_descriptors_ctx *ctx)
1041 {
1042 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF);
1043
1044 b->cursor = nir_before_instr(&intrin->instr);
1045 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1046 nir_def *desc = load_resource_deref_desc(b, 4, 32, deref, 0, ctx);
1047
1048 switch (intrin->intrinsic) {
1049 case nir_intrinsic_image_deref_load:
1050 case nir_intrinsic_image_deref_store:
1051 case nir_intrinsic_image_deref_atomic:
1052 case nir_intrinsic_image_deref_atomic_swap: {
1053 nir_def *pos = intrin->src[1].ssa;
1054 nir_def *x = nir_channel(b, pos, 0);
1055
1056 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, x);
1057 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1058
1059 nir_def *new_x = adjust_edb_buffer_view_coord(b, desc, x);
1060 pos = nir_vector_insert_imm(b, pos, new_x, 0);
1061 nir_src_rewrite(&intrin->src[1], pos);
1062
1063 if (intrin->intrinsic == nir_intrinsic_image_deref_load) {
1064 b->cursor = nir_after_instr(&intrin->instr);
1065 nir_def *res = &intrin->def;
1066 res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
1067 nir_intrinsic_dest_type(intrin));
1068 nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
1069 }
1070
1071 nir_rewrite_image_intrinsic(intrin, index, true);
1072 break;
1073 }
1074
1075 case nir_intrinsic_image_deref_size: {
1076 assert(intrin->def.num_components == 1);
1077 nir_def *size_el = nir_channel(b, desc, 2);
1078 nir_def_rewrite_uses(&intrin->def, size_el);
1079 break;
1080 }
1081
1082 default:
1083 unreachable("Unknown image intrinsic");
1084 }
1085 }
1086
1087 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1088 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1089 const struct lower_descriptors_ctx *ctx)
1090 {
1091 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1092
1093 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_MS) {
1094 lower_msaa_image_intrin(b, intrin, ctx);
1095 } else if (is_edb_buffer_view(deref, ctx)) {
1096 lower_edb_buffer_image_intrin(b, intrin, ctx);
1097 } else {
1098 b->cursor = nir_before_instr(&intrin->instr);
1099 nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
1100 nir_rewrite_image_intrinsic(intrin, desc, true);
1101 }
1102
1103 return true;
1104 }
1105
1106 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)1107 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
1108 const struct lower_descriptors_ctx *ctx)
1109 {
1110 const uint32_t root_table_offset =
1111 nvk_root_descriptor_offset(draw.sample_locations);
1112
1113 nir_def *sample = interp->src[1].ssa;
1114
1115 b->cursor = nir_before_instr(&interp->instr);
1116
1117 nir_def *loc = nir_ldc_nv(b, 1, 64,
1118 nir_imm_int(b, 0), /* Root table */
1119 nir_imm_int(b, root_table_offset),
1120 .align_mul = 8,
1121 .align_offset = 0);
1122
1123 /* Yay little endian */
1124 loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
1125 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
1126 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
1127 nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
1128 nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
1129 nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
1130
1131 assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
1132 interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
1133 nir_src_rewrite(&interp->src[1], offset);
1134
1135 return true;
1136 }
1137
1138 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1139 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1140 const struct lower_descriptors_ctx *ctx)
1141 {
1142 switch (intrin->intrinsic) {
1143 case nir_intrinsic_load_constant:
1144 return lower_load_constant(b, intrin, ctx);
1145
1146 case nir_intrinsic_load_vulkan_descriptor:
1147 return try_lower_load_vulkan_descriptor(b, intrin, ctx);
1148
1149 case nir_intrinsic_load_workgroup_size:
1150 unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
1151
1152 case nir_intrinsic_load_num_workgroups:
1153 return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
1154
1155 case nir_intrinsic_load_base_workgroup_id:
1156 return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
1157
1158 case nir_intrinsic_load_push_constant:
1159 return lower_load_push_constant(b, intrin, ctx);
1160
1161 case nir_intrinsic_load_base_vertex:
1162 case nir_intrinsic_load_first_vertex:
1163 return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
1164
1165 case nir_intrinsic_load_base_instance:
1166 return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
1167
1168 case nir_intrinsic_load_draw_id:
1169 return lower_sysval_to_root_table(b, intrin, draw.draw_index, ctx);
1170
1171 case nir_intrinsic_load_view_index:
1172 return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
1173
1174 case nir_intrinsic_image_deref_load:
1175 case nir_intrinsic_image_deref_sparse_load:
1176 case nir_intrinsic_image_deref_store:
1177 case nir_intrinsic_image_deref_atomic:
1178 case nir_intrinsic_image_deref_atomic_swap:
1179 case nir_intrinsic_image_deref_size:
1180 case nir_intrinsic_image_deref_samples:
1181 return lower_image_intrin(b, intrin, ctx);
1182
1183 case nir_intrinsic_interp_deref_at_sample:
1184 return lower_interp_at_sample(b, intrin, ctx);
1185
1186 default:
1187 return false;
1188 }
1189 }
1190
1191 static void
lower_edb_buffer_tex_instr(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1192 lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
1193 const struct lower_descriptors_ctx *ctx)
1194 {
1195 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_BUF);
1196
1197 b->cursor = nir_before_instr(&tex->instr);
1198
1199 const int texture_src_idx =
1200 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1201 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1202
1203 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1204 ASSERTED const uint32_t plane =
1205 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1206 assert(plane == 0);
1207
1208 nir_def *desc = load_resource_deref_desc(b, 4, 32, texture, 0, ctx);
1209
1210 switch (tex->op) {
1211 case nir_texop_txf: {
1212 const int coord_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1213 assert(coord_src_idx >= 0);
1214 nir_def *coord = tex->src[coord_src_idx].src.ssa;
1215
1216 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
1217
1218 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1219 nir_src_rewrite(&tex->src[texture_src_idx].src, index);
1220 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1221
1222 nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
1223 nir_src_rewrite(&tex->src[coord_src_idx].src, new_coord);
1224
1225 b->cursor = nir_after_instr(&tex->instr);
1226 nir_def *res = &tex->def;
1227 res = fixup_edb_buffer_view_result(b, desc, in_bounds,
1228 res, tex->dest_type);
1229 nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1230 break;
1231 }
1232
1233 case nir_texop_txs: {
1234 assert(tex->def.num_components == 1);
1235 nir_def *size_el = edb_buffer_view_size_el(b, desc);
1236 nir_def_rewrite_uses(&tex->def, size_el);
1237 break;
1238 }
1239
1240 default:
1241 unreachable("Invalid buffer texture op");
1242 }
1243 }
1244
1245 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1246 lower_tex(nir_builder *b, nir_tex_instr *tex,
1247 const struct lower_descriptors_ctx *ctx)
1248 {
1249 const int texture_src_idx =
1250 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1251 const int sampler_src_idx =
1252 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1253 if (texture_src_idx < 0) {
1254 assert(sampler_src_idx < 0);
1255 return false;
1256 }
1257
1258 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1259 nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
1260 nir_src_as_deref(tex->src[sampler_src_idx].src);
1261 assert(texture);
1262
1263 if (is_edb_buffer_view(texture, ctx)) {
1264 lower_edb_buffer_tex_instr(b, tex, ctx);
1265 return true;
1266 }
1267
1268 b->cursor = nir_before_instr(&tex->instr);
1269
1270 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1271 const uint32_t plane =
1272 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1273 const uint64_t plane_offset_B =
1274 plane * sizeof(struct nvk_sampled_image_descriptor);
1275
1276 nir_def *texture_desc =
1277 load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
1278
1279 nir_def *combined_handle;
1280 if (texture == sampler) {
1281 combined_handle = texture_desc;
1282 } else {
1283 combined_handle = nir_iand_imm(b, texture_desc,
1284 NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1285
1286 if (sampler != NULL) {
1287 nir_def *sampler_desc =
1288 load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
1289 nir_def *sampler_index =
1290 nir_iand_imm(b, sampler_desc,
1291 NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
1292 combined_handle = nir_ior(b, combined_handle, sampler_index);
1293 }
1294 }
1295
1296 /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
1297 combined_handle = nir_u2u64(b, combined_handle);
1298
1299 /* TODO: The nv50 back-end assumes it gets handles both places, even for
1300 * texelFetch.
1301 */
1302 nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
1303 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1304
1305 if (sampler_src_idx < 0) {
1306 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
1307 } else {
1308 nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
1309 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
1310 }
1311
1312 /* On pre-Volta hardware, we don't have real null descriptors. Null
1313 * descriptors work well enough for sampling but they may not return the
1314 * correct query results.
1315 */
1316 if (ctx->dev_info->cls_eng3d < VOLTA_A && nir_tex_instr_is_query(tex)) {
1317 b->cursor = nir_after_instr(&tex->instr);
1318
1319 /* This should get CSE'd with the earlier load */
1320 nir_def *texture_handle =
1321 nir_iand_imm(b, texture_desc, NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1322 nir_def *is_null = nir_ieq_imm(b, texture_handle, 0);
1323 nir_def *zero = nir_imm_zero(b, tex->def.num_components,
1324 tex->def.bit_size);
1325 nir_def *res = nir_bcsel(b, is_null, zero, &tex->def);
1326 nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1327 }
1328
1329 return true;
1330 }
1331
1332 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)1333 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
1334 void *_data)
1335 {
1336 const struct lower_descriptors_ctx *ctx = _data;
1337
1338 switch (instr->type) {
1339 case nir_instr_type_tex:
1340 return lower_tex(b, nir_instr_as_tex(instr), ctx);
1341 case nir_instr_type_intrinsic:
1342 return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
1343 default:
1344 return false;
1345 }
1346 }
1347
1348 #define ROOT_DESC_BASE_ADDR_HI 0x0057de3c
1349
1350 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1351 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1352 const struct lower_descriptors_ctx *ctx)
1353 {
1354 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1355 return false;
1356
1357 b->cursor = nir_instr_remove(&intrin->instr);
1358
1359 uint32_t set = nir_intrinsic_desc_set(intrin);
1360 uint32_t binding = nir_intrinsic_binding(intrin);
1361 nir_def *index = intrin->src[0].ssa;
1362
1363 const struct nvk_descriptor_set_binding_layout *binding_layout =
1364 get_binding_layout(set, binding, ctx);
1365
1366 nir_def *binding_addr;
1367 uint8_t binding_stride;
1368 switch (binding_layout->type) {
1369 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1370 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1371 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1372 binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1373 binding_stride = binding_layout->stride;
1374 break;
1375 }
1376
1377 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1378 nir_def *dynamic_buffer_start =
1379 nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1380 binding_layout->dynamic_buffer_index);
1381
1382 nir_def *dynamic_binding_offset =
1383 nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1384 sizeof(struct nvk_buffer_address)),
1385 nvk_root_descriptor_offset(dynamic_buffers));
1386
1387 binding_addr =
1388 nir_pack_64_2x32_split(b, dynamic_binding_offset,
1389 nir_imm_int(b, ROOT_DESC_BASE_ADDR_HI));
1390 binding_stride = sizeof(struct nvk_buffer_address);
1391 break;
1392 }
1393
1394 default:
1395 unreachable("Not an SSBO descriptor");
1396 }
1397
1398 /* Tuck the stride in the top 8 bits of the binding address */
1399 binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1400
1401 const uint32_t binding_size = binding_layout->array_size * binding_stride;
1402 nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1403
1404 /* We depend on this when we load descrptors */
1405 assert(binding_layout->array_size >= 1);
1406
1407 nir_def *addr;
1408 switch (ctx->ssbo_addr_format) {
1409 case nir_address_format_64bit_global_32bit_offset:
1410 case nir_address_format_64bit_bounded_global:
1411 addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1412 nir_unpack_64_2x32_split_y(b, binding_addr),
1413 nir_imm_int(b, binding_size),
1414 offset_in_binding);
1415 break;
1416
1417 default:
1418 unreachable("Unknown address mode");
1419 }
1420
1421 nir_def_rewrite_uses(&intrin->def, addr);
1422
1423 return true;
1424 }
1425
1426 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1427 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1428 const struct lower_descriptors_ctx *ctx)
1429 {
1430 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1431 return false;
1432
1433 b->cursor = nir_instr_remove(&intrin->instr);
1434
1435 nir_def *addr = intrin->src[0].ssa;
1436 nir_def *index = intrin->src[1].ssa;
1437
1438 nir_def *addr_high32;
1439 switch (ctx->ssbo_addr_format) {
1440 case nir_address_format_64bit_global_32bit_offset:
1441 case nir_address_format_64bit_bounded_global:
1442 addr_high32 = nir_channel(b, addr, 1);
1443 break;
1444
1445 default:
1446 unreachable("Unknown address mode");
1447 }
1448
1449 nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1450 nir_def *offset = nir_imul(b, index, stride);
1451
1452 addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1453 nir_var_mem_ssbo, offset);
1454 nir_def_rewrite_uses(&intrin->def, addr);
1455
1456 return true;
1457 }
1458
1459 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1460 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1461 const struct lower_descriptors_ctx *ctx)
1462 {
1463 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1464 return false;
1465
1466 b->cursor = nir_instr_remove(&intrin->instr);
1467
1468 nir_def *addr = intrin->src[0].ssa;
1469
1470 nir_def *base, *offset, *size = NULL;
1471 switch (ctx->ssbo_addr_format) {
1472 case nir_address_format_64bit_global_32bit_offset: {
1473 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1474 offset = nir_channel(b, addr, 3);
1475 break;
1476 }
1477
1478 case nir_address_format_64bit_bounded_global: {
1479 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1480 size = nir_channel(b, addr, 2);
1481 offset = nir_channel(b, addr, 3);
1482 break;
1483 }
1484
1485 default:
1486 unreachable("Unknown address mode");
1487 }
1488
1489 /* Mask off the binding stride */
1490 base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1491
1492 nir_def *base_lo = nir_unpack_64_2x32_split_x(b, base);
1493 nir_def *base_hi = nir_unpack_64_2x32_split_y(b, base);
1494
1495 nir_def *desc_root, *desc_global;
1496 nir_push_if(b, nir_ieq_imm(b, base_hi, ROOT_DESC_BASE_ADDR_HI));
1497 {
1498 desc_root = nir_load_ubo(b, 4, 32, nir_imm_int(b, 0),
1499 nir_iadd(b, base_lo, offset),
1500 .align_mul = 16, .align_offset = 0,
1501 .range = ~0);
1502 if (size != NULL) {
1503 /* assert(binding_layout->array_size >= 1); */
1504 nir_def *is_oob = nir_ult(b, nir_iadd_imm(b, size, -16), offset);
1505 desc_root = nir_bcsel(b, is_oob, nir_imm_zero(b, 4, 32), desc_root);
1506 }
1507 }
1508 nir_push_else(b, NULL);
1509 {
1510 if (size != NULL) {
1511 desc_global = nir_load_global_constant_bounded(b, 4, 32, base,
1512 offset, size,
1513 .align_mul = 16,
1514 .align_offset = 0);
1515 } else {
1516 desc_global = nir_load_global_constant_offset(b, 4, 32, base,
1517 offset,
1518 .align_mul = 16,
1519 .align_offset = 0);
1520 }
1521 }
1522 nir_pop_if(b, NULL);
1523 nir_def *desc = nir_if_phi(b, desc_root, desc_global);
1524
1525 nir_def_rewrite_uses(&intrin->def, desc);
1526
1527 return true;
1528 }
1529
1530 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1531 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1532 void *_data)
1533 {
1534 const struct lower_descriptors_ctx *ctx = _data;
1535
1536 if (instr->type != nir_instr_type_intrinsic)
1537 return false;
1538
1539 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1540 switch (intrin->intrinsic) {
1541 case nir_intrinsic_vulkan_resource_index:
1542 return lower_ssbo_resource_index(b, intrin, ctx);
1543 case nir_intrinsic_vulkan_resource_reindex:
1544 return lower_ssbo_resource_reindex(b, intrin, ctx);
1545 case nir_intrinsic_load_vulkan_descriptor:
1546 return lower_load_ssbo_descriptor(b, intrin, ctx);
1547 default:
1548 return false;
1549 }
1550 }
1551
1552 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct nvk_physical_device * pdev,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1553 nvk_nir_lower_descriptors(nir_shader *nir,
1554 const struct nvk_physical_device *pdev,
1555 VkShaderCreateFlagsEXT shader_flags,
1556 const struct vk_pipeline_robustness_state *rs,
1557 uint32_t set_layout_count,
1558 struct vk_descriptor_set_layout * const *set_layouts,
1559 struct nvk_cbuf_map *cbuf_map_out)
1560 {
1561 struct lower_descriptors_ctx ctx = {
1562 .dev_info = &pdev->info,
1563 .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
1564 .use_edb_buffer_views = nvk_use_edb_buffer_views(pdev),
1565 .clamp_desc_array_bounds =
1566 rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1567 rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1568 rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1569 .indirect_bind =
1570 shader_flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
1571 .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
1572 .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
1573 };
1574
1575 assert(set_layout_count <= NVK_MAX_SETS);
1576 for (uint32_t s = 0; s < set_layout_count; s++) {
1577 if (set_layouts[s] != NULL)
1578 ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1579 }
1580
1581 /* We run in four passes:
1582 *
1583 * 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is
1584 * actually lowered in this pass. It's just analysis.
1585 *
1586 * 2. Try to lower UBO loads to cbufs based on the map we just created.
1587 * We need to do this before the main lowering pass because it relies
1588 * on the original descriptor load intrinsics.
1589 *
1590 * 3. Attempt to lower everything with direct descriptors. This may fail
1591 * to lower some SSBO intrinsics when variable pointers are used.
1592 *
1593 * 4. Clean up any SSBO intrinsics which are left and lower them to
1594 * slightly less efficient but variable- pointers-correct versions.
1595 */
1596
1597 bool pass_lower_ubo = false;
1598 if (cbuf_map_out != NULL) {
1599 ctx.cbuf_map = cbuf_map_out;
1600 build_cbuf_map(nir, &ctx);
1601
1602 pass_lower_ubo =
1603 nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1604 nir_metadata_control_flow,
1605 (void *)&ctx);
1606 }
1607
1608 bool pass_lower_descriptors =
1609 nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1610 nir_metadata_control_flow,
1611 (void *)&ctx);
1612 bool pass_lower_ssbo =
1613 nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1614 nir_metadata_control_flow,
1615 (void *)&ctx);
1616 return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1617 }
1618