1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set_layout.h"
7 #include "nvk_descriptor_types.h"
8 #include "nvk_shader.h"
9
10 #include "vk_pipeline.h"
11
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14
15 #include "clc397.h"
16 #include "clc597.h"
17
18 struct lower_desc_cbuf {
19 struct nvk_cbuf key;
20
21 uint32_t use_count;
22
23 uint64_t start;
24 uint64_t end;
25 };
26
27 DERIVE_HASH_TABLE(nvk_cbuf);
28
29 static int
compar_cbufs(const void * _a,const void * _b)30 compar_cbufs(const void *_a, const void *_b)
31 {
32 const struct lower_desc_cbuf *a = _a;
33 const struct lower_desc_cbuf *b = _b;
34
35 #define COMPAR(field, pos) \
36 if (a->field < b->field) return -(pos); \
37 if (a->field > b->field) return (pos);
38
39 /* Sort by most used first */
40 COMPAR(use_count, -1)
41
42 /* Keep the list stable by then sorting by key fields. */
43 COMPAR(key.type, 1)
44 COMPAR(key.desc_set, 1)
45 COMPAR(key.dynamic_idx, 1)
46 COMPAR(key.desc_offset, 1)
47
48 #undef COMPAR
49
50 return 0;
51 }
52
53 struct lower_descriptors_ctx {
54 const struct nv_device_info *dev_info;
55 const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
56
57 bool use_bindless_cbuf;
58 bool use_edb_buffer_views;
59 bool clamp_desc_array_bounds;
60 bool indirect_bind;
61 nir_address_format ubo_addr_format;
62 nir_address_format ssbo_addr_format;
63
64 struct hash_table *cbufs;
65 struct nvk_cbuf_map *cbuf_map;
66 };
67
68 static bool
descriptor_type_is_ubo(VkDescriptorType desc_type)69 descriptor_type_is_ubo(VkDescriptorType desc_type)
70 {
71 switch (desc_type) {
72 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
73 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
74 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75 return true;
76
77 default:
78 return false;
79 }
80 }
81
82 static bool
descriptor_type_is_ssbo(VkDescriptorType desc_type)83 descriptor_type_is_ssbo(VkDescriptorType desc_type)
84 {
85 switch (desc_type) {
86 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
87 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
88 return true;
89
90 default:
91 return false;
92 }
93 }
94
95 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)96 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
97 struct lower_descriptors_ctx *ctx)
98 {
99 struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
100 if (entry != NULL) {
101 struct lower_desc_cbuf *cbuf = entry->data;
102 cbuf->use_count++;
103 cbuf->start = MIN2(cbuf->start, start);
104 cbuf->end = MAX2(cbuf->end, end);
105 } else {
106 struct lower_desc_cbuf *cbuf =
107 ralloc(ctx->cbufs, struct lower_desc_cbuf);
108 *cbuf = (struct lower_desc_cbuf) {
109 .key = *key,
110 .use_count = 1,
111 .start = start,
112 .end = end,
113 };
114 _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
115 }
116 }
117
118 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)119 get_binding_layout(uint32_t set, uint32_t binding,
120 const struct lower_descriptors_ctx *ctx)
121 {
122 assert(set < NVK_MAX_SETS);
123 assert(ctx->set_layouts[set] != NULL);
124
125 const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
126
127 assert(binding < set_layout->binding_count);
128 return &set_layout->binding[binding];
129 }
130
131 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)132 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
133 struct lower_descriptors_ctx *ctx)
134 {
135 const struct nvk_descriptor_set_binding_layout *binding_layout =
136 get_binding_layout(set, binding, ctx);
137
138 const struct nvk_cbuf key = {
139 .type = NVK_CBUF_TYPE_DESC_SET,
140 .desc_set = set,
141 };
142
143 uint64_t start, end;
144 if (index == NULL) {
145 /* When we don't have an index, assume 0 */
146 start = binding_layout->offset;
147 end = start + binding_layout->stride;
148 } else if (nir_src_is_const(*index)) {
149 start = binding_layout->offset +
150 nir_src_as_uint(*index) * binding_layout->stride;
151 end = start + binding_layout->stride;
152 } else {
153 start = binding_layout->offset;
154 end = start + binding_layout->array_size * binding_layout->stride;
155 }
156
157 record_cbuf_use(&key, start, end, ctx);
158 }
159
160 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)161 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
162 struct lower_descriptors_ctx *ctx)
163 {
164 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
165
166 /* These we'll handle later */
167 if (descriptor_type_is_ubo(nir_intrinsic_desc_type(intrin)))
168 return;
169
170 record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
171 nir_intrinsic_binding(intrin),
172 &intrin->src[0], ctx);
173 }
174
175 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)176 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
177 struct lower_descriptors_ctx *ctx)
178 {
179 nir_src *index_src = NULL;
180 if (deref->deref_type == nir_deref_type_array) {
181 index_src = &deref->arr.index;
182 deref = nir_deref_instr_parent(deref);
183 }
184
185 assert(deref->deref_type == nir_deref_type_var);
186 nir_variable *var = deref->var;
187
188 record_descriptor_cbuf_use(var->data.descriptor_set,
189 var->data.binding,
190 index_src, ctx);
191 }
192
193 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)194 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
195 struct lower_descriptors_ctx *ctx)
196 {
197 const int texture_src_idx =
198 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
199 const int sampler_src_idx =
200 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
201
202 if (texture_src_idx >= 0) {
203 nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
204 record_deref_descriptor_cbuf_use(deref, ctx);
205 }
206
207 if (sampler_src_idx >= 0) {
208 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
209 record_deref_descriptor_cbuf_use(deref, ctx);
210 }
211 }
212
213 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)214 ubo_deref_to_cbuf(nir_deref_instr *deref,
215 nir_intrinsic_instr **resource_index_out,
216 uint64_t *offset_out,
217 uint64_t *start_out, uint64_t *end_out,
218 const struct lower_descriptors_ctx *ctx)
219 {
220 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
221
222 /* In case we early return */
223 *offset_out = 0;
224 *start_out = 0;
225 *end_out = UINT64_MAX;
226 *resource_index_out = NULL;
227
228 const struct nvk_cbuf invalid = {
229 .type = NVK_CBUF_TYPE_INVALID,
230 };
231
232 uint64_t offset = 0;
233 uint64_t range = glsl_get_explicit_size(deref->type, false);
234 bool offset_valid = true;
235 while (deref->deref_type != nir_deref_type_cast) {
236 nir_deref_instr *parent = nir_deref_instr_parent(deref);
237
238 switch (deref->deref_type) {
239 case nir_deref_type_var:
240 unreachable("Buffers don't use variables in Vulkan");
241
242 case nir_deref_type_array:
243 case nir_deref_type_array_wildcard: {
244 uint32_t stride = nir_deref_instr_array_stride(deref);
245 if (deref->deref_type == nir_deref_type_array &&
246 nir_src_is_const(deref->arr.index)) {
247 offset += nir_src_as_uint(deref->arr.index) * stride;
248 } else {
249 range = glsl_get_length(parent->type) * stride;
250 }
251 break;
252 }
253
254 case nir_deref_type_ptr_as_array:
255 /* All bets are off. We shouldn't see these most of the time
256 * anyway, even with variable pointers.
257 */
258 offset_valid = false;
259 unreachable("Variable pointers aren't allowed on UBOs");
260 break;
261
262 case nir_deref_type_struct: {
263 offset += glsl_get_struct_field_offset(parent->type,
264 deref->strct.index);
265 break;
266 }
267
268 default:
269 unreachable("Unknown deref type");
270 }
271
272 deref = parent;
273 }
274
275 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
276 if (load_desc == NULL ||
277 load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
278 return invalid;
279
280 nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
281 if (res_index == NULL ||
282 res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
283 return invalid;
284
285 /* We try to early return as little as possible prior to this point so we
286 * can return the resource index intrinsic in as many cases as possible.
287 * After this point, though, early returns are fair game.
288 */
289 *resource_index_out = res_index;
290
291 if (!offset_valid || !nir_src_is_const(res_index->src[0]))
292 return invalid;
293
294 uint32_t set = nir_intrinsic_desc_set(res_index);
295 uint32_t binding = nir_intrinsic_binding(res_index);
296 uint32_t index = nir_src_as_uint(res_index->src[0]);
297
298 const struct nvk_descriptor_set_binding_layout *binding_layout =
299 get_binding_layout(set, binding, ctx);
300
301 switch (binding_layout->type) {
302 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
303 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
304 *offset_out = 0;
305 *start_out = offset;
306 *end_out = offset + range;
307 return (struct nvk_cbuf) {
308 .type = NVK_CBUF_TYPE_UBO_DESC,
309 .desc_set = set,
310 .desc_offset = binding_layout->offset +
311 index * binding_layout->stride,
312 };
313 }
314
315 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
316 *offset_out = 0;
317 *start_out = offset;
318 *end_out = offset + range;
319
320 return (struct nvk_cbuf) {
321 .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
322 .desc_set = set,
323 .dynamic_idx = binding_layout->dynamic_buffer_index + index,
324 };
325 }
326
327 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
328 *offset_out = binding_layout->offset;
329 *start_out = binding_layout->offset + offset;
330 *end_out = *start_out + range;
331
332 return (struct nvk_cbuf) {
333 .type = NVK_CBUF_TYPE_DESC_SET,
334 .desc_set = set,
335 };
336 }
337
338 default:
339 return invalid;
340 }
341 }
342
343 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)344 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
345 struct lower_descriptors_ctx *ctx)
346 {
347 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
348
349 UNUSED uint64_t offset;
350 uint64_t start, end;
351 nir_intrinsic_instr *res_index;
352 struct nvk_cbuf cbuf =
353 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
354
355 if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
356 record_cbuf_use(&cbuf, start, end, ctx);
357 } else if (res_index != NULL) {
358 record_vulkan_resource_cbuf_use(res_index, ctx);
359 }
360 }
361
362 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)363 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
364 {
365 struct lower_descriptors_ctx *ctx = _ctx;
366
367 switch (instr->type) {
368 case nir_instr_type_intrinsic: {
369 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
370 switch (intrin->intrinsic) {
371 case nir_intrinsic_vulkan_resource_index:
372 record_vulkan_resource_cbuf_use(intrin, ctx);
373 return false;
374
375 case nir_intrinsic_load_deref: {
376 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
377 if (nir_deref_mode_is(deref, nir_var_mem_ubo))
378 record_load_ubo_cbuf_uses(deref, ctx);
379 return false;
380 }
381
382 case nir_intrinsic_image_deref_load:
383 case nir_intrinsic_image_deref_store:
384 case nir_intrinsic_image_deref_atomic:
385 case nir_intrinsic_image_deref_atomic_swap:
386 case nir_intrinsic_image_deref_size:
387 case nir_intrinsic_image_deref_samples: {
388 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
389 record_deref_descriptor_cbuf_use(deref, ctx);
390 return false;
391 }
392
393 default:
394 return false;
395 }
396 unreachable("All cases return false");
397 }
398
399 case nir_instr_type_tex:
400 record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
401 return false;
402
403 default:
404 return false;
405 }
406 }
407
408 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)409 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
410 {
411 ctx->cbuf_map->cbuf_count = 0;
412
413 /* Root descriptors always go in cbuf 0 */
414 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
415 .type = NVK_CBUF_TYPE_ROOT_DESC,
416 };
417
418 /* If we have constant data, put it at cbuf 1 */
419 if (nir->constant_data_size > 0) {
420 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
421 .type = NVK_CBUF_TYPE_SHADER_DATA,
422 };
423 }
424
425 if (ctx->indirect_bind)
426 return;
427
428 ctx->cbufs = nvk_cbuf_table_create(NULL);
429 nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
430 nir_metadata_all, (void *)ctx);
431
432 struct lower_desc_cbuf *cbufs =
433 ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
434 _mesa_hash_table_num_entries(ctx->cbufs));
435
436 uint32_t num_cbufs = 0;
437 hash_table_foreach(ctx->cbufs, entry) {
438 struct lower_desc_cbuf *cbuf = entry->data;
439
440 /* We currently only start cbufs at the beginning so if it starts after
441 * the max cbuf size, there's no point in including it in the list.
442 */
443 if (cbuf->start > NVK_MAX_CBUF_SIZE)
444 continue;
445
446 cbufs[num_cbufs++] = *cbuf;
447 }
448
449 qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
450
451 uint8_t max_cbuf_bindings;
452 if (nir->info.stage == MESA_SHADER_COMPUTE ||
453 nir->info.stage == MESA_SHADER_KERNEL) {
454 max_cbuf_bindings = 8;
455 } else {
456 max_cbuf_bindings = 16;
457 }
458
459 for (uint32_t i = 0; i < num_cbufs; i++) {
460 if (ctx->cbuf_map->cbuf_count >= max_cbuf_bindings)
461 break;
462
463 /* We can't support indirect cbufs in compute yet */
464 if ((nir->info.stage == MESA_SHADER_COMPUTE ||
465 nir->info.stage == MESA_SHADER_KERNEL) &&
466 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
467 continue;
468
469 /* Prior to Turing, indirect cbufs require splitting the pushbuf and
470 * pushing bits of the descriptor set. Doing this every draw call is
471 * probably more overhead than it's worth.
472 */
473 if (ctx->dev_info->cls_eng3d < TURING_A &&
474 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
475 continue;
476
477 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = cbufs[i].key;
478 }
479
480 ralloc_free(ctx->cbufs);
481 ctx->cbufs = NULL;
482 }
483
484 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)485 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
486 const struct lower_descriptors_ctx *ctx)
487 {
488 if (ctx->cbuf_map == NULL)
489 return -1;
490
491 for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
492 if (nvk_cbuf_equal(&ctx->cbuf_map->cbufs[c], key)) {
493 return c;
494 }
495 }
496
497 return -1;
498 }
499
500 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)501 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
502 {
503 const struct lower_descriptors_ctx *ctx = _ctx;
504
505 if (load->intrinsic != nir_intrinsic_load_deref)
506 return false;
507
508 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
509 if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
510 return false;
511
512 uint64_t offset, end;
513 UNUSED uint64_t start;
514 UNUSED nir_intrinsic_instr *res_index;
515 struct nvk_cbuf cbuf_key =
516 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
517
518 if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
519 return false;
520
521 if (end > NVK_MAX_CBUF_SIZE)
522 return false;
523
524 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
525 if (cbuf_idx < 0)
526 return false;
527
528 b->cursor = nir_before_instr(&load->instr);
529
530 nir_deref_path path;
531 nir_deref_path_init(&path, deref, NULL);
532
533 nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
534 nir_address_format addr_format = nir_address_format_32bit_index_offset;
535 for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
536 addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
537
538 nir_deref_path_finish(&path);
539
540 nir_lower_explicit_io_instr(b, load, addr, addr_format);
541
542 return true;
543 }
544
545 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)546 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
547 const struct lower_descriptors_ctx *ctx)
548 {
549 assert(load->intrinsic == nir_intrinsic_load_constant);
550
551 const struct nvk_cbuf cbuf_key = {
552 .type = NVK_CBUF_TYPE_SHADER_DATA,
553 };
554 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
555 assert(cbuf_idx >= 0);
556
557 uint32_t base = nir_intrinsic_base(load);
558
559 b->cursor = nir_before_instr(&load->instr);
560
561 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
562 nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
563 nir_imm_int(b, cbuf_idx), offset,
564 .align_mul = nir_intrinsic_align_mul(load),
565 .align_offset = nir_intrinsic_align_offset(load));
566
567 nir_def_rewrite_uses(&load->def, data);
568
569 return true;
570 }
571
572 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)573 load_descriptor_set_addr(nir_builder *b, uint32_t set,
574 UNUSED const struct lower_descriptors_ctx *ctx)
575 {
576 uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
577 set * sizeof(struct nvk_buffer_address);
578
579 return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
580 nir_imm_int(b, set_addr_offset),
581 .align_mul = 8, .align_offset = 0);
582 }
583
584 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)585 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
586 const struct lower_descriptors_ctx *ctx)
587 {
588 int dynamic_buffer_start_imm = 0;
589 for (uint32_t s = 0; s < set; s++) {
590 if (ctx->set_layouts[s] == NULL) {
591 dynamic_buffer_start_imm = -1;
592 break;
593 }
594
595 dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
596 }
597
598 if (dynamic_buffer_start_imm >= 0) {
599 return nir_imm_int(b, dynamic_buffer_start_imm);
600 } else {
601 uint32_t root_offset =
602 nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
603
604 return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
605 nir_imm_int(b, root_offset),
606 .align_mul = 1, .align_offset = 0));
607 }
608 }
609
610 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)611 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
612 uint32_t set, uint32_t binding, nir_def *index,
613 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
614 {
615 const struct nvk_descriptor_set_binding_layout *binding_layout =
616 get_binding_layout(set, binding, ctx);
617
618 if (ctx->clamp_desc_array_bounds)
619 index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
620
621 switch (binding_layout->type) {
622 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
623 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
624 /* Get the index in the root descriptor table dynamic_buffers array. */
625 nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
626
627 index = nir_iadd(b, index,
628 nir_iadd_imm(b, dynamic_buffer_start,
629 binding_layout->dynamic_buffer_index));
630 uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
631 nir_def *root_desc_offset =
632 nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
633 nvk_root_descriptor_offset(dynamic_buffers));
634
635 assert(num_components * bit_size <= desc_size * 8);
636 return nir_ldc_nv(b, num_components, bit_size,
637 nir_imm_int(b, 0), root_desc_offset,
638 .align_mul = 16, .align_offset = 0);
639 }
640
641 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
642 nir_def *base_addr =
643 nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
644 binding_layout->offset);
645
646 assert(binding_layout->stride == 1);
647 const uint32_t binding_size = binding_layout->array_size;
648
649 if (ctx->use_bindless_cbuf) {
650 assert(num_components == 1 && bit_size == 64);
651 const uint32_t size = align(binding_size, 16);
652 return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
653 ((uint64_t)size >> 4) << 45);
654 } else {
655 /* Convert it to nir_address_format_64bit_bounded_global */
656 assert(num_components == 4 && bit_size == 32);
657 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
658 nir_unpack_64_2x32_split_y(b, base_addr),
659 nir_imm_int(b, binding_size),
660 nir_imm_int(b, 0));
661 }
662 }
663
664 default: {
665 assert(binding_layout->stride > 0);
666 nir_def *desc_ubo_offset =
667 nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
668 binding_layout->offset + offset_B);
669
670 uint64_t max_desc_ubo_offset = binding_layout->offset +
671 binding_layout->array_size * binding_layout->stride;
672
673 unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
674 desc_align_mul = MIN2(desc_align_mul, 16);
675 unsigned desc_align_offset = binding_layout->offset + offset_B;
676 desc_align_offset %= desc_align_mul;
677
678 const struct nvk_cbuf cbuf_key = {
679 .type = NVK_CBUF_TYPE_DESC_SET,
680 .desc_set = set,
681 };
682 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
683
684 if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
685 return nir_ldc_nv(b, num_components, bit_size,
686 nir_imm_int(b, cbuf_idx),
687 desc_ubo_offset,
688 .align_mul = desc_align_mul,
689 .align_offset = desc_align_offset);
690 } else {
691 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
692 return nir_load_global_constant_offset(b, num_components, bit_size,
693 set_addr, desc_ubo_offset,
694 .align_mul = desc_align_mul,
695 .align_offset = desc_align_offset);
696 }
697 }
698 }
699 }
700
701 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)702 is_idx_intrin(nir_intrinsic_instr *intrin)
703 {
704 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
705 intrin = nir_src_as_intrinsic(intrin->src[0]);
706 if (intrin == NULL)
707 return false;
708 }
709
710 return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
711 }
712
713 static nir_def *
buffer_address_to_ldcx_handle(nir_builder * b,nir_def * addr)714 buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
715 {
716 nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
717 nir_def *size = nir_channel(b, addr, 2);
718 nir_def *offset = nir_channel(b, addr, 3);
719
720 nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
721 nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
722 nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
723
724 /* If we assume the top bis of the address are 0 as well as the bottom two
725 * bits of the size. (We can trust it since it's a descriptor) then
726 *
727 * ((size >> 4) << 13) | addr
728 *
729 * is just an imad.
730 */
731 nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
732
733 return nir_vec3(b, addr16_lo, handle_hi, offset);
734 }
735
736 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)737 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
738 const struct lower_descriptors_ctx *ctx)
739 {
740 nir_def *index = nir_imm_int(b, 0);
741
742 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
743 index = nir_iadd(b, index, intrin->src[1].ssa);
744 intrin = nir_src_as_intrinsic(intrin->src[0]);
745 }
746
747 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
748 uint32_t set = nir_intrinsic_desc_set(intrin);
749 uint32_t binding = nir_intrinsic_binding(intrin);
750 index = nir_iadd(b, index, intrin->src[0].ssa);
751
752 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
753 if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
754 nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
755
756 /* The descriptor is just the handle. NIR also needs an offset. */
757 return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
758 nir_unpack_64_2x32_split_y(b, desc),
759 nir_imm_int(b, 0));
760 } else {
761 nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
762
763 /* We know a priori that the the .w compnent (offset) is zero */
764 return nir_vec4(b, nir_channel(b, desc, 0),
765 nir_channel(b, desc, 1),
766 nir_channel(b, desc, 2),
767 nir_imm_int(b, 0));
768 }
769 }
770
771 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)772 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
773 const struct lower_descriptors_ctx *ctx)
774 {
775 ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
776 b->cursor = nir_before_instr(&intrin->instr);
777
778 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
779 if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
780 assert(descriptor_type_is_ssbo(desc_type));
781 return false;
782 }
783
784 nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
785
786 nir_def_rewrite_uses(&intrin->def, desc);
787
788 return true;
789 }
790
791 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)792 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
793 uint32_t root_table_offset,
794 const struct lower_descriptors_ctx *ctx)
795 {
796 b->cursor = nir_instr_remove(&intrin->instr);
797
798 nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
799 intrin->def.bit_size,
800 nir_imm_int(b, 0), /* Root table */
801 nir_imm_int(b, root_table_offset),
802 .align_mul = 4,
803 .align_offset = 0);
804
805 nir_def_rewrite_uses(&intrin->def, val);
806
807 return true;
808 }
809
810 #define lower_sysval_to_root_table(b, intrin, member, ctx) \
811 _lower_sysval_to_root_table(b, intrin, \
812 nvk_root_descriptor_offset(member), \
813 ctx)
814
815 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)816 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
817 const struct lower_descriptors_ctx *ctx)
818 {
819 const uint32_t push_region_offset =
820 nvk_root_descriptor_offset(push);
821 const uint32_t base = nir_intrinsic_base(load);
822
823 b->cursor = nir_before_instr(&load->instr);
824
825 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
826 push_region_offset + base);
827
828 nir_def *val =
829 nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
830 nir_imm_int(b, 0), offset,
831 .align_mul = load->def.bit_size / 8,
832 .align_offset = 0);
833
834 nir_def_rewrite_uses(&load->def, val);
835
836 return true;
837 }
838
839 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)840 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
841 uint32_t *set, uint32_t *binding,
842 nir_def **index)
843 {
844 if (deref->deref_type == nir_deref_type_array) {
845 *index = deref->arr.index.ssa;
846 deref = nir_deref_instr_parent(deref);
847 } else {
848 *index = nir_imm_int(b, 0);
849 }
850
851 assert(deref->deref_type == nir_deref_type_var);
852 nir_variable *var = deref->var;
853
854 *set = var->data.descriptor_set;
855 *binding = var->data.binding;
856 }
857
858 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)859 load_resource_deref_desc(nir_builder *b,
860 unsigned num_components, unsigned bit_size,
861 nir_deref_instr *deref, unsigned offset_B,
862 const struct lower_descriptors_ctx *ctx)
863 {
864 uint32_t set, binding;
865 nir_def *index;
866 get_resource_deref_binding(b, deref, &set, &binding, &index);
867 return load_descriptor(b, num_components, bit_size,
868 set, binding, index, offset_B, ctx);
869 }
870
871 static void
lower_msaa_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)872 lower_msaa_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
873 const struct lower_descriptors_ctx *ctx)
874 {
875 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS);
876
877 b->cursor = nir_before_instr(&intrin->instr);
878 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
879 nir_def *desc = load_resource_deref_desc(b, 2, 32, deref, 0, ctx);
880 nir_def *desc0 = nir_channel(b, desc, 0);
881 nir_def *desc1 = nir_channel(b, desc, 1);
882
883 nir_def *img_index = nir_ubitfield_extract_imm(b, desc0, 0, 20);
884 nir_rewrite_image_intrinsic(intrin, img_index, true);
885
886 nir_def *sw_log2 = nir_ubitfield_extract_imm(b, desc0, 20, 2);
887 nir_def *sh_log2 = nir_ubitfield_extract_imm(b, desc0, 22, 2);
888 nir_def *s_map = desc1;
889
890 nir_def *sw = nir_ishl(b, nir_imm_int(b, 1), sw_log2);
891 nir_def *sh = nir_ishl(b, nir_imm_int(b, 1), sh_log2);
892 nir_def *num_samples = nir_imul(b, sw, sh);
893
894 switch (intrin->intrinsic) {
895 case nir_intrinsic_bindless_image_load:
896 case nir_intrinsic_bindless_image_sparse_load:
897 case nir_intrinsic_bindless_image_store:
898 case nir_intrinsic_bindless_image_atomic:
899 case nir_intrinsic_bindless_image_atomic_swap: {
900 nir_def *x = nir_channel(b, intrin->src[1].ssa, 0);
901 nir_def *y = nir_channel(b, intrin->src[1].ssa, 1);
902 nir_def *z = nir_channel(b, intrin->src[1].ssa, 2);
903 nir_def *w = nir_channel(b, intrin->src[1].ssa, 3);
904 nir_def *s = intrin->src[2].ssa;
905
906 nir_def *s_xy = nir_ushr(b, s_map, nir_imul_imm(b, s, 4));
907 nir_def *sx = nir_ubitfield_extract_imm(b, s_xy, 0, 2);
908 nir_def *sy = nir_ubitfield_extract_imm(b, s_xy, 2, 2);
909
910 x = nir_imad(b, x, sw, sx);
911 y = nir_imad(b, y, sh, sy);
912
913 /* Make OOB sample indices OOB X/Y indices */
914 x = nir_bcsel(b, nir_ult(b, s, num_samples), x, nir_imm_int(b, -1));
915
916 nir_src_rewrite(&intrin->src[1], nir_vec4(b, x, y, z, w));
917 nir_src_rewrite(&intrin->src[2], nir_undef(b, 1, 32));
918 break;
919 }
920
921 case nir_intrinsic_bindless_image_size: {
922 b->cursor = nir_after_instr(&intrin->instr);
923
924 nir_def *size = &intrin->def;
925 nir_def *w = nir_channel(b, size, 0);
926 nir_def *h = nir_channel(b, size, 1);
927
928 w = nir_ushr(b, w, sw_log2);
929 h = nir_ushr(b, h, sh_log2);
930
931 size = nir_vector_insert_imm(b, size, w, 0);
932 size = nir_vector_insert_imm(b, size, h, 1);
933
934 nir_def_rewrite_uses_after(&intrin->def, size, size->parent_instr);
935 break;
936 }
937
938 case nir_intrinsic_bindless_image_samples: {
939 /* We need to handle NULL descriptors explicitly */
940 nir_def *samples =
941 nir_bcsel(b, nir_ieq(b, desc0, nir_imm_int(b, 0)),
942 nir_imm_int(b, 0), num_samples);
943 nir_def_rewrite_uses(&intrin->def, samples);
944 break;
945 }
946
947 default:
948 unreachable("Unknown image intrinsic");
949 }
950
951 nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
952 }
953
954 static bool
is_edb_buffer_view(nir_deref_instr * deref,const struct lower_descriptors_ctx * ctx)955 is_edb_buffer_view(nir_deref_instr *deref,
956 const struct lower_descriptors_ctx *ctx)
957 {
958 if (glsl_get_sampler_dim(deref->type) != GLSL_SAMPLER_DIM_BUF)
959 return false;
960
961 if (ctx->use_edb_buffer_views)
962 return true;
963
964 nir_variable *var = nir_deref_instr_get_variable(deref);
965 uint8_t set = var->data.descriptor_set;
966
967 return (ctx->set_layouts[set]->flags &
968 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) &&
969 !(ctx->set_layouts[set]->flags &
970 VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT);
971 }
972
973 static nir_def *
edb_buffer_view_is_null(nir_builder * b,nir_def * desc)974 edb_buffer_view_is_null(nir_builder *b, nir_def *desc)
975 {
976 assert(desc->num_components == 4);
977 nir_def *index = nir_channel(b, desc, 0);
978 return nir_ieq_imm(b, index, 0);
979 }
980
981 static nir_def *
edb_buffer_view_offset_el(nir_builder * b,nir_def * desc)982 edb_buffer_view_offset_el(nir_builder *b, nir_def *desc)
983 {
984 assert(desc->num_components == 4);
985 return nir_channel(b, desc, 1);
986 }
987
988 static nir_def *
edb_buffer_view_size_el(nir_builder * b,nir_def * desc)989 edb_buffer_view_size_el(nir_builder *b, nir_def *desc)
990 {
991 assert(desc->num_components == 4);
992 return nir_channel(b, desc, 2);
993 }
994
995 static nir_def *
edb_buffer_view_oob_alpha(nir_builder * b,nir_def * desc)996 edb_buffer_view_oob_alpha(nir_builder *b, nir_def *desc)
997 {
998 assert(desc->num_components == 4);
999 return nir_channel(b, desc, 3);
1000 }
1001
1002 static nir_def *
edb_buffer_view_coord_is_in_bounds(nir_builder * b,nir_def * desc,nir_def * coord)1003 edb_buffer_view_coord_is_in_bounds(nir_builder *b, nir_def *desc,
1004 nir_def *coord)
1005 {
1006 assert(desc->num_components == 4);
1007 return nir_ult(b, coord, edb_buffer_view_size_el(b, desc));
1008 }
1009
1010 static nir_def *
edb_buffer_view_index(nir_builder * b,nir_def * desc,nir_def * in_bounds)1011 edb_buffer_view_index(nir_builder *b, nir_def *desc, nir_def *in_bounds)
1012 {
1013 assert(desc->num_components == 4);
1014 nir_def *index = nir_channel(b, desc, 0);
1015
1016 /* Use the NULL descriptor for OOB access */
1017 return nir_bcsel(b, in_bounds, index, nir_imm_int(b, 0));
1018 }
1019
1020 static nir_def *
adjust_edb_buffer_view_coord(nir_builder * b,nir_def * desc,nir_def * coord)1021 adjust_edb_buffer_view_coord(nir_builder *b, nir_def *desc, nir_def *coord)
1022 {
1023 return nir_iadd(b, coord, edb_buffer_view_offset_el(b, desc));
1024 }
1025
1026 static nir_def *
fixup_edb_buffer_view_result(nir_builder * b,nir_def * desc,nir_def * in_bounds,nir_def * res,nir_alu_type dest_type)1027 fixup_edb_buffer_view_result(nir_builder *b, nir_def *desc, nir_def *in_bounds,
1028 nir_def *res, nir_alu_type dest_type)
1029 {
1030 if (res->num_components < 4)
1031 return res;
1032
1033 nir_def *is_null = edb_buffer_view_is_null(b, desc);
1034 nir_def *oob_alpha = edb_buffer_view_oob_alpha(b, desc);
1035
1036 nir_def *a = nir_channel(b, res, 3);
1037 a = nir_bcsel(b, nir_ior(b, in_bounds, is_null), a, oob_alpha);
1038 return nir_vector_insert_imm(b, res, a, 3);
1039 }
1040
1041 static void
lower_edb_buffer_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1042 lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1043 const struct lower_descriptors_ctx *ctx)
1044 {
1045 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF);
1046
1047 b->cursor = nir_before_instr(&intrin->instr);
1048 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1049 nir_def *desc = load_resource_deref_desc(b, 4, 32, deref, 0, ctx);
1050
1051 switch (intrin->intrinsic) {
1052 case nir_intrinsic_image_deref_load:
1053 case nir_intrinsic_image_deref_sparse_load:
1054 case nir_intrinsic_image_deref_store:
1055 case nir_intrinsic_image_deref_atomic:
1056 case nir_intrinsic_image_deref_atomic_swap: {
1057 nir_def *pos = intrin->src[1].ssa;
1058 nir_def *x = nir_channel(b, pos, 0);
1059
1060 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, x);
1061 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1062
1063 nir_def *new_x = adjust_edb_buffer_view_coord(b, desc, x);
1064 pos = nir_vector_insert_imm(b, pos, new_x, 0);
1065 nir_src_rewrite(&intrin->src[1], pos);
1066
1067 if (intrin->intrinsic == nir_intrinsic_image_deref_load ||
1068 intrin->intrinsic == nir_intrinsic_image_deref_sparse_load) {
1069 b->cursor = nir_after_instr(&intrin->instr);
1070 nir_def *res = &intrin->def;
1071 res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
1072 nir_intrinsic_dest_type(intrin));
1073 nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
1074 }
1075
1076 nir_rewrite_image_intrinsic(intrin, index, true);
1077 break;
1078 }
1079
1080 case nir_intrinsic_image_deref_size: {
1081 assert(intrin->def.num_components == 1);
1082 nir_def *size_el = nir_channel(b, desc, 2);
1083 nir_def_rewrite_uses(&intrin->def, size_el);
1084 break;
1085 }
1086
1087 default:
1088 unreachable("Unknown image intrinsic");
1089 }
1090 }
1091
1092 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1093 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1094 const struct lower_descriptors_ctx *ctx)
1095 {
1096 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1097
1098 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_MS) {
1099 lower_msaa_image_intrin(b, intrin, ctx);
1100 } else if (is_edb_buffer_view(deref, ctx)) {
1101 lower_edb_buffer_image_intrin(b, intrin, ctx);
1102 } else {
1103 b->cursor = nir_before_instr(&intrin->instr);
1104 nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
1105 nir_rewrite_image_intrinsic(intrin, desc, true);
1106 }
1107
1108 return true;
1109 }
1110
1111 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)1112 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
1113 const struct lower_descriptors_ctx *ctx)
1114 {
1115 const uint32_t root_table_offset =
1116 nvk_root_descriptor_offset(draw.sample_locations);
1117
1118 nir_def *sample = interp->src[1].ssa;
1119
1120 b->cursor = nir_before_instr(&interp->instr);
1121
1122 nir_def *loc = nir_ldc_nv(b, 1, 64,
1123 nir_imm_int(b, 0), /* Root table */
1124 nir_imm_int(b, root_table_offset),
1125 .align_mul = 8,
1126 .align_offset = 0);
1127
1128 /* Yay little endian */
1129 loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
1130 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
1131 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
1132 nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
1133 nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
1134 nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
1135
1136 assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
1137 interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
1138 nir_src_rewrite(&interp->src[1], offset);
1139
1140 return true;
1141 }
1142
1143 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1144 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1145 const struct lower_descriptors_ctx *ctx)
1146 {
1147 switch (intrin->intrinsic) {
1148 case nir_intrinsic_load_constant:
1149 return lower_load_constant(b, intrin, ctx);
1150
1151 case nir_intrinsic_load_vulkan_descriptor:
1152 return try_lower_load_vulkan_descriptor(b, intrin, ctx);
1153
1154 case nir_intrinsic_load_workgroup_size:
1155 unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
1156
1157 case nir_intrinsic_load_num_workgroups:
1158 return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
1159
1160 case nir_intrinsic_load_base_workgroup_id:
1161 return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
1162
1163 case nir_intrinsic_load_push_constant:
1164 return lower_load_push_constant(b, intrin, ctx);
1165
1166 case nir_intrinsic_load_base_vertex:
1167 case nir_intrinsic_load_first_vertex:
1168 return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
1169
1170 case nir_intrinsic_load_base_instance:
1171 return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
1172
1173 case nir_intrinsic_load_draw_id:
1174 return lower_sysval_to_root_table(b, intrin, draw.draw_index, ctx);
1175
1176 case nir_intrinsic_load_view_index:
1177 return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
1178
1179 case nir_intrinsic_image_deref_load:
1180 case nir_intrinsic_image_deref_sparse_load:
1181 case nir_intrinsic_image_deref_store:
1182 case nir_intrinsic_image_deref_atomic:
1183 case nir_intrinsic_image_deref_atomic_swap:
1184 case nir_intrinsic_image_deref_size:
1185 case nir_intrinsic_image_deref_samples:
1186 return lower_image_intrin(b, intrin, ctx);
1187
1188 case nir_intrinsic_interp_deref_at_sample:
1189 return lower_interp_at_sample(b, intrin, ctx);
1190
1191 default:
1192 return false;
1193 }
1194 }
1195
1196 static void
lower_edb_buffer_tex_instr(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1197 lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
1198 const struct lower_descriptors_ctx *ctx)
1199 {
1200 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_BUF);
1201
1202 b->cursor = nir_before_instr(&tex->instr);
1203
1204 const int texture_src_idx =
1205 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1206 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1207
1208 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1209 ASSERTED const uint32_t plane =
1210 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1211 assert(plane == 0);
1212
1213 nir_def *desc = load_resource_deref_desc(b, 4, 32, texture, 0, ctx);
1214
1215 switch (tex->op) {
1216 case nir_texop_txf: {
1217 const int coord_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1218 assert(coord_src_idx >= 0);
1219 nir_def *coord = tex->src[coord_src_idx].src.ssa;
1220
1221 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
1222
1223 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1224 nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
1225 nir_def *u = nir_undef(b, 1, 32);
1226
1227 /* The tricks we play for EDB use very large texel buffer views. These
1228 * don't seem to play nicely with the tld instruction which thinks
1229 * buffers are a 1D texture. However, suld seems fine with it so we'll
1230 * rewrite to use that.
1231 */
1232 nir_def *res = nir_bindless_image_load(b, tex->def.num_components,
1233 tex->def.bit_size,
1234 index,
1235 nir_vec4(b, new_coord, u, u, u),
1236 u, /* sample_id */
1237 nir_imm_int(b, 0), /* LOD */
1238 .image_dim = GLSL_SAMPLER_DIM_BUF,
1239 .image_array = false,
1240 .format = PIPE_FORMAT_NONE,
1241 .access = ACCESS_NON_WRITEABLE |
1242 ACCESS_CAN_REORDER,
1243 .dest_type = tex->dest_type);
1244 if (tex->is_sparse) {
1245 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(res->parent_instr);
1246 intr->intrinsic = nir_intrinsic_bindless_image_sparse_load;
1247 }
1248
1249 res = fixup_edb_buffer_view_result(b, desc, in_bounds,
1250 res, tex->dest_type);
1251
1252 nir_def_rewrite_uses(&tex->def, res);
1253 break;
1254 }
1255
1256 case nir_texop_txs: {
1257 assert(tex->def.num_components == 1);
1258 nir_def *size_el = edb_buffer_view_size_el(b, desc);
1259 nir_def_rewrite_uses(&tex->def, size_el);
1260 break;
1261 }
1262
1263 default:
1264 unreachable("Invalid buffer texture op");
1265 }
1266 }
1267
1268 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1269 lower_tex(nir_builder *b, nir_tex_instr *tex,
1270 const struct lower_descriptors_ctx *ctx)
1271 {
1272 const int texture_src_idx =
1273 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1274 const int sampler_src_idx =
1275 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1276 if (texture_src_idx < 0) {
1277 assert(sampler_src_idx < 0);
1278 return false;
1279 }
1280
1281 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1282 nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
1283 nir_src_as_deref(tex->src[sampler_src_idx].src);
1284 assert(texture);
1285
1286 if (is_edb_buffer_view(texture, ctx)) {
1287 lower_edb_buffer_tex_instr(b, tex, ctx);
1288 return true;
1289 }
1290
1291 b->cursor = nir_before_instr(&tex->instr);
1292
1293 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1294 const uint32_t plane =
1295 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1296 const uint64_t plane_offset_B =
1297 plane * sizeof(struct nvk_sampled_image_descriptor);
1298
1299 nir_def *texture_desc =
1300 load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
1301
1302 nir_def *combined_handle;
1303 if (texture == sampler) {
1304 combined_handle = texture_desc;
1305 } else {
1306 combined_handle = nir_iand_imm(b, texture_desc,
1307 NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1308
1309 if (sampler != NULL) {
1310 nir_def *sampler_desc =
1311 load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
1312 nir_def *sampler_index =
1313 nir_iand_imm(b, sampler_desc,
1314 NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
1315 combined_handle = nir_ior(b, combined_handle, sampler_index);
1316 }
1317 }
1318
1319 /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
1320 combined_handle = nir_u2u64(b, combined_handle);
1321
1322 /* TODO: The nv50 back-end assumes it gets handles both places, even for
1323 * texelFetch.
1324 */
1325 nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
1326 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1327
1328 if (sampler_src_idx < 0) {
1329 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
1330 } else {
1331 nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
1332 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
1333 }
1334
1335 /* On pre-Volta hardware, we don't have real null descriptors. Null
1336 * descriptors work well enough for sampling but they may not return the
1337 * correct query results.
1338 */
1339 if (ctx->dev_info->cls_eng3d < VOLTA_A && nir_tex_instr_is_query(tex)) {
1340 b->cursor = nir_after_instr(&tex->instr);
1341
1342 /* This should get CSE'd with the earlier load */
1343 nir_def *texture_handle =
1344 nir_iand_imm(b, texture_desc, NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1345 nir_def *is_null = nir_ieq_imm(b, texture_handle, 0);
1346 nir_def *zero = nir_imm_zero(b, tex->def.num_components,
1347 tex->def.bit_size);
1348 nir_def *res = nir_bcsel(b, is_null, zero, &tex->def);
1349 nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1350 }
1351
1352 return true;
1353 }
1354
1355 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)1356 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
1357 void *_data)
1358 {
1359 const struct lower_descriptors_ctx *ctx = _data;
1360
1361 switch (instr->type) {
1362 case nir_instr_type_tex:
1363 return lower_tex(b, nir_instr_as_tex(instr), ctx);
1364 case nir_instr_type_intrinsic:
1365 return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
1366 default:
1367 return false;
1368 }
1369 }
1370
1371 #define ROOT_DESC_BASE_ADDR_HI 0x0057de3c
1372
1373 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1374 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1375 const struct lower_descriptors_ctx *ctx)
1376 {
1377 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1378 return false;
1379
1380 b->cursor = nir_instr_remove(&intrin->instr);
1381
1382 uint32_t set = nir_intrinsic_desc_set(intrin);
1383 uint32_t binding = nir_intrinsic_binding(intrin);
1384 nir_def *index = intrin->src[0].ssa;
1385
1386 const struct nvk_descriptor_set_binding_layout *binding_layout =
1387 get_binding_layout(set, binding, ctx);
1388
1389 nir_def *binding_addr;
1390 uint8_t binding_stride;
1391 switch (binding_layout->type) {
1392 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1393 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1394 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1395 binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1396 binding_stride = binding_layout->stride;
1397 break;
1398 }
1399
1400 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1401 nir_def *dynamic_buffer_start =
1402 nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1403 binding_layout->dynamic_buffer_index);
1404
1405 nir_def *dynamic_binding_offset =
1406 nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1407 sizeof(struct nvk_buffer_address)),
1408 nvk_root_descriptor_offset(dynamic_buffers));
1409
1410 binding_addr =
1411 nir_pack_64_2x32_split(b, dynamic_binding_offset,
1412 nir_imm_int(b, ROOT_DESC_BASE_ADDR_HI));
1413 binding_stride = sizeof(struct nvk_buffer_address);
1414 break;
1415 }
1416
1417 default:
1418 unreachable("Not an SSBO descriptor");
1419 }
1420
1421 /* Tuck the stride in the top 8 bits of the binding address */
1422 binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1423
1424 const uint32_t binding_size = binding_layout->array_size * binding_stride;
1425 nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1426
1427 /* We depend on this when we load descrptors */
1428 assert(binding_layout->array_size >= 1);
1429
1430 nir_def *addr;
1431 switch (ctx->ssbo_addr_format) {
1432 case nir_address_format_64bit_global_32bit_offset:
1433 case nir_address_format_64bit_bounded_global:
1434 addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1435 nir_unpack_64_2x32_split_y(b, binding_addr),
1436 nir_imm_int(b, binding_size),
1437 offset_in_binding);
1438 break;
1439
1440 default:
1441 unreachable("Unknown address mode");
1442 }
1443
1444 nir_def_rewrite_uses(&intrin->def, addr);
1445
1446 return true;
1447 }
1448
1449 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1450 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1451 const struct lower_descriptors_ctx *ctx)
1452 {
1453 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1454 return false;
1455
1456 b->cursor = nir_instr_remove(&intrin->instr);
1457
1458 nir_def *addr = intrin->src[0].ssa;
1459 nir_def *index = intrin->src[1].ssa;
1460
1461 nir_def *addr_high32;
1462 switch (ctx->ssbo_addr_format) {
1463 case nir_address_format_64bit_global_32bit_offset:
1464 case nir_address_format_64bit_bounded_global:
1465 addr_high32 = nir_channel(b, addr, 1);
1466 break;
1467
1468 default:
1469 unreachable("Unknown address mode");
1470 }
1471
1472 nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1473 nir_def *offset = nir_imul(b, index, stride);
1474
1475 addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1476 nir_var_mem_ssbo, offset);
1477 nir_def_rewrite_uses(&intrin->def, addr);
1478
1479 return true;
1480 }
1481
1482 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1483 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1484 const struct lower_descriptors_ctx *ctx)
1485 {
1486 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1487 return false;
1488
1489 b->cursor = nir_instr_remove(&intrin->instr);
1490
1491 nir_def *addr = intrin->src[0].ssa;
1492
1493 nir_def *base, *offset, *size = NULL;
1494 switch (ctx->ssbo_addr_format) {
1495 case nir_address_format_64bit_global_32bit_offset: {
1496 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1497 offset = nir_channel(b, addr, 3);
1498 break;
1499 }
1500
1501 case nir_address_format_64bit_bounded_global: {
1502 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1503 size = nir_channel(b, addr, 2);
1504 offset = nir_channel(b, addr, 3);
1505 break;
1506 }
1507
1508 default:
1509 unreachable("Unknown address mode");
1510 }
1511
1512 /* Mask off the binding stride */
1513 base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1514
1515 nir_def *base_lo = nir_unpack_64_2x32_split_x(b, base);
1516 nir_def *base_hi = nir_unpack_64_2x32_split_y(b, base);
1517
1518 nir_def *desc_root, *desc_global;
1519 nir_push_if(b, nir_ieq_imm(b, base_hi, ROOT_DESC_BASE_ADDR_HI));
1520 {
1521 desc_root = nir_load_ubo(b, 4, 32, nir_imm_int(b, 0),
1522 nir_iadd(b, base_lo, offset),
1523 .align_mul = 16, .align_offset = 0,
1524 .range = ~0);
1525 if (size != NULL) {
1526 /* assert(binding_layout->array_size >= 1); */
1527 nir_def *is_oob = nir_ult(b, nir_iadd_imm(b, size, -16), offset);
1528 desc_root = nir_bcsel(b, is_oob, nir_imm_zero(b, 4, 32), desc_root);
1529 }
1530 }
1531 nir_push_else(b, NULL);
1532 {
1533 if (size != NULL) {
1534 desc_global = nir_load_global_constant_bounded(b, 4, 32, base,
1535 offset, size,
1536 .align_mul = 16,
1537 .align_offset = 0);
1538 } else {
1539 desc_global = nir_load_global_constant_offset(b, 4, 32, base,
1540 offset,
1541 .align_mul = 16,
1542 .align_offset = 0);
1543 }
1544 }
1545 nir_pop_if(b, NULL);
1546 nir_def *desc = nir_if_phi(b, desc_root, desc_global);
1547
1548 nir_def_rewrite_uses(&intrin->def, desc);
1549
1550 return true;
1551 }
1552
1553 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1554 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1555 void *_data)
1556 {
1557 const struct lower_descriptors_ctx *ctx = _data;
1558
1559 if (instr->type != nir_instr_type_intrinsic)
1560 return false;
1561
1562 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1563 switch (intrin->intrinsic) {
1564 case nir_intrinsic_vulkan_resource_index:
1565 return lower_ssbo_resource_index(b, intrin, ctx);
1566 case nir_intrinsic_vulkan_resource_reindex:
1567 return lower_ssbo_resource_reindex(b, intrin, ctx);
1568 case nir_intrinsic_load_vulkan_descriptor:
1569 return lower_load_ssbo_descriptor(b, intrin, ctx);
1570 default:
1571 return false;
1572 }
1573 }
1574
1575 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct nvk_physical_device * pdev,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1576 nvk_nir_lower_descriptors(nir_shader *nir,
1577 const struct nvk_physical_device *pdev,
1578 VkShaderCreateFlagsEXT shader_flags,
1579 const struct vk_pipeline_robustness_state *rs,
1580 uint32_t set_layout_count,
1581 struct vk_descriptor_set_layout * const *set_layouts,
1582 struct nvk_cbuf_map *cbuf_map_out)
1583 {
1584 struct lower_descriptors_ctx ctx = {
1585 .dev_info = &pdev->info,
1586 .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
1587 .use_edb_buffer_views = nvk_use_edb_buffer_views(pdev),
1588 .clamp_desc_array_bounds =
1589 rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1590 rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1591 rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1592 .indirect_bind =
1593 shader_flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT,
1594 .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
1595 .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
1596 };
1597
1598 assert(set_layout_count <= NVK_MAX_SETS);
1599 for (uint32_t s = 0; s < set_layout_count; s++) {
1600 if (set_layouts[s] != NULL)
1601 ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1602 }
1603
1604 /* We run in four passes:
1605 *
1606 * 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is
1607 * actually lowered in this pass. It's just analysis.
1608 *
1609 * 2. Try to lower UBO loads to cbufs based on the map we just created.
1610 * We need to do this before the main lowering pass because it relies
1611 * on the original descriptor load intrinsics.
1612 *
1613 * 3. Attempt to lower everything with direct descriptors. This may fail
1614 * to lower some SSBO intrinsics when variable pointers are used.
1615 *
1616 * 4. Clean up any SSBO intrinsics which are left and lower them to
1617 * slightly less efficient but variable- pointers-correct versions.
1618 */
1619
1620 bool pass_lower_ubo = false;
1621 if (cbuf_map_out != NULL) {
1622 ctx.cbuf_map = cbuf_map_out;
1623 build_cbuf_map(nir, &ctx);
1624
1625 pass_lower_ubo =
1626 nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1627 nir_metadata_control_flow,
1628 (void *)&ctx);
1629 }
1630
1631 bool pass_lower_descriptors =
1632 nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1633 nir_metadata_control_flow,
1634 (void *)&ctx);
1635 bool pass_lower_ssbo =
1636 nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1637 nir_metadata_none,
1638 (void *)&ctx);
1639 return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1640 }
1641