1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set.h"
7 #include "nvk_descriptor_set_layout.h"
8 #include "nvk_shader.h"
9
10 #include "vk_pipeline.h"
11
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14
15 struct lower_desc_cbuf {
16 struct nvk_cbuf key;
17
18 uint32_t use_count;
19
20 uint64_t start;
21 uint64_t end;
22 };
23
24 static uint32_t
hash_cbuf(const void * data)25 hash_cbuf(const void *data)
26 {
27 return _mesa_hash_data(data, sizeof(struct nvk_cbuf));
28 }
29
30 static bool
cbufs_equal(const void * a,const void * b)31 cbufs_equal(const void *a, const void *b)
32 {
33 return memcmp(a, b, sizeof(struct nvk_cbuf)) == 0;
34 }
35
36 static int
compar_cbufs(const void * _a,const void * _b)37 compar_cbufs(const void *_a, const void *_b)
38 {
39 const struct lower_desc_cbuf *a = _a;
40 const struct lower_desc_cbuf *b = _b;
41
42 #define COMPAR(field, pos) \
43 if (a->field < b->field) return -(pos); \
44 if (a->field > b->field) return (pos);
45
46 /* Sort by most used first */
47 COMPAR(use_count, -1)
48
49 /* Keep the list stable by then sorting by key fields. */
50 COMPAR(key.type, 1)
51 COMPAR(key.desc_set, 1)
52 COMPAR(key.dynamic_idx, 1)
53 COMPAR(key.desc_offset, 1)
54
55 #undef COMPAR
56
57 return 0;
58 }
59
60 struct lower_descriptors_ctx {
61 const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
62
63 bool clamp_desc_array_bounds;
64 nir_address_format ubo_addr_format;
65 nir_address_format ssbo_addr_format;
66
67 struct hash_table *cbufs;
68 struct nvk_cbuf_map *cbuf_map;
69 };
70
71 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)72 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
73 struct lower_descriptors_ctx *ctx)
74 {
75 struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
76 if (entry != NULL) {
77 struct lower_desc_cbuf *cbuf = entry->data;
78 cbuf->use_count++;
79 cbuf->start = MIN2(cbuf->start, start);
80 cbuf->end = MAX2(cbuf->end, end);
81 } else {
82 struct lower_desc_cbuf *cbuf =
83 ralloc(ctx->cbufs, struct lower_desc_cbuf);
84 *cbuf = (struct lower_desc_cbuf) {
85 .key = *key,
86 .use_count = 1,
87 .start = start,
88 .end = end,
89 };
90 _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
91 }
92 }
93
94 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)95 get_binding_layout(uint32_t set, uint32_t binding,
96 const struct lower_descriptors_ctx *ctx)
97 {
98 assert(set < NVK_MAX_SETS);
99 assert(ctx->set_layouts[set] != NULL);
100
101 const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
102
103 assert(binding < set_layout->binding_count);
104 return &set_layout->binding[binding];
105 }
106
107 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)108 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
109 struct lower_descriptors_ctx *ctx)
110 {
111 const struct nvk_descriptor_set_binding_layout *binding_layout =
112 get_binding_layout(set, binding, ctx);
113
114 const struct nvk_cbuf key = {
115 .type = NVK_CBUF_TYPE_DESC_SET,
116 .desc_set = set,
117 };
118
119 uint64_t start, end;
120 if (index == NULL) {
121 /* When we don't have an index, assume 0 */
122 start = binding_layout->offset;
123 end = start + binding_layout->stride;
124 } else if (nir_src_is_const(*index)) {
125 start = binding_layout->offset +
126 nir_src_as_uint(*index) * binding_layout->stride;
127 end = start + binding_layout->stride;
128 } else {
129 start = binding_layout->offset;
130 end = start + binding_layout->array_size * binding_layout->stride;
131 }
132
133 record_cbuf_use(&key, start, end, ctx);
134 }
135
136 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)137 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
138 struct lower_descriptors_ctx *ctx)
139 {
140 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
141 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
142
143 /* These we'll handle later */
144 if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
145 desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
146 return;
147
148 record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
149 nir_intrinsic_binding(intrin),
150 &intrin->src[0], ctx);
151 }
152
153 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)154 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
155 struct lower_descriptors_ctx *ctx)
156 {
157 nir_src *index_src = NULL;
158 if (deref->deref_type == nir_deref_type_array) {
159 index_src = &deref->arr.index;
160 deref = nir_deref_instr_parent(deref);
161 }
162
163 assert(deref->deref_type == nir_deref_type_var);
164 nir_variable *var = deref->var;
165
166 record_descriptor_cbuf_use(var->data.descriptor_set,
167 var->data.binding,
168 index_src, ctx);
169 }
170
171 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)172 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
173 struct lower_descriptors_ctx *ctx)
174 {
175 const int texture_src_idx =
176 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
177 const int sampler_src_idx =
178 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
179
180 if (texture_src_idx >= 0) {
181 nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
182 record_deref_descriptor_cbuf_use(deref, ctx);
183 }
184
185 if (sampler_src_idx >= 0) {
186 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
187 record_deref_descriptor_cbuf_use(deref, ctx);
188 }
189 }
190
191 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)192 ubo_deref_to_cbuf(nir_deref_instr *deref,
193 nir_intrinsic_instr **resource_index_out,
194 uint64_t *offset_out,
195 uint64_t *start_out, uint64_t *end_out,
196 const struct lower_descriptors_ctx *ctx)
197 {
198 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
199
200 /* In case we early return */
201 *offset_out = 0;
202 *start_out = 0;
203 *end_out = UINT64_MAX;
204 *resource_index_out = NULL;
205
206 const struct nvk_cbuf invalid = {
207 .type = NVK_CBUF_TYPE_INVALID,
208 };
209
210 uint64_t offset = 0;
211 uint64_t range = glsl_get_explicit_size(deref->type, false);
212 bool offset_valid = true;
213 while (deref->deref_type != nir_deref_type_cast) {
214 nir_deref_instr *parent = nir_deref_instr_parent(deref);
215
216 switch (deref->deref_type) {
217 case nir_deref_type_var:
218 unreachable("Buffers don't use variables in Vulkan");
219
220 case nir_deref_type_array:
221 case nir_deref_type_array_wildcard: {
222 uint32_t stride = nir_deref_instr_array_stride(deref);
223 if (deref->deref_type == nir_deref_type_array &&
224 nir_src_is_const(deref->arr.index)) {
225 offset += nir_src_as_uint(deref->arr.index) * stride;
226 } else {
227 range = glsl_get_length(parent->type) * stride;
228 }
229 break;
230 }
231
232 case nir_deref_type_ptr_as_array:
233 /* All bets are off. We shouldn't see these most of the time
234 * anyway, even with variable pointers.
235 */
236 offset_valid = false;
237 unreachable("Variable pointers aren't allowed on UBOs");
238 break;
239
240 case nir_deref_type_struct: {
241 offset += glsl_get_struct_field_offset(parent->type,
242 deref->strct.index);
243 break;
244 }
245
246 default:
247 unreachable("Unknown deref type");
248 }
249
250 deref = parent;
251 }
252
253 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
254 if (load_desc == NULL ||
255 load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
256 return invalid;
257
258 nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
259 if (res_index == NULL ||
260 res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
261 return invalid;
262
263 /* We try to early return as little as possible prior to this point so we
264 * can return the resource index intrinsic in as many cases as possible.
265 * After this point, though, early returns are fair game.
266 */
267 *resource_index_out = res_index;
268
269 if (!offset_valid || !nir_src_is_const(res_index->src[0]))
270 return invalid;
271
272 uint32_t set = nir_intrinsic_desc_set(res_index);
273 uint32_t binding = nir_intrinsic_binding(res_index);
274 uint32_t index = nir_src_as_uint(res_index->src[0]);
275
276 const struct nvk_descriptor_set_binding_layout *binding_layout =
277 get_binding_layout(set, binding, ctx);
278
279 switch (binding_layout->type) {
280 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
281 *offset_out = 0;
282 *start_out = offset;
283 *end_out = offset + range;
284 return (struct nvk_cbuf) {
285 .type = NVK_CBUF_TYPE_UBO_DESC,
286 .desc_set = set,
287 .desc_offset = binding_layout->offset +
288 index * binding_layout->stride,
289 };
290 }
291
292 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
293 *offset_out = 0;
294 *start_out = offset;
295 *end_out = offset + range;
296
297 return (struct nvk_cbuf) {
298 .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
299 .desc_set = set,
300 .dynamic_idx = binding_layout->dynamic_buffer_index + index,
301 };
302 }
303
304 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
305 *offset_out = binding_layout->offset;
306 *start_out = binding_layout->offset + offset;
307 *end_out = *start_out + range;
308
309 return (struct nvk_cbuf) {
310 .type = NVK_CBUF_TYPE_DESC_SET,
311 .desc_set = set,
312 };
313 }
314
315 default:
316 return invalid;
317 }
318 }
319
320 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)321 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
322 struct lower_descriptors_ctx *ctx)
323 {
324 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
325
326 UNUSED uint64_t offset;
327 uint64_t start, end;
328 nir_intrinsic_instr *res_index;
329 struct nvk_cbuf cbuf =
330 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
331
332 if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
333 record_cbuf_use(&cbuf, start, end, ctx);
334 } else if (res_index != NULL) {
335 record_vulkan_resource_cbuf_use(res_index, ctx);
336 }
337 }
338
339 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)340 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
341 {
342 struct lower_descriptors_ctx *ctx = _ctx;
343
344 switch (instr->type) {
345 case nir_instr_type_intrinsic: {
346 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
347 switch (intrin->intrinsic) {
348 case nir_intrinsic_vulkan_resource_index:
349 record_vulkan_resource_cbuf_use(intrin, ctx);
350 return false;
351
352 case nir_intrinsic_load_deref: {
353 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
354 if (nir_deref_mode_is(deref, nir_var_mem_ubo))
355 record_load_ubo_cbuf_uses(deref, ctx);
356 return false;
357 }
358
359 case nir_intrinsic_image_deref_load:
360 case nir_intrinsic_image_deref_store:
361 case nir_intrinsic_image_deref_atomic:
362 case nir_intrinsic_image_deref_atomic_swap:
363 case nir_intrinsic_image_deref_size:
364 case nir_intrinsic_image_deref_samples:
365 case nir_intrinsic_image_deref_load_param_intel:
366 case nir_intrinsic_image_deref_load_raw_intel:
367 case nir_intrinsic_image_deref_store_raw_intel: {
368 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369 record_deref_descriptor_cbuf_use(deref, ctx);
370 return false;
371 }
372
373 default:
374 return false;
375 }
376 unreachable("All cases return false");
377 }
378
379 case nir_instr_type_tex:
380 record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
381 return false;
382
383 default:
384 return false;
385 }
386 }
387
388 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)389 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
390 {
391 ctx->cbufs = _mesa_hash_table_create(NULL, hash_cbuf, cbufs_equal);
392
393 nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
394 nir_metadata_all, (void *)ctx);
395
396 struct lower_desc_cbuf *cbufs =
397 ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
398 _mesa_hash_table_num_entries(ctx->cbufs));
399
400 uint32_t num_cbufs = 0;
401 hash_table_foreach(ctx->cbufs, entry) {
402 struct lower_desc_cbuf *cbuf = entry->data;
403
404 /* We currently only start cbufs at the beginning so if it starts after
405 * the max cbuf size, there's no point in including it in the list.
406 */
407 if (cbuf->start > NVK_MAX_CBUF_SIZE)
408 continue;
409
410 cbufs[num_cbufs++] = *cbuf;
411 }
412
413 qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
414
415 uint32_t mapped_cbuf_count = 0;
416
417 /* Root descriptors always go in cbuf 0 */
418 ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
419 .type = NVK_CBUF_TYPE_ROOT_DESC,
420 };
421
422 if (nir->constant_data_size > 0) {
423 ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
424 .type = NVK_CBUF_TYPE_SHADER_DATA,
425 };
426 }
427
428 uint8_t max_cbuf_bindings;
429 if (nir->info.stage == MESA_SHADER_COMPUTE ||
430 nir->info.stage == MESA_SHADER_KERNEL) {
431 max_cbuf_bindings = 8;
432 } else {
433 max_cbuf_bindings = 16;
434 }
435
436 for (uint32_t i = 0; i < num_cbufs; i++) {
437 if (mapped_cbuf_count >= max_cbuf_bindings)
438 break;
439
440 /* We can't support indirect cbufs in compute yet */
441 if ((nir->info.stage == MESA_SHADER_COMPUTE ||
442 nir->info.stage == MESA_SHADER_KERNEL) &&
443 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
444 continue;
445
446 ctx->cbuf_map->cbufs[mapped_cbuf_count++] = cbufs[i].key;
447 }
448 ctx->cbuf_map->cbuf_count = mapped_cbuf_count;
449
450 ralloc_free(ctx->cbufs);
451 ctx->cbufs = NULL;
452 }
453
454 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)455 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
456 const struct lower_descriptors_ctx *ctx)
457 {
458 if (ctx->cbuf_map == NULL)
459 return -1;
460
461 for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
462 if (cbufs_equal(&ctx->cbuf_map->cbufs[c], key)) {
463 return c;
464 }
465 }
466
467 return -1;
468 }
469
470 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)471 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
472 {
473 const struct lower_descriptors_ctx *ctx = _ctx;
474
475 if (load->intrinsic != nir_intrinsic_load_deref)
476 return false;
477
478 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
479 if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
480 return false;
481
482 uint64_t offset, end;
483 UNUSED uint64_t start;
484 UNUSED nir_intrinsic_instr *res_index;
485 struct nvk_cbuf cbuf_key =
486 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
487
488 if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
489 return false;
490
491 if (end > NVK_MAX_CBUF_SIZE)
492 return false;
493
494 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
495 if (cbuf_idx < 0)
496 return false;
497
498 b->cursor = nir_before_instr(&load->instr);
499
500 nir_deref_path path;
501 nir_deref_path_init(&path, deref, NULL);
502
503 nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
504 nir_address_format addr_format = nir_address_format_32bit_index_offset;
505 for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
506 addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
507
508 nir_deref_path_finish(&path);
509
510 nir_lower_explicit_io_instr(b, load, addr, addr_format);
511
512 return true;
513 }
514
515 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)516 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
517 const struct lower_descriptors_ctx *ctx)
518 {
519 assert(load->intrinsic == nir_intrinsic_load_constant);
520
521 const struct nvk_cbuf cbuf_key = {
522 .type = NVK_CBUF_TYPE_SHADER_DATA,
523 };
524 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
525 assert(cbuf_idx >= 0);
526
527 uint32_t base = nir_intrinsic_base(load);
528 uint32_t range = nir_intrinsic_range(load);
529
530 b->cursor = nir_before_instr(&load->instr);
531
532 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
533 nir_def *data = nir_load_ubo(b, load->def.num_components, load->def.bit_size,
534 nir_imm_int(b, cbuf_idx), offset,
535 .align_mul = nir_intrinsic_align_mul(load),
536 .align_offset = nir_intrinsic_align_offset(load),
537 .range_base = base, .range = range);
538
539 nir_def_rewrite_uses(&load->def, data);
540
541 return true;
542 }
543
544 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)545 load_descriptor_set_addr(nir_builder *b, uint32_t set,
546 UNUSED const struct lower_descriptors_ctx *ctx)
547 {
548 uint32_t set_addr_offset =
549 nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
550
551 return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
552 nir_imm_int(b, set_addr_offset),
553 .align_mul = 8, .align_offset = 0, .range = ~0);
554 }
555
556 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)557 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
558 const struct lower_descriptors_ctx *ctx)
559 {
560 int dynamic_buffer_start_imm = 0;
561 for (uint32_t s = 0; s < set; s++) {
562 if (ctx->set_layouts[s] == NULL) {
563 dynamic_buffer_start_imm = -1;
564 break;
565 }
566
567 dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
568 }
569
570 if (dynamic_buffer_start_imm >= 0) {
571 return nir_imm_int(b, dynamic_buffer_start_imm);
572 } else {
573 uint32_t root_offset =
574 nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
575
576 return nir_u2u32(b, nir_load_ubo(b, 1, 8, nir_imm_int(b, 0),
577 nir_imm_int(b, root_offset),
578 .align_mul = 1, .align_offset = 0,
579 .range = ~0));
580 }
581 }
582
583 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)584 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
585 uint32_t set, uint32_t binding, nir_def *index,
586 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
587 {
588 const struct nvk_descriptor_set_binding_layout *binding_layout =
589 get_binding_layout(set, binding, ctx);
590
591 if (ctx->clamp_desc_array_bounds)
592 index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
593
594 switch (binding_layout->type) {
595 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
596 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
597 /* Get the index in the root descriptor table dynamic_buffers array. */
598 nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
599
600 index = nir_iadd(b, index,
601 nir_iadd_imm(b, dynamic_buffer_start,
602 binding_layout->dynamic_buffer_index));
603
604 nir_def *root_desc_offset =
605 nir_iadd_imm(b, nir_imul_imm(b, index, sizeof(struct nvk_buffer_address)),
606 nvk_root_descriptor_offset(dynamic_buffers));
607
608 assert(num_components == 4 && bit_size == 32);
609 nir_def *desc =
610 nir_load_ubo(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
611 .align_mul = 16, .align_offset = 0, .range = ~0);
612 /* We know a priori that the the .w compnent (offset) is zero */
613 return nir_vec4(b, nir_channel(b, desc, 0),
614 nir_channel(b, desc, 1),
615 nir_channel(b, desc, 2),
616 nir_imm_int(b, 0));
617 }
618
619 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
620 nir_def *base_addr =
621 nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
622 binding_layout->offset);
623
624 assert(binding_layout->stride == 1);
625 const uint32_t binding_size = binding_layout->array_size;
626
627 /* Convert it to nir_address_format_64bit_bounded_global */
628 assert(num_components == 4 && bit_size == 32);
629 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
630 nir_unpack_64_2x32_split_y(b, base_addr),
631 nir_imm_int(b, binding_size),
632 nir_imm_int(b, 0));
633 }
634
635 default: {
636 assert(binding_layout->stride > 0);
637 nir_def *desc_ubo_offset =
638 nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
639 binding_layout->offset + offset_B);
640
641 unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
642 desc_align_mul = MIN2(desc_align_mul, 16);
643 unsigned desc_align_offset = binding_layout->offset + offset_B;
644 desc_align_offset %= desc_align_mul;
645
646 const struct nvk_cbuf cbuf_key = {
647 .type = NVK_CBUF_TYPE_DESC_SET,
648 .desc_set = set,
649 };
650 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
651
652 nir_def *desc;
653 if (cbuf_idx >= 0) {
654 desc = nir_load_ubo(b, num_components, bit_size,
655 nir_imm_int(b, cbuf_idx),
656 desc_ubo_offset,
657 .align_mul = desc_align_mul,
658 .align_offset = desc_align_offset,
659 .range = ~0);
660 } else {
661 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
662 desc = nir_load_global_constant_offset(b, num_components, bit_size,
663 set_addr, desc_ubo_offset,
664 .align_mul = desc_align_mul,
665 .align_offset = desc_align_offset);
666 }
667 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
668 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
669 /* We know a priori that the the .w compnent (offset) is zero */
670 assert(num_components == 4 && bit_size == 32);
671 desc = nir_vec4(b, nir_channel(b, desc, 0),
672 nir_channel(b, desc, 1),
673 nir_channel(b, desc, 2),
674 nir_imm_int(b, 0));
675 }
676 return desc;
677 }
678 }
679 }
680
681 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)682 is_idx_intrin(nir_intrinsic_instr *intrin)
683 {
684 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
685 intrin = nir_src_as_intrinsic(intrin->src[0]);
686 if (intrin == NULL)
687 return false;
688 }
689
690 return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
691 }
692
693 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)694 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
695 const struct lower_descriptors_ctx *ctx)
696 {
697 nir_def *index = nir_imm_int(b, 0);
698
699 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
700 index = nir_iadd(b, index, intrin->src[1].ssa);
701 intrin = nir_src_as_intrinsic(intrin->src[0]);
702 }
703
704 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
705 uint32_t set = nir_intrinsic_desc_set(intrin);
706 uint32_t binding = nir_intrinsic_binding(intrin);
707 index = nir_iadd(b, index, intrin->src[0].ssa);
708
709 return load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
710 }
711
712 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)713 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
714 const struct lower_descriptors_ctx *ctx)
715 {
716 ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
717 b->cursor = nir_before_instr(&intrin->instr);
718
719 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
720 if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
721 assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
722 desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
723 return false;
724 }
725
726 nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
727
728 nir_def_rewrite_uses(&intrin->def, desc);
729
730 return true;
731 }
732
733 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)734 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
735 uint32_t root_table_offset,
736 const struct lower_descriptors_ctx *ctx)
737 {
738 b->cursor = nir_instr_remove(&intrin->instr);
739
740 nir_def *val = nir_load_ubo(b, intrin->def.num_components,
741 intrin->def.bit_size,
742 nir_imm_int(b, 0), /* Root table */
743 nir_imm_int(b, root_table_offset),
744 .align_mul = 4,
745 .align_offset = 0,
746 .range = root_table_offset + 3 * 4);
747
748 nir_def_rewrite_uses(&intrin->def, val);
749
750 return true;
751 }
752
753 #define lower_sysval_to_root_table(b, intrin, member, ctx) \
754 _lower_sysval_to_root_table(b, intrin, \
755 nvk_root_descriptor_offset(member), \
756 ctx)
757
758 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)759 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
760 const struct lower_descriptors_ctx *ctx)
761 {
762 const uint32_t push_region_offset =
763 nvk_root_descriptor_offset(push);
764 const uint32_t base = nir_intrinsic_base(load);
765
766 b->cursor = nir_before_instr(&load->instr);
767
768 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
769 push_region_offset + base);
770
771 nir_def *val =
772 nir_load_ubo(b, load->def.num_components, load->def.bit_size,
773 nir_imm_int(b, 0), offset,
774 .align_mul = load->def.bit_size / 8,
775 .align_offset = 0,
776 .range = push_region_offset + base +
777 nir_intrinsic_range(load));
778
779 nir_def_rewrite_uses(&load->def, val);
780
781 return true;
782 }
783
784 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)785 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
786 uint32_t *set, uint32_t *binding,
787 nir_def **index)
788 {
789 if (deref->deref_type == nir_deref_type_array) {
790 *index = deref->arr.index.ssa;
791 deref = nir_deref_instr_parent(deref);
792 } else {
793 *index = nir_imm_int(b, 0);
794 }
795
796 assert(deref->deref_type == nir_deref_type_var);
797 nir_variable *var = deref->var;
798
799 *set = var->data.descriptor_set;
800 *binding = var->data.binding;
801 }
802
803 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)804 load_resource_deref_desc(nir_builder *b,
805 unsigned num_components, unsigned bit_size,
806 nir_deref_instr *deref, unsigned offset_B,
807 const struct lower_descriptors_ctx *ctx)
808 {
809 uint32_t set, binding;
810 nir_def *index;
811 get_resource_deref_binding(b, deref, &set, &binding, &index);
812 return load_descriptor(b, num_components, bit_size,
813 set, binding, index, offset_B, ctx);
814 }
815
816 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)817 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
818 const struct lower_descriptors_ctx *ctx)
819 {
820 b->cursor = nir_before_instr(&intrin->instr);
821 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
822 nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
823 nir_rewrite_image_intrinsic(intrin, desc, true);
824
825 /* We treat 3D images as 2D arrays */
826 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_3D) {
827 assert(!nir_intrinsic_image_array(intrin));
828 nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
829 nir_intrinsic_set_image_array(intrin, true);
830 }
831
832 return true;
833 }
834
835 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)836 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
837 const struct lower_descriptors_ctx *ctx)
838 {
839 const uint32_t root_table_offset =
840 nvk_root_descriptor_offset(draw.sample_locations);
841
842 nir_def *sample = interp->src[1].ssa;
843
844 b->cursor = nir_before_instr(&interp->instr);
845
846 nir_def *loc = nir_load_ubo(b, 1, 64,
847 nir_imm_int(b, 0), /* Root table */
848 nir_imm_int(b, root_table_offset),
849 .align_mul = 8,
850 .align_offset = 0,
851 .range = root_table_offset + 8);
852
853 /* Yay little endian */
854 loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
855 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
856 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
857 nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
858 nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
859 nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
860
861 assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
862 interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
863 nir_src_rewrite(&interp->src[1], offset);
864
865 return true;
866 }
867
868 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)869 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
870 const struct lower_descriptors_ctx *ctx)
871 {
872 switch (intrin->intrinsic) {
873 case nir_intrinsic_load_constant:
874 return lower_load_constant(b, intrin, ctx);
875
876 case nir_intrinsic_load_vulkan_descriptor:
877 return try_lower_load_vulkan_descriptor(b, intrin, ctx);
878
879 case nir_intrinsic_load_workgroup_size:
880 unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
881
882 case nir_intrinsic_load_num_workgroups:
883 return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
884
885 case nir_intrinsic_load_base_workgroup_id:
886 return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
887
888 case nir_intrinsic_load_push_constant:
889 return lower_load_push_constant(b, intrin, ctx);
890
891 case nir_intrinsic_load_base_vertex:
892 case nir_intrinsic_load_first_vertex:
893 return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
894
895 case nir_intrinsic_load_base_instance:
896 return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
897
898 case nir_intrinsic_load_draw_id:
899 return lower_sysval_to_root_table(b, intrin, draw.draw_id, ctx);
900
901 case nir_intrinsic_load_view_index:
902 return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
903
904 case nir_intrinsic_image_deref_load:
905 case nir_intrinsic_image_deref_store:
906 case nir_intrinsic_image_deref_atomic:
907 case nir_intrinsic_image_deref_atomic_swap:
908 case nir_intrinsic_image_deref_size:
909 case nir_intrinsic_image_deref_samples:
910 case nir_intrinsic_image_deref_load_param_intel:
911 case nir_intrinsic_image_deref_load_raw_intel:
912 case nir_intrinsic_image_deref_store_raw_intel:
913 return lower_image_intrin(b, intrin, ctx);
914
915 case nir_intrinsic_interp_deref_at_sample:
916 return lower_interp_at_sample(b, intrin, ctx);
917
918 default:
919 return false;
920 }
921 }
922
923 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)924 lower_tex(nir_builder *b, nir_tex_instr *tex,
925 const struct lower_descriptors_ctx *ctx)
926 {
927 b->cursor = nir_before_instr(&tex->instr);
928
929 const int texture_src_idx =
930 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
931 const int sampler_src_idx =
932 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
933 if (texture_src_idx < 0) {
934 assert(sampler_src_idx < 0);
935 return false;
936 }
937
938 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
939 nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
940 nir_src_as_deref(tex->src[sampler_src_idx].src);
941 assert(texture);
942
943 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
944 const uint32_t plane =
945 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
946 const uint64_t plane_offset_B = plane * sizeof(struct nvk_image_descriptor);
947
948 nir_def *combined_handle;
949 if (texture == sampler) {
950 combined_handle = load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
951 } else {
952 nir_def *texture_desc =
953 load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
954 combined_handle = nir_iand_imm(b, texture_desc,
955 NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
956
957 if (sampler != NULL) {
958 nir_def *sampler_desc =
959 load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
960 nir_def *sampler_index =
961 nir_iand_imm(b, sampler_desc,
962 NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
963 combined_handle = nir_ior(b, combined_handle, sampler_index);
964 }
965 }
966
967 /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
968 combined_handle = nir_u2u64(b, combined_handle);
969
970 /* TODO: The nv50 back-end assumes it gets handles both places, even for
971 * texelFetch.
972 */
973 nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
974 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
975
976 if (sampler_src_idx < 0) {
977 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
978 } else {
979 nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
980 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
981 }
982
983 return true;
984 }
985
986 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)987 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
988 void *_data)
989 {
990 const struct lower_descriptors_ctx *ctx = _data;
991
992 switch (instr->type) {
993 case nir_instr_type_tex:
994 return lower_tex(b, nir_instr_as_tex(instr), ctx);
995 case nir_instr_type_intrinsic:
996 return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
997 default:
998 return false;
999 }
1000 }
1001
1002 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1003 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1004 const struct lower_descriptors_ctx *ctx)
1005 {
1006 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1007 if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1008 desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1009 return false;
1010
1011 b->cursor = nir_instr_remove(&intrin->instr);
1012
1013 uint32_t set = nir_intrinsic_desc_set(intrin);
1014 uint32_t binding = nir_intrinsic_binding(intrin);
1015 nir_def *index = intrin->src[0].ssa;
1016
1017 const struct nvk_descriptor_set_binding_layout *binding_layout =
1018 get_binding_layout(set, binding, ctx);
1019
1020 nir_def *binding_addr;
1021 uint8_t binding_stride;
1022 switch (binding_layout->type) {
1023 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1024 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1025 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1026 binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1027 binding_stride = binding_layout->stride;
1028 break;
1029 }
1030
1031 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1032 const uint32_t root_desc_addr_offset =
1033 nvk_root_descriptor_offset(root_desc_addr);
1034
1035 nir_def *root_desc_addr =
1036 nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
1037 nir_imm_int(b, root_desc_addr_offset),
1038 .align_mul = 8, .align_offset = 0, .range = ~0);
1039
1040 nir_def *dynamic_buffer_start =
1041 nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1042 binding_layout->dynamic_buffer_index);
1043
1044 nir_def *dynamic_binding_offset =
1045 nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1046 sizeof(struct nvk_buffer_address)),
1047 nvk_root_descriptor_offset(dynamic_buffers));
1048
1049 binding_addr = nir_iadd(b, root_desc_addr,
1050 nir_u2u64(b, dynamic_binding_offset));
1051 binding_stride = sizeof(struct nvk_buffer_address);
1052 break;
1053 }
1054
1055 default:
1056 unreachable("Not an SSBO descriptor");
1057 }
1058
1059 /* Tuck the stride in the top 8 bits of the binding address */
1060 binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1061
1062 const uint32_t binding_size = binding_layout->array_size * binding_stride;
1063 nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1064
1065 nir_def *addr;
1066 switch (ctx->ssbo_addr_format) {
1067 case nir_address_format_64bit_global:
1068 addr = nir_iadd(b, binding_addr, nir_u2u64(b, offset_in_binding));
1069 break;
1070
1071 case nir_address_format_64bit_global_32bit_offset:
1072 case nir_address_format_64bit_bounded_global:
1073 addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1074 nir_unpack_64_2x32_split_y(b, binding_addr),
1075 nir_imm_int(b, binding_size),
1076 offset_in_binding);
1077 break;
1078
1079 default:
1080 unreachable("Unknown address mode");
1081 }
1082
1083 nir_def_rewrite_uses(&intrin->def, addr);
1084
1085 return true;
1086 }
1087
1088 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1089 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1090 const struct lower_descriptors_ctx *ctx)
1091 {
1092 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1093 if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1094 desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1095 return false;
1096
1097 b->cursor = nir_instr_remove(&intrin->instr);
1098
1099 nir_def *addr = intrin->src[0].ssa;
1100 nir_def *index = intrin->src[1].ssa;
1101
1102 nir_def *addr_high32;
1103 switch (ctx->ssbo_addr_format) {
1104 case nir_address_format_64bit_global:
1105 addr_high32 = nir_unpack_64_2x32_split_y(b, addr);
1106 break;
1107
1108 case nir_address_format_64bit_global_32bit_offset:
1109 case nir_address_format_64bit_bounded_global:
1110 addr_high32 = nir_channel(b, addr, 1);
1111 break;
1112
1113 default:
1114 unreachable("Unknown address mode");
1115 }
1116
1117 nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1118 nir_def *offset = nir_imul(b, index, stride);
1119
1120 addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1121 nir_var_mem_ssbo, offset);
1122 nir_def_rewrite_uses(&intrin->def, addr);
1123
1124 return true;
1125 }
1126
1127 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1128 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1129 const struct lower_descriptors_ctx *ctx)
1130 {
1131 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1132 if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1133 desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1134 return false;
1135
1136 b->cursor = nir_instr_remove(&intrin->instr);
1137
1138 nir_def *addr = intrin->src[0].ssa;
1139
1140 nir_def *desc;
1141 switch (ctx->ssbo_addr_format) {
1142 case nir_address_format_64bit_global:
1143 /* Mask off the binding stride */
1144 addr = nir_iand_imm(b, addr, BITFIELD64_MASK(56));
1145 desc = nir_build_load_global_constant(b, 1, 64, addr,
1146 .align_mul = 16,
1147 .align_offset = 0);
1148 break;
1149
1150 case nir_address_format_64bit_global_32bit_offset: {
1151 nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1152 nir_def *offset = nir_channel(b, addr, 3);
1153 /* Mask off the binding stride */
1154 base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1155 desc = nir_load_global_constant_offset(b, 4, 32, base, offset,
1156 .align_mul = 16,
1157 .align_offset = 0);
1158 break;
1159 }
1160
1161 case nir_address_format_64bit_bounded_global: {
1162 nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1163 nir_def *size = nir_channel(b, addr, 2);
1164 nir_def *offset = nir_channel(b, addr, 3);
1165 /* Mask off the binding stride */
1166 base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1167 desc = nir_load_global_constant_bounded(b, 4, 32, base, offset, size,
1168 .align_mul = 16,
1169 .align_offset = 0);
1170 break;
1171 }
1172
1173 default:
1174 unreachable("Unknown address mode");
1175 }
1176
1177 nir_def_rewrite_uses(&intrin->def, desc);
1178
1179 return true;
1180 }
1181
1182 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1183 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1184 void *_data)
1185 {
1186 const struct lower_descriptors_ctx *ctx = _data;
1187
1188 if (instr->type != nir_instr_type_intrinsic)
1189 return false;
1190
1191 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1192 switch (intrin->intrinsic) {
1193 case nir_intrinsic_vulkan_resource_index:
1194 return lower_ssbo_resource_index(b, intrin, ctx);
1195 case nir_intrinsic_vulkan_resource_reindex:
1196 return lower_ssbo_resource_reindex(b, intrin, ctx);
1197 case nir_intrinsic_load_vulkan_descriptor:
1198 return lower_load_ssbo_descriptor(b, intrin, ctx);
1199 default:
1200 return false;
1201 }
1202 }
1203
1204 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1205 nvk_nir_lower_descriptors(nir_shader *nir,
1206 const struct vk_pipeline_robustness_state *rs,
1207 uint32_t set_layout_count,
1208 struct vk_descriptor_set_layout * const *set_layouts,
1209 struct nvk_cbuf_map *cbuf_map_out)
1210 {
1211 struct lower_descriptors_ctx ctx = {
1212 .clamp_desc_array_bounds =
1213 rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1214 rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1215 rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1216 .ssbo_addr_format = nvk_buffer_addr_format(rs->storage_buffers),
1217 .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers),
1218 };
1219
1220 assert(set_layout_count <= NVK_MAX_SETS);
1221 for (uint32_t s = 0; s < set_layout_count; s++) {
1222 if (set_layouts[s] != NULL)
1223 ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1224 }
1225
1226 /* We run in four passes:
1227 *
1228 * 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is
1229 * actually lowered in this pass. It's just analysis.
1230 *
1231 * 2. Try to lower UBO loads to cbufs based on the map we just created.
1232 * We need to do this before the main lowering pass because it relies
1233 * on the original descriptor load intrinsics.
1234 *
1235 * 3. Attempt to lower everything with direct descriptors. This may fail
1236 * to lower some SSBO intrinsics when variable pointers are used.
1237 *
1238 * 4. Clean up any SSBO intrinsics which are left and lower them to
1239 * slightly less efficient but variable- pointers-correct versions.
1240 */
1241
1242 bool pass_lower_ubo = false;
1243 if (cbuf_map_out != NULL) {
1244 ctx.cbuf_map = cbuf_map_out;
1245 build_cbuf_map(nir, &ctx);
1246
1247 pass_lower_ubo =
1248 nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1249 nir_metadata_block_index |
1250 nir_metadata_dominance,
1251 (void *)&ctx);
1252 }
1253
1254 bool pass_lower_descriptors =
1255 nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1256 nir_metadata_block_index |
1257 nir_metadata_dominance,
1258 (void *)&ctx);
1259 bool pass_lower_ssbo =
1260 nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1261 nir_metadata_block_index |
1262 nir_metadata_dominance,
1263 (void *)&ctx);
1264 return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1265 }
1266