• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set.h"
7 #include "nvk_descriptor_set_layout.h"
8 #include "nvk_shader.h"
9 
10 #include "vk_pipeline.h"
11 
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14 
15 struct lower_desc_cbuf {
16    struct nvk_cbuf key;
17 
18    uint32_t use_count;
19 
20    uint64_t start;
21    uint64_t end;
22 };
23 
24 static uint32_t
hash_cbuf(const void * data)25 hash_cbuf(const void *data)
26 {
27    return _mesa_hash_data(data, sizeof(struct nvk_cbuf));
28 }
29 
30 static bool
cbufs_equal(const void * a,const void * b)31 cbufs_equal(const void *a, const void *b)
32 {
33    return memcmp(a, b, sizeof(struct nvk_cbuf)) == 0;
34 }
35 
36 static int
compar_cbufs(const void * _a,const void * _b)37 compar_cbufs(const void *_a, const void *_b)
38 {
39    const struct lower_desc_cbuf *a = _a;
40    const struct lower_desc_cbuf *b = _b;
41 
42 #define COMPAR(field, pos) \
43    if (a->field < b->field) return -(pos); \
44    if (a->field > b->field) return (pos);
45 
46    /* Sort by most used first */
47    COMPAR(use_count, -1)
48 
49    /* Keep the list stable by then sorting by key fields. */
50    COMPAR(key.type, 1)
51    COMPAR(key.desc_set, 1)
52    COMPAR(key.dynamic_idx, 1)
53    COMPAR(key.desc_offset, 1)
54 
55 #undef COMPAR
56 
57    return 0;
58 }
59 
60 struct lower_descriptors_ctx {
61    const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
62 
63    bool clamp_desc_array_bounds;
64    nir_address_format ubo_addr_format;
65    nir_address_format ssbo_addr_format;
66 
67    struct hash_table *cbufs;
68    struct nvk_cbuf_map *cbuf_map;
69 };
70 
71 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)72 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
73                 struct lower_descriptors_ctx *ctx)
74 {
75    struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
76    if (entry != NULL) {
77       struct lower_desc_cbuf *cbuf = entry->data;
78       cbuf->use_count++;
79       cbuf->start = MIN2(cbuf->start, start);
80       cbuf->end = MAX2(cbuf->end, end);
81    } else {
82       struct lower_desc_cbuf *cbuf =
83          ralloc(ctx->cbufs, struct lower_desc_cbuf);
84       *cbuf = (struct lower_desc_cbuf) {
85          .key = *key,
86          .use_count = 1,
87          .start = start,
88          .end = end,
89       };
90       _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
91    }
92 }
93 
94 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)95 get_binding_layout(uint32_t set, uint32_t binding,
96                    const struct lower_descriptors_ctx *ctx)
97 {
98    assert(set < NVK_MAX_SETS);
99    assert(ctx->set_layouts[set] != NULL);
100 
101    const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
102 
103    assert(binding < set_layout->binding_count);
104    return &set_layout->binding[binding];
105 }
106 
107 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)108 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
109                            struct lower_descriptors_ctx *ctx)
110 {
111    const struct nvk_descriptor_set_binding_layout *binding_layout =
112       get_binding_layout(set, binding, ctx);
113 
114    const struct nvk_cbuf key = {
115       .type = NVK_CBUF_TYPE_DESC_SET,
116       .desc_set = set,
117    };
118 
119    uint64_t start, end;
120    if (index == NULL) {
121       /* When we don't have an index, assume 0 */
122       start = binding_layout->offset;
123       end = start + binding_layout->stride;
124    } else if (nir_src_is_const(*index)) {
125       start = binding_layout->offset +
126               nir_src_as_uint(*index) * binding_layout->stride;
127       end = start + binding_layout->stride;
128    } else {
129       start = binding_layout->offset;
130       end = start + binding_layout->array_size * binding_layout->stride;
131    }
132 
133    record_cbuf_use(&key, start, end, ctx);
134 }
135 
136 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)137 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
138                                 struct lower_descriptors_ctx *ctx)
139 {
140    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
141    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
142 
143    /* These we'll handle later */
144    if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
145        desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
146       return;
147 
148    record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
149                               nir_intrinsic_binding(intrin),
150                               &intrin->src[0], ctx);
151 }
152 
153 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)154 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
155                                  struct lower_descriptors_ctx *ctx)
156 {
157    nir_src *index_src = NULL;
158    if (deref->deref_type == nir_deref_type_array) {
159       index_src = &deref->arr.index;
160       deref = nir_deref_instr_parent(deref);
161    }
162 
163    assert(deref->deref_type == nir_deref_type_var);
164    nir_variable *var = deref->var;
165 
166    record_descriptor_cbuf_use(var->data.descriptor_set,
167                               var->data.binding,
168                               index_src, ctx);
169 }
170 
171 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)172 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
173                                struct lower_descriptors_ctx *ctx)
174 {
175    const int texture_src_idx =
176       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
177    const int sampler_src_idx =
178       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
179 
180    if (texture_src_idx >= 0) {
181       nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
182       record_deref_descriptor_cbuf_use(deref, ctx);
183    }
184 
185    if (sampler_src_idx >= 0) {
186       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
187       record_deref_descriptor_cbuf_use(deref, ctx);
188    }
189 }
190 
191 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)192 ubo_deref_to_cbuf(nir_deref_instr *deref,
193                   nir_intrinsic_instr **resource_index_out,
194                   uint64_t *offset_out,
195                   uint64_t *start_out, uint64_t *end_out,
196                   const struct lower_descriptors_ctx *ctx)
197 {
198    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
199 
200    /* In case we early return */
201    *offset_out = 0;
202    *start_out = 0;
203    *end_out = UINT64_MAX;
204    *resource_index_out = NULL;
205 
206    const struct nvk_cbuf invalid = {
207       .type = NVK_CBUF_TYPE_INVALID,
208    };
209 
210    uint64_t offset = 0;
211    uint64_t range = glsl_get_explicit_size(deref->type, false);
212    bool offset_valid = true;
213    while (deref->deref_type != nir_deref_type_cast) {
214       nir_deref_instr *parent = nir_deref_instr_parent(deref);
215 
216       switch (deref->deref_type) {
217       case nir_deref_type_var:
218          unreachable("Buffers don't use variables in Vulkan");
219 
220       case nir_deref_type_array:
221       case nir_deref_type_array_wildcard: {
222          uint32_t stride = nir_deref_instr_array_stride(deref);
223          if (deref->deref_type == nir_deref_type_array &&
224              nir_src_is_const(deref->arr.index)) {
225             offset += nir_src_as_uint(deref->arr.index) * stride;
226          } else {
227             range = glsl_get_length(parent->type) * stride;
228          }
229          break;
230       }
231 
232       case nir_deref_type_ptr_as_array:
233          /* All bets are off.  We shouldn't see these most of the time
234           * anyway, even with variable pointers.
235           */
236          offset_valid = false;
237          unreachable("Variable pointers aren't allowed on UBOs");
238          break;
239 
240       case nir_deref_type_struct: {
241          offset += glsl_get_struct_field_offset(parent->type,
242                                                 deref->strct.index);
243          break;
244       }
245 
246       default:
247          unreachable("Unknown deref type");
248       }
249 
250       deref = parent;
251    }
252 
253    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
254    if (load_desc == NULL ||
255        load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
256       return invalid;
257 
258    nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
259    if (res_index == NULL ||
260        res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
261       return invalid;
262 
263    /* We try to early return as little as possible prior to this point so we
264     * can return the resource index intrinsic in as many cases as possible.
265     * After this point, though, early returns are fair game.
266     */
267    *resource_index_out = res_index;
268 
269    if (!offset_valid || !nir_src_is_const(res_index->src[0]))
270       return invalid;
271 
272    uint32_t set = nir_intrinsic_desc_set(res_index);
273    uint32_t binding = nir_intrinsic_binding(res_index);
274    uint32_t index = nir_src_as_uint(res_index->src[0]);
275 
276    const struct nvk_descriptor_set_binding_layout *binding_layout =
277       get_binding_layout(set, binding, ctx);
278 
279    switch (binding_layout->type) {
280    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
281       *offset_out = 0;
282       *start_out = offset;
283       *end_out = offset + range;
284       return (struct nvk_cbuf) {
285          .type = NVK_CBUF_TYPE_UBO_DESC,
286          .desc_set = set,
287          .desc_offset = binding_layout->offset +
288                         index * binding_layout->stride,
289       };
290    }
291 
292    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
293       *offset_out = 0;
294       *start_out = offset;
295       *end_out = offset + range;
296 
297       return (struct nvk_cbuf) {
298          .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
299          .desc_set = set,
300          .dynamic_idx = binding_layout->dynamic_buffer_index + index,
301       };
302    }
303 
304    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
305       *offset_out = binding_layout->offset;
306       *start_out = binding_layout->offset + offset;
307       *end_out = *start_out + range;
308 
309       return (struct nvk_cbuf) {
310          .type = NVK_CBUF_TYPE_DESC_SET,
311          .desc_set = set,
312       };
313    }
314 
315    default:
316       return invalid;
317    }
318 }
319 
320 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)321 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
322                           struct lower_descriptors_ctx *ctx)
323 {
324    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
325 
326    UNUSED uint64_t offset;
327    uint64_t start, end;
328    nir_intrinsic_instr *res_index;
329    struct nvk_cbuf cbuf =
330       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
331 
332    if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
333       record_cbuf_use(&cbuf, start, end, ctx);
334    } else if (res_index != NULL) {
335       record_vulkan_resource_cbuf_use(res_index, ctx);
336    }
337 }
338 
339 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)340 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
341 {
342    struct lower_descriptors_ctx *ctx = _ctx;
343 
344    switch (instr->type) {
345    case nir_instr_type_intrinsic: {
346       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
347       switch (intrin->intrinsic) {
348       case nir_intrinsic_vulkan_resource_index:
349          record_vulkan_resource_cbuf_use(intrin, ctx);
350          return false;
351 
352       case nir_intrinsic_load_deref: {
353          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
354          if (nir_deref_mode_is(deref, nir_var_mem_ubo))
355             record_load_ubo_cbuf_uses(deref, ctx);
356          return false;
357       }
358 
359       case nir_intrinsic_image_deref_load:
360       case nir_intrinsic_image_deref_store:
361       case nir_intrinsic_image_deref_atomic:
362       case nir_intrinsic_image_deref_atomic_swap:
363       case nir_intrinsic_image_deref_size:
364       case nir_intrinsic_image_deref_samples:
365       case nir_intrinsic_image_deref_load_param_intel:
366       case nir_intrinsic_image_deref_load_raw_intel:
367       case nir_intrinsic_image_deref_store_raw_intel: {
368          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369          record_deref_descriptor_cbuf_use(deref, ctx);
370          return false;
371       }
372 
373       default:
374          return false;
375       }
376       unreachable("All cases return false");
377    }
378 
379    case nir_instr_type_tex:
380       record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
381       return false;
382 
383    default:
384       return false;
385    }
386 }
387 
388 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)389 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
390 {
391    ctx->cbufs = _mesa_hash_table_create(NULL, hash_cbuf, cbufs_equal);
392 
393    nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
394                                 nir_metadata_all, (void *)ctx);
395 
396    struct lower_desc_cbuf *cbufs =
397       ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
398                    _mesa_hash_table_num_entries(ctx->cbufs));
399 
400    uint32_t num_cbufs = 0;
401    hash_table_foreach(ctx->cbufs, entry) {
402       struct lower_desc_cbuf *cbuf = entry->data;
403 
404       /* We currently only start cbufs at the beginning so if it starts after
405        * the max cbuf size, there's no point in including it in the list.
406        */
407       if (cbuf->start > NVK_MAX_CBUF_SIZE)
408          continue;
409 
410       cbufs[num_cbufs++] = *cbuf;
411    }
412 
413    qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
414 
415    uint32_t mapped_cbuf_count = 0;
416 
417    /* Root descriptors always go in cbuf 0 */
418    ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
419       .type = NVK_CBUF_TYPE_ROOT_DESC,
420    };
421 
422    if (nir->constant_data_size > 0) {
423       ctx->cbuf_map->cbufs[mapped_cbuf_count++] = (struct nvk_cbuf) {
424          .type = NVK_CBUF_TYPE_SHADER_DATA,
425       };
426    }
427 
428    uint8_t max_cbuf_bindings;
429    if (nir->info.stage == MESA_SHADER_COMPUTE ||
430        nir->info.stage == MESA_SHADER_KERNEL) {
431       max_cbuf_bindings = 8;
432    } else {
433       max_cbuf_bindings = 16;
434    }
435 
436    for (uint32_t i = 0; i < num_cbufs; i++) {
437       if (mapped_cbuf_count >= max_cbuf_bindings)
438          break;
439 
440       /* We can't support indirect cbufs in compute yet */
441       if ((nir->info.stage == MESA_SHADER_COMPUTE ||
442            nir->info.stage == MESA_SHADER_KERNEL) &&
443           cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
444          continue;
445 
446       ctx->cbuf_map->cbufs[mapped_cbuf_count++] = cbufs[i].key;
447    }
448    ctx->cbuf_map->cbuf_count = mapped_cbuf_count;
449 
450    ralloc_free(ctx->cbufs);
451    ctx->cbufs = NULL;
452 }
453 
454 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)455 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
456                     const struct lower_descriptors_ctx *ctx)
457 {
458    if (ctx->cbuf_map == NULL)
459       return -1;
460 
461    for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
462       if (cbufs_equal(&ctx->cbuf_map->cbufs[c], key)) {
463          return c;
464       }
465    }
466 
467    return -1;
468 }
469 
470 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)471 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
472 {
473    const struct lower_descriptors_ctx *ctx = _ctx;
474 
475    if (load->intrinsic != nir_intrinsic_load_deref)
476       return false;
477 
478    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
479    if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
480       return false;
481 
482    uint64_t offset, end;
483    UNUSED uint64_t start;
484    UNUSED nir_intrinsic_instr *res_index;
485    struct nvk_cbuf cbuf_key =
486       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
487 
488    if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
489       return false;
490 
491    if (end > NVK_MAX_CBUF_SIZE)
492       return false;
493 
494    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
495    if (cbuf_idx < 0)
496       return false;
497 
498    b->cursor = nir_before_instr(&load->instr);
499 
500    nir_deref_path path;
501    nir_deref_path_init(&path, deref, NULL);
502 
503    nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
504    nir_address_format addr_format = nir_address_format_32bit_index_offset;
505    for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
506       addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
507 
508    nir_deref_path_finish(&path);
509 
510    nir_lower_explicit_io_instr(b, load, addr, addr_format);
511 
512    return true;
513 }
514 
515 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)516 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
517                     const struct lower_descriptors_ctx *ctx)
518 {
519    assert(load->intrinsic == nir_intrinsic_load_constant);
520 
521    const struct nvk_cbuf cbuf_key = {
522       .type = NVK_CBUF_TYPE_SHADER_DATA,
523    };
524    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
525    assert(cbuf_idx >= 0);
526 
527    uint32_t base = nir_intrinsic_base(load);
528    uint32_t range = nir_intrinsic_range(load);
529 
530    b->cursor = nir_before_instr(&load->instr);
531 
532    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
533    nir_def *data = nir_load_ubo(b, load->def.num_components, load->def.bit_size,
534                                 nir_imm_int(b, cbuf_idx), offset,
535                                 .align_mul = nir_intrinsic_align_mul(load),
536                                 .align_offset = nir_intrinsic_align_offset(load),
537                                 .range_base = base, .range = range);
538 
539    nir_def_rewrite_uses(&load->def, data);
540 
541    return true;
542 }
543 
544 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)545 load_descriptor_set_addr(nir_builder *b, uint32_t set,
546                          UNUSED const struct lower_descriptors_ctx *ctx)
547 {
548    uint32_t set_addr_offset =
549       nvk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
550 
551    return nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
552                        nir_imm_int(b, set_addr_offset),
553                        .align_mul = 8, .align_offset = 0, .range = ~0);
554 }
555 
556 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)557 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
558                           const struct lower_descriptors_ctx *ctx)
559 {
560    int dynamic_buffer_start_imm = 0;
561    for (uint32_t s = 0; s < set; s++) {
562       if (ctx->set_layouts[s] == NULL) {
563          dynamic_buffer_start_imm = -1;
564          break;
565       }
566 
567       dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
568    }
569 
570    if (dynamic_buffer_start_imm >= 0) {
571       return nir_imm_int(b, dynamic_buffer_start_imm);
572    } else {
573       uint32_t root_offset =
574          nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
575 
576       return nir_u2u32(b, nir_load_ubo(b, 1, 8, nir_imm_int(b, 0),
577                                        nir_imm_int(b, root_offset),
578                                        .align_mul = 1, .align_offset = 0,
579                                        .range = ~0));
580    }
581 }
582 
583 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)584 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
585                 uint32_t set, uint32_t binding, nir_def *index,
586                 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
587 {
588    const struct nvk_descriptor_set_binding_layout *binding_layout =
589       get_binding_layout(set, binding, ctx);
590 
591    if (ctx->clamp_desc_array_bounds)
592       index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
593 
594    switch (binding_layout->type) {
595    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
596    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
597       /* Get the index in the root descriptor table dynamic_buffers array. */
598       nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
599 
600       index = nir_iadd(b, index,
601                        nir_iadd_imm(b, dynamic_buffer_start,
602                                     binding_layout->dynamic_buffer_index));
603 
604       nir_def *root_desc_offset =
605          nir_iadd_imm(b, nir_imul_imm(b, index, sizeof(struct nvk_buffer_address)),
606                       nvk_root_descriptor_offset(dynamic_buffers));
607 
608       assert(num_components == 4 && bit_size == 32);
609       nir_def *desc =
610          nir_load_ubo(b, 4, 32, nir_imm_int(b, 0), root_desc_offset,
611                       .align_mul = 16, .align_offset = 0, .range = ~0);
612       /* We know a priori that the the .w compnent (offset) is zero */
613       return nir_vec4(b, nir_channel(b, desc, 0),
614                          nir_channel(b, desc, 1),
615                          nir_channel(b, desc, 2),
616                          nir_imm_int(b, 0));
617    }
618 
619    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
620       nir_def *base_addr =
621          nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
622                           binding_layout->offset);
623 
624       assert(binding_layout->stride == 1);
625       const uint32_t binding_size = binding_layout->array_size;
626 
627       /* Convert it to nir_address_format_64bit_bounded_global */
628       assert(num_components == 4 && bit_size == 32);
629       return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
630                          nir_unpack_64_2x32_split_y(b, base_addr),
631                          nir_imm_int(b, binding_size),
632                          nir_imm_int(b, 0));
633    }
634 
635    default: {
636       assert(binding_layout->stride > 0);
637       nir_def *desc_ubo_offset =
638          nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
639                          binding_layout->offset + offset_B);
640 
641       unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
642       desc_align_mul = MIN2(desc_align_mul, 16);
643       unsigned desc_align_offset = binding_layout->offset + offset_B;
644       desc_align_offset %= desc_align_mul;
645 
646       const struct nvk_cbuf cbuf_key = {
647          .type = NVK_CBUF_TYPE_DESC_SET,
648          .desc_set = set,
649       };
650       int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
651 
652       nir_def *desc;
653       if (cbuf_idx >= 0) {
654          desc = nir_load_ubo(b, num_components, bit_size,
655                              nir_imm_int(b, cbuf_idx),
656                              desc_ubo_offset,
657                              .align_mul = desc_align_mul,
658                              .align_offset = desc_align_offset,
659                              .range = ~0);
660       } else {
661          nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
662          desc = nir_load_global_constant_offset(b, num_components, bit_size,
663                                                 set_addr, desc_ubo_offset,
664                                                 .align_mul = desc_align_mul,
665                                                 .align_offset = desc_align_offset);
666       }
667       if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
668           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
669          /* We know a priori that the the .w compnent (offset) is zero */
670          assert(num_components == 4 && bit_size == 32);
671          desc = nir_vec4(b, nir_channel(b, desc, 0),
672                             nir_channel(b, desc, 1),
673                             nir_channel(b, desc, 2),
674                             nir_imm_int(b, 0));
675       }
676       return desc;
677    }
678    }
679 }
680 
681 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)682 is_idx_intrin(nir_intrinsic_instr *intrin)
683 {
684    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
685       intrin = nir_src_as_intrinsic(intrin->src[0]);
686       if (intrin == NULL)
687          return false;
688    }
689 
690    return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
691 }
692 
693 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)694 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
695                                const struct lower_descriptors_ctx *ctx)
696 {
697    nir_def *index = nir_imm_int(b, 0);
698 
699    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
700       index = nir_iadd(b, index, intrin->src[1].ssa);
701       intrin = nir_src_as_intrinsic(intrin->src[0]);
702    }
703 
704    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
705    uint32_t set = nir_intrinsic_desc_set(intrin);
706    uint32_t binding = nir_intrinsic_binding(intrin);
707    index = nir_iadd(b, index, intrin->src[0].ssa);
708 
709    return load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
710 }
711 
712 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)713 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
714                                  const struct lower_descriptors_ctx *ctx)
715 {
716    ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
717    b->cursor = nir_before_instr(&intrin->instr);
718 
719    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
720    if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
721       assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
722              desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
723       return false;
724    }
725 
726    nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
727 
728    nir_def_rewrite_uses(&intrin->def, desc);
729 
730    return true;
731 }
732 
733 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)734 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
735                             uint32_t root_table_offset,
736                             const struct lower_descriptors_ctx *ctx)
737 {
738    b->cursor = nir_instr_remove(&intrin->instr);
739 
740    nir_def *val = nir_load_ubo(b, intrin->def.num_components,
741                                intrin->def.bit_size,
742                                nir_imm_int(b, 0), /* Root table */
743                                nir_imm_int(b, root_table_offset),
744                                .align_mul = 4,
745                                .align_offset = 0,
746                                .range = root_table_offset + 3 * 4);
747 
748    nir_def_rewrite_uses(&intrin->def, val);
749 
750    return true;
751 }
752 
753 #define lower_sysval_to_root_table(b, intrin, member, ctx)           \
754    _lower_sysval_to_root_table(b, intrin,                            \
755                                nvk_root_descriptor_offset(member),   \
756                                ctx)
757 
758 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)759 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
760                          const struct lower_descriptors_ctx *ctx)
761 {
762    const uint32_t push_region_offset =
763       nvk_root_descriptor_offset(push);
764    const uint32_t base = nir_intrinsic_base(load);
765 
766    b->cursor = nir_before_instr(&load->instr);
767 
768    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
769                                          push_region_offset + base);
770 
771    nir_def *val =
772       nir_load_ubo(b, load->def.num_components, load->def.bit_size,
773                    nir_imm_int(b, 0), offset,
774                    .align_mul = load->def.bit_size / 8,
775                    .align_offset = 0,
776                    .range = push_region_offset + base +
777                             nir_intrinsic_range(load));
778 
779    nir_def_rewrite_uses(&load->def, val);
780 
781    return true;
782 }
783 
784 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)785 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
786                            uint32_t *set, uint32_t *binding,
787                            nir_def **index)
788 {
789    if (deref->deref_type == nir_deref_type_array) {
790       *index = deref->arr.index.ssa;
791       deref = nir_deref_instr_parent(deref);
792    } else {
793       *index = nir_imm_int(b, 0);
794    }
795 
796    assert(deref->deref_type == nir_deref_type_var);
797    nir_variable *var = deref->var;
798 
799    *set = var->data.descriptor_set;
800    *binding = var->data.binding;
801 }
802 
803 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)804 load_resource_deref_desc(nir_builder *b,
805                          unsigned num_components, unsigned bit_size,
806                          nir_deref_instr *deref, unsigned offset_B,
807                          const struct lower_descriptors_ctx *ctx)
808 {
809    uint32_t set, binding;
810    nir_def *index;
811    get_resource_deref_binding(b, deref, &set, &binding, &index);
812    return load_descriptor(b, num_components, bit_size,
813                           set, binding, index, offset_B, ctx);
814 }
815 
816 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)817 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
818                    const struct lower_descriptors_ctx *ctx)
819 {
820    b->cursor = nir_before_instr(&intrin->instr);
821    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
822    nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
823    nir_rewrite_image_intrinsic(intrin, desc, true);
824 
825    /* We treat 3D images as 2D arrays */
826    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_3D) {
827       assert(!nir_intrinsic_image_array(intrin));
828       nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
829       nir_intrinsic_set_image_array(intrin, true);
830    }
831 
832    return true;
833 }
834 
835 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)836 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
837                        const struct lower_descriptors_ctx *ctx)
838 {
839    const uint32_t root_table_offset =
840       nvk_root_descriptor_offset(draw.sample_locations);
841 
842    nir_def *sample = interp->src[1].ssa;
843 
844    b->cursor = nir_before_instr(&interp->instr);
845 
846    nir_def *loc = nir_load_ubo(b, 1, 64,
847                                nir_imm_int(b, 0), /* Root table */
848                                nir_imm_int(b, root_table_offset),
849                                .align_mul = 8,
850                                .align_offset = 0,
851                                .range = root_table_offset + 8);
852 
853    /* Yay little endian */
854    loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
855    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
856    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
857    nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
858    nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
859    nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
860 
861    assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
862    interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
863    nir_src_rewrite(&interp->src[1], offset);
864 
865    return true;
866 }
867 
868 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)869 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
870                  const struct lower_descriptors_ctx *ctx)
871 {
872    switch (intrin->intrinsic) {
873    case nir_intrinsic_load_constant:
874       return lower_load_constant(b, intrin, ctx);
875 
876    case nir_intrinsic_load_vulkan_descriptor:
877       return try_lower_load_vulkan_descriptor(b, intrin, ctx);
878 
879    case nir_intrinsic_load_workgroup_size:
880       unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
881 
882    case nir_intrinsic_load_num_workgroups:
883       return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
884 
885    case nir_intrinsic_load_base_workgroup_id:
886       return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
887 
888    case nir_intrinsic_load_push_constant:
889       return lower_load_push_constant(b, intrin, ctx);
890 
891    case nir_intrinsic_load_base_vertex:
892    case nir_intrinsic_load_first_vertex:
893       return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
894 
895    case nir_intrinsic_load_base_instance:
896       return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
897 
898    case nir_intrinsic_load_draw_id:
899       return lower_sysval_to_root_table(b, intrin, draw.draw_id, ctx);
900 
901    case nir_intrinsic_load_view_index:
902       return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
903 
904    case nir_intrinsic_image_deref_load:
905    case nir_intrinsic_image_deref_store:
906    case nir_intrinsic_image_deref_atomic:
907    case nir_intrinsic_image_deref_atomic_swap:
908    case nir_intrinsic_image_deref_size:
909    case nir_intrinsic_image_deref_samples:
910    case nir_intrinsic_image_deref_load_param_intel:
911    case nir_intrinsic_image_deref_load_raw_intel:
912    case nir_intrinsic_image_deref_store_raw_intel:
913       return lower_image_intrin(b, intrin, ctx);
914 
915    case nir_intrinsic_interp_deref_at_sample:
916       return lower_interp_at_sample(b, intrin, ctx);
917 
918    default:
919       return false;
920    }
921 }
922 
923 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)924 lower_tex(nir_builder *b, nir_tex_instr *tex,
925           const struct lower_descriptors_ctx *ctx)
926 {
927    b->cursor = nir_before_instr(&tex->instr);
928 
929    const int texture_src_idx =
930       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
931    const int sampler_src_idx =
932       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
933    if (texture_src_idx < 0) {
934       assert(sampler_src_idx < 0);
935       return false;
936    }
937 
938    nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
939    nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
940                               nir_src_as_deref(tex->src[sampler_src_idx].src);
941    assert(texture);
942 
943    nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
944    const uint32_t plane =
945       plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
946    const uint64_t plane_offset_B = plane * sizeof(struct nvk_image_descriptor);
947 
948    nir_def *combined_handle;
949    if (texture == sampler) {
950       combined_handle = load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
951    } else {
952       nir_def *texture_desc =
953          load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
954       combined_handle = nir_iand_imm(b, texture_desc,
955                                      NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
956 
957       if (sampler != NULL) {
958          nir_def *sampler_desc =
959             load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
960          nir_def *sampler_index =
961             nir_iand_imm(b, sampler_desc,
962                          NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
963          combined_handle = nir_ior(b, combined_handle, sampler_index);
964       }
965    }
966 
967    /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
968    combined_handle = nir_u2u64(b, combined_handle);
969 
970    /* TODO: The nv50 back-end assumes it gets handles both places, even for
971     * texelFetch.
972     */
973    nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
974    tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
975 
976    if (sampler_src_idx < 0) {
977       nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
978    } else {
979       nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
980       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
981    }
982 
983    return true;
984 }
985 
986 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)987 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
988                             void *_data)
989 {
990    const struct lower_descriptors_ctx *ctx = _data;
991 
992    switch (instr->type) {
993    case nir_instr_type_tex:
994       return lower_tex(b, nir_instr_as_tex(instr), ctx);
995    case nir_instr_type_intrinsic:
996       return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
997    default:
998       return false;
999    }
1000 }
1001 
1002 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1003 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1004                           const struct lower_descriptors_ctx *ctx)
1005 {
1006    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1007    if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1008        desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1009       return false;
1010 
1011    b->cursor = nir_instr_remove(&intrin->instr);
1012 
1013    uint32_t set = nir_intrinsic_desc_set(intrin);
1014    uint32_t binding = nir_intrinsic_binding(intrin);
1015    nir_def *index = intrin->src[0].ssa;
1016 
1017    const struct nvk_descriptor_set_binding_layout *binding_layout =
1018       get_binding_layout(set, binding, ctx);
1019 
1020    nir_def *binding_addr;
1021    uint8_t binding_stride;
1022    switch (binding_layout->type) {
1023    case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1024    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1025       nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1026       binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1027       binding_stride = binding_layout->stride;
1028       break;
1029    }
1030 
1031    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1032       const uint32_t root_desc_addr_offset =
1033          nvk_root_descriptor_offset(root_desc_addr);
1034 
1035       nir_def *root_desc_addr =
1036          nir_load_ubo(b, 1, 64, nir_imm_int(b, 0),
1037                       nir_imm_int(b, root_desc_addr_offset),
1038                       .align_mul = 8, .align_offset = 0, .range = ~0);
1039 
1040       nir_def *dynamic_buffer_start =
1041          nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1042                       binding_layout->dynamic_buffer_index);
1043 
1044       nir_def *dynamic_binding_offset =
1045          nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1046                                       sizeof(struct nvk_buffer_address)),
1047                       nvk_root_descriptor_offset(dynamic_buffers));
1048 
1049       binding_addr = nir_iadd(b, root_desc_addr,
1050                                  nir_u2u64(b, dynamic_binding_offset));
1051       binding_stride = sizeof(struct nvk_buffer_address);
1052       break;
1053    }
1054 
1055    default:
1056       unreachable("Not an SSBO descriptor");
1057    }
1058 
1059    /* Tuck the stride in the top 8 bits of the binding address */
1060    binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1061 
1062    const uint32_t binding_size = binding_layout->array_size * binding_stride;
1063    nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1064 
1065    nir_def *addr;
1066    switch (ctx->ssbo_addr_format) {
1067    case nir_address_format_64bit_global:
1068       addr = nir_iadd(b, binding_addr, nir_u2u64(b, offset_in_binding));
1069       break;
1070 
1071    case nir_address_format_64bit_global_32bit_offset:
1072    case nir_address_format_64bit_bounded_global:
1073       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1074                          nir_unpack_64_2x32_split_y(b, binding_addr),
1075                          nir_imm_int(b, binding_size),
1076                          offset_in_binding);
1077       break;
1078 
1079    default:
1080       unreachable("Unknown address mode");
1081    }
1082 
1083    nir_def_rewrite_uses(&intrin->def, addr);
1084 
1085    return true;
1086 }
1087 
1088 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1089 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1090                             const struct lower_descriptors_ctx *ctx)
1091 {
1092    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1093    if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1094        desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1095       return false;
1096 
1097    b->cursor = nir_instr_remove(&intrin->instr);
1098 
1099    nir_def *addr = intrin->src[0].ssa;
1100    nir_def *index = intrin->src[1].ssa;
1101 
1102    nir_def *addr_high32;
1103    switch (ctx->ssbo_addr_format) {
1104    case nir_address_format_64bit_global:
1105       addr_high32 = nir_unpack_64_2x32_split_y(b, addr);
1106       break;
1107 
1108    case nir_address_format_64bit_global_32bit_offset:
1109    case nir_address_format_64bit_bounded_global:
1110       addr_high32 = nir_channel(b, addr, 1);
1111       break;
1112 
1113    default:
1114       unreachable("Unknown address mode");
1115    }
1116 
1117    nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1118    nir_def *offset = nir_imul(b, index, stride);
1119 
1120    addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1121                               nir_var_mem_ssbo, offset);
1122    nir_def_rewrite_uses(&intrin->def, addr);
1123 
1124    return true;
1125 }
1126 
1127 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1128 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1129                            const struct lower_descriptors_ctx *ctx)
1130 {
1131    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1132    if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
1133        desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
1134       return false;
1135 
1136    b->cursor = nir_instr_remove(&intrin->instr);
1137 
1138    nir_def *addr = intrin->src[0].ssa;
1139 
1140    nir_def *desc;
1141    switch (ctx->ssbo_addr_format) {
1142    case nir_address_format_64bit_global:
1143       /* Mask off the binding stride */
1144       addr = nir_iand_imm(b, addr, BITFIELD64_MASK(56));
1145       desc = nir_build_load_global_constant(b, 1, 64, addr,
1146                                             .align_mul = 16,
1147                                             .align_offset = 0);
1148       break;
1149 
1150    case nir_address_format_64bit_global_32bit_offset: {
1151       nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1152       nir_def *offset = nir_channel(b, addr, 3);
1153       /* Mask off the binding stride */
1154       base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1155       desc = nir_load_global_constant_offset(b, 4, 32, base, offset,
1156                                              .align_mul = 16,
1157                                              .align_offset = 0);
1158       break;
1159    }
1160 
1161    case nir_address_format_64bit_bounded_global: {
1162       nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1163       nir_def *size = nir_channel(b, addr, 2);
1164       nir_def *offset = nir_channel(b, addr, 3);
1165       /* Mask off the binding stride */
1166       base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1167       desc = nir_load_global_constant_bounded(b, 4, 32, base, offset, size,
1168                                               .align_mul = 16,
1169                                               .align_offset = 0);
1170       break;
1171    }
1172 
1173    default:
1174       unreachable("Unknown address mode");
1175    }
1176 
1177    nir_def_rewrite_uses(&intrin->def, desc);
1178 
1179    return true;
1180 }
1181 
1182 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1183 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1184                             void *_data)
1185 {
1186    const struct lower_descriptors_ctx *ctx = _data;
1187 
1188    if (instr->type != nir_instr_type_intrinsic)
1189       return false;
1190 
1191    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1192    switch (intrin->intrinsic) {
1193    case nir_intrinsic_vulkan_resource_index:
1194       return lower_ssbo_resource_index(b, intrin, ctx);
1195    case nir_intrinsic_vulkan_resource_reindex:
1196       return lower_ssbo_resource_reindex(b, intrin, ctx);
1197    case nir_intrinsic_load_vulkan_descriptor:
1198       return lower_load_ssbo_descriptor(b, intrin, ctx);
1199    default:
1200       return false;
1201    }
1202 }
1203 
1204 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1205 nvk_nir_lower_descriptors(nir_shader *nir,
1206                           const struct vk_pipeline_robustness_state *rs,
1207                           uint32_t set_layout_count,
1208                           struct vk_descriptor_set_layout * const *set_layouts,
1209                           struct nvk_cbuf_map *cbuf_map_out)
1210 {
1211    struct lower_descriptors_ctx ctx = {
1212       .clamp_desc_array_bounds =
1213          rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1214          rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1215          rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1216       .ssbo_addr_format = nvk_buffer_addr_format(rs->storage_buffers),
1217       .ubo_addr_format = nvk_buffer_addr_format(rs->uniform_buffers),
1218    };
1219 
1220    assert(set_layout_count <= NVK_MAX_SETS);
1221    for (uint32_t s = 0; s < set_layout_count; s++) {
1222       if (set_layouts[s] != NULL)
1223          ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1224    }
1225 
1226    /* We run in four passes:
1227     *
1228     *  1. Find ranges of UBOs that we can promote to bound UBOs.  Nothing is
1229     *     actually lowered in this pass.  It's just analysis.
1230     *
1231     *  2. Try to lower UBO loads to cbufs based on the map we just created.
1232     *     We need to do this before the main lowering pass because it relies
1233     *     on the original descriptor load intrinsics.
1234     *
1235     *  3. Attempt to lower everything with direct descriptors.  This may fail
1236     *     to lower some SSBO intrinsics when variable pointers are used.
1237     *
1238     *  4. Clean up any SSBO intrinsics which are left and lower them to
1239     *     slightly less efficient but variable- pointers-correct versions.
1240     */
1241 
1242    bool pass_lower_ubo = false;
1243    if (cbuf_map_out != NULL) {
1244       ctx.cbuf_map = cbuf_map_out;
1245       build_cbuf_map(nir, &ctx);
1246 
1247       pass_lower_ubo =
1248          nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1249                                     nir_metadata_block_index |
1250                                     nir_metadata_dominance,
1251                                     (void *)&ctx);
1252    }
1253 
1254    bool pass_lower_descriptors =
1255       nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1256                                    nir_metadata_block_index |
1257                                    nir_metadata_dominance,
1258                                    (void *)&ctx);
1259    bool pass_lower_ssbo =
1260       nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1261                                    nir_metadata_block_index |
1262                                    nir_metadata_dominance,
1263                                    (void *)&ctx);
1264    return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1265 }
1266